4 use lib "$FindBin::Bin/../modules";
6 use Constants qw($BASEDIR $MAXPHRASE $DATADIR @SEARCH_EXCLUDE @SEARCH_INCLUDE $SEARCH_LEVEL);
10 use BSE::WebUtil 'refresh_to_admin';
11 my $in_cgi = exists $ENV{REQUEST_METHOD};
13 #eval "use CGI::Carp qw(fatalsToBrowser)";
16 my $cfg = BSE::Cfg->new;
17 my $urlbase = $cfg->entryVar('site', 'url');
19 my $stopwords = "$DATADIR/stopwords.txt";
22 open STOP, "< $stopwords"
23 or die "Cannot open $stopwords: $!";
24 chomp(my @stopwords = <STOP>);
25 tr/\r//d for @stopwords; # just in case
27 @stopwords{@stopwords} = (1) x @stopwords;
30 my $articles = Articles->new;
32 # scores depending on where the term appears
33 # these may need some tuning
34 # preferably, keep these to single digits
42 file_displayName => 2,
47 for my $name (keys %scores) {
48 my $score = $cfg->entry('search index scores', $name);
49 if (defined($score) && $score =~ /^\d+$/) {
50 $scores{$name} = $score;
54 # if the level of the article is higher than this, store it's parentid
56 my $max_level = $SEARCH_LEVEL;
58 # key is phrase, value is hashref with $id -> $sectionid
63 #print Dumper(\%index);
65 my $dh = BSE::DB->single;
66 my $dropIndex = $dh->stmt('dropIndex')
67 or die "No dropIndex member in BSE::DB";
68 my $insertIndex = $dh->stmt('insertIndex')
69 or die "No insertIndex member in BSE::DB";
72 or die "Could not drop search index ",$dropIndex->errstr;
74 for my $key (sort keys %index) {
75 my $word = $index{$key};
76 # sort by reverse score so that if we overflow the field we
77 # get the highest scoring matches
78 my @ids = sort { $word->{$b}[1] <=> $word->{$a}[1] } keys %$word;
79 my @sections = map { $_->[0] } @$word{@ids};
80 my @scores = map { $_->[1] } @$word{@ids};
81 #my @sections = map { $_->[0] } values %{$index{$key}};
82 #my @scores = map { $_->[1] } values %{$index{$key}};
84 $insertIndex->execute($key, "@ids", "@sections", "@scores")
85 or die "Cannot insert into index: ", $insertIndex->errstr;
89 refresh_to_admin($cfg, "/cgi-bin/admin/menu.pl");
96 @dont_search{@SEARCH_EXCLUDE} = @SEARCH_EXCLUDE;
97 @do_search{@SEARCH_INCLUDE} = @SEARCH_INCLUDE;
98 INDEX: until ($articles->EOF) {
102 my $article = $articles->getNext;
103 next unless ($article->{listed} || $article->{flags} =~ /I/);
104 next if $article->{flags} =~ /[CN]/;
105 my $section = $article;
106 while ($section->{parentid} >= 1) {
107 $section = $articles->getByPkey($section->{parentid});
108 next INDEX if $section->{flags} =~ /C/;
110 my $id = $article->{id};
111 my $indexas = $article->{level} > $max_level ? $article->{parentid} : $id;
112 my $sectionid = $section->{id};
113 eval "use $article->{generator}";
115 my $gen = $article->{generator}->new(top=>$article, cfg=>$cfg);
116 next unless $gen->visible($article) or $do_search{$sectionid};
118 next if $dont_search{$sectionid};
120 for my $field (sort { $scores{$b} <=> $scores{$a} } keys %scores) {
122 next unless $scores{$field};
125 if (exists $article->{$field}) {
126 $text = $article->{$field};
129 if ($field =~ /^file_(.*)/) {
131 @files = $article->files unless $got_files++;
132 $text = join "\n", map $_->{$file_field}, @files;
135 #next if $text =~ m!^\<html\>!i; # I don't know how to do this (yet)
136 if ($field eq 'body') {
137 $gen->remove_block($articles, [], \$text);
138 $text =~ s/[abi]\[([^\]]+)\]/$1/g;
141 next unless defined $text;
144 for my $para (split /\n/, $text) {
145 my @words = split /\W+/, $para;
148 for my $word (@words) {
149 if ($stopwords{lc $word}) {
150 process($indexas, $sectionid, $scores{$field}, @buffer) if @buffer;
154 push(@buffer, $word);
157 process($indexas, $sectionid, $scores{$field}, @buffer) if @buffer;
164 my ($id, $sectionid, $score, @words) = @_;
166 for (my $start = 0; $start < @words; ++$start) {
167 my $end = $start + $MAXPHRASE-1;
168 $end = $#words if $end > $#words;
170 for my $phrase (map { "@words[$start..$_]" } $start..$end) {
171 if (!exists $index{lc $phrase}{$id}
172 || $score > $index{lc $phrase}{$id}[1]) {
173 $index{lc $phrase}{$id} = [ $sectionid, $score ];
175 if (!exists $index{$phrase}{$id}
176 || $score > $index{$phrase}{$id}[1]) {
177 $index{$phrase}{$id} = [ $sectionid, $score ];