optional case-insensitivity for searching
[bse.git] / site / cgi-bin / modules / BSE / Index.pm
CommitLineData
3f015bd2
TC
1package BSE::Index;
2use strict;
3use Time::HiRes qw(time);
289f5a78 4use Constants qw(@SEARCH_EXCLUDE @SEARCH_INCLUDE);
e0ed81d7 5use BSE::TB::Articles;
3f015bd2 6
5d2a441a 7our $VERSION = "1.007";
3f015bd2 8
673086da 9my %default_scores =
3f015bd2
TC
10 (
11 title=>5,
12 body=>3,
13 keyword=>4,
14 pageTitle=>5,
15 author=>4,
16 file_displayName => 2,
17 file_description=>2,
18 file_notes => 1,
19 summary => 0,
20 description => 0,
21 product_code => 0,
22 );
23
24sub new {
25 my ($class, %opts) = @_;
26
289f5a78 27 my $cfg = BSE::Cfg->single;
673086da
TC
28 unless ($opts{scores}) {
29 my $scores = { %default_scores };
673086da
TC
30 for my $field (keys %$scores) {
31 $scores->{$field} = $cfg->entry("search index scores", $field, $scores->{$field});
32 }
33 $opts{scores} = $scores;
34 }
3f015bd2 35 $opts{start} = time;
289f5a78 36 $opts{max_level} ||= $cfg->entry("search", "level", $Constants::SEARCH_LEVEL);
3f015bd2
TC
37
38 return bless \%opts, $class;
39}
40
41sub indexer {
42 my ($self) = @_;
43
44 unless ($self->{indexer}) {
45 my $cfg = BSE::Cfg->single;
46 my $indexer_class = $cfg->entry('search', 'indexer', 'BSE::Index::BSE');
47 (my $indexer_file = $indexer_class . ".pm") =~ s!::!/!g;
48 require $indexer_file;
5d2a441a
TC
49 my $case_sensitivity = $cfg->entry('search', 'case_sensitive', 'context');
50 $case_sensitivity =~ /^(none|context|controlled)$/
51 or die "[search].case_sensitive must be none, context or controlled\n";
3f015bd2
TC
52
53 $self->{indexer} = $indexer_class->new
54 (
55 cfg => $cfg,
56 scores => $self->{scores},
57 verbose => $self->{verbose},
5d2a441a 58 case => $case_sensitivity,
3f015bd2
TC
59 );
60 }
61
62 return $self->{indexer};
63}
64
65sub do_index {
66 my ($self) = @_;
67
68 my $indexer = $self->indexer;
69 eval {
70 $self->vnote("s1::Starting index");
71 $indexer->start_index();
72 $self->vnote("s2::Starting article scan");
73 $self->make_index();
74 $self->vnote("f2::Populating search index");
75 $indexer->end_index();
76 $self->vnote("f1::Indexing complete");
77 };
78 if ($@) {
79 $self->_error("Indexing error: $@");
80 return;
81 }
82 return 1;
83}
84
85sub make_index {
86 my ($self) = @_;
87
88 my %dont_search;
89 my %do_search;
90 @dont_search{@SEARCH_EXCLUDE} = @SEARCH_EXCLUDE;
91 @do_search{@SEARCH_INCLUDE} = @SEARCH_INCLUDE;
92 $self->vnote("s::Loading article ids");
e0ed81d7 93 my @ids = BSE::TB::Articles->allids;
3f015bd2
TC
94 my $count = @ids;
95 $self->vnote("c:$count:$count articles to index");
96 my $cfg = BSE::Cfg->single;
97 my $indexer = $self->indexer;
98
99 INDEX: for my $id (@ids) {
100 my @files;
101 my $got_files;
102 # find the section
e0ed81d7 103 my $article = BSE::TB::Articles->getByPkey($id);
3f015bd2 104 next unless $article;
496ba394
TC
105 next unless $article->should_index;
106 my $section = $article->section;
3f015bd2 107 my $id = $article->{id};
cb2a09ac 108 my $indexas = $article->indexed_as;
3f015bd2
TC
109 my $sectionid = $section->{id};
110 eval "use $article->{generator}";
111 $@ and die $@;
112 my $gen = $article->{generator}->new(top=>$article, cfg=>$cfg);
113 next unless $gen->visible($article) or $do_search{$sectionid};
114
115 next if $dont_search{$sectionid};
116
117 $article = $gen->get_real_article($article);
118
119 unless ($article) {
120 $self->error("$id:Full article for $id not found");
121 next;
122 }
123
124 $self->vnote("i:$id:Indexing '$article->{title}'");
125
126 my %fields;
673086da
TC
127 my $scores = $self->{scores};
128 for my $field (sort { $scores->{$b} <=> $scores->{$a} } keys %$scores) {
3f015bd2 129
673086da 130 next unless $self->{scores}{$field};
3f015bd2
TC
131 # strip out markup
132 my $text;
133 if (exists $article->{$field}) {
134 $text = $article->{$field};
135 }
136 else {
137 if ($field =~ /^file_(.*)/) {
138 my $file_field = $1;
139 @files = $article->files unless $got_files++;
140 $text = join "\n", map $_->{$file_field}, @files;
141 }
142 }
143 #next if $text =~ m!^\<html\>!i; # I don't know how to do this (yet)
144 if ($field eq 'body') {
e0ed81d7 145 $gen->remove_block("BSE::TB::Articles", [], \$text);
3f015bd2
TC
146 $text =~ s/[abi]\[([^\]]+)\]/$1/g;
147 }
148
149 next unless defined $text;
150
151 $fields{$field} = $text;
152 }
cb2a09ac 153 $indexer->process_article($article, $section, $indexas->id, \%fields);
3f015bd2
TC
154 }
155 $self->vnote("f::Article scan complete");
156}
157
158sub error {
159 my ($self, @msg) = @_;
160
161 $self->_error($self->_time_passed, ":e:", @msg);
162}
163
164sub _error {
165 my ($self, @error) = @_;
166
167 if ($self->{error}) {
168 $self->{error}->(@error);
169 }
170 else {
171 print STDERR @error;
172 }
173}
174
175sub _time_passed {
176 my ($self) = @_;
177
178 return sprintf("%.3f", time() - $self->{start});
179}
180
181sub vnote {
182 my ($self, @msg) = @_;
183
184 $self->_note($self->_time_passed, ":", @msg);
185}
186
187sub _note {
188 my ($self, @msg) = @_;
189
190 if ($self->{note}) {
191 $self->{note}->(@msg);
192 }
193}