optional case-insensitivity for searching
[bse.git] / site / cgi-bin / modules / BSE / UI / Search.pm
CommitLineData
e58486b7
TC
1package BSE::UI::Search;
2use strict;
3use base 'BSE::UI::Dispatch';
e0ed81d7 4use BSE::TB::Articles;
e58486b7
TC
5use BSE::DB;
6use Constants qw(:search);
7use Carp;
8use BSE::Cfg;
9use BSE::Template;
3f9c8a96 10use BSE::Util::HTML qw':default popup_menu';
9d77df06 11use BSE::Util::Tags qw(tag_article);
e58486b7
TC
12use BSE::Request;
13
5d2a441a 14our $VERSION = "1.006";
cb7fd78d 15
e58486b7
TC
16my %actions =
17 (
18 search => 1,
19 );
20
21sub actions { \%actions }
22
23sub default_action { 'search' }
24
25sub req_search {
26 my ($class, $req) = @_;
27
28 my $cfg = $req->cfg;
88fa9cd6 29
e58486b7 30 my $cgi = $req->cgi;
88fa9cd6
AO
31 my $results_per_page = int($cgi->param('pp') || 10);
32 $results_per_page >= 1 or $results_per_page = 10;
e58486b7
TC
33 my $words = $cgi->param('q');
34 my $section = $cgi->param('s');
35 my $date = $cgi->param('d');
36 my $admin = $cgi->param('admin') ? 1 : 0;
37 my $match_all = $cgi->param('match_all');
38 $section = '' if !defined $section;
39 $date = 'ar' if ! defined $date;
40 my @results;
41 my @terms; # terms as parsed by the search engine
42 my $case_sensitive;
43 if (defined $words && length $words) {
5d2a441a
TC
44 my $case = $cfg->entry('search', 'case_sensitive', 'context');
45 if ($case eq 'context') {
46 $case_sensitive = $words ne lc $words;
47 }
48 elsif ($case eq 'controlled') {
49 $case_sensitive = $cfg->param('c') || 0;
50 }
51 else {
52 $case_sensitive = 0;
53 }
54 @results = getSearchResult($req, $words, $section, $date, \@terms, $match_all,
55 $case_sensitive);
e58486b7
TC
56 }
57 else {
58 $words = ''; # so we don't return junk for the form default
59 }
60
61 my $page_count = int((@results + $results_per_page - 1)/$results_per_page);
62
63 my $page_number = $cgi->param('page') || 1;
64 $page_number = $page_count if $page_number > $page_count;
65
66 my @articles;
67 if (@results) {
68 my $articles_start = ($page_number-1) * $results_per_page;
69 my $articles_end = $articles_start + $results_per_page-1;
70 $articles_end = $#results if $articles_end >= @results;
71
72 if ($cfg->entry('search', 'keep_inaccessible')) {
73 for my $entry (@results[$articles_start..$articles_end]) {
e0ed81d7 74 my $article = BSE::TB::Articles->getByPkey($entry->[0])
e58486b7
TC
75 or die "Cannot retrieve article $entry->[0]\n";
76 push(@articles, $article);
77 }
78 }
79 else {
80 my %remove; # used later to remove the inaccessible from @results;
81 # we need to check accessiblity on each article
82 my $index = 0;
83 my $seen = 0;
84 while ($index < @results && $seen <= $articles_end) {
85 my $id = $results[$index][0];
e0ed81d7 86 my $article = BSE::TB::Articles->getByPkey($id)
e58486b7
TC
87 or die "Cannot retrieve article $id\n";
88 if ($req->siteuser_has_access($article)) {
89 if ($seen >= $articles_start) {
90 push @articles, $article;
91 }
92 ++$seen;
93 }
94 else {
95 $remove{$id} = 1;
96 }
97 ++$index;
98 }
99 @results = grep !$remove{$_->[0]}, @results;
100 }
101 }
102
cb2a09ac 103 my %index_as;
e58486b7
TC
104 for my $article (@articles) {
105 my $generator = $article->{generator};
106 eval "use $generator";
107 my $gen = $generator->new(top=>$article, cfg=>$cfg);
108 $article = $gen->get_real_article($article);
cb2a09ac 109 $index_as{$article->id} = [ $article->others_indexed_as_myself ];
e58486b7
TC
110 }
111
112 $page_count = int((@results + $results_per_page - 1)/$results_per_page);
113
114 # make an array of hashes (to preserve order)
115 my %excluded;
116 @excluded{@SEARCH_EXCLUDE} = @SEARCH_EXCLUDE;
117 my %included;
118 @included{@SEARCH_INCLUDE} = @SEARCH_INCLUDE;
119 my @sections = map { { $_->{id} => $_->{title} } }
120 sort { $b->{displayOrder} <=> $a->{displayOrder} }
121 grep { ($_->{listed} || $included{$_->{id}})
122 && !$excluded{$_->{id}} }
e0ed81d7 123 BSE::TB::Articles->getBy('level', 1);
e58486b7
TC
124 unshift(@sections, { ""=>$SEARCH_ALL });
125 my %sections = map { %$_ } @sections;
126 # now a list of values ( in the correct order
127 @sections = map { keys %$_ } @sections;
128
129 my %scores = map @$_, @results;
130
131 my $max_score = 0;
132 for my $score (values %scores) {
133 $score > $max_score and $max_score = $score;
134 }
135
136 my %highlight_prefix;
137 my %highlight_suffix;
138 for my $type (qw(keyword author pageTitle file_displayName
139 file_description file_notes summary description product_code)) {
140 $highlight_prefix{$type} =
947d7c09 141 $cfg->entry('search highlight', "${type}_prefix", $cfg->entry('search highlight', "prefix", "<b>"));
e58486b7 142 $highlight_suffix{$type} =
947d7c09 143 $cfg->entry('search highlight', "${type}_suffix", $cfg->entry('search highlight', "suffix", "</b>"));
e58486b7
TC
144 }
145
146 my $page_num_iter = 0;
147
148 my $article_index = -1;
149 my $result_seq = ($page_number-1) * $results_per_page;
150 my $excerpt;
151 my %match_tags;
152 my $words_re_str = '\b('.join('|', map quotemeta, @terms).')';
153 my $highlight_partial = $cfg->entryBool('search', 'highlight_partial', 1);
154 $words_re_str .= '\b' unless $highlight_partial;
155 my $words_re = qr/$words_re_str/i;
156 my @files;
157 my $file_index;
158 my $current_result;
159 my %acts;
160 %acts =
161 (
162 $req->dyn_user_tags(),
163 iterate_results =>
164 sub {
165 ++$result_seq;
166 ++$article_index;
167 if ($article_index < @articles) {
168 $current_result = $articles[$article_index];
cb2a09ac
TC
169 my @excerpts;
170 for my $check (@{$index_as{$current_result->id}}) {
171 my $found_child = 0;
172 my $tmp = excerpt($cfg, $admin, $case_sensitive, $check, \$found_child, \@terms);
173 push @excerpts, $tmp if $found_child;
174 }
e58486b7 175 my $found = 0;
cb2a09ac
TC
176 my $first_excerpt = excerpt($cfg, $admin, $case_sensitive, $current_result, \$found, \@terms);
177 unshift @excerpts, $first_excerpt if $found || !@excerpts;
178 $excerpt = join " ", @excerpts;
179
e58486b7 180 $req->set_article(result => $current_result);
cb2a09ac
TC
181
182 %match_tags = ();
e58486b7 183 for my $field (qw/pageTitle summary keyword description author product_code/) {
cb2a09ac
TC
184 for my $check ($current_result, @{$index_as{$current_result->id}}) {
185 my $value = $check->{$field};
186 defined $value or $value = '';
187 $value =~ s!$words_re!$highlight_prefix{$field}$1$highlight_suffix{$field}!g
188 or $value = '';
189 $match_tags{$field} .= " $value";
190 }
e58486b7 191 }
cb2a09ac
TC
192 s/^\s+// for values %match_tags;
193
e58486b7
TC
194 # match files
195 @files = ();
cb2a09ac 196 for my $file (map $_->files, $current_result, @{$index_as{$current_result->id}}) {
e58486b7
TC
197 my $found;
198 my %fileout;
199 for my $field (qw(displayName description notes)) {
200 my $prefix = $highlight_prefix{"file_$field"};
201 my $suffix = $highlight_suffix{"file_$field"};
202 $fileout{$field. "_matched"} = $file->{$field} =~ /$words_re/;
203 ++$found if ($fileout{$field} = $file->{$field})
204 =~ s!$words_re!$prefix$1$suffix!g;
205 }
206 if ($found) {
207 $fileout{notes_excerpt} =
208 excerpt($cfg, $admin, $case_sensitive, $current_result, \$found, \@terms, 'file_notes', $file->{notes});
209 push @files, [ \%fileout, $file ];
210 }
211 }
212
213 return 1;
214 }
215 else {
216 $req->set_article(result => undef);
217
218 return 0;
219 }
220 },
221 result =>
222 sub {
223 my $arg = shift;
224 if ($arg eq 'score') {
225 return sprintf("%.1f", 100.0 * $scores{$current_result->{id}} / $max_score);
226 }
9d77df06 227 return tag_article($current_result, $cfg, $arg);
e58486b7
TC
228 },
229 date =>
230 sub {
231 my ($func, $args) = split ' ', $_[0];
232 use POSIX 'strftime';
233 exists $acts{$func}
234 or return "** $func not found for date **";
235 my $date = $acts{$func}->($args)
236 or return '';
237 my ($year, $month, $day) = $date =~ /(\d+)\D+(\d+)\D+(\d+)/;
238 $year -= 1900;
239 --$month;
240 return strftime('%d-%b-%Y', 0, 0, 0, $day, $month, $year, 0, 0);
241 },
242 keywords => sub { $match_tags{keyword} },
243 author => sub { $match_tags{author} },
244 pageTitle => sub { $match_tags{pageTitle} },
245 match_summary => sub { $match_tags{summary} },
246 description => sub { $match_tags{description} },
247 product_code => sub { $match_tags{product_code} },
248
249 ifMatchfiles => sub { @files },
250 matchfile_count => sub { @files },
3d0a97d9 251 matchfile_index => sub { $file_index },
e58486b7
TC
252 iterate_matchfiles_reset => sub { $file_index = -1 },
253 iterate_matchfiles => sub { ++$file_index < @files },
254 matchfile =>
255 sub {
256 my ($args) = @_;
257 $file_index < @files or return '';
258 my $file_entry = $files[$file_index];
259 # already html escaped
260 exists $file_entry->[0]{$args} and return $file_entry->[0]{$args};
261
262 my $value = $file_entry->[1]{$args};
263 defined $value or return '';
264
265 escape_html($value);
266 },
267
268 ifResults => sub { scalar @results; },
269 ifSearch => sub { defined $words and length $words },
270 dateSelected => sub { $_[0] eq $date ? 'selected="selected"' : '' },
271 excerpt =>
272 sub {
273 return $excerpt;
274 },
275 articleurl =>
276 sub {
9d77df06 277 return $admin ? $current_result->{admin} : $current_result->link($cfg);
e58486b7
TC
278 },
279 count => sub { scalar @results },
280 multiple => sub { @results != 1 },
281 terms => sub { escape_html($words) },
282 resultSeq => sub { $result_seq },
4751ce20 283 list => sub { popup_menu(-name=>'s', -id => 'search_s',
e58486b7
TC
284 -values=>\@sections,
285 -labels=>\%sections,
286 -default=>$section) },
287
288 # result pages
289 iterate_pages =>
290 sub {
291 return ++$page_num_iter <= $page_count;
292 },
293 page => sub { $page_num_iter },
294 ifCurrentSearchPage =>
295 sub { $page_num_iter == $page_number },
296 pageurl =>
297 sub {
3f9c8a96
TC
298 my $work_words = $words;
299 $ENV{SCRIPT_NAME} . "?q=" . escape_uri($work_words) .
e58486b7
TC
300 "&amp;s=" . escape_uri($section) .
301 "&amp;d=" . escape_uri($date) .
3f9c8a96
TC
302 "&amp;page=".$page_num_iter .
303 "&amp;pp=$results_per_page";
e58486b7
TC
304 },
305 highlight_result =>
306 [ \&tag_highlight_result, \$current_result, $cfg, $words_re ],
4b1dcb69 307 admin_search => $admin,
e58486b7
TC
308 );
309
310 my $template = $cgi->param('embed') ? 'include/search_results' : 'search';
d060e02f 311 my $result = $req->dyn_response($template, \%acts);
e58486b7
TC
312 %acts = (); # remove any circular refs
313
314 return $result;
315}
316
317sub tag_highlight_result {
318 my ($rcurrent_result, $cfg, $words_re, $arg) = @_;
319
320 $$rcurrent_result
321 or return "** highlight_result must be in results iterator **";
322
323 my $text = $$rcurrent_result->{$arg};
324 defined $text or return '';
325
326 $text = escape_html($text);
327
947d7c09
AO
328 my $prefix = $cfg->entry('search highlight', "${arg}_prefix", $cfg->entry('search highlight', "prefix", "<b>"));
329 my $suffix = $cfg->entry('search highlight', "${arg}_suffix", $cfg->entry('search highlight', "suffix", "</b>"));
e58486b7
TC
330
331 $text =~ s/$words_re/$prefix$1$suffix/g;
332
333 $text;
334}
335
336sub getSearchResult {
5d2a441a 337 my ($req, $words, $section, $date, $terms, $match_all, $match_case) = @_;
e58486b7
TC
338
339 my $cfg = $req->cfg;
340 my $searcher_class = $cfg->entry('search', 'searcher', 'BSE::Search::BSE');
341 (my $searcher_file = $searcher_class . '.pm') =~ s!::!/!g;;
342 require $searcher_file;
5d2a441a
TC
343 my $case_sensitivity = $cfg->entry('search', 'case_sensitive', 'context');
344 my $searcher = $searcher_class->new(cfg => $cfg, case => $case_sensitivity);
345 return $searcher->search($words, $section, $date, $terms, $match_all, $req, $match_case);
e58486b7
TC
346}
347
348my %gens;
349
350sub excerpt {
351 my ($cfg, $admin, $case_sensitive, $article, $found, $terms, $type, $text) = @_;
352
353 my $generator = $article->{generator};
354
355 $generator =~ /\S/ or confess "generator for $article->{id} is blank";
356
357 eval "use $generator";
358 confess "Cannot use $generator: $@" if $@;
359
360 $gens{$generator} ||= $generator->new(admin=>$admin, cfg=>$cfg, top=>$article);
361
cb2a09ac 362 return $gens{$generator}->excerpt($article, $found, $case_sensitive, $terms, $type, $text, 1);
e58486b7
TC
363}
364
3651;