fast CGI search support
[bse.git] / site / cgi-bin / modules / BSE / UI / Search.pm
CommitLineData
e58486b7
TC
1package BSE::UI::Search;
2use strict;
3use base 'BSE::UI::Dispatch';
4use Articles;
5use BSE::DB;
6use Constants qw(:search);
7use Carp;
8use BSE::Cfg;
9use BSE::Template;
10use DevHelp::HTML qw':default popup_menu';
11use BSE::Util::Tags;
12use BSE::Request;
13
14my %actions =
15 (
16 search => 1,
17 );
18
19sub actions { \%actions }
20
21sub default_action { 'search' }
22
23sub req_search {
24 my ($class, $req) = @_;
25
26 my $cfg = $req->cfg;
27
28 my $results_per_page = 10;
29
30 my $cgi = $req->cgi;
31 my $words = $cgi->param('q');
32 my $section = $cgi->param('s');
33 my $date = $cgi->param('d');
34 my $admin = $cgi->param('admin') ? 1 : 0;
35 my $match_all = $cgi->param('match_all');
36 $section = '' if !defined $section;
37 $date = 'ar' if ! defined $date;
38 my @results;
39 my @terms; # terms as parsed by the search engine
40 my $case_sensitive;
41 if (defined $words && length $words) {
42 $case_sensitive = $words ne lc $words;
43 @results = getSearchResult($req, $words, $section, $date, \@terms, $match_all);
44 }
45 else {
46 $words = ''; # so we don't return junk for the form default
47 }
48
49 my $page_count = int((@results + $results_per_page - 1)/$results_per_page);
50
51 my $page_number = $cgi->param('page') || 1;
52 $page_number = $page_count if $page_number > $page_count;
53
54 my @articles;
55 if (@results) {
56 my $articles_start = ($page_number-1) * $results_per_page;
57 my $articles_end = $articles_start + $results_per_page-1;
58 $articles_end = $#results if $articles_end >= @results;
59
60 if ($cfg->entry('search', 'keep_inaccessible')) {
61 for my $entry (@results[$articles_start..$articles_end]) {
62 my $article = Articles->getByPkey($entry->[0])
63 or die "Cannot retrieve article $entry->[0]\n";
64 push(@articles, $article);
65 }
66 }
67 else {
68 my %remove; # used later to remove the inaccessible from @results;
69 # we need to check accessiblity on each article
70 my $index = 0;
71 my $seen = 0;
72 while ($index < @results && $seen <= $articles_end) {
73 my $id = $results[$index][0];
74 my $article = Articles->getByPkey($id)
75 or die "Cannot retrieve article $id\n";
76 if ($req->siteuser_has_access($article)) {
77 if ($seen >= $articles_start) {
78 push @articles, $article;
79 }
80 ++$seen;
81 }
82 else {
83 $remove{$id} = 1;
84 }
85 ++$index;
86 }
87 @results = grep !$remove{$_->[0]}, @results;
88 }
89 }
90
91 for my $article (@articles) {
92 my $generator = $article->{generator};
93 eval "use $generator";
94 my $gen = $generator->new(top=>$article, cfg=>$cfg);
95 $article = $gen->get_real_article($article);
96 }
97
98 $page_count = int((@results + $results_per_page - 1)/$results_per_page);
99
100 # make an array of hashes (to preserve order)
101 my %excluded;
102 @excluded{@SEARCH_EXCLUDE} = @SEARCH_EXCLUDE;
103 my %included;
104 @included{@SEARCH_INCLUDE} = @SEARCH_INCLUDE;
105 my @sections = map { { $_->{id} => $_->{title} } }
106 sort { $b->{displayOrder} <=> $a->{displayOrder} }
107 grep { ($_->{listed} || $included{$_->{id}})
108 && !$excluded{$_->{id}} }
109 Articles->getBy('level', 1);
110 unshift(@sections, { ""=>$SEARCH_ALL });
111 my %sections = map { %$_ } @sections;
112 # now a list of values ( in the correct order
113 @sections = map { keys %$_ } @sections;
114
115 my %scores = map @$_, @results;
116
117 my $max_score = 0;
118 for my $score (values %scores) {
119 $score > $max_score and $max_score = $score;
120 }
121
122 my %highlight_prefix;
123 my %highlight_suffix;
124 for my $type (qw(keyword author pageTitle file_displayName
125 file_description file_notes summary description product_code)) {
126 $highlight_prefix{$type} =
127 $cfg->entry('search highlight', "${type}_prefix", "<b>");
128 $highlight_suffix{$type} =
129 $cfg->entry('search highlight', "${type}_suffix", "</b>");
130 }
131
132 my $page_num_iter = 0;
133
134 my $article_index = -1;
135 my $result_seq = ($page_number-1) * $results_per_page;
136 my $excerpt;
137 my %match_tags;
138 my $words_re_str = '\b('.join('|', map quotemeta, @terms).')';
139 my $highlight_partial = $cfg->entryBool('search', 'highlight_partial', 1);
140 $words_re_str .= '\b' unless $highlight_partial;
141 my $words_re = qr/$words_re_str/i;
142 my @files;
143 my $file_index;
144 my $current_result;
145 my %acts;
146 %acts =
147 (
148 $req->dyn_user_tags(),
149 iterate_results =>
150 sub {
151 ++$result_seq;
152 ++$article_index;
153 if ($article_index < @articles) {
154 $current_result = $articles[$article_index];
155 my $found = 0;
156 $excerpt = excerpt($cfg, $admin, $case_sensitive, $current_result, \$found, \@terms);
157
158 $req->set_article(result => $current_result);
159
160 for my $field (qw/pageTitle summary keyword description author product_code/) {
161 my $value = $current_result->{$field};
162 defined $value or $value = '';
163 $value =~ s!$words_re!$highlight_prefix{$field}$1$highlight_suffix{$field}!g
164 or $value = '';
165 $match_tags{$field} = $value;
166 }
167
168 # match files
169 @files = ();
170 for my $file ($current_result->files) {
171 my $found;
172 my %fileout;
173 for my $field (qw(displayName description notes)) {
174 my $prefix = $highlight_prefix{"file_$field"};
175 my $suffix = $highlight_suffix{"file_$field"};
176 $fileout{$field. "_matched"} = $file->{$field} =~ /$words_re/;
177 ++$found if ($fileout{$field} = $file->{$field})
178 =~ s!$words_re!$prefix$1$suffix!g;
179 }
180 if ($found) {
181 $fileout{notes_excerpt} =
182 excerpt($cfg, $admin, $case_sensitive, $current_result, \$found, \@terms, 'file_notes', $file->{notes});
183 push @files, [ \%fileout, $file ];
184 }
185 }
186
187 return 1;
188 }
189 else {
190 $req->set_article(result => undef);
191
192 return 0;
193 }
194 },
195 result =>
196 sub {
197 my $arg = shift;
198 if ($arg eq 'score') {
199 return sprintf("%.1f", 100.0 * $scores{$current_result->{id}} / $max_score);
200 }
201 return escape_html($current_result->{$arg});
202 },
203 date =>
204 sub {
205 my ($func, $args) = split ' ', $_[0];
206 use POSIX 'strftime';
207 exists $acts{$func}
208 or return "** $func not found for date **";
209 my $date = $acts{$func}->($args)
210 or return '';
211 my ($year, $month, $day) = $date =~ /(\d+)\D+(\d+)\D+(\d+)/;
212 $year -= 1900;
213 --$month;
214 return strftime('%d-%b-%Y', 0, 0, 0, $day, $month, $year, 0, 0);
215 },
216 keywords => sub { $match_tags{keyword} },
217 author => sub { $match_tags{author} },
218 pageTitle => sub { $match_tags{pageTitle} },
219 match_summary => sub { $match_tags{summary} },
220 description => sub { $match_tags{description} },
221 product_code => sub { $match_tags{product_code} },
222
223 ifMatchfiles => sub { @files },
224 matchfile_count => sub { @files },
225 iterate_matchfiles_reset => sub { $file_index = -1 },
226 iterate_matchfiles => sub { ++$file_index < @files },
227 matchfile =>
228 sub {
229 my ($args) = @_;
230 $file_index < @files or return '';
231 my $file_entry = $files[$file_index];
232 # already html escaped
233 exists $file_entry->[0]{$args} and return $file_entry->[0]{$args};
234
235 my $value = $file_entry->[1]{$args};
236 defined $value or return '';
237
238 escape_html($value);
239 },
240
241 ifResults => sub { scalar @results; },
242 ifSearch => sub { defined $words and length $words },
243 dateSelected => sub { $_[0] eq $date ? 'selected="selected"' : '' },
244 excerpt =>
245 sub {
246 return $excerpt;
247 },
248 articleurl =>
249 sub {
250 my $field = $admin ? 'admin' : 'link';
251 return $articles[$article_index]{$field};
252 },
253 count => sub { scalar @results },
254 multiple => sub { @results != 1 },
255 terms => sub { escape_html($words) },
256 resultSeq => sub { $result_seq },
257 list => sub { popup_menu(-name=>'s', -id => 's',
258 -values=>\@sections,
259 -labels=>\%sections,
260 -default=>$section) },
261
262 # result pages
263 iterate_pages =>
264 sub {
265 return ++$page_num_iter <= $page_count;
266 },
267 page => sub { $page_num_iter },
268 ifCurrentSearchPage =>
269 sub { $page_num_iter == $page_number },
270 pageurl =>
271 sub {
272 $ENV{SCRIPT_NAME} . "?q=" . escape_uri($words) .
273 "&amp;s=" . escape_uri($section) .
274 "&amp;d=" . escape_uri($date) .
275 "&amp;page=".$page_num_iter;
276 },
277 highlight_result =>
278 [ \&tag_highlight_result, \$current_result, $cfg, $words_re ],
279 admin => $admin,
280 );
281
282 my $template = $cgi->param('embed') ? 'include/search_results' : 'search';
283 my $result = $req->response($template, \%acts);
284 %acts = (); # remove any circular refs
285
286 return $result;
287}
288
289sub tag_highlight_result {
290 my ($rcurrent_result, $cfg, $words_re, $arg) = @_;
291
292 $$rcurrent_result
293 or return "** highlight_result must be in results iterator **";
294
295 my $text = $$rcurrent_result->{$arg};
296 defined $text or return '';
297
298 $text = escape_html($text);
299
300 my $prefix = $cfg->entry('search highlight', "${arg}_prefix", "<b>");
301 my $suffix = $cfg->entry('search highlight', "${arg}_suffix", "</b>");
302
303 $text =~ s/$words_re/$prefix$1$suffix/g;
304
305 $text;
306}
307
308sub getSearchResult {
309 my ($req, $words, $section, $date, $terms, $match_all) = @_;
310
311 my $cfg = $req->cfg;
312 my $searcher_class = $cfg->entry('search', 'searcher', 'BSE::Search::BSE');
313 (my $searcher_file = $searcher_class . '.pm') =~ s!::!/!g;;
314 require $searcher_file;
315 my $searcher = $searcher_class->new(cfg => $cfg);
316 return $searcher->search($words, $section, $date, $terms, $match_all, $req);
317}
318
319my %gens;
320
321sub excerpt {
322 my ($cfg, $admin, $case_sensitive, $article, $found, $terms, $type, $text) = @_;
323
324 my $generator = $article->{generator};
325
326 $generator =~ /\S/ or confess "generator for $article->{id} is blank";
327
328 eval "use $generator";
329 confess "Cannot use $generator: $@" if $@;
330
331 $gens{$generator} ||= $generator->new(admin=>$admin, cfg=>$cfg, top=>$article);
332
333 return $gens{$generator}->excerpt($article, $found, $case_sensitive, $terms, $type, $text);
334}
335
3361;