add a host config parameter for S3 storages
[bse.git] / site / cgi-bin / modules / BSE / UI / Search.pm
CommitLineData
e58486b7
TC
1package BSE::UI::Search;
2use strict;
3use base 'BSE::UI::Dispatch';
e0ed81d7 4use BSE::TB::Articles;
e58486b7
TC
5use BSE::DB;
6use Constants qw(:search);
7use Carp;
8use BSE::Cfg;
9use BSE::Template;
3f9c8a96 10use BSE::Util::HTML qw':default popup_menu';
9d77df06 11use BSE::Util::Tags qw(tag_article);
e58486b7
TC
12use BSE::Request;
13
cb2a09ac 14our $VERSION = "1.005";
cb7fd78d 15
e58486b7
TC
16my %actions =
17 (
18 search => 1,
19 );
20
21sub actions { \%actions }
22
23sub default_action { 'search' }
24
25sub req_search {
26 my ($class, $req) = @_;
27
28 my $cfg = $req->cfg;
88fa9cd6 29
e58486b7 30 my $cgi = $req->cgi;
88fa9cd6
AO
31 my $results_per_page = int($cgi->param('pp') || 10);
32 $results_per_page >= 1 or $results_per_page = 10;
e58486b7
TC
33 my $words = $cgi->param('q');
34 my $section = $cgi->param('s');
35 my $date = $cgi->param('d');
36 my $admin = $cgi->param('admin') ? 1 : 0;
37 my $match_all = $cgi->param('match_all');
38 $section = '' if !defined $section;
39 $date = 'ar' if ! defined $date;
40 my @results;
41 my @terms; # terms as parsed by the search engine
42 my $case_sensitive;
43 if (defined $words && length $words) {
44 $case_sensitive = $words ne lc $words;
45 @results = getSearchResult($req, $words, $section, $date, \@terms, $match_all);
46 }
47 else {
48 $words = ''; # so we don't return junk for the form default
49 }
50
51 my $page_count = int((@results + $results_per_page - 1)/$results_per_page);
52
53 my $page_number = $cgi->param('page') || 1;
54 $page_number = $page_count if $page_number > $page_count;
55
56 my @articles;
57 if (@results) {
58 my $articles_start = ($page_number-1) * $results_per_page;
59 my $articles_end = $articles_start + $results_per_page-1;
60 $articles_end = $#results if $articles_end >= @results;
61
62 if ($cfg->entry('search', 'keep_inaccessible')) {
63 for my $entry (@results[$articles_start..$articles_end]) {
e0ed81d7 64 my $article = BSE::TB::Articles->getByPkey($entry->[0])
e58486b7
TC
65 or die "Cannot retrieve article $entry->[0]\n";
66 push(@articles, $article);
67 }
68 }
69 else {
70 my %remove; # used later to remove the inaccessible from @results;
71 # we need to check accessiblity on each article
72 my $index = 0;
73 my $seen = 0;
74 while ($index < @results && $seen <= $articles_end) {
75 my $id = $results[$index][0];
e0ed81d7 76 my $article = BSE::TB::Articles->getByPkey($id)
e58486b7
TC
77 or die "Cannot retrieve article $id\n";
78 if ($req->siteuser_has_access($article)) {
79 if ($seen >= $articles_start) {
80 push @articles, $article;
81 }
82 ++$seen;
83 }
84 else {
85 $remove{$id} = 1;
86 }
87 ++$index;
88 }
89 @results = grep !$remove{$_->[0]}, @results;
90 }
91 }
92
cb2a09ac 93 my %index_as;
e58486b7
TC
94 for my $article (@articles) {
95 my $generator = $article->{generator};
96 eval "use $generator";
97 my $gen = $generator->new(top=>$article, cfg=>$cfg);
98 $article = $gen->get_real_article($article);
cb2a09ac 99 $index_as{$article->id} = [ $article->others_indexed_as_myself ];
e58486b7
TC
100 }
101
102 $page_count = int((@results + $results_per_page - 1)/$results_per_page);
103
104 # make an array of hashes (to preserve order)
105 my %excluded;
106 @excluded{@SEARCH_EXCLUDE} = @SEARCH_EXCLUDE;
107 my %included;
108 @included{@SEARCH_INCLUDE} = @SEARCH_INCLUDE;
109 my @sections = map { { $_->{id} => $_->{title} } }
110 sort { $b->{displayOrder} <=> $a->{displayOrder} }
111 grep { ($_->{listed} || $included{$_->{id}})
112 && !$excluded{$_->{id}} }
e0ed81d7 113 BSE::TB::Articles->getBy('level', 1);
e58486b7
TC
114 unshift(@sections, { ""=>$SEARCH_ALL });
115 my %sections = map { %$_ } @sections;
116 # now a list of values ( in the correct order
117 @sections = map { keys %$_ } @sections;
118
119 my %scores = map @$_, @results;
120
121 my $max_score = 0;
122 for my $score (values %scores) {
123 $score > $max_score and $max_score = $score;
124 }
125
126 my %highlight_prefix;
127 my %highlight_suffix;
128 for my $type (qw(keyword author pageTitle file_displayName
129 file_description file_notes summary description product_code)) {
130 $highlight_prefix{$type} =
947d7c09 131 $cfg->entry('search highlight', "${type}_prefix", $cfg->entry('search highlight', "prefix", "<b>"));
e58486b7 132 $highlight_suffix{$type} =
947d7c09 133 $cfg->entry('search highlight', "${type}_suffix", $cfg->entry('search highlight', "suffix", "</b>"));
e58486b7
TC
134 }
135
136 my $page_num_iter = 0;
137
138 my $article_index = -1;
139 my $result_seq = ($page_number-1) * $results_per_page;
140 my $excerpt;
141 my %match_tags;
142 my $words_re_str = '\b('.join('|', map quotemeta, @terms).')';
143 my $highlight_partial = $cfg->entryBool('search', 'highlight_partial', 1);
144 $words_re_str .= '\b' unless $highlight_partial;
145 my $words_re = qr/$words_re_str/i;
146 my @files;
147 my $file_index;
148 my $current_result;
149 my %acts;
150 %acts =
151 (
152 $req->dyn_user_tags(),
153 iterate_results =>
154 sub {
155 ++$result_seq;
156 ++$article_index;
157 if ($article_index < @articles) {
158 $current_result = $articles[$article_index];
cb2a09ac
TC
159 my @excerpts;
160 for my $check (@{$index_as{$current_result->id}}) {
161 my $found_child = 0;
162 my $tmp = excerpt($cfg, $admin, $case_sensitive, $check, \$found_child, \@terms);
163 push @excerpts, $tmp if $found_child;
164 }
e58486b7 165 my $found = 0;
cb2a09ac
TC
166 my $first_excerpt = excerpt($cfg, $admin, $case_sensitive, $current_result, \$found, \@terms);
167 unshift @excerpts, $first_excerpt if $found || !@excerpts;
168 $excerpt = join " ", @excerpts;
169
e58486b7 170 $req->set_article(result => $current_result);
cb2a09ac
TC
171
172 %match_tags = ();
e58486b7 173 for my $field (qw/pageTitle summary keyword description author product_code/) {
cb2a09ac
TC
174 for my $check ($current_result, @{$index_as{$current_result->id}}) {
175 my $value = $check->{$field};
176 defined $value or $value = '';
177 $value =~ s!$words_re!$highlight_prefix{$field}$1$highlight_suffix{$field}!g
178 or $value = '';
179 $match_tags{$field} .= " $value";
180 }
e58486b7 181 }
cb2a09ac
TC
182 s/^\s+// for values %match_tags;
183
e58486b7
TC
184 # match files
185 @files = ();
cb2a09ac 186 for my $file (map $_->files, $current_result, @{$index_as{$current_result->id}}) {
e58486b7
TC
187 my $found;
188 my %fileout;
189 for my $field (qw(displayName description notes)) {
190 my $prefix = $highlight_prefix{"file_$field"};
191 my $suffix = $highlight_suffix{"file_$field"};
192 $fileout{$field. "_matched"} = $file->{$field} =~ /$words_re/;
193 ++$found if ($fileout{$field} = $file->{$field})
194 =~ s!$words_re!$prefix$1$suffix!g;
195 }
196 if ($found) {
197 $fileout{notes_excerpt} =
198 excerpt($cfg, $admin, $case_sensitive, $current_result, \$found, \@terms, 'file_notes', $file->{notes});
199 push @files, [ \%fileout, $file ];
200 }
201 }
202
203 return 1;
204 }
205 else {
206 $req->set_article(result => undef);
207
208 return 0;
209 }
210 },
211 result =>
212 sub {
213 my $arg = shift;
214 if ($arg eq 'score') {
215 return sprintf("%.1f", 100.0 * $scores{$current_result->{id}} / $max_score);
216 }
9d77df06 217 return tag_article($current_result, $cfg, $arg);
e58486b7
TC
218 },
219 date =>
220 sub {
221 my ($func, $args) = split ' ', $_[0];
222 use POSIX 'strftime';
223 exists $acts{$func}
224 or return "** $func not found for date **";
225 my $date = $acts{$func}->($args)
226 or return '';
227 my ($year, $month, $day) = $date =~ /(\d+)\D+(\d+)\D+(\d+)/;
228 $year -= 1900;
229 --$month;
230 return strftime('%d-%b-%Y', 0, 0, 0, $day, $month, $year, 0, 0);
231 },
232 keywords => sub { $match_tags{keyword} },
233 author => sub { $match_tags{author} },
234 pageTitle => sub { $match_tags{pageTitle} },
235 match_summary => sub { $match_tags{summary} },
236 description => sub { $match_tags{description} },
237 product_code => sub { $match_tags{product_code} },
238
239 ifMatchfiles => sub { @files },
240 matchfile_count => sub { @files },
3d0a97d9 241 matchfile_index => sub { $file_index },
e58486b7
TC
242 iterate_matchfiles_reset => sub { $file_index = -1 },
243 iterate_matchfiles => sub { ++$file_index < @files },
244 matchfile =>
245 sub {
246 my ($args) = @_;
247 $file_index < @files or return '';
248 my $file_entry = $files[$file_index];
249 # already html escaped
250 exists $file_entry->[0]{$args} and return $file_entry->[0]{$args};
251
252 my $value = $file_entry->[1]{$args};
253 defined $value or return '';
254
255 escape_html($value);
256 },
257
258 ifResults => sub { scalar @results; },
259 ifSearch => sub { defined $words and length $words },
260 dateSelected => sub { $_[0] eq $date ? 'selected="selected"' : '' },
261 excerpt =>
262 sub {
263 return $excerpt;
264 },
265 articleurl =>
266 sub {
9d77df06 267 return $admin ? $current_result->{admin} : $current_result->link($cfg);
e58486b7
TC
268 },
269 count => sub { scalar @results },
270 multiple => sub { @results != 1 },
271 terms => sub { escape_html($words) },
272 resultSeq => sub { $result_seq },
4751ce20 273 list => sub { popup_menu(-name=>'s', -id => 'search_s',
e58486b7
TC
274 -values=>\@sections,
275 -labels=>\%sections,
276 -default=>$section) },
277
278 # result pages
279 iterate_pages =>
280 sub {
281 return ++$page_num_iter <= $page_count;
282 },
283 page => sub { $page_num_iter },
284 ifCurrentSearchPage =>
285 sub { $page_num_iter == $page_number },
286 pageurl =>
287 sub {
3f9c8a96
TC
288 my $work_words = $words;
289 $ENV{SCRIPT_NAME} . "?q=" . escape_uri($work_words) .
e58486b7
TC
290 "&amp;s=" . escape_uri($section) .
291 "&amp;d=" . escape_uri($date) .
3f9c8a96
TC
292 "&amp;page=".$page_num_iter .
293 "&amp;pp=$results_per_page";
e58486b7
TC
294 },
295 highlight_result =>
296 [ \&tag_highlight_result, \$current_result, $cfg, $words_re ],
4b1dcb69 297 admin_search => $admin,
e58486b7
TC
298 );
299
300 my $template = $cgi->param('embed') ? 'include/search_results' : 'search';
d060e02f 301 my $result = $req->dyn_response($template, \%acts);
e58486b7
TC
302 %acts = (); # remove any circular refs
303
304 return $result;
305}
306
307sub tag_highlight_result {
308 my ($rcurrent_result, $cfg, $words_re, $arg) = @_;
309
310 $$rcurrent_result
311 or return "** highlight_result must be in results iterator **";
312
313 my $text = $$rcurrent_result->{$arg};
314 defined $text or return '';
315
316 $text = escape_html($text);
317
947d7c09
AO
318 my $prefix = $cfg->entry('search highlight', "${arg}_prefix", $cfg->entry('search highlight', "prefix", "<b>"));
319 my $suffix = $cfg->entry('search highlight', "${arg}_suffix", $cfg->entry('search highlight', "suffix", "</b>"));
e58486b7
TC
320
321 $text =~ s/$words_re/$prefix$1$suffix/g;
322
323 $text;
324}
325
326sub getSearchResult {
327 my ($req, $words, $section, $date, $terms, $match_all) = @_;
328
329 my $cfg = $req->cfg;
330 my $searcher_class = $cfg->entry('search', 'searcher', 'BSE::Search::BSE');
331 (my $searcher_file = $searcher_class . '.pm') =~ s!::!/!g;;
332 require $searcher_file;
333 my $searcher = $searcher_class->new(cfg => $cfg);
334 return $searcher->search($words, $section, $date, $terms, $match_all, $req);
335}
336
337my %gens;
338
339sub excerpt {
340 my ($cfg, $admin, $case_sensitive, $article, $found, $terms, $type, $text) = @_;
341
342 my $generator = $article->{generator};
343
344 $generator =~ /\S/ or confess "generator for $article->{id} is blank";
345
346 eval "use $generator";
347 confess "Cannot use $generator: $@" if $@;
348
349 $gens{$generator} ||= $generator->new(admin=>$admin, cfg=>$cfg, top=>$article);
350
cb2a09ac 351 return $gens{$generator}->excerpt($article, $found, $case_sensitive, $terms, $type, $text, 1);
e58486b7
TC
352}
353
3541;