add a host config parameter for S3 storages
[bse.git] / site / cgi-bin / modules / BSE / Search / BSE.pm
CommitLineData
aec300cd
TC
1package BSE::Search::BSE;
2use strict;
3use Constants qw(:search);
4
cb7fd78d
TC
5our $VERSION = "1.000";
6
aec300cd
TC
7use base 'BSE::Search::Base';
8
9sub new {
10 my ($class, %opts) = @_;
11
12 return bless \%opts, $class;
13}
14
15sub get_term_matches {
16 my ($term, $allow_wc, $section) = @_;
17
18 my $dh = BSE::DB->single;
19
20 my $sth;
21 if ($SEARCH_AUTO_WILDCARD && $allow_wc) {
22 $sth = $dh->stmt('searchIndexWC');
23 $sth->execute($term."%")
24 or die "Could not execute search: ",$sth->errstr;
25 }
26 else {
27 $sth = $dh->stmt('searchIndex');
28 $sth->execute($term)
29 or die "Could not execute search: ",$sth->errstr;
30 }
31
32 my %matches;
33 while (my $row = $sth->fetchrow_arrayref) {
34 # skip any results that contain spaces if our term doesn't
35 # contain spaces. This loses wildcard matches which hit
36 # phrase entries
37 next if $term !~ /\s/ && $row->[0] =~ /\s/;
38 my @ids = split ' ', $row->[1];
39 my @scores = split ' ', $row->[3];
40 if ($section) {
41 # only for the section requested
42 my @sections = split ' ', $row->[2];
43 my @keep = grep { $sections[$_] == $section && $ids[$_] } 0..$#sections;
44 @ids = @ids[@keep];
45 @scores = @scores[@keep];
46 }
47 for my $index (0 .. $#ids) {
48 $matches{$ids[$index]} += $scores[$index];
49 }
50 }
51
52 return map [ $_, $matches{$_} ], keys %matches;
53}
54
55sub search {
d1ca9873 56 my ($self, $words, $section, $date, $terms, $match_all, $req) = @_;
aec300cd
TC
57
58 # canonical form
59 $words =~ s/^\s+|\s+$//g;
60
61 # array of [ term, unquoted, required, weight ]
62 my @terms;
63 my @exclude;
64 while (1) {
65 if ($words =~ /\G\s*-"([^"]+)"/gc
66 || $words =~ /\G\s*-'([^\']+)'/gc) {
67 push @exclude, [ $1, 0 ];
68 }
69 elsif ($words =~ /\G\s*\+"([^"]+)"/gc
70 || $words =~ /\G\s*\+'([^\']+)'/gc) {
71 push @terms, [ $1, 0, 1, 1 ];
72 }
73 elsif ($words =~ /\G\s*"([^"]+)"/gc
74 || $words =~ /\G\s*'([^']+)'/gc) {
75 push(@terms, [ $1, 0, 0, 1 ]);
76 }
77 elsif ($words =~ /\G\s*-(\S+)/gc) {
78 push @exclude, [ $1, 1 ];
79 }
80 elsif ($words =~ /\G\s*\+(\S+)/gc) {
81 push(@terms, [ $1, 1, 1, 1 ]);
82 }
83 elsif ($words =~ /\G\s*(\S+)/gc) {
84 push(@terms, [ $1, 1, 0, 1 ]);
85 }
86 else {
87 last;
88 }
89 }
90
91 @terms or return;
92
d1ca9873
TC
93 if ($match_all) {
94 for my $term (@terms) {
95 $term->[2] = 1;
96 }
97 }
98
aec300cd
TC
99 # if the user entered a plain multi-word phrase
100 if ($words !~ /["'+-]/ && $words =~ /\s/) {
101 # treat it as if they entered it in quotes as well
102 # giving articles with that phrase an extra score
103 push(@terms, [ $words, 0, 0, 0.1 ]);
104 }
105
106 # disable wildcarding for short terms
107 for my $term (@terms) {
108 if ($term->[1] && length($term->[0]) < $SEARCH_WILDCARD_MIN) {
109 $term->[1] = 0;
110 }
111 }
112
113 my %scores;
114 my %terms;
115 for my $term (grep !$_->[2], @terms) {
116 my @matches = get_term_matches($term->[0], $term->[1], $section);
117 for my $match (@matches) {
118 $scores{$match->[0]} += $match->[1] * $term->[3];
119 }
120 }
121 my @required = grep $_->[2], @terms;
122 my %delete; # has of id to 1
123 if (@required) {
124 my %match_required;
125 for my $term (@required) {
126 my @matches = get_term_matches($term->[0], $term->[1], $section);
127 for my $match (@matches) {
128 $scores{$match->[0]} += $match->[1];
129 ++$match_required{$match->[0]};
130 }
131 }
132 for my $id (keys %scores) {
133 if (!$match_required{$id} || $match_required{$id} != @required) {
134 ++$delete{$id};
135 }
136 }
137 }
138 for my $term (@exclude) {
139 my @matches = get_term_matches($term->[0], $term->[1], $section);
140 ++$delete{$_->[0]} for @matches;
141 }
142
143 delete @scores{keys %delete};
144
145 return () if !keys %scores;
146
147 # make sure we match the other requirements
148 my $sql = "select id from article where ";
149 $sql .= "(".join(" or ", map "id = $_", keys %scores).")";
150 my $now = _sql_date(time);
151 my $oneday = 24 * 3600;
152 SWITCH: for ($date) {
153 $_ eq 'ar' # been released
154 && do {
74b21f6d 155 $sql .= " and $now between \"release\" and expire";
aec300cd
TC
156 last SWITCH;
157 };
158 /^r(\d+)$/ # released in last N days
159 && do {
74b21f6d 160 $sql .= " and \"release\" > "._sql_date(time - $oneday * $1);
aec300cd
TC
161 last SWITCH;
162 };
163 /^e(\d+)$/ # expired in last N days
164 && do {
165 $sql .= " and expire > " . _sql_date(time - $oneday * $1)
166 ." and expire <= $now";
167 last SWITCH;
168 };
169 /^m(\d+)$/ # modified in last N days
170 && do {
171 $sql .= " and lastModified > " . _sql_date(time - $oneday * $1);
172 last SWITCH;
173 };
174 $_ eq 'ae'
175 && do {
176 $sql .= " and expire < $now";
177 last SWITCH;
178 };
179 }
180
d1ca9873
TC
181 $sql .= " order by title";
182
aec300cd
TC
183 my $dh = BSE::DB->single;
184
185 my $sth = $dh->{dbh}->prepare($sql)
186 or die "Error preparing $sql: ",$dh->{dbh}->errstr;
187
188 $sth->execute()
189 or die "Cannot execute $sql: ",$sth->errstr;
190
191 my @ids;
192 my $row;
193 push(@ids, $row->[0]) while $row = $sth->fetchrow_arrayref;
194
195 @ids = sort { $scores{$b} <=> $scores{$a} } @ids;
196
197 @$terms = map $_->[0], @terms;
198
199 return map [ $_, $scores{$_} ], @ids;
200}
201
202sub _sql_date {
203 my ($time) = @_;
204 use POSIX qw(strftime);
205
206 strftime("'%Y-%m-%d %H:%M'", localtime $time);
207}
208
2091;