optional case-insensitivity for searching
[bse.git] / site / cgi-bin / modules / BSE / Search / BSE.pm
CommitLineData
aec300cd
TC
1package BSE::Search::BSE;
2use strict;
3use Constants qw(:search);
4
5d2a441a 5our $VERSION = "1.001";
cb7fd78d 6
aec300cd
TC
7use base 'BSE::Search::Base';
8
9sub new {
10 my ($class, %opts) = @_;
11
12 return bless \%opts, $class;
13}
14
15sub get_term_matches {
16 my ($term, $allow_wc, $section) = @_;
17
18 my $dh = BSE::DB->single;
19
20 my $sth;
21 if ($SEARCH_AUTO_WILDCARD && $allow_wc) {
22 $sth = $dh->stmt('searchIndexWC');
23 $sth->execute($term."%")
24 or die "Could not execute search: ",$sth->errstr;
25 }
26 else {
27 $sth = $dh->stmt('searchIndex');
28 $sth->execute($term)
29 or die "Could not execute search: ",$sth->errstr;
30 }
31
32 my %matches;
33 while (my $row = $sth->fetchrow_arrayref) {
34 # skip any results that contain spaces if our term doesn't
35 # contain spaces. This loses wildcard matches which hit
36 # phrase entries
37 next if $term !~ /\s/ && $row->[0] =~ /\s/;
38 my @ids = split ' ', $row->[1];
39 my @scores = split ' ', $row->[3];
40 if ($section) {
41 # only for the section requested
42 my @sections = split ' ', $row->[2];
43 my @keep = grep { $sections[$_] == $section && $ids[$_] } 0..$#sections;
44 @ids = @ids[@keep];
45 @scores = @scores[@keep];
46 }
47 for my $index (0 .. $#ids) {
48 $matches{$ids[$index]} += $scores[$index];
49 }
50 }
51
52 return map [ $_, $matches{$_} ], keys %matches;
53}
54
55sub search {
5d2a441a 56 my ($self, $words, $section, $date, $terms, $match_all, $req, $match_case) = @_;
aec300cd
TC
57
58 # canonical form
59 $words =~ s/^\s+|\s+$//g;
60
5d2a441a
TC
61 $words = lc $words unless $match_case;
62
aec300cd
TC
63 # array of [ term, unquoted, required, weight ]
64 my @terms;
65 my @exclude;
66 while (1) {
67 if ($words =~ /\G\s*-"([^"]+)"/gc
68 || $words =~ /\G\s*-'([^\']+)'/gc) {
69 push @exclude, [ $1, 0 ];
70 }
71 elsif ($words =~ /\G\s*\+"([^"]+)"/gc
72 || $words =~ /\G\s*\+'([^\']+)'/gc) {
73 push @terms, [ $1, 0, 1, 1 ];
74 }
75 elsif ($words =~ /\G\s*"([^"]+)"/gc
76 || $words =~ /\G\s*'([^']+)'/gc) {
77 push(@terms, [ $1, 0, 0, 1 ]);
78 }
79 elsif ($words =~ /\G\s*-(\S+)/gc) {
80 push @exclude, [ $1, 1 ];
81 }
82 elsif ($words =~ /\G\s*\+(\S+)/gc) {
83 push(@terms, [ $1, 1, 1, 1 ]);
84 }
85 elsif ($words =~ /\G\s*(\S+)/gc) {
86 push(@terms, [ $1, 1, 0, 1 ]);
87 }
88 else {
89 last;
90 }
91 }
92
93 @terms or return;
94
d1ca9873
TC
95 if ($match_all) {
96 for my $term (@terms) {
97 $term->[2] = 1;
98 }
99 }
100
aec300cd
TC
101 # if the user entered a plain multi-word phrase
102 if ($words !~ /["'+-]/ && $words =~ /\s/) {
103 # treat it as if they entered it in quotes as well
104 # giving articles with that phrase an extra score
105 push(@terms, [ $words, 0, 0, 0.1 ]);
106 }
107
108 # disable wildcarding for short terms
109 for my $term (@terms) {
110 if ($term->[1] && length($term->[0]) < $SEARCH_WILDCARD_MIN) {
111 $term->[1] = 0;
112 }
113 }
114
115 my %scores;
116 my %terms;
117 for my $term (grep !$_->[2], @terms) {
118 my @matches = get_term_matches($term->[0], $term->[1], $section);
119 for my $match (@matches) {
120 $scores{$match->[0]} += $match->[1] * $term->[3];
121 }
122 }
123 my @required = grep $_->[2], @terms;
124 my %delete; # has of id to 1
125 if (@required) {
126 my %match_required;
127 for my $term (@required) {
128 my @matches = get_term_matches($term->[0], $term->[1], $section);
129 for my $match (@matches) {
130 $scores{$match->[0]} += $match->[1];
131 ++$match_required{$match->[0]};
132 }
133 }
134 for my $id (keys %scores) {
135 if (!$match_required{$id} || $match_required{$id} != @required) {
136 ++$delete{$id};
137 }
138 }
139 }
140 for my $term (@exclude) {
141 my @matches = get_term_matches($term->[0], $term->[1], $section);
142 ++$delete{$_->[0]} for @matches;
143 }
144
145 delete @scores{keys %delete};
146
147 return () if !keys %scores;
148
149 # make sure we match the other requirements
150 my $sql = "select id from article where ";
151 $sql .= "(".join(" or ", map "id = $_", keys %scores).")";
152 my $now = _sql_date(time);
153 my $oneday = 24 * 3600;
154 SWITCH: for ($date) {
155 $_ eq 'ar' # been released
156 && do {
74b21f6d 157 $sql .= " and $now between \"release\" and expire";
aec300cd
TC
158 last SWITCH;
159 };
160 /^r(\d+)$/ # released in last N days
161 && do {
74b21f6d 162 $sql .= " and \"release\" > "._sql_date(time - $oneday * $1);
aec300cd
TC
163 last SWITCH;
164 };
165 /^e(\d+)$/ # expired in last N days
166 && do {
167 $sql .= " and expire > " . _sql_date(time - $oneday * $1)
168 ." and expire <= $now";
169 last SWITCH;
170 };
171 /^m(\d+)$/ # modified in last N days
172 && do {
173 $sql .= " and lastModified > " . _sql_date(time - $oneday * $1);
174 last SWITCH;
175 };
176 $_ eq 'ae'
177 && do {
178 $sql .= " and expire < $now";
179 last SWITCH;
180 };
181 }
182
d1ca9873
TC
183 $sql .= " order by title";
184
aec300cd
TC
185 my $dh = BSE::DB->single;
186
187 my $sth = $dh->{dbh}->prepare($sql)
188 or die "Error preparing $sql: ",$dh->{dbh}->errstr;
189
190 $sth->execute()
191 or die "Cannot execute $sql: ",$sth->errstr;
192
193 my @ids;
194 my $row;
195 push(@ids, $row->[0]) while $row = $sth->fetchrow_arrayref;
196
197 @ids = sort { $scores{$b} <=> $scores{$a} } @ids;
198
199 @$terms = map $_->[0], @terms;
200
201 return map [ $_, $scores{$_} ], @ids;
202}
203
204sub _sql_date {
205 my ($time) = @_;
206 use POSIX qw(strftime);
207
208 strftime("'%Y-%m-%d %H:%M'", localtime $time);
209}
210
2111;