bump to 0.15_36 for release
[bse.git] / site / cgi-bin / modules / BSE / Search / BSE.pm
CommitLineData
aec300cd
TC
1package BSE::Search::BSE;
2use strict;
3use Constants qw(:search);
4
5use base 'BSE::Search::Base';
6
7sub new {
8 my ($class, %opts) = @_;
9
10 return bless \%opts, $class;
11}
12
13sub get_term_matches {
14 my ($term, $allow_wc, $section) = @_;
15
16 my $dh = BSE::DB->single;
17
18 my $sth;
19 if ($SEARCH_AUTO_WILDCARD && $allow_wc) {
20 $sth = $dh->stmt('searchIndexWC');
21 $sth->execute($term."%")
22 or die "Could not execute search: ",$sth->errstr;
23 }
24 else {
25 $sth = $dh->stmt('searchIndex');
26 $sth->execute($term)
27 or die "Could not execute search: ",$sth->errstr;
28 }
29
30 my %matches;
31 while (my $row = $sth->fetchrow_arrayref) {
32 # skip any results that contain spaces if our term doesn't
33 # contain spaces. This loses wildcard matches which hit
34 # phrase entries
35 next if $term !~ /\s/ && $row->[0] =~ /\s/;
36 my @ids = split ' ', $row->[1];
37 my @scores = split ' ', $row->[3];
38 if ($section) {
39 # only for the section requested
40 my @sections = split ' ', $row->[2];
41 my @keep = grep { $sections[$_] == $section && $ids[$_] } 0..$#sections;
42 @ids = @ids[@keep];
43 @scores = @scores[@keep];
44 }
45 for my $index (0 .. $#ids) {
46 $matches{$ids[$index]} += $scores[$index];
47 }
48 }
49
50 return map [ $_, $matches{$_} ], keys %matches;
51}
52
53sub search {
54 my ($self, $words, $section, $date, $terms, $req) = @_;
55
56 # canonical form
57 $words =~ s/^\s+|\s+$//g;
58
59 # array of [ term, unquoted, required, weight ]
60 my @terms;
61 my @exclude;
62 while (1) {
63 if ($words =~ /\G\s*-"([^"]+)"/gc
64 || $words =~ /\G\s*-'([^\']+)'/gc) {
65 push @exclude, [ $1, 0 ];
66 }
67 elsif ($words =~ /\G\s*\+"([^"]+)"/gc
68 || $words =~ /\G\s*\+'([^\']+)'/gc) {
69 push @terms, [ $1, 0, 1, 1 ];
70 }
71 elsif ($words =~ /\G\s*"([^"]+)"/gc
72 || $words =~ /\G\s*'([^']+)'/gc) {
73 push(@terms, [ $1, 0, 0, 1 ]);
74 }
75 elsif ($words =~ /\G\s*-(\S+)/gc) {
76 push @exclude, [ $1, 1 ];
77 }
78 elsif ($words =~ /\G\s*\+(\S+)/gc) {
79 push(@terms, [ $1, 1, 1, 1 ]);
80 }
81 elsif ($words =~ /\G\s*(\S+)/gc) {
82 push(@terms, [ $1, 1, 0, 1 ]);
83 }
84 else {
85 last;
86 }
87 }
88
89 @terms or return;
90
91 # if the user entered a plain multi-word phrase
92 if ($words !~ /["'+-]/ && $words =~ /\s/) {
93 # treat it as if they entered it in quotes as well
94 # giving articles with that phrase an extra score
95 push(@terms, [ $words, 0, 0, 0.1 ]);
96 }
97
98 # disable wildcarding for short terms
99 for my $term (@terms) {
100 if ($term->[1] && length($term->[0]) < $SEARCH_WILDCARD_MIN) {
101 $term->[1] = 0;
102 }
103 }
104
105 my %scores;
106 my %terms;
107 for my $term (grep !$_->[2], @terms) {
108 my @matches = get_term_matches($term->[0], $term->[1], $section);
109 for my $match (@matches) {
110 $scores{$match->[0]} += $match->[1] * $term->[3];
111 }
112 }
113 my @required = grep $_->[2], @terms;
114 my %delete; # has of id to 1
115 if (@required) {
116 my %match_required;
117 for my $term (@required) {
118 my @matches = get_term_matches($term->[0], $term->[1], $section);
119 for my $match (@matches) {
120 $scores{$match->[0]} += $match->[1];
121 ++$match_required{$match->[0]};
122 }
123 }
124 for my $id (keys %scores) {
125 if (!$match_required{$id} || $match_required{$id} != @required) {
126 ++$delete{$id};
127 }
128 }
129 }
130 for my $term (@exclude) {
131 my @matches = get_term_matches($term->[0], $term->[1], $section);
132 ++$delete{$_->[0]} for @matches;
133 }
134
135 delete @scores{keys %delete};
136
137 return () if !keys %scores;
138
139 # make sure we match the other requirements
140 my $sql = "select id from article where ";
141 $sql .= "(".join(" or ", map "id = $_", keys %scores).")";
142 my $now = _sql_date(time);
143 my $oneday = 24 * 3600;
144 SWITCH: for ($date) {
145 $_ eq 'ar' # been released
146 && do {
147 $sql .= " and $now between release and expire";
148 last SWITCH;
149 };
150 /^r(\d+)$/ # released in last N days
151 && do {
152 $sql .= " and release > "._sql_date(time - $oneday * $1);
153 last SWITCH;
154 };
155 /^e(\d+)$/ # expired in last N days
156 && do {
157 $sql .= " and expire > " . _sql_date(time - $oneday * $1)
158 ." and expire <= $now";
159 last SWITCH;
160 };
161 /^m(\d+)$/ # modified in last N days
162 && do {
163 $sql .= " and lastModified > " . _sql_date(time - $oneday * $1);
164 last SWITCH;
165 };
166 $_ eq 'ae'
167 && do {
168 $sql .= " and expire < $now";
169 last SWITCH;
170 };
171 }
172
173 my $dh = BSE::DB->single;
174
175 my $sth = $dh->{dbh}->prepare($sql)
176 or die "Error preparing $sql: ",$dh->{dbh}->errstr;
177
178 $sth->execute()
179 or die "Cannot execute $sql: ",$sth->errstr;
180
181 my @ids;
182 my $row;
183 push(@ids, $row->[0]) while $row = $sth->fetchrow_arrayref;
184
185 @ids = sort { $scores{$b} <=> $scores{$a} } @ids;
186
187 @$terms = map $_->[0], @terms;
188
189 return map [ $_, $scores{$_} ], @ids;
190}
191
192sub _sql_date {
193 my ($time) = @_;
194 use POSIX qw(strftime);
195
196 strftime("'%Y-%m-%d %H:%M'", localtime $time);
197}
198
1991;