]> git.imager.perl.org - bse.git/blame - site/cgi-bin/modules/BSE/Search/BSE.pm
commit for 0.15_53 release
[bse.git] / site / cgi-bin / modules / BSE / Search / BSE.pm
CommitLineData
aec300cd
TC
1package BSE::Search::BSE;
2use strict;
3use Constants qw(:search);
4
5use base 'BSE::Search::Base';
6
7sub new {
8 my ($class, %opts) = @_;
9
10 return bless \%opts, $class;
11}
12
13sub get_term_matches {
14 my ($term, $allow_wc, $section) = @_;
15
16 my $dh = BSE::DB->single;
17
18 my $sth;
19 if ($SEARCH_AUTO_WILDCARD && $allow_wc) {
20 $sth = $dh->stmt('searchIndexWC');
21 $sth->execute($term."%")
22 or die "Could not execute search: ",$sth->errstr;
23 }
24 else {
25 $sth = $dh->stmt('searchIndex');
26 $sth->execute($term)
27 or die "Could not execute search: ",$sth->errstr;
28 }
29
30 my %matches;
31 while (my $row = $sth->fetchrow_arrayref) {
32 # skip any results that contain spaces if our term doesn't
33 # contain spaces. This loses wildcard matches which hit
34 # phrase entries
35 next if $term !~ /\s/ && $row->[0] =~ /\s/;
36 my @ids = split ' ', $row->[1];
37 my @scores = split ' ', $row->[3];
38 if ($section) {
39 # only for the section requested
40 my @sections = split ' ', $row->[2];
41 my @keep = grep { $sections[$_] == $section && $ids[$_] } 0..$#sections;
42 @ids = @ids[@keep];
43 @scores = @scores[@keep];
44 }
45 for my $index (0 .. $#ids) {
46 $matches{$ids[$index]} += $scores[$index];
47 }
48 }
49
50 return map [ $_, $matches{$_} ], keys %matches;
51}
52
53sub search {
d1ca9873 54 my ($self, $words, $section, $date, $terms, $match_all, $req) = @_;
aec300cd
TC
55
56 # canonical form
57 $words =~ s/^\s+|\s+$//g;
58
59 # array of [ term, unquoted, required, weight ]
60 my @terms;
61 my @exclude;
62 while (1) {
63 if ($words =~ /\G\s*-"([^"]+)"/gc
64 || $words =~ /\G\s*-'([^\']+)'/gc) {
65 push @exclude, [ $1, 0 ];
66 }
67 elsif ($words =~ /\G\s*\+"([^"]+)"/gc
68 || $words =~ /\G\s*\+'([^\']+)'/gc) {
69 push @terms, [ $1, 0, 1, 1 ];
70 }
71 elsif ($words =~ /\G\s*"([^"]+)"/gc
72 || $words =~ /\G\s*'([^']+)'/gc) {
73 push(@terms, [ $1, 0, 0, 1 ]);
74 }
75 elsif ($words =~ /\G\s*-(\S+)/gc) {
76 push @exclude, [ $1, 1 ];
77 }
78 elsif ($words =~ /\G\s*\+(\S+)/gc) {
79 push(@terms, [ $1, 1, 1, 1 ]);
80 }
81 elsif ($words =~ /\G\s*(\S+)/gc) {
82 push(@terms, [ $1, 1, 0, 1 ]);
83 }
84 else {
85 last;
86 }
87 }
88
89 @terms or return;
90
d1ca9873
TC
91 if ($match_all) {
92 for my $term (@terms) {
93 $term->[2] = 1;
94 }
95 }
96
aec300cd
TC
97 # if the user entered a plain multi-word phrase
98 if ($words !~ /["'+-]/ && $words =~ /\s/) {
99 # treat it as if they entered it in quotes as well
100 # giving articles with that phrase an extra score
101 push(@terms, [ $words, 0, 0, 0.1 ]);
102 }
103
104 # disable wildcarding for short terms
105 for my $term (@terms) {
106 if ($term->[1] && length($term->[0]) < $SEARCH_WILDCARD_MIN) {
107 $term->[1] = 0;
108 }
109 }
110
111 my %scores;
112 my %terms;
113 for my $term (grep !$_->[2], @terms) {
114 my @matches = get_term_matches($term->[0], $term->[1], $section);
115 for my $match (@matches) {
116 $scores{$match->[0]} += $match->[1] * $term->[3];
117 }
118 }
119 my @required = grep $_->[2], @terms;
120 my %delete; # has of id to 1
121 if (@required) {
122 my %match_required;
123 for my $term (@required) {
124 my @matches = get_term_matches($term->[0], $term->[1], $section);
125 for my $match (@matches) {
126 $scores{$match->[0]} += $match->[1];
127 ++$match_required{$match->[0]};
128 }
129 }
130 for my $id (keys %scores) {
131 if (!$match_required{$id} || $match_required{$id} != @required) {
132 ++$delete{$id};
133 }
134 }
135 }
136 for my $term (@exclude) {
137 my @matches = get_term_matches($term->[0], $term->[1], $section);
138 ++$delete{$_->[0]} for @matches;
139 }
140
141 delete @scores{keys %delete};
142
143 return () if !keys %scores;
144
145 # make sure we match the other requirements
146 my $sql = "select id from article where ";
147 $sql .= "(".join(" or ", map "id = $_", keys %scores).")";
148 my $now = _sql_date(time);
149 my $oneday = 24 * 3600;
150 SWITCH: for ($date) {
151 $_ eq 'ar' # been released
152 && do {
153 $sql .= " and $now between release and expire";
154 last SWITCH;
155 };
156 /^r(\d+)$/ # released in last N days
157 && do {
158 $sql .= " and release > "._sql_date(time - $oneday * $1);
159 last SWITCH;
160 };
161 /^e(\d+)$/ # expired in last N days
162 && do {
163 $sql .= " and expire > " . _sql_date(time - $oneday * $1)
164 ." and expire <= $now";
165 last SWITCH;
166 };
167 /^m(\d+)$/ # modified in last N days
168 && do {
169 $sql .= " and lastModified > " . _sql_date(time - $oneday * $1);
170 last SWITCH;
171 };
172 $_ eq 'ae'
173 && do {
174 $sql .= " and expire < $now";
175 last SWITCH;
176 };
177 }
178
d1ca9873
TC
179 $sql .= " order by title";
180
aec300cd
TC
181 my $dh = BSE::DB->single;
182
183 my $sth = $dh->{dbh}->prepare($sql)
184 or die "Error preparing $sql: ",$dh->{dbh}->errstr;
185
186 $sth->execute()
187 or die "Cannot execute $sql: ",$sth->errstr;
188
189 my @ids;
190 my $row;
191 push(@ids, $row->[0]) while $row = $sth->fetchrow_arrayref;
192
193 @ids = sort { $scores{$b} <=> $scores{$a} } @ids;
194
195 @$terms = map $_->[0], @terms;
196
197 return map [ $_, $scores{$_} ], @ids;
198}
199
200sub _sql_date {
201 my ($time) = @_;
202 use POSIX qw(strftime);
203
204 strftime("'%Y-%m-%d %H:%M'", localtime $time);
205}
206
2071;