version bump modules affected by generate move
[bse.git] / site / cgi-bin / modules / BSE / Index.pm
CommitLineData
3f015bd2
TC
1package BSE::Index;
2use strict;
3use Time::HiRes qw(time);
289f5a78 4use Constants qw(@SEARCH_EXCLUDE @SEARCH_INCLUDE);
3f015bd2
TC
5use Articles;
6
496ba394 7our $VERSION = "1.004";
3f015bd2 8
673086da 9my %default_scores =
3f015bd2
TC
10 (
11 title=>5,
12 body=>3,
13 keyword=>4,
14 pageTitle=>5,
15 author=>4,
16 file_displayName => 2,
17 file_description=>2,
18 file_notes => 1,
19 summary => 0,
20 description => 0,
21 product_code => 0,
22 );
23
24sub new {
25 my ($class, %opts) = @_;
26
289f5a78 27 my $cfg = BSE::Cfg->single;
673086da
TC
28 unless ($opts{scores}) {
29 my $scores = { %default_scores };
673086da
TC
30 for my $field (keys %$scores) {
31 $scores->{$field} = $cfg->entry("search index scores", $field, $scores->{$field});
32 }
33 $opts{scores} = $scores;
34 }
3f015bd2 35 $opts{start} = time;
289f5a78 36 $opts{max_level} ||= $cfg->entry("search", "level", $Constants::SEARCH_LEVEL);
3f015bd2
TC
37
38 return bless \%opts, $class;
39}
40
41sub indexer {
42 my ($self) = @_;
43
44 unless ($self->{indexer}) {
45 my $cfg = BSE::Cfg->single;
46 my $indexer_class = $cfg->entry('search', 'indexer', 'BSE::Index::BSE');
47 (my $indexer_file = $indexer_class . ".pm") =~ s!::!/!g;
48 require $indexer_file;
49
50 $self->{indexer} = $indexer_class->new
51 (
52 cfg => $cfg,
53 scores => $self->{scores},
54 verbose => $self->{verbose},
55 );
56 }
57
58 return $self->{indexer};
59}
60
61sub do_index {
62 my ($self) = @_;
63
64 my $indexer = $self->indexer;
65 eval {
66 $self->vnote("s1::Starting index");
67 $indexer->start_index();
68 $self->vnote("s2::Starting article scan");
69 $self->make_index();
70 $self->vnote("f2::Populating search index");
71 $indexer->end_index();
72 $self->vnote("f1::Indexing complete");
73 };
74 if ($@) {
75 $self->_error("Indexing error: $@");
76 return;
77 }
78 return 1;
79}
80
81sub make_index {
82 my ($self) = @_;
83
84 my %dont_search;
85 my %do_search;
86 @dont_search{@SEARCH_EXCLUDE} = @SEARCH_EXCLUDE;
87 @do_search{@SEARCH_INCLUDE} = @SEARCH_INCLUDE;
88 $self->vnote("s::Loading article ids");
89 my @ids = Articles->allids;
90 my $count = @ids;
91 $self->vnote("c:$count:$count articles to index");
92 my $cfg = BSE::Cfg->single;
93 my $indexer = $self->indexer;
94
95 INDEX: for my $id (@ids) {
96 my @files;
97 my $got_files;
98 # find the section
99 my $article = Articles->getByPkey($id);
100 next unless $article;
496ba394
TC
101 next unless $article->should_index;
102 my $section = $article->section;
3f015bd2
TC
103 my $id = $article->{id};
104 my $indexas = $article->{level} > $self->{max_level} ? $article->{parentid} : $id;
105 my $sectionid = $section->{id};
106 eval "use $article->{generator}";
107 $@ and die $@;
108 my $gen = $article->{generator}->new(top=>$article, cfg=>$cfg);
109 next unless $gen->visible($article) or $do_search{$sectionid};
110
111 next if $dont_search{$sectionid};
112
113 $article = $gen->get_real_article($article);
114
115 unless ($article) {
116 $self->error("$id:Full article for $id not found");
117 next;
118 }
119
120 $self->vnote("i:$id:Indexing '$article->{title}'");
121
122 my %fields;
673086da
TC
123 my $scores = $self->{scores};
124 for my $field (sort { $scores->{$b} <=> $scores->{$a} } keys %$scores) {
3f015bd2 125
673086da 126 next unless $self->{scores}{$field};
3f015bd2
TC
127 # strip out markup
128 my $text;
129 if (exists $article->{$field}) {
130 $text = $article->{$field};
131 }
132 else {
133 if ($field =~ /^file_(.*)/) {
134 my $file_field = $1;
135 @files = $article->files unless $got_files++;
136 $text = join "\n", map $_->{$file_field}, @files;
137 }
138 }
139 #next if $text =~ m!^\<html\>!i; # I don't know how to do this (yet)
140 if ($field eq 'body') {
141 $gen->remove_block("Articles", [], \$text);
142 $text =~ s/[abi]\[([^\]]+)\]/$1/g;
143 }
144
145 next unless defined $text;
146
147 $fields{$field} = $text;
148 }
149 $indexer->process_article($article, $section, $indexas, \%fields);
150 }
151 $self->vnote("f::Article scan complete");
152}
153
154sub error {
155 my ($self, @msg) = @_;
156
157 $self->_error($self->_time_passed, ":e:", @msg);
158}
159
160sub _error {
161 my ($self, @error) = @_;
162
163 if ($self->{error}) {
164 $self->{error}->(@error);
165 }
166 else {
167 print STDERR @error;
168 }
169}
170
171sub _time_passed {
172 my ($self) = @_;
173
174 return sprintf("%.3f", time() - $self->{start});
175}
176
177sub vnote {
178 my ($self, @msg) = @_;
179
180 $self->_note($self->_time_passed, ":", @msg);
181}
182
183sub _note {
184 my ($self, @msg) = @_;
185
186 if ($self->{note}) {
187 $self->{note}->(@msg);
188 }
189}