make subscription/isubscription into object tags
[bse.git] / site / cgi-bin / modules / BSE / Index.pm
CommitLineData
3f015bd2
TC
1package BSE::Index;
2use strict;
3use Time::HiRes qw(time);
4use Constants qw($BASEDIR $MAXPHRASE $DATADIR @SEARCH_EXCLUDE @SEARCH_INCLUDE $SEARCH_LEVEL);
5use Articles;
6
7our $VERSION = "1.000";
8
9my %scores =
10 (
11 title=>5,
12 body=>3,
13 keyword=>4,
14 pageTitle=>5,
15 author=>4,
16 file_displayName => 2,
17 file_description=>2,
18 file_notes => 1,
19 summary => 0,
20 description => 0,
21 product_code => 0,
22 );
23
24sub new {
25 my ($class, %opts) = @_;
26
27 $opts{scores} ||= \%scores;
28 $opts{start} = time;
29 $opts{max_level} ||= $SEARCH_LEVEL;
30
31 return bless \%opts, $class;
32}
33
34sub indexer {
35 my ($self) = @_;
36
37 unless ($self->{indexer}) {
38 my $cfg = BSE::Cfg->single;
39 my $indexer_class = $cfg->entry('search', 'indexer', 'BSE::Index::BSE');
40 (my $indexer_file = $indexer_class . ".pm") =~ s!::!/!g;
41 require $indexer_file;
42
43 $self->{indexer} = $indexer_class->new
44 (
45 cfg => $cfg,
46 scores => $self->{scores},
47 verbose => $self->{verbose},
48 );
49 }
50
51 return $self->{indexer};
52}
53
54sub do_index {
55 my ($self) = @_;
56
57 my $indexer = $self->indexer;
58 eval {
59 $self->vnote("s1::Starting index");
60 $indexer->start_index();
61 $self->vnote("s2::Starting article scan");
62 $self->make_index();
63 $self->vnote("f2::Populating search index");
64 $indexer->end_index();
65 $self->vnote("f1::Indexing complete");
66 };
67 if ($@) {
68 $self->_error("Indexing error: $@");
69 return;
70 }
71 return 1;
72}
73
74sub make_index {
75 my ($self) = @_;
76
77 my %dont_search;
78 my %do_search;
79 @dont_search{@SEARCH_EXCLUDE} = @SEARCH_EXCLUDE;
80 @do_search{@SEARCH_INCLUDE} = @SEARCH_INCLUDE;
81 $self->vnote("s::Loading article ids");
82 my @ids = Articles->allids;
83 my $count = @ids;
84 $self->vnote("c:$count:$count articles to index");
85 my $cfg = BSE::Cfg->single;
86 my $indexer = $self->indexer;
87
88 INDEX: for my $id (@ids) {
89 my @files;
90 my $got_files;
91 # find the section
92 my $article = Articles->getByPkey($id);
93 next unless $article;
94 next unless ($article->{listed} || $article->{flags} =~ /I/);
95 next unless $article->is_linked;
96 next if $article->{flags} =~ /[CN]/;
97 my $section = $article;
98 while ($section->{parentid} >= 1) {
99 $section = Articles->getByPkey($section->{parentid});
100 next INDEX if $section->{flags} =~ /C/;
101 }
102 my $id = $article->{id};
103 my $indexas = $article->{level} > $self->{max_level} ? $article->{parentid} : $id;
104 my $sectionid = $section->{id};
105 eval "use $article->{generator}";
106 $@ and die $@;
107 my $gen = $article->{generator}->new(top=>$article, cfg=>$cfg);
108 next unless $gen->visible($article) or $do_search{$sectionid};
109
110 next if $dont_search{$sectionid};
111
112 $article = $gen->get_real_article($article);
113
114 unless ($article) {
115 $self->error("$id:Full article for $id not found");
116 next;
117 }
118
119 $self->vnote("i:$id:Indexing '$article->{title}'");
120
121 my %fields;
122 for my $field (sort { $scores{$b} <=> $scores{$a} } keys %scores) {
123
124 next unless $scores{$field};
125 # strip out markup
126 my $text;
127 if (exists $article->{$field}) {
128 $text = $article->{$field};
129 }
130 else {
131 if ($field =~ /^file_(.*)/) {
132 my $file_field = $1;
133 @files = $article->files unless $got_files++;
134 $text = join "\n", map $_->{$file_field}, @files;
135 }
136 }
137 #next if $text =~ m!^\<html\>!i; # I don't know how to do this (yet)
138 if ($field eq 'body') {
139 $gen->remove_block("Articles", [], \$text);
140 $text =~ s/[abi]\[([^\]]+)\]/$1/g;
141 }
142
143 next unless defined $text;
144
145 $fields{$field} = $text;
146 }
147 $indexer->process_article($article, $section, $indexas, \%fields);
148 }
149 $self->vnote("f::Article scan complete");
150}
151
152sub error {
153 my ($self, @msg) = @_;
154
155 $self->_error($self->_time_passed, ":e:", @msg);
156}
157
158sub _error {
159 my ($self, @error) = @_;
160
161 if ($self->{error}) {
162 $self->{error}->(@error);
163 }
164 else {
165 print STDERR @error;
166 }
167}
168
169sub _time_passed {
170 my ($self) = @_;
171
172 return sprintf("%.3f", time() - $self->{start});
173}
174
175sub vnote {
176 my ($self, @msg) = @_;
177
178 $self->_note($self->_time_passed, ":", @msg);
179}
180
181sub _note {
182 my ($self, @msg) = @_;
183
184 if ($self->{note}) {
185 $self->{note}->(@msg);
186 }
187}