the XLS parser now identifies it can't parse XLS
[bse.git] / site / cgi-bin / modules / BSE / Importer / Target / Article.pm
CommitLineData
d415d0ba 1package BSE::Importer::Target::Article;
3709451d 2use strict;
d415d0ba 3use base 'BSE::Importer::Target::Base';
3709451d
TC
4use BSE::API qw(bse_make_article bse_add_image bse_add_step_parent);
5use Articles;
6use Products;
7use OtherParents;
8
57e4a9c7 9our $VERSION = "1.003";
d415d0ba
TC
10
11=head1 NAME
12
13BSE::Importer::Target::Article - import target for articles.
14
15=head1 SYNOPSIS
16
17 [import profile foo]
18 ...
19 ; these are the defaults
20 codes=0
21 code_field=linkAlias
22 parent=-1
23 ignore_missing=1
24 reset_images=0
25 reset_steps=0
26
27 # done by the importer
28 my $target = BSE::Importer::Target::Article->new
29 (importer => $importer, opts => \%opts)
30 ...
31 $target->start($imp);
32 # for each row:
33 $target->row($imp, \%entry, \@parents);
34
35
36=head1 DESCRIPTION
37
38Provides a target for importing BSE articles.
39
57e4a9c7
TC
40C<update_only> profiles must provide a mapping for one of C<id> or
41C<linkAlias>.
42
43Non-C<update_only> profiles must provide a mapping for C<title>.
d415d0ba
TC
44
45=head1 CONFIGURATION
46
47The following extra configuration can be set in the import profile:
48
49=over
50
51=item *
52
53C<codes> - set to true to use the configured C<code_field> to update
57e4a9c7
TC
54existing articles rather than creating new articles. This is forced
55on when the import profile enables C<update_only>.
d415d0ba
TC
56
57=item *
58
59C<code_field> - the field to use to identify existing articles.
60Default: C<linkAlias> for article imports.
61
62=item *
63
64C<parent> - the base of the tree of parent articles to create the
65parent tree under.
66
67=item *
68
69C<ignore_missing> - set to 0 to error on missing image files.
70Default: 1.
71
72=item *
73
74C<reset_images> - set to true to delete all images from an article
75before adding the imported images.
76
77=item *
78
79C<reset_steps> - set to true to delete all step parents from an
80article before adding the imported steps.
81
82=back
83
84=head1 SPECIAL FIELDS
85
86The following fields are used to import extra information into
87articles:
88
89=over
90
91=item *
92
93C<< imageI<index>_I<field> >> - used to import images,
94eg. C<image1_file> to specify the image file. Note: images are not
95replaced unless C<reset_images> is set. I<index> is a number from 1
96to 10, I<field> can be any of C<file>, C<alt>, C<name>, C<url>,
97C<storage>, with the C<file> entry being required.
98
99=item *
100
101C<< stepI<index> >> - specify step parents for the article. This can
102either be the article id or the article link alias.
103
104=item *
105
106C<tags> - this is split on C</> to set the tags for the article.
107
108=back
109
110=head1 METHODS
111
112=over
113
114=item new()
115
116Create a new article import target. Follows the protocol specified by
117L<BSE::Importer::Target::Base>.
118
119=cut
cb7fd78d 120
3709451d
TC
121sub new {
122 my ($class, %opts) = @_;
123
124 my $self = $class->SUPER::new(%opts);
125
126 my $importer = delete $opts{importer};
127
57e4a9c7 128 $self->{use_codes} = $importer->cfg_entry('codes', 0);
3709451d 129 my $map = $importer->maps;
57e4a9c7
TC
130 if ($importer->update_only) {
131 my $def_code;
132 my $found_key = 0;
133 KEYS:
134 for my $key ($self->key_fields) {
135 if ($map->{$key}) {
136 $found_key = 1;
137 $def_code = $key;
138 last KEYS;
139 }
140 }
141 $found_key
142 or die "No key field (", join(",", $self->key_fields),
143 ") mapping found\n";
3709451d 144
57e4a9c7
TC
145 $self->{code_field} = $importer->cfg_entry("code_field", $def_code);
146 $self->{use_codes} = 1;
147 }
148 else {
149 defined $map->{title}
150 or die "No title mapping found\n";
151
152 $self->{code_field} = $importer->cfg_entry("code_field", $self->default_code_field);
153
154 }
3709451d
TC
155
156 $self->{parent} = $importer->cfg_entry("parent", $self->default_parent);
157
158 if ($self->{use_codes} && !defined $map->{$self->{code_field}}) {
4bfc78d4 159 die "No $self->{code_field} mapping found with 'codes' enabled\n";
3709451d
TC
160 }
161 $self->{ignore_missing} = $importer->cfg_entry("ignore_missing", 1);
162 $self->{reset_images} = $importer->cfg_entry("reset_images", 0);
163 $self->{reset_steps} = $importer->cfg_entry("reset_steps", 0);
164
165 return $self;
166}
167
d415d0ba
TC
168=item start()
169
170Start import processing.
171
172=cut
173
3709451d
TC
174sub start {
175 my ($self) = @_;
176
177 $self->{parent_cache} = {};
178 $self->{leaves} = [];
179 $self->{parents} = [];
180}
181
d415d0ba 182=item row()
3709451d 183
d415d0ba 184Process a row of data.
3709451d 185
d415d0ba 186=cut
0cca6ce6 187
3709451d
TC
188sub row {
189 my ($self, $importer, $entry, $parents) = @_;
190
0cca6ce6 191 $self->xform_entry($importer, $entry);
57e4a9c7
TC
192
193 if (!$importer->update_only || @$parents) {
194 $entry->{parentid} = $self->_find_parent($importer, $self->{parent}, @$parents);
195 }
196
3709451d
TC
197 my $leaf;
198 if ($self->{use_codes}) {
199 my $leaf_id = $entry->{$self->{code_field}};
57e4a9c7
TC
200
201 if ($importer->{update_only}) {
202 $leaf_id =~ /\S/
203 or die "$self->{code_field} blank for update_only profile\n";
204 }
205
206 $leaf = $self->find_leaf($leaf_id, $importer);
3709451d
TC
207 }
208 if ($leaf) {
209 @{$leaf}{keys %$entry} = values %$entry;
210 $leaf->save;
57e4a9c7 211 $importer->info("Updated $leaf->{id}: ".$leaf->title);
3709451d
TC
212 if ($self->{reset_images}) {
213 $leaf->remove_images($importer->cfg);
214 $importer->info(" $leaf->{id}: Reset images");
215 }
216 if ($self->{reset_steps}) {
217 my @steps = OtherParents->getBy(childId => $leaf->{id});
218 for my $step (@steps) {
219 $step->remove;
220 }
221 }
222 }
57e4a9c7 223 elsif (!$importer->update_only) {
3709451d
TC
224 $leaf = $self->make_leaf
225 (
226 $importer,
227 cfg => $importer->cfg,
228 %$entry
229 );
230 $importer->info("Added $leaf->{id}: $entry->{title}");
231 }
57e4a9c7
TC
232 else {
233 die "No leaf found for $entry->{$self->{code_field}} for update_only profile\n";
234 }
3709451d
TC
235 for my $image_index (1 .. 10) {
236 my $file = $entry->{"image${image_index}_file"};
237 $file
238 or next;
239 my $full_file = $importer->find_file($file);
240
241 unless ($full_file) {
242 $self->{ignore_missing}
243 and next;
244 die "File '$file' not found for image$image_index\n";
245 }
246
247 my %opts = ( file => $full_file );
248 for my $key (qw/alt name url storage/) {
249 my $fkey = "image${image_index}_$key";
250 $entry->{$fkey}
251 and $opts{$key} = $entry->{$fkey};
252 }
253
254 my %errors;
255 my $im = bse_add_image($importer->cfg, $leaf, %opts,
256 errors => \%errors);
257 $im
258 or die join(", ",map "$_: $errors{$_}", keys %errors), "\n";
259 $importer->info(" $leaf->{id}: Add image '$file'");
260 }
261 for my $step_index (1 .. 10) {
262 my $step_id = $entry->{"step$step_index"};
263 $step_id
264 or next;
265 my $step;
266 if ($step_id =~ /^\d+$/) {
267 $step = Articles->getByPkey($step_id);
268 }
269 else {
270 $step = Articles->getBy(linkAlias => $step_id);
271 }
272 $step
273 or die "Cannot find stepparent with id $step_id\n";
274
275 bse_add_step_parent($importer->cfg, child => $leaf, parent => $step);
276 }
0cca6ce6 277 $self->fill_leaf($importer, $leaf, %$entry);
3709451d 278 push @{$self->{leaves}}, $leaf;
57e4a9c7
TC
279
280 $importer->event(endrow => { leaf => $leaf });
3709451d
TC
281}
282
d415d0ba
TC
283=item xform_entry()
284
285Called by row() to perform an extra data transformation needed.
286
287Currently this forces a non-blank, non-newline title, and defaults the
288values of C<summary>, C<description> and C<body> to the title.
289
290=cut
291
292sub xform_entry {
293 my ($self, $importer, $entry) = @_;
294
57e4a9c7
TC
295 if (exists $entry->{title}) {
296 $entry->{title} =~ /\S/
297 or die "title blank\n";
298
299 $entry->{title} =~ /\n/
300 and die "Title may not contain newlines";
301 }
302 unless ($importer->update_only) {
303 $entry->{summary}
304 or $entry->{summary} = $entry->{title};
305 $entry->{description}
306 or $entry->{description} = $entry->{title};
307 $entry->{body}
308 or $entry->{body} = $entry->{title};
309 }
d415d0ba
TC
310}
311
312=item children_of()
313
314Utility method to find the children of a given article.
315
316=cut
317
318sub children_of {
319 my ($self, $parent) = @_;
320
321 Articles->children($parent);
322}
323
324=item make_parent()
325
326Create a parent article.
327
328Overridden in the product importer to create catalogs.
329
330=cut
331
332sub make_parent {
333 my ($self, $importer, %entry) = @_;
334
335 return bse_make_article(%entry);
336}
337
338=item find_leaf()
339
340Find a leave article based on the supplied code.
341
342=cut
343
344sub find_leaf {
57e4a9c7 345 my ($self, $leaf_id, $importer) = @_;
d415d0ba 346
57e4a9c7
TC
347 $leaf_id =~ s/\A\s+//;
348 $leaf_id =~ s/\s+\z//;
d415d0ba
TC
349
350 my ($leaf) = Articles->getBy($self->{code_field}, $leaf_id)
351 or return;
352
57e4a9c7
TC
353 $importer->event(find_leaf => { id => $leaf_id, leaf => $leaf });
354
d415d0ba
TC
355 return $leaf;
356}
357
358=item make_leaf()
359
360Create an article based on the imported data.
361
362Overridden in the product importer to create products.
363
364=cut
365
366sub make_leaf {
367 my ($self, $importer, %entry) = @_;
368
57e4a9c7
TC
369 my $leaf = bse_make_article(%entry);
370
371 $importer->event(make_leaf => { leaf => $leaf });
372
373 return $leaf;
d415d0ba
TC
374}
375
376=item fill_leaf()
377
378Fill the article some more.
379
380Currently sets the tags.
381
382Overridden by the product target to set product options and tiered
383pricing.
384
385=cut
386
387sub fill_leaf {
388 my ($self, $importer, $leaf, %entry) = @_;
389
390 if ($entry{tags}) {
391 my @tags = split '/', $entry{tags};
392 my $error;
393 unless ($leaf->set_tags(\@tags, \$error)) {
394 die "Error setting tags: $error";
395 }
396 }
397
398 return 1;
399}
400
401=item _find_parent()
402
403Find a parent article.
404
405This method calls itself recursively to work down a tree of parents.
406
407=cut
408
3709451d
TC
409sub _find_parent {
410 my ($self, $importer, $parent, @parents) = @_;
411
412 @parents
413 or return $parent;
414 my $cache = $self->{parent_cache};
415 unless ($cache->{$parent}) {
416 my @kids = $self->children_of($parent);
417 $cache->{$parent} = \@kids;
418 }
419
420 my $title = shift @parents;
421 my ($cat) = grep lc $_->{title} eq lc $title, @{$cache->{$parent}};
422 unless ($cat) {
423 my %opts =
424 (
425 cfg => $importer->cfg,
426 parentid => $parent,
427 title => $title,
428 body => $title,
429 );
430 $self->{catalog_template}
431 and $opts{template} = $self->{catalog_template};
432 $cat = $self->make_parent($importer, %opts);
433 $importer->info("Add parent $cat->{id}: $title");
434 push @{$cache->{$parent}}, $cat;
435 }
436
437 unless ($self->{catseen}{$cat->{id}}) {
438 $self->{catseen}{$cat->{id}} = 1;
439 push @{$self->{parents}}, $cat;
440 }
441
442 return $self->_find_parent($importer, $cat->{id}, @parents);
443}
444
d415d0ba
TC
445=item default_parent()
446
447Return the default parent id.
448
449Overridden by the product target to return the shop id.
450
451=cut
452
3709451d
TC
453sub default_parent { -1 }
454
d415d0ba
TC
455=item default_code_field()
456
457Return the default code field.
458
57e4a9c7 459Overridden by the product target to return the C<product_code> field.
d415d0ba
TC
460
461=cut
462
3709451d
TC
463sub default_code_field { "linkAlias" }
464
d415d0ba
TC
465=item leaves()
466
467Return the leaf articles created or modified by the import run.
468
469=cut
470
3709451d
TC
471sub leaves {
472 return @{$_[0]{leaves}}
473}
474
d415d0ba
TC
475=item parents()
476
477Return the parent articles created or used by the import run.
478
479=cut
480
3709451d
TC
481sub parents {
482 return @{$_[0]{parents}}
483}
484
57e4a9c7
TC
485=item key_fields()
486
487Columns that can act as keys.
488
489=cut
490
491sub key_fields {
492 return qw(id linkAlias);
493}
494
3709451d 4951;
d415d0ba
TC
496
497=back
498
499=head1 AUTHOR
500
501Tony Cook <tony@develop-help.com>
502
503=cut