don't abort an article import just because one file failed
[bse.git] / site / cgi-bin / modules / BSE / Importer / Target / Article.pm
CommitLineData
d415d0ba 1package BSE::Importer::Target::Article;
3709451d 2use strict;
d415d0ba 3use base 'BSE::Importer::Target::Base';
3709451d
TC
4use BSE::API qw(bse_make_article bse_add_image bse_add_step_parent);
5use Articles;
6use Products;
7use OtherParents;
8
000015bb 9our $VERSION = "1.005";
d415d0ba
TC
10
11=head1 NAME
12
13BSE::Importer::Target::Article - import target for articles.
14
15=head1 SYNOPSIS
16
17 [import profile foo]
18 ...
19 ; these are the defaults
20 codes=0
21 code_field=linkAlias
22 parent=-1
23 ignore_missing=1
24 reset_images=0
3f58d535 25 reset_files=0
d415d0ba
TC
26 reset_steps=0
27
28 # done by the importer
29 my $target = BSE::Importer::Target::Article->new
30 (importer => $importer, opts => \%opts)
31 ...
32 $target->start($imp);
33 # for each row:
34 $target->row($imp, \%entry, \@parents);
35
36
37=head1 DESCRIPTION
38
39Provides a target for importing BSE articles.
40
57e4a9c7
TC
41C<update_only> profiles must provide a mapping for one of C<id> or
42C<linkAlias>.
43
44Non-C<update_only> profiles must provide a mapping for C<title>.
d415d0ba
TC
45
46=head1 CONFIGURATION
47
48The following extra configuration can be set in the import profile:
49
50=over
51
52=item *
53
54C<codes> - set to true to use the configured C<code_field> to update
57e4a9c7
TC
55existing articles rather than creating new articles. This is forced
56on when the import profile enables C<update_only>.
d415d0ba
TC
57
58=item *
59
60C<code_field> - the field to use to identify existing articles.
61Default: C<linkAlias> for article imports.
62
63=item *
64
65C<parent> - the base of the tree of parent articles to create the
66parent tree under.
67
68=item *
69
3f58d535
TC
70C<ignore_missing> - set to 0 to error on missing image or article
71files. Default: 1.
d415d0ba
TC
72
73=item *
74
75C<reset_images> - set to true to delete all images from an article
76before adding the imported images.
77
78=item *
79
3f58d535
TC
80C<reset_files> - set to true to delete all files from an article
81before adding the imported files.
82
83=item *
84
d415d0ba
TC
85C<reset_steps> - set to true to delete all step parents from an
86article before adding the imported steps.
87
88=back
89
90=head1 SPECIAL FIELDS
91
92The following fields are used to import extra information into
93articles:
94
95=over
96
97=item *
98
99C<< imageI<index>_I<field> >> - used to import images,
100eg. C<image1_file> to specify the image file. Note: images are not
101replaced unless C<reset_images> is set. I<index> is a number from 1
102to 10, I<field> can be any of C<file>, C<alt>, C<name>, C<url>,
103C<storage>, with the C<file> entry being required.
104
105=item *
106
107C<< stepI<index> >> - specify step parents for the article. This can
108either be the article id or the article link alias.
109
110=item *
111
112C<tags> - this is split on C</> to set the tags for the article.
113
114=back
115
116=head1 METHODS
117
118=over
119
120=item new()
121
122Create a new article import target. Follows the protocol specified by
123L<BSE::Importer::Target::Base>.
124
125=cut
cb7fd78d 126
3709451d
TC
127sub new {
128 my ($class, %opts) = @_;
129
130 my $self = $class->SUPER::new(%opts);
131
132 my $importer = delete $opts{importer};
133
57e4a9c7 134 $self->{use_codes} = $importer->cfg_entry('codes', 0);
3709451d 135 my $map = $importer->maps;
57e4a9c7
TC
136 if ($importer->update_only) {
137 my $def_code;
138 my $found_key = 0;
139 KEYS:
140 for my $key ($self->key_fields) {
141 if ($map->{$key}) {
142 $found_key = 1;
143 $def_code = $key;
144 last KEYS;
145 }
146 }
147 $found_key
148 or die "No key field (", join(",", $self->key_fields),
149 ") mapping found\n";
3709451d 150
57e4a9c7
TC
151 $self->{code_field} = $importer->cfg_entry("code_field", $def_code);
152 $self->{use_codes} = 1;
153 }
154 else {
155 defined $map->{title}
156 or die "No title mapping found\n";
157
158 $self->{code_field} = $importer->cfg_entry("code_field", $self->default_code_field);
159
160 }
3709451d
TC
161
162 $self->{parent} = $importer->cfg_entry("parent", $self->default_parent);
163
164 if ($self->{use_codes} && !defined $map->{$self->{code_field}}) {
4bfc78d4 165 die "No $self->{code_field} mapping found with 'codes' enabled\n";
3709451d
TC
166 }
167 $self->{ignore_missing} = $importer->cfg_entry("ignore_missing", 1);
168 $self->{reset_images} = $importer->cfg_entry("reset_images", 0);
3f58d535 169 $self->{reset_files} = $importer->cfg_entry("reset_files", 0);
3709451d
TC
170 $self->{reset_steps} = $importer->cfg_entry("reset_steps", 0);
171
172 return $self;
173}
174
d415d0ba
TC
175=item start()
176
177Start import processing.
178
179=cut
180
3709451d
TC
181sub start {
182 my ($self) = @_;
183
184 $self->{parent_cache} = {};
185 $self->{leaves} = [];
186 $self->{parents} = [];
187}
188
d415d0ba 189=item row()
3709451d 190
d415d0ba 191Process a row of data.
3709451d 192
d415d0ba 193=cut
0cca6ce6 194
3709451d
TC
195sub row {
196 my ($self, $importer, $entry, $parents) = @_;
197
0cca6ce6 198 $self->xform_entry($importer, $entry);
57e4a9c7
TC
199
200 if (!$importer->update_only || @$parents) {
201 $entry->{parentid} = $self->_find_parent($importer, $self->{parent}, @$parents);
202 }
203
3709451d
TC
204 my $leaf;
205 if ($self->{use_codes}) {
206 my $leaf_id = $entry->{$self->{code_field}};
57e4a9c7
TC
207
208 if ($importer->{update_only}) {
209 $leaf_id =~ /\S/
210 or die "$self->{code_field} blank for update_only profile\n";
211 }
212
213 $leaf = $self->find_leaf($leaf_id, $importer);
3709451d
TC
214 }
215 if ($leaf) {
216 @{$leaf}{keys %$entry} = values %$entry;
217 $leaf->save;
57e4a9c7 218 $importer->info("Updated $leaf->{id}: ".$leaf->title);
3709451d
TC
219 if ($self->{reset_images}) {
220 $leaf->remove_images($importer->cfg);
221 $importer->info(" $leaf->{id}: Reset images");
222 }
3f58d535
TC
223 if ($self->{reset_files}) {
224 $leaf->remove_files($importer->cfg);
225 $importer->info(" $leaf->{id}: Reset files");
226 }
3709451d
TC
227 if ($self->{reset_steps}) {
228 my @steps = OtherParents->getBy(childId => $leaf->{id});
229 for my $step (@steps) {
230 $step->remove;
231 }
232 }
233 }
57e4a9c7 234 elsif (!$importer->update_only) {
3709451d
TC
235 $leaf = $self->make_leaf
236 (
237 $importer,
238 cfg => $importer->cfg,
239 %$entry
240 );
241 $importer->info("Added $leaf->{id}: $entry->{title}");
242 }
57e4a9c7
TC
243 else {
244 die "No leaf found for $entry->{$self->{code_field}} for update_only profile\n";
245 }
3709451d
TC
246 for my $image_index (1 .. 10) {
247 my $file = $entry->{"image${image_index}_file"};
248 $file
249 or next;
250 my $full_file = $importer->find_file($file);
251
252 unless ($full_file) {
253 $self->{ignore_missing}
254 and next;
255 die "File '$file' not found for image$image_index\n";
256 }
257
258 my %opts = ( file => $full_file );
259 for my $key (qw/alt name url storage/) {
260 my $fkey = "image${image_index}_$key";
261 $entry->{$fkey}
262 and $opts{$key} = $entry->{$fkey};
263 }
264
265 my %errors;
266 my $im = bse_add_image($importer->cfg, $leaf, %opts,
267 errors => \%errors);
268 $im
269 or die join(", ",map "$_: $errors{$_}", keys %errors), "\n";
270 $importer->info(" $leaf->{id}: Add image '$file'");
271 }
3f58d535 272 $self->_add_files($importer, $entry, $leaf);
3709451d
TC
273 for my $step_index (1 .. 10) {
274 my $step_id = $entry->{"step$step_index"};
275 $step_id
276 or next;
277 my $step;
278 if ($step_id =~ /^\d+$/) {
279 $step = Articles->getByPkey($step_id);
280 }
281 else {
282 $step = Articles->getBy(linkAlias => $step_id);
283 }
284 $step
285 or die "Cannot find stepparent with id $step_id\n";
286
287 bse_add_step_parent($importer->cfg, child => $leaf, parent => $step);
288 }
0cca6ce6 289 $self->fill_leaf($importer, $leaf, %$entry);
3709451d 290 push @{$self->{leaves}}, $leaf;
57e4a9c7
TC
291
292 $importer->event(endrow => { leaf => $leaf });
3709451d
TC
293}
294
3f58d535
TC
295sub _add_files {
296 my ($self, $importer, $entry, $leaf) = @_;
297
298 my %named_files = map { $_->name => $_ } grep $_->name ne '', $leaf->files;
299
300 for my $file_index (1 .. 10) {
301 my %opts;
302
303 my $found = 0;
304 for my $key (qw/name displayName storage description forSale download requireUser notes hide_from_list category/) {
305 my $fkey = "file${file_index}_$key";
306 if (defined $entry->{$fkey}) {
307 $opts{$key} = $entry->{$fkey};
308 $found = 1;
309 }
310 }
311
312 my $filename = $entry->{"file${file_index}_file"};
313 if ($filename) {
314 my $full_file = $importer->find_file($filename);
315
316 unless ($full_file) {
317 $self->{ignore_missing}
318 and next;
319 die "File '$filename' not found for file$file_index\n";
320 }
321
322 $opts{filename} = $full_file;
323 $found = 1;
324 }
325
326 $found
327 or next;
328
329 my $file;
330 if ($opts{name}) {
331 $file = $named_files{$opts{name}};
332 }
333
334 if (!$file && !$opts{filename}) {
000015bb
TC
335 $importer->warn("No file${file_index}_file supplied but other file${file_index}_* field supplied");
336 next;
3f58d535
TC
337 }
338
339 if ($filename && !$opts{displayName}) {
000015bb
TC
340 unless (($opts{displayName}) = $filename =~ /([^\\\/:]+)$/) {
341 $importer->warn("Cannot create displayName for $filename");
342 next;
343 }
3f58d535 344 }
3f58d535 345
000015bb
TC
346 eval {
347 if ($file) {
348 my @warnings;
349 $file->update
350 (
351 _actor => $importer->actor,
352 _warnings => \@warnings,
353 %opts,
354 );
355
356 $importer->info(" $leaf->{id}: Update file '".$file->displayName ."'");
357 }
358 else {
359 # this dies on failure
360 $file = $leaf->add_file
361 (
362 $importer->cfg,
363 %opts,
364 store => 1,
365 );
366
367 $importer->info(" $leaf->{id}: Add file '$filename'");
368 }
369 1;
370 } or do {
371 $importer->warn($@);
372 };
3f58d535
TC
373 }
374}
375
d415d0ba
TC
376=item xform_entry()
377
378Called by row() to perform an extra data transformation needed.
379
380Currently this forces a non-blank, non-newline title, and defaults the
381values of C<summary>, C<description> and C<body> to the title.
382
383=cut
384
385sub xform_entry {
386 my ($self, $importer, $entry) = @_;
387
57e4a9c7
TC
388 if (exists $entry->{title}) {
389 $entry->{title} =~ /\S/
390 or die "title blank\n";
391
392 $entry->{title} =~ /\n/
393 and die "Title may not contain newlines";
394 }
395 unless ($importer->update_only) {
396 $entry->{summary}
397 or $entry->{summary} = $entry->{title};
398 $entry->{description}
399 or $entry->{description} = $entry->{title};
400 $entry->{body}
401 or $entry->{body} = $entry->{title};
402 }
d415d0ba
TC
403}
404
405=item children_of()
406
407Utility method to find the children of a given article.
408
409=cut
410
411sub children_of {
412 my ($self, $parent) = @_;
413
414 Articles->children($parent);
415}
416
417=item make_parent()
418
419Create a parent article.
420
421Overridden in the product importer to create catalogs.
422
423=cut
424
425sub make_parent {
426 my ($self, $importer, %entry) = @_;
427
428 return bse_make_article(%entry);
429}
430
431=item find_leaf()
432
433Find a leave article based on the supplied code.
434
435=cut
436
437sub find_leaf {
57e4a9c7 438 my ($self, $leaf_id, $importer) = @_;
d415d0ba 439
57e4a9c7
TC
440 $leaf_id =~ s/\A\s+//;
441 $leaf_id =~ s/\s+\z//;
d415d0ba
TC
442
443 my ($leaf) = Articles->getBy($self->{code_field}, $leaf_id)
444 or return;
445
57e4a9c7
TC
446 $importer->event(find_leaf => { id => $leaf_id, leaf => $leaf });
447
d415d0ba
TC
448 return $leaf;
449}
450
451=item make_leaf()
452
453Create an article based on the imported data.
454
455Overridden in the product importer to create products.
456
457=cut
458
459sub make_leaf {
460 my ($self, $importer, %entry) = @_;
461
57e4a9c7
TC
462 my $leaf = bse_make_article(%entry);
463
464 $importer->event(make_leaf => { leaf => $leaf });
465
466 return $leaf;
d415d0ba
TC
467}
468
469=item fill_leaf()
470
471Fill the article some more.
472
473Currently sets the tags.
474
475Overridden by the product target to set product options and tiered
476pricing.
477
478=cut
479
480sub fill_leaf {
481 my ($self, $importer, $leaf, %entry) = @_;
482
483 if ($entry{tags}) {
484 my @tags = split '/', $entry{tags};
485 my $error;
486 unless ($leaf->set_tags(\@tags, \$error)) {
487 die "Error setting tags: $error";
488 }
489 }
490
491 return 1;
492}
493
494=item _find_parent()
495
496Find a parent article.
497
498This method calls itself recursively to work down a tree of parents.
499
500=cut
501
3709451d
TC
502sub _find_parent {
503 my ($self, $importer, $parent, @parents) = @_;
504
505 @parents
506 or return $parent;
507 my $cache = $self->{parent_cache};
508 unless ($cache->{$parent}) {
509 my @kids = $self->children_of($parent);
510 $cache->{$parent} = \@kids;
511 }
512
513 my $title = shift @parents;
514 my ($cat) = grep lc $_->{title} eq lc $title, @{$cache->{$parent}};
515 unless ($cat) {
516 my %opts =
517 (
518 cfg => $importer->cfg,
519 parentid => $parent,
520 title => $title,
521 body => $title,
522 );
523 $self->{catalog_template}
524 and $opts{template} = $self->{catalog_template};
525 $cat = $self->make_parent($importer, %opts);
526 $importer->info("Add parent $cat->{id}: $title");
527 push @{$cache->{$parent}}, $cat;
528 }
529
530 unless ($self->{catseen}{$cat->{id}}) {
531 $self->{catseen}{$cat->{id}} = 1;
532 push @{$self->{parents}}, $cat;
533 }
534
535 return $self->_find_parent($importer, $cat->{id}, @parents);
536}
537
d415d0ba
TC
538=item default_parent()
539
540Return the default parent id.
541
542Overridden by the product target to return the shop id.
543
544=cut
545
3709451d
TC
546sub default_parent { -1 }
547
d415d0ba
TC
548=item default_code_field()
549
550Return the default code field.
551
57e4a9c7 552Overridden by the product target to return the C<product_code> field.
d415d0ba
TC
553
554=cut
555
3709451d
TC
556sub default_code_field { "linkAlias" }
557
d415d0ba
TC
558=item leaves()
559
560Return the leaf articles created or modified by the import run.
561
562=cut
563
3709451d
TC
564sub leaves {
565 return @{$_[0]{leaves}}
566}
567
d415d0ba
TC
568=item parents()
569
570Return the parent articles created or used by the import run.
571
572=cut
573
3709451d
TC
574sub parents {
575 return @{$_[0]{parents}}
576}
577
57e4a9c7
TC
578=item key_fields()
579
580Columns that can act as keys.
581
582=cut
583
584sub key_fields {
585 return qw(id linkAlias);
586}
587
3709451d 5881;
d415d0ba
TC
589
590=back
591
592=head1 AUTHOR
593
594Tony Cook <tony@develop-help.com>
595
596=cut