prevent the importer overwriting primary key fields
[bse.git] / site / cgi-bin / modules / BSE / Importer / Target / Article.pm
CommitLineData
d415d0ba 1package BSE::Importer::Target::Article;
3709451d 2use strict;
d415d0ba 3use base 'BSE::Importer::Target::Base';
3709451d 4use BSE::API qw(bse_make_article bse_add_image bse_add_step_parent);
e0ed81d7 5use BSE::TB::Articles;
10dd37f9 6use BSE::TB::Products;
0c2d3f00 7use BSE::TB::OtherParents;
3709451d 8
f5203422 9our $VERSION = "1.013";
d415d0ba
TC
10
11=head1 NAME
12
13BSE::Importer::Target::Article - import target for articles.
14
15=head1 SYNOPSIS
16
17 [import profile foo]
18 ...
19 ; these are the defaults
20 codes=0
21 code_field=linkAlias
22 parent=-1
23 ignore_missing=1
24 reset_images=0
3f58d535 25 reset_files=0
d415d0ba
TC
26 reset_steps=0
27
28 # done by the importer
29 my $target = BSE::Importer::Target::Article->new
30 (importer => $importer, opts => \%opts)
31 ...
32 $target->start($imp);
33 # for each row:
34 $target->row($imp, \%entry, \@parents);
35
36
37=head1 DESCRIPTION
38
39Provides a target for importing BSE articles.
40
57e4a9c7
TC
41C<update_only> profiles must provide a mapping for one of C<id> or
42C<linkAlias>.
43
44Non-C<update_only> profiles must provide a mapping for C<title>.
d415d0ba
TC
45
46=head1 CONFIGURATION
47
48The following extra configuration can be set in the import profile:
49
50=over
51
52=item *
53
54C<codes> - set to true to use the configured C<code_field> to update
57e4a9c7
TC
55existing articles rather than creating new articles. This is forced
56on when the import profile enables C<update_only>.
d415d0ba
TC
57
58=item *
59
60C<code_field> - the field to use to identify existing articles.
61Default: C<linkAlias> for article imports.
62
63=item *
64
65C<parent> - the base of the tree of parent articles to create the
66parent tree under.
67
68=item *
69
3f58d535
TC
70C<ignore_missing> - set to 0 to error on missing image or article
71files. Default: 1.
d415d0ba
TC
72
73=item *
74
75C<reset_images> - set to true to delete all images from an article
76before adding the imported images.
77
78=item *
79
3f58d535
TC
80C<reset_files> - set to true to delete all files from an article
81before adding the imported files.
82
83=item *
84
d415d0ba
TC
85C<reset_steps> - set to true to delete all step parents from an
86article before adding the imported steps.
87
88=back
89
90=head1 SPECIAL FIELDS
91
92The following fields are used to import extra information into
93articles:
94
95=over
96
97=item *
98
99C<< imageI<index>_I<field> >> - used to import images,
100eg. C<image1_file> to specify the image file. Note: images are not
101replaced unless C<reset_images> is set. I<index> is a number from 1
102to 10, I<field> can be any of C<file>, C<alt>, C<name>, C<url>,
103C<storage>, with the C<file> entry being required.
104
105=item *
106
107C<< stepI<index> >> - specify step parents for the article. This can
108either be the article id or the article link alias.
109
110=item *
111
112C<tags> - this is split on C</> to set the tags for the article.
113
114=back
115
116=head1 METHODS
117
118=over
119
120=item new()
121
122Create a new article import target. Follows the protocol specified by
123L<BSE::Importer::Target::Base>.
124
125=cut
cb7fd78d 126
3709451d
TC
127sub new {
128 my ($class, %opts) = @_;
129
130 my $self = $class->SUPER::new(%opts);
131
132 my $importer = delete $opts{importer};
133
57e4a9c7 134 $self->{use_codes} = $importer->cfg_entry('codes', 0);
3709451d 135 my $map = $importer->maps;
57e4a9c7
TC
136 if ($importer->update_only) {
137 my $def_code;
138 my $found_key = 0;
139 KEYS:
140 for my $key ($self->key_fields) {
141 if ($map->{$key}) {
142 $found_key = 1;
143 $def_code = $key;
144 last KEYS;
145 }
146 }
147 $found_key
148 or die "No key field (", join(",", $self->key_fields),
149 ") mapping found\n";
3709451d 150
57e4a9c7
TC
151 $self->{code_field} = $importer->cfg_entry("code_field", $def_code);
152 $self->{use_codes} = 1;
153 }
154 else {
155 defined $map->{title}
156 or die "No title mapping found\n";
157
158 $self->{code_field} = $importer->cfg_entry("code_field", $self->default_code_field);
159
160 }
3709451d
TC
161
162 $self->{parent} = $importer->cfg_entry("parent", $self->default_parent);
163
164 if ($self->{use_codes} && !defined $map->{$self->{code_field}}) {
4bfc78d4 165 die "No $self->{code_field} mapping found with 'codes' enabled\n";
3709451d
TC
166 }
167 $self->{ignore_missing} = $importer->cfg_entry("ignore_missing", 1);
168 $self->{reset_images} = $importer->cfg_entry("reset_images", 0);
3f58d535 169 $self->{reset_files} = $importer->cfg_entry("reset_files", 0);
3709451d
TC
170 $self->{reset_steps} = $importer->cfg_entry("reset_steps", 0);
171
172 return $self;
173}
174
d415d0ba
TC
175=item start()
176
177Start import processing.
178
179=cut
180
3709451d
TC
181sub start {
182 my ($self) = @_;
183
184 $self->{parent_cache} = {};
185 $self->{leaves} = [];
186 $self->{parents} = [];
187}
188
d415d0ba 189=item row()
3709451d 190
d415d0ba 191Process a row of data.
3709451d 192
d415d0ba 193=cut
0cca6ce6 194
3709451d
TC
195sub row {
196 my ($self, $importer, $entry, $parents) = @_;
197
0cca6ce6 198 $self->xform_entry($importer, $entry);
57e4a9c7
TC
199
200 if (!$importer->update_only || @$parents) {
201 $entry->{parentid} = $self->_find_parent($importer, $self->{parent}, @$parents);
202 }
203
3709451d
TC
204 my $leaf;
205 if ($self->{use_codes}) {
206 my $leaf_id = $entry->{$self->{code_field}};
57e4a9c7
TC
207
208 if ($importer->{update_only}) {
209 $leaf_id =~ /\S/
210 or die "$self->{code_field} blank for update_only profile\n";
211 }
212
213 $leaf = $self->find_leaf($leaf_id, $importer);
3709451d
TC
214 }
215 if ($leaf) {
f5203422
TC
216 # make sure id, articleId etc aren't overwritten
217 delete @$entry{$self->primary_key_fields};
3709451d 218 @{$leaf}{keys %$entry} = values %$entry;
46e71678 219 $leaf->mark_modified(actor => $importer->actor);
3709451d 220 $leaf->save;
57e4a9c7 221 $importer->info("Updated $leaf->{id}: ".$leaf->title);
3709451d
TC
222 if ($self->{reset_images}) {
223 $leaf->remove_images($importer->cfg);
224 $importer->info(" $leaf->{id}: Reset images");
225 }
3f58d535
TC
226 if ($self->{reset_files}) {
227 $leaf->remove_files($importer->cfg);
228 $importer->info(" $leaf->{id}: Reset files");
229 }
3709451d 230 if ($self->{reset_steps}) {
0c2d3f00 231 my @steps = BSE::TB::OtherParents->getBy(childId => $leaf->{id});
3709451d
TC
232 for my $step (@steps) {
233 $step->remove;
234 }
235 }
236 }
57e4a9c7 237 elsif (!$importer->update_only) {
81b173a8
TC
238 $entry->{createdBy} ||= ref $importer->actor ? $importer->actor->logon : "";
239 $entry->{lastModifiedBy} ||= ref $importer->actor ? $importer->actor->logon : "";
1455f602 240 $self->validate_make_leaf($importer, $entry);
3709451d
TC
241 $leaf = $self->make_leaf
242 (
243 $importer,
244 cfg => $importer->cfg,
245 %$entry
246 );
247 $importer->info("Added $leaf->{id}: $entry->{title}");
248 }
57e4a9c7
TC
249 else {
250 die "No leaf found for $entry->{$self->{code_field}} for update_only profile\n";
251 }
3709451d
TC
252 for my $image_index (1 .. 10) {
253 my $file = $entry->{"image${image_index}_file"};
254 $file
255 or next;
256 my $full_file = $importer->find_file($file);
257
258 unless ($full_file) {
259 $self->{ignore_missing}
260 and next;
261 die "File '$file' not found for image$image_index\n";
262 }
263
9ef99d41
TC
264 my %opts =
265 (
266 file => $full_file,
267 display_name => $file,
268 );
3709451d
TC
269 for my $key (qw/alt name url storage/) {
270 my $fkey = "image${image_index}_$key";
271 $entry->{$fkey}
272 and $opts{$key} = $entry->{$fkey};
273 }
274
275 my %errors;
276 my $im = bse_add_image($importer->cfg, $leaf, %opts,
277 errors => \%errors);
278 $im
279 or die join(", ",map "$_: $errors{$_}", keys %errors), "\n";
280 $importer->info(" $leaf->{id}: Add image '$file'");
281 }
3f58d535 282 $self->_add_files($importer, $entry, $leaf);
3709451d
TC
283 for my $step_index (1 .. 10) {
284 my $step_id = $entry->{"step$step_index"};
285 $step_id
286 or next;
287 my $step;
288 if ($step_id =~ /^\d+$/) {
e0ed81d7 289 $step = BSE::TB::Articles->getByPkey($step_id);
3709451d
TC
290 }
291 else {
e0ed81d7 292 $step = BSE::TB::Articles->getBy(linkAlias => $step_id);
3709451d
TC
293 }
294 $step
295 or die "Cannot find stepparent with id $step_id\n";
296
297 bse_add_step_parent($importer->cfg, child => $leaf, parent => $step);
298 }
0cca6ce6 299 $self->fill_leaf($importer, $leaf, %$entry);
3709451d 300 push @{$self->{leaves}}, $leaf;
57e4a9c7
TC
301
302 $importer->event(endrow => { leaf => $leaf });
3709451d
TC
303}
304
3f58d535
TC
305sub _add_files {
306 my ($self, $importer, $entry, $leaf) = @_;
307
308 my %named_files = map { $_->name => $_ } grep $_->name ne '', $leaf->files;
309
310 for my $file_index (1 .. 10) {
311 my %opts;
312
313 my $found = 0;
314 for my $key (qw/name displayName storage description forSale download requireUser notes hide_from_list category/) {
315 my $fkey = "file${file_index}_$key";
316 if (defined $entry->{$fkey}) {
317 $opts{$key} = $entry->{$fkey};
318 $found = 1;
319 }
320 }
321
322 my $filename = $entry->{"file${file_index}_file"};
323 if ($filename) {
324 my $full_file = $importer->find_file($filename);
325
326 unless ($full_file) {
327 $self->{ignore_missing}
328 and next;
329 die "File '$filename' not found for file$file_index\n";
330 }
331
332 $opts{filename} = $full_file;
333 $found = 1;
334 }
335
336 $found
337 or next;
338
339 my $file;
340 if ($opts{name}) {
341 $file = $named_files{$opts{name}};
342 }
343
344 if (!$file && !$opts{filename}) {
000015bb
TC
345 $importer->warn("No file${file_index}_file supplied but other file${file_index}_* field supplied");
346 next;
3f58d535
TC
347 }
348
349 if ($filename && !$opts{displayName}) {
000015bb
TC
350 unless (($opts{displayName}) = $filename =~ /([^\\\/:]+)$/) {
351 $importer->warn("Cannot create displayName for $filename");
352 next;
353 }
3f58d535 354 }
3f58d535 355
000015bb
TC
356 eval {
357 if ($file) {
358 my @warnings;
359 $file->update
360 (
361 _actor => $importer->actor,
362 _warnings => \@warnings,
363 %opts,
364 );
365
366 $importer->info(" $leaf->{id}: Update file '".$file->displayName ."'");
367 }
368 else {
369 # this dies on failure
370 $file = $leaf->add_file
371 (
372 $importer->cfg,
373 %opts,
374 store => 1,
375 );
376
377 $importer->info(" $leaf->{id}: Add file '$filename'");
378 }
379 1;
380 } or do {
381 $importer->warn($@);
382 };
3f58d535
TC
383 }
384}
385
d415d0ba
TC
386=item xform_entry()
387
388Called by row() to perform an extra data transformation needed.
389
390Currently this forces a non-blank, non-newline title, and defaults the
391values of C<summary>, C<description> and C<body> to the title.
392
393=cut
394
395sub xform_entry {
396 my ($self, $importer, $entry) = @_;
397
57e4a9c7
TC
398 if (exists $entry->{title}) {
399 $entry->{title} =~ /\S/
400 or die "title blank\n";
401
402 $entry->{title} =~ /\n/
403 and die "Title may not contain newlines";
404 }
405 unless ($importer->update_only) {
406 $entry->{summary}
407 or $entry->{summary} = $entry->{title};
408 $entry->{description}
409 or $entry->{description} = $entry->{title};
410 $entry->{body}
411 or $entry->{body} = $entry->{title};
412 }
1455f602
TC
413
414 if (defined $entry->{linkAlias}) {
a6361f2f 415 $entry->{linkAlias} =~ tr/A-Za-z0-9_-//cd;
1455f602 416 }
d415d0ba
TC
417}
418
419=item children_of()
420
421Utility method to find the children of a given article.
422
423=cut
424
425sub children_of {
426 my ($self, $parent) = @_;
427
e0ed81d7 428 BSE::TB::Articles->children($parent);
d415d0ba
TC
429}
430
431=item make_parent()
432
433Create a parent article.
434
435Overridden in the product importer to create catalogs.
436
437=cut
438
439sub make_parent {
440 my ($self, $importer, %entry) = @_;
441
442 return bse_make_article(%entry);
443}
444
445=item find_leaf()
446
447Find a leave article based on the supplied code.
448
449=cut
450
451sub find_leaf {
57e4a9c7 452 my ($self, $leaf_id, $importer) = @_;
d415d0ba 453
57e4a9c7
TC
454 $leaf_id =~ s/\A\s+//;
455 $leaf_id =~ s/\s+\z//;
d415d0ba 456
e0ed81d7 457 my ($leaf) = BSE::TB::Articles->getBy($self->{code_field}, $leaf_id)
d415d0ba
TC
458 or return;
459
57e4a9c7
TC
460 $importer->event(find_leaf => { id => $leaf_id, leaf => $leaf });
461
d415d0ba
TC
462 return $leaf;
463}
464
465=item make_leaf()
466
467Create an article based on the imported data.
468
469Overridden in the product importer to create products.
470
471=cut
472
473sub make_leaf {
474 my ($self, $importer, %entry) = @_;
475
57e4a9c7
TC
476 my $leaf = bse_make_article(%entry);
477
478 $importer->event(make_leaf => { leaf => $leaf });
479
480 return $leaf;
d415d0ba
TC
481}
482
483=item fill_leaf()
484
485Fill the article some more.
486
487Currently sets the tags.
488
489Overridden by the product target to set product options and tiered
490pricing.
491
492=cut
493
494sub fill_leaf {
495 my ($self, $importer, $leaf, %entry) = @_;
496
497 if ($entry{tags}) {
498 my @tags = split '/', $entry{tags};
499 my $error;
500 unless ($leaf->set_tags(\@tags, \$error)) {
501 die "Error setting tags: $error";
502 }
503 }
504
505 return 1;
506}
507
508=item _find_parent()
509
510Find a parent article.
511
512This method calls itself recursively to work down a tree of parents.
513
514=cut
515
3709451d
TC
516sub _find_parent {
517 my ($self, $importer, $parent, @parents) = @_;
518
519 @parents
520 or return $parent;
521 my $cache = $self->{parent_cache};
522 unless ($cache->{$parent}) {
523 my @kids = $self->children_of($parent);
524 $cache->{$parent} = \@kids;
525 }
526
527 my $title = shift @parents;
528 my ($cat) = grep lc $_->{title} eq lc $title, @{$cache->{$parent}};
529 unless ($cat) {
530 my %opts =
531 (
532 cfg => $importer->cfg,
533 parentid => $parent,
534 title => $title,
535 body => $title,
536 );
537 $self->{catalog_template}
538 and $opts{template} = $self->{catalog_template};
539 $cat = $self->make_parent($importer, %opts);
540 $importer->info("Add parent $cat->{id}: $title");
541 push @{$cache->{$parent}}, $cat;
542 }
543
544 unless ($self->{catseen}{$cat->{id}}) {
545 $self->{catseen}{$cat->{id}} = 1;
546 push @{$self->{parents}}, $cat;
547 }
548
549 return $self->_find_parent($importer, $cat->{id}, @parents);
550}
551
d415d0ba
TC
552=item default_parent()
553
554Return the default parent id.
555
556Overridden by the product target to return the shop id.
557
558=cut
559
3709451d
TC
560sub default_parent { -1 }
561
d415d0ba
TC
562=item default_code_field()
563
564Return the default code field.
565
57e4a9c7 566Overridden by the product target to return the C<product_code> field.
d415d0ba
TC
567
568=cut
569
3709451d
TC
570sub default_code_field { "linkAlias" }
571
d415d0ba
TC
572=item leaves()
573
574Return the leaf articles created or modified by the import run.
575
576=cut
577
3709451d
TC
578sub leaves {
579 return @{$_[0]{leaves}}
580}
581
d415d0ba
TC
582=item parents()
583
584Return the parent articles created or used by the import run.
585
586=cut
587
3709451d
TC
588sub parents {
589 return @{$_[0]{parents}}
590}
591
57e4a9c7
TC
592=item key_fields()
593
594Columns that can act as keys.
595
596=cut
597
598sub key_fields {
599 return qw(id linkAlias);
600}
601
1455f602
TC
602=item validate_make_leaf
603
604Perform validation only needed on creation
605
606=cut
607
608sub validate_make_leaf {
609 my ($self, $importer, $entry) = @_;
610
611 if (defined $entry->{linkAlias} && $entry->{linkAlias} ne '') {
e0ed81d7 612 my $other = BSE::TB::Articles->getBy(linkAlias => $entry->{linkAlias});
1455f602
TC
613 $other
614 and die "Duplicate linkAlias value with article ", $other->id, "\n";
615 }
616}
617
f5203422
TC
618=item primary_key_fields
619
620Fields we can't modify (or initialize) since the database generates
621them.
622
623=cut
624
625sub primary_key_fields {
626 qw(id);
627}
628
3709451d 6291;
d415d0ba
TC
630
631=back
632
633=head1 AUTHOR
634
635Tony Cook <tony@develop-help.com>
636
637=cut