allow the state variable to be replaced properly for imageclean
[bse.git] / site / cgi-bin / modules / BSE / Importer / Target / Article.pm
CommitLineData
d415d0ba 1package BSE::Importer::Target::Article;
3709451d 2use strict;
d415d0ba 3use base 'BSE::Importer::Target::Base';
3709451d 4use BSE::API qw(bse_make_article bse_add_image bse_add_step_parent);
e0ed81d7 5use BSE::TB::Articles;
10dd37f9 6use BSE::TB::Products;
0c2d3f00 7use BSE::TB::OtherParents;
3709451d 8
0c2d3f00 9our $VERSION = "1.011";
d415d0ba
TC
10
11=head1 NAME
12
13BSE::Importer::Target::Article - import target for articles.
14
15=head1 SYNOPSIS
16
17 [import profile foo]
18 ...
19 ; these are the defaults
20 codes=0
21 code_field=linkAlias
22 parent=-1
23 ignore_missing=1
24 reset_images=0
3f58d535 25 reset_files=0
d415d0ba
TC
26 reset_steps=0
27
28 # done by the importer
29 my $target = BSE::Importer::Target::Article->new
30 (importer => $importer, opts => \%opts)
31 ...
32 $target->start($imp);
33 # for each row:
34 $target->row($imp, \%entry, \@parents);
35
36
37=head1 DESCRIPTION
38
39Provides a target for importing BSE articles.
40
57e4a9c7
TC
41C<update_only> profiles must provide a mapping for one of C<id> or
42C<linkAlias>.
43
44Non-C<update_only> profiles must provide a mapping for C<title>.
d415d0ba
TC
45
46=head1 CONFIGURATION
47
48The following extra configuration can be set in the import profile:
49
50=over
51
52=item *
53
54C<codes> - set to true to use the configured C<code_field> to update
57e4a9c7
TC
55existing articles rather than creating new articles. This is forced
56on when the import profile enables C<update_only>.
d415d0ba
TC
57
58=item *
59
60C<code_field> - the field to use to identify existing articles.
61Default: C<linkAlias> for article imports.
62
63=item *
64
65C<parent> - the base of the tree of parent articles to create the
66parent tree under.
67
68=item *
69
3f58d535
TC
70C<ignore_missing> - set to 0 to error on missing image or article
71files. Default: 1.
d415d0ba
TC
72
73=item *
74
75C<reset_images> - set to true to delete all images from an article
76before adding the imported images.
77
78=item *
79
3f58d535
TC
80C<reset_files> - set to true to delete all files from an article
81before adding the imported files.
82
83=item *
84
d415d0ba
TC
85C<reset_steps> - set to true to delete all step parents from an
86article before adding the imported steps.
87
88=back
89
90=head1 SPECIAL FIELDS
91
92The following fields are used to import extra information into
93articles:
94
95=over
96
97=item *
98
99C<< imageI<index>_I<field> >> - used to import images,
100eg. C<image1_file> to specify the image file. Note: images are not
101replaced unless C<reset_images> is set. I<index> is a number from 1
102to 10, I<field> can be any of C<file>, C<alt>, C<name>, C<url>,
103C<storage>, with the C<file> entry being required.
104
105=item *
106
107C<< stepI<index> >> - specify step parents for the article. This can
108either be the article id or the article link alias.
109
110=item *
111
112C<tags> - this is split on C</> to set the tags for the article.
113
114=back
115
116=head1 METHODS
117
118=over
119
120=item new()
121
122Create a new article import target. Follows the protocol specified by
123L<BSE::Importer::Target::Base>.
124
125=cut
cb7fd78d 126
3709451d
TC
127sub new {
128 my ($class, %opts) = @_;
129
130 my $self = $class->SUPER::new(%opts);
131
132 my $importer = delete $opts{importer};
133
57e4a9c7 134 $self->{use_codes} = $importer->cfg_entry('codes', 0);
3709451d 135 my $map = $importer->maps;
57e4a9c7
TC
136 if ($importer->update_only) {
137 my $def_code;
138 my $found_key = 0;
139 KEYS:
140 for my $key ($self->key_fields) {
141 if ($map->{$key}) {
142 $found_key = 1;
143 $def_code = $key;
144 last KEYS;
145 }
146 }
147 $found_key
148 or die "No key field (", join(",", $self->key_fields),
149 ") mapping found\n";
3709451d 150
57e4a9c7
TC
151 $self->{code_field} = $importer->cfg_entry("code_field", $def_code);
152 $self->{use_codes} = 1;
153 }
154 else {
155 defined $map->{title}
156 or die "No title mapping found\n";
157
158 $self->{code_field} = $importer->cfg_entry("code_field", $self->default_code_field);
159
160 }
3709451d
TC
161
162 $self->{parent} = $importer->cfg_entry("parent", $self->default_parent);
163
164 if ($self->{use_codes} && !defined $map->{$self->{code_field}}) {
4bfc78d4 165 die "No $self->{code_field} mapping found with 'codes' enabled\n";
3709451d
TC
166 }
167 $self->{ignore_missing} = $importer->cfg_entry("ignore_missing", 1);
168 $self->{reset_images} = $importer->cfg_entry("reset_images", 0);
3f58d535 169 $self->{reset_files} = $importer->cfg_entry("reset_files", 0);
3709451d
TC
170 $self->{reset_steps} = $importer->cfg_entry("reset_steps", 0);
171
172 return $self;
173}
174
d415d0ba
TC
175=item start()
176
177Start import processing.
178
179=cut
180
3709451d
TC
181sub start {
182 my ($self) = @_;
183
184 $self->{parent_cache} = {};
185 $self->{leaves} = [];
186 $self->{parents} = [];
187}
188
d415d0ba 189=item row()
3709451d 190
d415d0ba 191Process a row of data.
3709451d 192
d415d0ba 193=cut
0cca6ce6 194
3709451d
TC
195sub row {
196 my ($self, $importer, $entry, $parents) = @_;
197
0cca6ce6 198 $self->xform_entry($importer, $entry);
57e4a9c7
TC
199
200 if (!$importer->update_only || @$parents) {
201 $entry->{parentid} = $self->_find_parent($importer, $self->{parent}, @$parents);
202 }
203
3709451d
TC
204 my $leaf;
205 if ($self->{use_codes}) {
206 my $leaf_id = $entry->{$self->{code_field}};
57e4a9c7
TC
207
208 if ($importer->{update_only}) {
209 $leaf_id =~ /\S/
210 or die "$self->{code_field} blank for update_only profile\n";
211 }
212
213 $leaf = $self->find_leaf($leaf_id, $importer);
3709451d
TC
214 }
215 if ($leaf) {
216 @{$leaf}{keys %$entry} = values %$entry;
46e71678 217 $leaf->mark_modified(actor => $importer->actor);
3709451d 218 $leaf->save;
57e4a9c7 219 $importer->info("Updated $leaf->{id}: ".$leaf->title);
3709451d
TC
220 if ($self->{reset_images}) {
221 $leaf->remove_images($importer->cfg);
222 $importer->info(" $leaf->{id}: Reset images");
223 }
3f58d535
TC
224 if ($self->{reset_files}) {
225 $leaf->remove_files($importer->cfg);
226 $importer->info(" $leaf->{id}: Reset files");
227 }
3709451d 228 if ($self->{reset_steps}) {
0c2d3f00 229 my @steps = BSE::TB::OtherParents->getBy(childId => $leaf->{id});
3709451d
TC
230 for my $step (@steps) {
231 $step->remove;
232 }
233 }
234 }
57e4a9c7 235 elsif (!$importer->update_only) {
81b173a8
TC
236 $entry->{createdBy} ||= ref $importer->actor ? $importer->actor->logon : "";
237 $entry->{lastModifiedBy} ||= ref $importer->actor ? $importer->actor->logon : "";
1455f602 238 $self->validate_make_leaf($importer, $entry);
3709451d
TC
239 $leaf = $self->make_leaf
240 (
241 $importer,
242 cfg => $importer->cfg,
243 %$entry
244 );
245 $importer->info("Added $leaf->{id}: $entry->{title}");
246 }
57e4a9c7
TC
247 else {
248 die "No leaf found for $entry->{$self->{code_field}} for update_only profile\n";
249 }
3709451d
TC
250 for my $image_index (1 .. 10) {
251 my $file = $entry->{"image${image_index}_file"};
252 $file
253 or next;
254 my $full_file = $importer->find_file($file);
255
256 unless ($full_file) {
257 $self->{ignore_missing}
258 and next;
259 die "File '$file' not found for image$image_index\n";
260 }
261
262 my %opts = ( file => $full_file );
263 for my $key (qw/alt name url storage/) {
264 my $fkey = "image${image_index}_$key";
265 $entry->{$fkey}
266 and $opts{$key} = $entry->{$fkey};
267 }
268
269 my %errors;
270 my $im = bse_add_image($importer->cfg, $leaf, %opts,
271 errors => \%errors);
272 $im
273 or die join(", ",map "$_: $errors{$_}", keys %errors), "\n";
274 $importer->info(" $leaf->{id}: Add image '$file'");
275 }
3f58d535 276 $self->_add_files($importer, $entry, $leaf);
3709451d
TC
277 for my $step_index (1 .. 10) {
278 my $step_id = $entry->{"step$step_index"};
279 $step_id
280 or next;
281 my $step;
282 if ($step_id =~ /^\d+$/) {
e0ed81d7 283 $step = BSE::TB::Articles->getByPkey($step_id);
3709451d
TC
284 }
285 else {
e0ed81d7 286 $step = BSE::TB::Articles->getBy(linkAlias => $step_id);
3709451d
TC
287 }
288 $step
289 or die "Cannot find stepparent with id $step_id\n";
290
291 bse_add_step_parent($importer->cfg, child => $leaf, parent => $step);
292 }
0cca6ce6 293 $self->fill_leaf($importer, $leaf, %$entry);
3709451d 294 push @{$self->{leaves}}, $leaf;
57e4a9c7
TC
295
296 $importer->event(endrow => { leaf => $leaf });
3709451d
TC
297}
298
3f58d535
TC
299sub _add_files {
300 my ($self, $importer, $entry, $leaf) = @_;
301
302 my %named_files = map { $_->name => $_ } grep $_->name ne '', $leaf->files;
303
304 for my $file_index (1 .. 10) {
305 my %opts;
306
307 my $found = 0;
308 for my $key (qw/name displayName storage description forSale download requireUser notes hide_from_list category/) {
309 my $fkey = "file${file_index}_$key";
310 if (defined $entry->{$fkey}) {
311 $opts{$key} = $entry->{$fkey};
312 $found = 1;
313 }
314 }
315
316 my $filename = $entry->{"file${file_index}_file"};
317 if ($filename) {
318 my $full_file = $importer->find_file($filename);
319
320 unless ($full_file) {
321 $self->{ignore_missing}
322 and next;
323 die "File '$filename' not found for file$file_index\n";
324 }
325
326 $opts{filename} = $full_file;
327 $found = 1;
328 }
329
330 $found
331 or next;
332
333 my $file;
334 if ($opts{name}) {
335 $file = $named_files{$opts{name}};
336 }
337
338 if (!$file && !$opts{filename}) {
000015bb
TC
339 $importer->warn("No file${file_index}_file supplied but other file${file_index}_* field supplied");
340 next;
3f58d535
TC
341 }
342
343 if ($filename && !$opts{displayName}) {
000015bb
TC
344 unless (($opts{displayName}) = $filename =~ /([^\\\/:]+)$/) {
345 $importer->warn("Cannot create displayName for $filename");
346 next;
347 }
3f58d535 348 }
3f58d535 349
000015bb
TC
350 eval {
351 if ($file) {
352 my @warnings;
353 $file->update
354 (
355 _actor => $importer->actor,
356 _warnings => \@warnings,
357 %opts,
358 );
359
360 $importer->info(" $leaf->{id}: Update file '".$file->displayName ."'");
361 }
362 else {
363 # this dies on failure
364 $file = $leaf->add_file
365 (
366 $importer->cfg,
367 %opts,
368 store => 1,
369 );
370
371 $importer->info(" $leaf->{id}: Add file '$filename'");
372 }
373 1;
374 } or do {
375 $importer->warn($@);
376 };
3f58d535
TC
377 }
378}
379
d415d0ba
TC
380=item xform_entry()
381
382Called by row() to perform an extra data transformation needed.
383
384Currently this forces a non-blank, non-newline title, and defaults the
385values of C<summary>, C<description> and C<body> to the title.
386
387=cut
388
389sub xform_entry {
390 my ($self, $importer, $entry) = @_;
391
57e4a9c7
TC
392 if (exists $entry->{title}) {
393 $entry->{title} =~ /\S/
394 or die "title blank\n";
395
396 $entry->{title} =~ /\n/
397 and die "Title may not contain newlines";
398 }
399 unless ($importer->update_only) {
400 $entry->{summary}
401 or $entry->{summary} = $entry->{title};
402 $entry->{description}
403 or $entry->{description} = $entry->{title};
404 $entry->{body}
405 or $entry->{body} = $entry->{title};
406 }
1455f602
TC
407
408 if (defined $entry->{linkAlias}) {
a6361f2f 409 $entry->{linkAlias} =~ tr/A-Za-z0-9_-//cd;
1455f602 410 }
d415d0ba
TC
411}
412
413=item children_of()
414
415Utility method to find the children of a given article.
416
417=cut
418
419sub children_of {
420 my ($self, $parent) = @_;
421
e0ed81d7 422 BSE::TB::Articles->children($parent);
d415d0ba
TC
423}
424
425=item make_parent()
426
427Create a parent article.
428
429Overridden in the product importer to create catalogs.
430
431=cut
432
433sub make_parent {
434 my ($self, $importer, %entry) = @_;
435
436 return bse_make_article(%entry);
437}
438
439=item find_leaf()
440
441Find a leave article based on the supplied code.
442
443=cut
444
445sub find_leaf {
57e4a9c7 446 my ($self, $leaf_id, $importer) = @_;
d415d0ba 447
57e4a9c7
TC
448 $leaf_id =~ s/\A\s+//;
449 $leaf_id =~ s/\s+\z//;
d415d0ba 450
e0ed81d7 451 my ($leaf) = BSE::TB::Articles->getBy($self->{code_field}, $leaf_id)
d415d0ba
TC
452 or return;
453
57e4a9c7
TC
454 $importer->event(find_leaf => { id => $leaf_id, leaf => $leaf });
455
d415d0ba
TC
456 return $leaf;
457}
458
459=item make_leaf()
460
461Create an article based on the imported data.
462
463Overridden in the product importer to create products.
464
465=cut
466
467sub make_leaf {
468 my ($self, $importer, %entry) = @_;
469
57e4a9c7
TC
470 my $leaf = bse_make_article(%entry);
471
472 $importer->event(make_leaf => { leaf => $leaf });
473
474 return $leaf;
d415d0ba
TC
475}
476
477=item fill_leaf()
478
479Fill the article some more.
480
481Currently sets the tags.
482
483Overridden by the product target to set product options and tiered
484pricing.
485
486=cut
487
488sub fill_leaf {
489 my ($self, $importer, $leaf, %entry) = @_;
490
491 if ($entry{tags}) {
492 my @tags = split '/', $entry{tags};
493 my $error;
494 unless ($leaf->set_tags(\@tags, \$error)) {
495 die "Error setting tags: $error";
496 }
497 }
498
499 return 1;
500}
501
502=item _find_parent()
503
504Find a parent article.
505
506This method calls itself recursively to work down a tree of parents.
507
508=cut
509
3709451d
TC
510sub _find_parent {
511 my ($self, $importer, $parent, @parents) = @_;
512
513 @parents
514 or return $parent;
515 my $cache = $self->{parent_cache};
516 unless ($cache->{$parent}) {
517 my @kids = $self->children_of($parent);
518 $cache->{$parent} = \@kids;
519 }
520
521 my $title = shift @parents;
522 my ($cat) = grep lc $_->{title} eq lc $title, @{$cache->{$parent}};
523 unless ($cat) {
524 my %opts =
525 (
526 cfg => $importer->cfg,
527 parentid => $parent,
528 title => $title,
529 body => $title,
530 );
531 $self->{catalog_template}
532 and $opts{template} = $self->{catalog_template};
533 $cat = $self->make_parent($importer, %opts);
534 $importer->info("Add parent $cat->{id}: $title");
535 push @{$cache->{$parent}}, $cat;
536 }
537
538 unless ($self->{catseen}{$cat->{id}}) {
539 $self->{catseen}{$cat->{id}} = 1;
540 push @{$self->{parents}}, $cat;
541 }
542
543 return $self->_find_parent($importer, $cat->{id}, @parents);
544}
545
d415d0ba
TC
546=item default_parent()
547
548Return the default parent id.
549
550Overridden by the product target to return the shop id.
551
552=cut
553
3709451d
TC
554sub default_parent { -1 }
555
d415d0ba
TC
556=item default_code_field()
557
558Return the default code field.
559
57e4a9c7 560Overridden by the product target to return the C<product_code> field.
d415d0ba
TC
561
562=cut
563
3709451d
TC
564sub default_code_field { "linkAlias" }
565
d415d0ba
TC
566=item leaves()
567
568Return the leaf articles created or modified by the import run.
569
570=cut
571
3709451d
TC
572sub leaves {
573 return @{$_[0]{leaves}}
574}
575
d415d0ba
TC
576=item parents()
577
578Return the parent articles created or used by the import run.
579
580=cut
581
3709451d
TC
582sub parents {
583 return @{$_[0]{parents}}
584}
585
57e4a9c7
TC
586=item key_fields()
587
588Columns that can act as keys.
589
590=cut
591
592sub key_fields {
593 return qw(id linkAlias);
594}
595
1455f602
TC
596=item validate_make_leaf
597
598Perform validation only needed on creation
599
600=cut
601
602sub validate_make_leaf {
603 my ($self, $importer, $entry) = @_;
604
605 if (defined $entry->{linkAlias} && $entry->{linkAlias} ne '') {
e0ed81d7 606 my $other = BSE::TB::Articles->getBy(linkAlias => $entry->{linkAlias});
1455f602
TC
607 $other
608 and die "Duplicate linkAlias value with article ", $other->id, "\n";
609 }
610}
611
3709451d 6121;
d415d0ba
TC
613
614=back
615
616=head1 AUTHOR
617
618Tony Cook <tony@develop-help.com>
619
620=cut