use the supplied name of the image file for working out the stored name
[bse.git] / site / cgi-bin / modules / BSE / Importer / Target / Article.pm
CommitLineData
d415d0ba 1package BSE::Importer::Target::Article;
3709451d 2use strict;
d415d0ba 3use base 'BSE::Importer::Target::Base';
3709451d 4use BSE::API qw(bse_make_article bse_add_image bse_add_step_parent);
e0ed81d7 5use BSE::TB::Articles;
10dd37f9 6use BSE::TB::Products;
0c2d3f00 7use BSE::TB::OtherParents;
3709451d 8
9ef99d41 9our $VERSION = "1.012";
d415d0ba
TC
10
11=head1 NAME
12
13BSE::Importer::Target::Article - import target for articles.
14
15=head1 SYNOPSIS
16
17 [import profile foo]
18 ...
19 ; these are the defaults
20 codes=0
21 code_field=linkAlias
22 parent=-1
23 ignore_missing=1
24 reset_images=0
3f58d535 25 reset_files=0
d415d0ba
TC
26 reset_steps=0
27
28 # done by the importer
29 my $target = BSE::Importer::Target::Article->new
30 (importer => $importer, opts => \%opts)
31 ...
32 $target->start($imp);
33 # for each row:
34 $target->row($imp, \%entry, \@parents);
35
36
37=head1 DESCRIPTION
38
39Provides a target for importing BSE articles.
40
57e4a9c7
TC
41C<update_only> profiles must provide a mapping for one of C<id> or
42C<linkAlias>.
43
44Non-C<update_only> profiles must provide a mapping for C<title>.
d415d0ba
TC
45
46=head1 CONFIGURATION
47
48The following extra configuration can be set in the import profile:
49
50=over
51
52=item *
53
54C<codes> - set to true to use the configured C<code_field> to update
57e4a9c7
TC
55existing articles rather than creating new articles. This is forced
56on when the import profile enables C<update_only>.
d415d0ba
TC
57
58=item *
59
60C<code_field> - the field to use to identify existing articles.
61Default: C<linkAlias> for article imports.
62
63=item *
64
65C<parent> - the base of the tree of parent articles to create the
66parent tree under.
67
68=item *
69
3f58d535
TC
70C<ignore_missing> - set to 0 to error on missing image or article
71files. Default: 1.
d415d0ba
TC
72
73=item *
74
75C<reset_images> - set to true to delete all images from an article
76before adding the imported images.
77
78=item *
79
3f58d535
TC
80C<reset_files> - set to true to delete all files from an article
81before adding the imported files.
82
83=item *
84
d415d0ba
TC
85C<reset_steps> - set to true to delete all step parents from an
86article before adding the imported steps.
87
88=back
89
90=head1 SPECIAL FIELDS
91
92The following fields are used to import extra information into
93articles:
94
95=over
96
97=item *
98
99C<< imageI<index>_I<field> >> - used to import images,
100eg. C<image1_file> to specify the image file. Note: images are not
101replaced unless C<reset_images> is set. I<index> is a number from 1
102to 10, I<field> can be any of C<file>, C<alt>, C<name>, C<url>,
103C<storage>, with the C<file> entry being required.
104
105=item *
106
107C<< stepI<index> >> - specify step parents for the article. This can
108either be the article id or the article link alias.
109
110=item *
111
112C<tags> - this is split on C</> to set the tags for the article.
113
114=back
115
116=head1 METHODS
117
118=over
119
120=item new()
121
122Create a new article import target. Follows the protocol specified by
123L<BSE::Importer::Target::Base>.
124
125=cut
cb7fd78d 126
3709451d
TC
127sub new {
128 my ($class, %opts) = @_;
129
130 my $self = $class->SUPER::new(%opts);
131
132 my $importer = delete $opts{importer};
133
57e4a9c7 134 $self->{use_codes} = $importer->cfg_entry('codes', 0);
3709451d 135 my $map = $importer->maps;
57e4a9c7
TC
136 if ($importer->update_only) {
137 my $def_code;
138 my $found_key = 0;
139 KEYS:
140 for my $key ($self->key_fields) {
141 if ($map->{$key}) {
142 $found_key = 1;
143 $def_code = $key;
144 last KEYS;
145 }
146 }
147 $found_key
148 or die "No key field (", join(",", $self->key_fields),
149 ") mapping found\n";
3709451d 150
57e4a9c7
TC
151 $self->{code_field} = $importer->cfg_entry("code_field", $def_code);
152 $self->{use_codes} = 1;
153 }
154 else {
155 defined $map->{title}
156 or die "No title mapping found\n";
157
158 $self->{code_field} = $importer->cfg_entry("code_field", $self->default_code_field);
159
160 }
3709451d
TC
161
162 $self->{parent} = $importer->cfg_entry("parent", $self->default_parent);
163
164 if ($self->{use_codes} && !defined $map->{$self->{code_field}}) {
4bfc78d4 165 die "No $self->{code_field} mapping found with 'codes' enabled\n";
3709451d
TC
166 }
167 $self->{ignore_missing} = $importer->cfg_entry("ignore_missing", 1);
168 $self->{reset_images} = $importer->cfg_entry("reset_images", 0);
3f58d535 169 $self->{reset_files} = $importer->cfg_entry("reset_files", 0);
3709451d
TC
170 $self->{reset_steps} = $importer->cfg_entry("reset_steps", 0);
171
172 return $self;
173}
174
d415d0ba
TC
175=item start()
176
177Start import processing.
178
179=cut
180
3709451d
TC
181sub start {
182 my ($self) = @_;
183
184 $self->{parent_cache} = {};
185 $self->{leaves} = [];
186 $self->{parents} = [];
187}
188
d415d0ba 189=item row()
3709451d 190
d415d0ba 191Process a row of data.
3709451d 192
d415d0ba 193=cut
0cca6ce6 194
3709451d
TC
195sub row {
196 my ($self, $importer, $entry, $parents) = @_;
197
0cca6ce6 198 $self->xform_entry($importer, $entry);
57e4a9c7
TC
199
200 if (!$importer->update_only || @$parents) {
201 $entry->{parentid} = $self->_find_parent($importer, $self->{parent}, @$parents);
202 }
203
3709451d
TC
204 my $leaf;
205 if ($self->{use_codes}) {
206 my $leaf_id = $entry->{$self->{code_field}};
57e4a9c7
TC
207
208 if ($importer->{update_only}) {
209 $leaf_id =~ /\S/
210 or die "$self->{code_field} blank for update_only profile\n";
211 }
212
213 $leaf = $self->find_leaf($leaf_id, $importer);
3709451d
TC
214 }
215 if ($leaf) {
216 @{$leaf}{keys %$entry} = values %$entry;
46e71678 217 $leaf->mark_modified(actor => $importer->actor);
3709451d 218 $leaf->save;
57e4a9c7 219 $importer->info("Updated $leaf->{id}: ".$leaf->title);
3709451d
TC
220 if ($self->{reset_images}) {
221 $leaf->remove_images($importer->cfg);
222 $importer->info(" $leaf->{id}: Reset images");
223 }
3f58d535
TC
224 if ($self->{reset_files}) {
225 $leaf->remove_files($importer->cfg);
226 $importer->info(" $leaf->{id}: Reset files");
227 }
3709451d 228 if ($self->{reset_steps}) {
0c2d3f00 229 my @steps = BSE::TB::OtherParents->getBy(childId => $leaf->{id});
3709451d
TC
230 for my $step (@steps) {
231 $step->remove;
232 }
233 }
234 }
57e4a9c7 235 elsif (!$importer->update_only) {
81b173a8
TC
236 $entry->{createdBy} ||= ref $importer->actor ? $importer->actor->logon : "";
237 $entry->{lastModifiedBy} ||= ref $importer->actor ? $importer->actor->logon : "";
1455f602 238 $self->validate_make_leaf($importer, $entry);
3709451d
TC
239 $leaf = $self->make_leaf
240 (
241 $importer,
242 cfg => $importer->cfg,
243 %$entry
244 );
245 $importer->info("Added $leaf->{id}: $entry->{title}");
246 }
57e4a9c7
TC
247 else {
248 die "No leaf found for $entry->{$self->{code_field}} for update_only profile\n";
249 }
3709451d
TC
250 for my $image_index (1 .. 10) {
251 my $file = $entry->{"image${image_index}_file"};
252 $file
253 or next;
254 my $full_file = $importer->find_file($file);
255
256 unless ($full_file) {
257 $self->{ignore_missing}
258 and next;
259 die "File '$file' not found for image$image_index\n";
260 }
261
9ef99d41
TC
262 my %opts =
263 (
264 file => $full_file,
265 display_name => $file,
266 );
3709451d
TC
267 for my $key (qw/alt name url storage/) {
268 my $fkey = "image${image_index}_$key";
269 $entry->{$fkey}
270 and $opts{$key} = $entry->{$fkey};
271 }
272
273 my %errors;
274 my $im = bse_add_image($importer->cfg, $leaf, %opts,
275 errors => \%errors);
276 $im
277 or die join(", ",map "$_: $errors{$_}", keys %errors), "\n";
278 $importer->info(" $leaf->{id}: Add image '$file'");
279 }
3f58d535 280 $self->_add_files($importer, $entry, $leaf);
3709451d
TC
281 for my $step_index (1 .. 10) {
282 my $step_id = $entry->{"step$step_index"};
283 $step_id
284 or next;
285 my $step;
286 if ($step_id =~ /^\d+$/) {
e0ed81d7 287 $step = BSE::TB::Articles->getByPkey($step_id);
3709451d
TC
288 }
289 else {
e0ed81d7 290 $step = BSE::TB::Articles->getBy(linkAlias => $step_id);
3709451d
TC
291 }
292 $step
293 or die "Cannot find stepparent with id $step_id\n";
294
295 bse_add_step_parent($importer->cfg, child => $leaf, parent => $step);
296 }
0cca6ce6 297 $self->fill_leaf($importer, $leaf, %$entry);
3709451d 298 push @{$self->{leaves}}, $leaf;
57e4a9c7
TC
299
300 $importer->event(endrow => { leaf => $leaf });
3709451d
TC
301}
302
3f58d535
TC
303sub _add_files {
304 my ($self, $importer, $entry, $leaf) = @_;
305
306 my %named_files = map { $_->name => $_ } grep $_->name ne '', $leaf->files;
307
308 for my $file_index (1 .. 10) {
309 my %opts;
310
311 my $found = 0;
312 for my $key (qw/name displayName storage description forSale download requireUser notes hide_from_list category/) {
313 my $fkey = "file${file_index}_$key";
314 if (defined $entry->{$fkey}) {
315 $opts{$key} = $entry->{$fkey};
316 $found = 1;
317 }
318 }
319
320 my $filename = $entry->{"file${file_index}_file"};
321 if ($filename) {
322 my $full_file = $importer->find_file($filename);
323
324 unless ($full_file) {
325 $self->{ignore_missing}
326 and next;
327 die "File '$filename' not found for file$file_index\n";
328 }
329
330 $opts{filename} = $full_file;
331 $found = 1;
332 }
333
334 $found
335 or next;
336
337 my $file;
338 if ($opts{name}) {
339 $file = $named_files{$opts{name}};
340 }
341
342 if (!$file && !$opts{filename}) {
000015bb
TC
343 $importer->warn("No file${file_index}_file supplied but other file${file_index}_* field supplied");
344 next;
3f58d535
TC
345 }
346
347 if ($filename && !$opts{displayName}) {
000015bb
TC
348 unless (($opts{displayName}) = $filename =~ /([^\\\/:]+)$/) {
349 $importer->warn("Cannot create displayName for $filename");
350 next;
351 }
3f58d535 352 }
3f58d535 353
000015bb
TC
354 eval {
355 if ($file) {
356 my @warnings;
357 $file->update
358 (
359 _actor => $importer->actor,
360 _warnings => \@warnings,
361 %opts,
362 );
363
364 $importer->info(" $leaf->{id}: Update file '".$file->displayName ."'");
365 }
366 else {
367 # this dies on failure
368 $file = $leaf->add_file
369 (
370 $importer->cfg,
371 %opts,
372 store => 1,
373 );
374
375 $importer->info(" $leaf->{id}: Add file '$filename'");
376 }
377 1;
378 } or do {
379 $importer->warn($@);
380 };
3f58d535
TC
381 }
382}
383
d415d0ba
TC
384=item xform_entry()
385
386Called by row() to perform an extra data transformation needed.
387
388Currently this forces a non-blank, non-newline title, and defaults the
389values of C<summary>, C<description> and C<body> to the title.
390
391=cut
392
393sub xform_entry {
394 my ($self, $importer, $entry) = @_;
395
57e4a9c7
TC
396 if (exists $entry->{title}) {
397 $entry->{title} =~ /\S/
398 or die "title blank\n";
399
400 $entry->{title} =~ /\n/
401 and die "Title may not contain newlines";
402 }
403 unless ($importer->update_only) {
404 $entry->{summary}
405 or $entry->{summary} = $entry->{title};
406 $entry->{description}
407 or $entry->{description} = $entry->{title};
408 $entry->{body}
409 or $entry->{body} = $entry->{title};
410 }
1455f602
TC
411
412 if (defined $entry->{linkAlias}) {
a6361f2f 413 $entry->{linkAlias} =~ tr/A-Za-z0-9_-//cd;
1455f602 414 }
d415d0ba
TC
415}
416
417=item children_of()
418
419Utility method to find the children of a given article.
420
421=cut
422
423sub children_of {
424 my ($self, $parent) = @_;
425
e0ed81d7 426 BSE::TB::Articles->children($parent);
d415d0ba
TC
427}
428
429=item make_parent()
430
431Create a parent article.
432
433Overridden in the product importer to create catalogs.
434
435=cut
436
437sub make_parent {
438 my ($self, $importer, %entry) = @_;
439
440 return bse_make_article(%entry);
441}
442
443=item find_leaf()
444
445Find a leave article based on the supplied code.
446
447=cut
448
449sub find_leaf {
57e4a9c7 450 my ($self, $leaf_id, $importer) = @_;
d415d0ba 451
57e4a9c7
TC
452 $leaf_id =~ s/\A\s+//;
453 $leaf_id =~ s/\s+\z//;
d415d0ba 454
e0ed81d7 455 my ($leaf) = BSE::TB::Articles->getBy($self->{code_field}, $leaf_id)
d415d0ba
TC
456 or return;
457
57e4a9c7
TC
458 $importer->event(find_leaf => { id => $leaf_id, leaf => $leaf });
459
d415d0ba
TC
460 return $leaf;
461}
462
463=item make_leaf()
464
465Create an article based on the imported data.
466
467Overridden in the product importer to create products.
468
469=cut
470
471sub make_leaf {
472 my ($self, $importer, %entry) = @_;
473
57e4a9c7
TC
474 my $leaf = bse_make_article(%entry);
475
476 $importer->event(make_leaf => { leaf => $leaf });
477
478 return $leaf;
d415d0ba
TC
479}
480
481=item fill_leaf()
482
483Fill the article some more.
484
485Currently sets the tags.
486
487Overridden by the product target to set product options and tiered
488pricing.
489
490=cut
491
492sub fill_leaf {
493 my ($self, $importer, $leaf, %entry) = @_;
494
495 if ($entry{tags}) {
496 my @tags = split '/', $entry{tags};
497 my $error;
498 unless ($leaf->set_tags(\@tags, \$error)) {
499 die "Error setting tags: $error";
500 }
501 }
502
503 return 1;
504}
505
506=item _find_parent()
507
508Find a parent article.
509
510This method calls itself recursively to work down a tree of parents.
511
512=cut
513
3709451d
TC
514sub _find_parent {
515 my ($self, $importer, $parent, @parents) = @_;
516
517 @parents
518 or return $parent;
519 my $cache = $self->{parent_cache};
520 unless ($cache->{$parent}) {
521 my @kids = $self->children_of($parent);
522 $cache->{$parent} = \@kids;
523 }
524
525 my $title = shift @parents;
526 my ($cat) = grep lc $_->{title} eq lc $title, @{$cache->{$parent}};
527 unless ($cat) {
528 my %opts =
529 (
530 cfg => $importer->cfg,
531 parentid => $parent,
532 title => $title,
533 body => $title,
534 );
535 $self->{catalog_template}
536 and $opts{template} = $self->{catalog_template};
537 $cat = $self->make_parent($importer, %opts);
538 $importer->info("Add parent $cat->{id}: $title");
539 push @{$cache->{$parent}}, $cat;
540 }
541
542 unless ($self->{catseen}{$cat->{id}}) {
543 $self->{catseen}{$cat->{id}} = 1;
544 push @{$self->{parents}}, $cat;
545 }
546
547 return $self->_find_parent($importer, $cat->{id}, @parents);
548}
549
d415d0ba
TC
550=item default_parent()
551
552Return the default parent id.
553
554Overridden by the product target to return the shop id.
555
556=cut
557
3709451d
TC
558sub default_parent { -1 }
559
d415d0ba
TC
560=item default_code_field()
561
562Return the default code field.
563
57e4a9c7 564Overridden by the product target to return the C<product_code> field.
d415d0ba
TC
565
566=cut
567
3709451d
TC
568sub default_code_field { "linkAlias" }
569
d415d0ba
TC
570=item leaves()
571
572Return the leaf articles created or modified by the import run.
573
574=cut
575
3709451d
TC
576sub leaves {
577 return @{$_[0]{leaves}}
578}
579
d415d0ba
TC
580=item parents()
581
582Return the parent articles created or used by the import run.
583
584=cut
585
3709451d
TC
586sub parents {
587 return @{$_[0]{parents}}
588}
589
57e4a9c7
TC
590=item key_fields()
591
592Columns that can act as keys.
593
594=cut
595
596sub key_fields {
597 return qw(id linkAlias);
598}
599
1455f602
TC
600=item validate_make_leaf
601
602Perform validation only needed on creation
603
604=cut
605
606sub validate_make_leaf {
607 my ($self, $importer, $entry) = @_;
608
609 if (defined $entry->{linkAlias} && $entry->{linkAlias} ne '') {
e0ed81d7 610 my $other = BSE::TB::Articles->getBy(linkAlias => $entry->{linkAlias});
1455f602
TC
611 $other
612 and die "Duplicate linkAlias value with article ", $other->id, "\n";
613 }
614}
615
3709451d 6161;
d415d0ba
TC
617
618=back
619
620=head1 AUTHOR
621
622Tony Cook <tony@develop-help.com>
623
624=cut