]> git.imager.perl.org - bse.git/blame - site/cgi-bin/modules/BSE/Importer.pm
fix has_tags([$names]) and add has_tag($name)/has_tag($tag_object)
[bse.git] / site / cgi-bin / modules / BSE / Importer.pm
CommitLineData
3709451d
TC
1package BSE::Importer;
2use strict;
3use Config;
4
3eed7da4 5our $VERSION = "1.008";
d415d0ba
TC
6
7=head1 NAME
8
9BSE::Importer - generic import framework
10
11=head1 SYNOPSIS
12
13 [import profile foo]
14 map_title=1
15 map_linkAlias=2
16 set_template=common/default.tmpl
17 xform_customInt1 = int(rand 100)
18
19 use BSE::Importer;
20
21 my $profiles = BSE::Importer->profiles($cfg);
22 my $imp = BSE::Importer->new(cfg => $cfg, profile => $name);
23 $imp->process($filename);
24
25=head1 CONFIGURATION
26
27=head2 [import profiles]
28
29This can be used to provide display names for the defined profiles.
30
31Each key is a profile id and the value is the display name.
32
33=head2 [import profile I<name>]
34
35Defines an import profile, with the following keys:
36
37=over
38
39=item *
40
41C<< map_I<field> >> - defines which column number in the source which
42be mapped to the specificed field. The value must be numeric.
43
44=item *
45
46C<< set_I<field> >> - set the value of the given field to a specific
47value.
48
49=item *
50
51C<< xform_I<field> >> - perl code to transform other input values to
52the value of the specified field.
53
54=item *
55
56C<cat1>, C<cat2>, C<cat3> - the chain of catalog names leading to a
57product.
58
59=item *
60
61C<file_path> - PATH format list of directories to search for attached
62files such as images.
63
64=item *
65
66C<source> - the source file type, the source module name is this value
67with C<BSE::Importer::Source::> prepended, so a value of C<XLS> will use the
68C<BSE::Importer::Source::XLS> module.
69
70=item *
71
72C<target> - the target object type, the target module name is this
73value with C<BSE::Importer::Target::> prepended, so a value of
74C<Product> will use the C<BSE::Importer::Target::Product> module.
75
57e4a9c7
TC
76=item *
77
78C<update_only> - if true, the profile will only update existing
79records. This may change which fields are required.
80
d415d0ba
TC
81=back
82
83The source and target module may include their own configuration in
84this section.
85
86=head1 CLASS METHODS
87
88=over
89
90=item new()
91
92BSE::Importer->new(profile => $profile, ...)
93
94Create a new importer. Parameters are:
95
96=over
97
98=item *
99
100C<profile> - the import profile to process
101
102=item *
103
104C<cfg> - the BSE::Cfg object to use for configuration
105
106=item *
107
108C<callback> - a sub ref to call for messages generated during
109processing.
110
57e4a9c7
TC
111=item *
112
113C<listen> - a hashref of event handlers.
114
3f58d535
TC
115=item *
116
117C<actor> - an actor name suitable for audit logging.
118
d415d0ba
TC
119=back
120
121If the profile is invalid, new() with die with a newline terminated
122error message.
123
124=cut
cb7fd78d 125
3709451d
TC
126sub new {
127 my ($class, %opts) = @_;
128
d415d0ba 129 my $cfg = delete $opts{cfg} || BSE::Cfg->single;
3709451d
TC
130 my $profile = delete $opts{profile}
131 or die "Missing profile option\n";
132
133
134 my $self = bless
135 {
136 cfg => $cfg,
137 profile => $profile,
138 section => "import profile $profile",
139 callback => scalar(delete $opts{callback}),
3eed7da4 140 actor => $opts{actor} || "U",
3709451d
TC
141 }, $class;
142
143 # field mapping
144 my $section = $self->section;
145 my %ids = $cfg->entriesCS($section);
146 keys %ids
147 or die "No entries found for profile $profile\n";
148
149 my %map;
150 for my $map (grep /^map_\w+$/, keys %ids) {
151 (my $out = $map) =~ s/^map_//;
152 my $in = $ids{$map};
153 $in =~ /^\d+$/
154 or die "Mapping for $out not numeric\n";
155 $map{$out} = $in;
156 }
157 $self->{map} = \%map;
158
159 my %set;
160 for my $set (grep /^set_\w+$/, keys %ids) {
161 (my $out = $set) =~ s/^set_//;
162 $set{$out} = $ids{$set};
163 }
164 $self->{set} = \%set;
165
166 my %xform;
167 for my $xform (grep /^xform_\w+$/, keys %ids) {
168 (my $out = $xform) =~ s/^xform_//;
169 $map{$out}
170 or die "Xform for $out but no mapping\n";
df2663f0
TC
171 my $code = <<EOS;
172sub { (local \$_, my \$product) = \@_;
173#line 1 "Xform $xform code"
174$ids{$xform};
175return \$_
176}
177EOS
3709451d
TC
178 my $sub = eval $code;
179 $sub
180 or die "Compilation error for $xform code: $@\n";
181 $xform{$out} = $sub;
182 }
183 $self->{xform} = \%xform;
184
185 my @cats;
186 for my $cat (qw/cat1 cat2 cat3/) {
187 my $col = $ids{$cat};
188 $col and push @cats, $col;
189 }
190 $self->{cats} = \@cats;
191
192 my $file_path = $self->cfg_entry('file_path', delete $opts{file_path});
193 defined $file_path or $file_path = '';
194 my @file_path = split /$Config{path_sep}/, $file_path;
195 if ($opts{file_path}) {
196 unshift @file_path,
197 map
198 {
199 split /$Config{path_sep}/, $_
200 }
201 @{$opts{file_path}};
202 }
203 $self->{file_path} = \@file_path;
204
57e4a9c7 205 $self->{update_only} = $self->cfg_entry('update_only', 0);
3709451d
TC
206
207 my $source_type = $self->cfg_entry("source", "XLS");
d415d0ba 208 $self->{source_class} = "BSE::Importer::Source::$source_type";
3709451d
TC
209
210 $self->_do_require($self->{source_class});
211 $self->{source} = $self->{source_class}->new
212 (
213 importer => $self,
214 opts => \%opts,
215 );
216
217 my $target_type = $self->cfg_entry("target", "Product");
d415d0ba 218 $self->{target_class} = "BSE::Importer::Target::$target_type";
3709451d
TC
219 $self->_do_require($self->{target_class});
220 $self->{target} = $self->{target_class}->new
221 (
222 importer => $self,
223 opts => \%opts,
224 );
225
3709451d
TC
226 return $self;
227}
228
d415d0ba
TC
229=item profiles()
230
231Return a hashref mapping profile names to display names.
232
233=cut
234
3709451d
TC
235sub profiles {
236 my ($class, $cfg) = @_;
237
d415d0ba
TC
238 $cfg ||= BSE::Cfg->single;
239
3709451d
TC
240 my %ids = $cfg->entries("import profiles");
241 return \%ids;
242}
243
d415d0ba 244=back
3709451d 245
d415d0ba 246=head1 OBJECT METHODS
3709451d 247
d415d0ba 248=head2 Processing
3709451d 249
d415d0ba 250=over
3709451d 251
d415d0ba 252=item process()
3709451d 253
d415d0ba 254 $imp->process($filename);
3709451d 255
d415d0ba
TC
256Process the specified file, importing the data.
257
258Note that while the current source treats the argument as a filename,
259future sources may treat it as a URL or pretty much anything else.
260
261=cut
3709451d
TC
262
263sub process {
264 my ($self, @source_info) = @_;
265
266 $self->{target}->start($self);
267 $self->{source}->each_row($self, @source_info);
268}
269
d415d0ba
TC
270=item errors()
271
272Valid after process() is called, return a list of errors encountered
273during processing.
274
275=cut
276
277sub errors {
278 $_[0]{errors}
279 and return @{$_[0]{errors}};
280
281 return;
282}
283
284=item leaves()
285
286Valid after process() is called, return a list of created imported
287objects.
288
289=cut
290
291sub leaves {
292 return $_[0]{target}->leaves;
293}
294
295=item parents()
296
297Valid after process() is called, return a list of synthesized parent
298objects (if any).
299
300=cut
301
302sub parents {
303 return $_[0]{target}->parents;
304}
305
e0946a86
TC
306=item set_callback()
307
308Replace the callback sub reference.
309
310=cut
311
312sub set_callback {
313 my ($self, $callback) = @_;
314
315 $self->{callback} = $callback;
316}
317
d415d0ba
TC
318=back
319
320=head2 Internal
321
322These are for use my sources and targets.
323
324=over
325
326=item row()
327
328 $imp->row($source)
329
330Called by the source to process each row.
331
332=cut
333
3709451d
TC
334sub row {
335 my ($self, $source) = @_;
336
337 eval {
338 my %entry = %{$self->{set}};
339
340 # load from mapping
341 my $non_blank = 0;
342 for my $col (keys %{$self->{map}}) {
343 $entry{$col} = $source->get_column($self->{map}{$col});
344 $non_blank ||= $entry{$col} =~ /\S/;
345 }
346 $non_blank
347 or return;
348 for my $col (keys %{$self->{xform}}) {
349 $entry{$col} = $self->{xform}{$col}->($entry{$col}, \%entry);
350 }
351 my @parents;
352 for my $cat (@{$self->{cats}}) {
353 my $value = $source->get_column($cat);
354 defined $value && $value =~ /\S/
355 and push @parents, $value;
356 }
57e4a9c7 357 $self->event(row => { entry => \%entry, parents => \@parents });
3709451d
TC
358 $self->{target}->row($self, \%entry, \@parents);
359 };
360 if ($@) {
361 my $error = $source->rowid . ": $@";
362 $error =~ s/\n\z//;
363 $error =~ tr/\n/ /s;
364 push @{$self->{errors}}, $error;
365 $self->warn("Error: $error");
57e4a9c7 366 $self->event(error => { msg => $error });
3709451d
TC
367 }
368}
369
d415d0ba 370=item _do_require()
3709451d 371
d415d0ba
TC
372Load a module by module name and perform a default import.
373
374=cut
3709451d
TC
375
376sub _do_require {
377 my ($self, $class) = @_;
378
379 (my $file = $class . ".pm") =~ s!::!/!g;
380
381 require $file;
382 $file->import;
383
384 1;
385}
386
d415d0ba
TC
387=item info()
388
389 $imp->info(@msg)
390
391Called by various parts of the system to produce informational messages.
392
393=cut
394
3709451d
TC
395sub info {
396 my ($self, @msg) = @_;
397
398 $self->{callback}
399 and $self->{callback}->("@msg");
400}
401
d415d0ba
TC
402=item warn()
403
404 $imp->warn(@msg);
405
406Called by various parts of the system to produce warning messaged for
407the current row.
408
409=cut
410
3709451d
TC
411sub warn {
412 my ($self, @msg) = @_;
413
414 $self->{callback}
415 and $self->{callback}->($self->{source}->rowid, ": @msg");
416}
417
57e4a9c7
TC
418=item event()
419
420Called by various parts of the system to report events. These are
421intended for tools.
422
423=cut
424
425sub event {
426 my ($self, $event, $args) = @_;
427
428 if ($self->{listen}{$event}) {
429 $self->{listen}{$event}->($event, $args);
430 }
431}
432
d415d0ba
TC
433=item find_file()
434
435 my $fullname = $imp->find_file($filename)
436
437Search the configured file search path for C<$filename> and return the
438full path to the file.
439
440Returns an empty list on failure.
441
442=cut
443
3709451d
TC
444sub find_file {
445 my ($self, $file) = @_;
446
447 for my $path (@{$self->{file_path}}) {
448 my $full = "$path/$file";
449 -f $full and return $full;
450 }
451
452 return;
453}
454
d415d0ba
TC
455=item section()
456
457Return the configuration section for the profile.
458
459=cut
460
461sub section {
462 my ($self) = @_;
463
464 return "import profile $self->{profile}";
3709451d
TC
465}
466
d415d0ba
TC
467=item maps()
468
469Return a hash reference mapping field names to column numbers.
470
471=cut
472
473sub maps {
474 $_[0]{map};
475}
476
477=item cfg()
478
479Return the BSE::Cfg object used to configure the importer.
480
481=cut
482
483sub cfg {
484 $_[0]{cfg};
485}
486
487=item profile()
488
489Return the profile name.
490
491=cut
492
493sub profile {
494 $_[0]{profile};
495}
496
497=item cfg_entry()
498
499 my $value = $imp->cfg_entry($key, $default)
500
501Return the specified config value from the section for this profile.
502
503=cut
504
505sub cfg_entry {
506 my ($self, $key, $default) = @_;
507
508 return $self->{cfg}->entry($self->{section}, $key, $default);
3709451d
TC
509}
510
57e4a9c7
TC
511=item update_only
512
513Returns true if only performing updates.
514
515=cut
516
517sub update_only {
518 $_[0]{update_only};
519}
520
3f58d535
TC
521=item actor
522
523The actor supplied to new.
524
525=cut
526
527sub actor {
3eed7da4 528 $_[0]{actor};
3f58d535
TC
529}
530
3709451d 5311;
d415d0ba
TC
532
533=back
534
535=head1 SEE ALSO
536
537L<BSE::Importer::Source::Base>, L<BSE::Importer::Source::XLS>,
6b801f1a
TC
538L<BSE::Importer::Source::CSV>, L<BSE::Importer::Target::Base>,
539L<BSE::Importer::Target::Article>, L<BSE::Importer::Target::Product>,
d415d0ba
TC
540
541=head1 AUTHOR
542
543Tony Cook <tony@develop-help.com>
544
545=cut
546