]>
Commit | Line | Data |
---|---|---|
3709451d TC |
1 | package BSE::Importer; |
2 | use strict; | |
3 | use Config; | |
4 | ||
3eed7da4 | 5 | our $VERSION = "1.008"; |
d415d0ba TC |
6 | |
7 | =head1 NAME | |
8 | ||
9 | BSE::Importer - generic import framework | |
10 | ||
11 | =head1 SYNOPSIS | |
12 | ||
13 | [import profile foo] | |
14 | map_title=1 | |
15 | map_linkAlias=2 | |
16 | set_template=common/default.tmpl | |
17 | xform_customInt1 = int(rand 100) | |
18 | ||
19 | use BSE::Importer; | |
20 | ||
21 | my $profiles = BSE::Importer->profiles($cfg); | |
22 | my $imp = BSE::Importer->new(cfg => $cfg, profile => $name); | |
23 | $imp->process($filename); | |
24 | ||
25 | =head1 CONFIGURATION | |
26 | ||
27 | =head2 [import profiles] | |
28 | ||
29 | This can be used to provide display names for the defined profiles. | |
30 | ||
31 | Each key is a profile id and the value is the display name. | |
32 | ||
33 | =head2 [import profile I<name>] | |
34 | ||
35 | Defines an import profile, with the following keys: | |
36 | ||
37 | =over | |
38 | ||
39 | =item * | |
40 | ||
41 | C<< map_I<field> >> - defines which column number in the source which | |
42 | be mapped to the specificed field. The value must be numeric. | |
43 | ||
44 | =item * | |
45 | ||
46 | C<< set_I<field> >> - set the value of the given field to a specific | |
47 | value. | |
48 | ||
49 | =item * | |
50 | ||
51 | C<< xform_I<field> >> - perl code to transform other input values to | |
52 | the value of the specified field. | |
53 | ||
54 | =item * | |
55 | ||
56 | C<cat1>, C<cat2>, C<cat3> - the chain of catalog names leading to a | |
57 | product. | |
58 | ||
59 | =item * | |
60 | ||
61 | C<file_path> - PATH format list of directories to search for attached | |
62 | files such as images. | |
63 | ||
64 | =item * | |
65 | ||
66 | C<source> - the source file type, the source module name is this value | |
67 | with C<BSE::Importer::Source::> prepended, so a value of C<XLS> will use the | |
68 | C<BSE::Importer::Source::XLS> module. | |
69 | ||
70 | =item * | |
71 | ||
72 | C<target> - the target object type, the target module name is this | |
73 | value with C<BSE::Importer::Target::> prepended, so a value of | |
74 | C<Product> will use the C<BSE::Importer::Target::Product> module. | |
75 | ||
57e4a9c7 TC |
76 | =item * |
77 | ||
78 | C<update_only> - if true, the profile will only update existing | |
79 | records. This may change which fields are required. | |
80 | ||
d415d0ba TC |
81 | =back |
82 | ||
83 | The source and target module may include their own configuration in | |
84 | this section. | |
85 | ||
86 | =head1 CLASS METHODS | |
87 | ||
88 | =over | |
89 | ||
90 | =item new() | |
91 | ||
92 | BSE::Importer->new(profile => $profile, ...) | |
93 | ||
94 | Create a new importer. Parameters are: | |
95 | ||
96 | =over | |
97 | ||
98 | =item * | |
99 | ||
100 | C<profile> - the import profile to process | |
101 | ||
102 | =item * | |
103 | ||
104 | C<cfg> - the BSE::Cfg object to use for configuration | |
105 | ||
106 | =item * | |
107 | ||
108 | C<callback> - a sub ref to call for messages generated during | |
109 | processing. | |
110 | ||
57e4a9c7 TC |
111 | =item * |
112 | ||
113 | C<listen> - a hashref of event handlers. | |
114 | ||
3f58d535 TC |
115 | =item * |
116 | ||
117 | C<actor> - an actor name suitable for audit logging. | |
118 | ||
d415d0ba TC |
119 | =back |
120 | ||
121 | If the profile is invalid, new() with die with a newline terminated | |
122 | error message. | |
123 | ||
124 | =cut | |
cb7fd78d | 125 | |
3709451d TC |
126 | sub new { |
127 | my ($class, %opts) = @_; | |
128 | ||
d415d0ba | 129 | my $cfg = delete $opts{cfg} || BSE::Cfg->single; |
3709451d TC |
130 | my $profile = delete $opts{profile} |
131 | or die "Missing profile option\n"; | |
132 | ||
133 | ||
134 | my $self = bless | |
135 | { | |
136 | cfg => $cfg, | |
137 | profile => $profile, | |
138 | section => "import profile $profile", | |
139 | callback => scalar(delete $opts{callback}), | |
3eed7da4 | 140 | actor => $opts{actor} || "U", |
3709451d TC |
141 | }, $class; |
142 | ||
143 | # field mapping | |
144 | my $section = $self->section; | |
145 | my %ids = $cfg->entriesCS($section); | |
146 | keys %ids | |
147 | or die "No entries found for profile $profile\n"; | |
148 | ||
149 | my %map; | |
150 | for my $map (grep /^map_\w+$/, keys %ids) { | |
151 | (my $out = $map) =~ s/^map_//; | |
152 | my $in = $ids{$map}; | |
153 | $in =~ /^\d+$/ | |
154 | or die "Mapping for $out not numeric\n"; | |
155 | $map{$out} = $in; | |
156 | } | |
157 | $self->{map} = \%map; | |
158 | ||
159 | my %set; | |
160 | for my $set (grep /^set_\w+$/, keys %ids) { | |
161 | (my $out = $set) =~ s/^set_//; | |
162 | $set{$out} = $ids{$set}; | |
163 | } | |
164 | $self->{set} = \%set; | |
165 | ||
166 | my %xform; | |
167 | for my $xform (grep /^xform_\w+$/, keys %ids) { | |
168 | (my $out = $xform) =~ s/^xform_//; | |
169 | $map{$out} | |
170 | or die "Xform for $out but no mapping\n"; | |
df2663f0 TC |
171 | my $code = <<EOS; |
172 | sub { (local \$_, my \$product) = \@_; | |
173 | #line 1 "Xform $xform code" | |
174 | $ids{$xform}; | |
175 | return \$_ | |
176 | } | |
177 | EOS | |
3709451d TC |
178 | my $sub = eval $code; |
179 | $sub | |
180 | or die "Compilation error for $xform code: $@\n"; | |
181 | $xform{$out} = $sub; | |
182 | } | |
183 | $self->{xform} = \%xform; | |
184 | ||
185 | my @cats; | |
186 | for my $cat (qw/cat1 cat2 cat3/) { | |
187 | my $col = $ids{$cat}; | |
188 | $col and push @cats, $col; | |
189 | } | |
190 | $self->{cats} = \@cats; | |
191 | ||
192 | my $file_path = $self->cfg_entry('file_path', delete $opts{file_path}); | |
193 | defined $file_path or $file_path = ''; | |
194 | my @file_path = split /$Config{path_sep}/, $file_path; | |
195 | if ($opts{file_path}) { | |
196 | unshift @file_path, | |
197 | map | |
198 | { | |
199 | split /$Config{path_sep}/, $_ | |
200 | } | |
201 | @{$opts{file_path}}; | |
202 | } | |
203 | $self->{file_path} = \@file_path; | |
204 | ||
57e4a9c7 | 205 | $self->{update_only} = $self->cfg_entry('update_only', 0); |
3709451d TC |
206 | |
207 | my $source_type = $self->cfg_entry("source", "XLS"); | |
d415d0ba | 208 | $self->{source_class} = "BSE::Importer::Source::$source_type"; |
3709451d TC |
209 | |
210 | $self->_do_require($self->{source_class}); | |
211 | $self->{source} = $self->{source_class}->new | |
212 | ( | |
213 | importer => $self, | |
214 | opts => \%opts, | |
215 | ); | |
216 | ||
217 | my $target_type = $self->cfg_entry("target", "Product"); | |
d415d0ba | 218 | $self->{target_class} = "BSE::Importer::Target::$target_type"; |
3709451d TC |
219 | $self->_do_require($self->{target_class}); |
220 | $self->{target} = $self->{target_class}->new | |
221 | ( | |
222 | importer => $self, | |
223 | opts => \%opts, | |
224 | ); | |
225 | ||
3709451d TC |
226 | return $self; |
227 | } | |
228 | ||
d415d0ba TC |
229 | =item profiles() |
230 | ||
231 | Return a hashref mapping profile names to display names. | |
232 | ||
233 | =cut | |
234 | ||
3709451d TC |
235 | sub profiles { |
236 | my ($class, $cfg) = @_; | |
237 | ||
d415d0ba TC |
238 | $cfg ||= BSE::Cfg->single; |
239 | ||
3709451d TC |
240 | my %ids = $cfg->entries("import profiles"); |
241 | return \%ids; | |
242 | } | |
243 | ||
d415d0ba | 244 | =back |
3709451d | 245 | |
d415d0ba | 246 | =head1 OBJECT METHODS |
3709451d | 247 | |
d415d0ba | 248 | =head2 Processing |
3709451d | 249 | |
d415d0ba | 250 | =over |
3709451d | 251 | |
d415d0ba | 252 | =item process() |
3709451d | 253 | |
d415d0ba | 254 | $imp->process($filename); |
3709451d | 255 | |
d415d0ba TC |
256 | Process the specified file, importing the data. |
257 | ||
258 | Note that while the current source treats the argument as a filename, | |
259 | future sources may treat it as a URL or pretty much anything else. | |
260 | ||
261 | =cut | |
3709451d TC |
262 | |
263 | sub process { | |
264 | my ($self, @source_info) = @_; | |
265 | ||
266 | $self->{target}->start($self); | |
267 | $self->{source}->each_row($self, @source_info); | |
268 | } | |
269 | ||
d415d0ba TC |
270 | =item errors() |
271 | ||
272 | Valid after process() is called, return a list of errors encountered | |
273 | during processing. | |
274 | ||
275 | =cut | |
276 | ||
277 | sub errors { | |
278 | $_[0]{errors} | |
279 | and return @{$_[0]{errors}}; | |
280 | ||
281 | return; | |
282 | } | |
283 | ||
284 | =item leaves() | |
285 | ||
286 | Valid after process() is called, return a list of created imported | |
287 | objects. | |
288 | ||
289 | =cut | |
290 | ||
291 | sub leaves { | |
292 | return $_[0]{target}->leaves; | |
293 | } | |
294 | ||
295 | =item parents() | |
296 | ||
297 | Valid after process() is called, return a list of synthesized parent | |
298 | objects (if any). | |
299 | ||
300 | =cut | |
301 | ||
302 | sub parents { | |
303 | return $_[0]{target}->parents; | |
304 | } | |
305 | ||
e0946a86 TC |
306 | =item set_callback() |
307 | ||
308 | Replace the callback sub reference. | |
309 | ||
310 | =cut | |
311 | ||
312 | sub set_callback { | |
313 | my ($self, $callback) = @_; | |
314 | ||
315 | $self->{callback} = $callback; | |
316 | } | |
317 | ||
d415d0ba TC |
318 | =back |
319 | ||
320 | =head2 Internal | |
321 | ||
322 | These are for use my sources and targets. | |
323 | ||
324 | =over | |
325 | ||
326 | =item row() | |
327 | ||
328 | $imp->row($source) | |
329 | ||
330 | Called by the source to process each row. | |
331 | ||
332 | =cut | |
333 | ||
3709451d TC |
334 | sub row { |
335 | my ($self, $source) = @_; | |
336 | ||
337 | eval { | |
338 | my %entry = %{$self->{set}}; | |
339 | ||
340 | # load from mapping | |
341 | my $non_blank = 0; | |
342 | for my $col (keys %{$self->{map}}) { | |
343 | $entry{$col} = $source->get_column($self->{map}{$col}); | |
344 | $non_blank ||= $entry{$col} =~ /\S/; | |
345 | } | |
346 | $non_blank | |
347 | or return; | |
348 | for my $col (keys %{$self->{xform}}) { | |
349 | $entry{$col} = $self->{xform}{$col}->($entry{$col}, \%entry); | |
350 | } | |
351 | my @parents; | |
352 | for my $cat (@{$self->{cats}}) { | |
353 | my $value = $source->get_column($cat); | |
354 | defined $value && $value =~ /\S/ | |
355 | and push @parents, $value; | |
356 | } | |
57e4a9c7 | 357 | $self->event(row => { entry => \%entry, parents => \@parents }); |
3709451d TC |
358 | $self->{target}->row($self, \%entry, \@parents); |
359 | }; | |
360 | if ($@) { | |
361 | my $error = $source->rowid . ": $@"; | |
362 | $error =~ s/\n\z//; | |
363 | $error =~ tr/\n/ /s; | |
364 | push @{$self->{errors}}, $error; | |
365 | $self->warn("Error: $error"); | |
57e4a9c7 | 366 | $self->event(error => { msg => $error }); |
3709451d TC |
367 | } |
368 | } | |
369 | ||
d415d0ba | 370 | =item _do_require() |
3709451d | 371 | |
d415d0ba TC |
372 | Load a module by module name and perform a default import. |
373 | ||
374 | =cut | |
3709451d TC |
375 | |
376 | sub _do_require { | |
377 | my ($self, $class) = @_; | |
378 | ||
379 | (my $file = $class . ".pm") =~ s!::!/!g; | |
380 | ||
381 | require $file; | |
382 | $file->import; | |
383 | ||
384 | 1; | |
385 | } | |
386 | ||
d415d0ba TC |
387 | =item info() |
388 | ||
389 | $imp->info(@msg) | |
390 | ||
391 | Called by various parts of the system to produce informational messages. | |
392 | ||
393 | =cut | |
394 | ||
3709451d TC |
395 | sub info { |
396 | my ($self, @msg) = @_; | |
397 | ||
398 | $self->{callback} | |
399 | and $self->{callback}->("@msg"); | |
400 | } | |
401 | ||
d415d0ba TC |
402 | =item warn() |
403 | ||
404 | $imp->warn(@msg); | |
405 | ||
406 | Called by various parts of the system to produce warning messaged for | |
407 | the current row. | |
408 | ||
409 | =cut | |
410 | ||
3709451d TC |
411 | sub warn { |
412 | my ($self, @msg) = @_; | |
413 | ||
414 | $self->{callback} | |
415 | and $self->{callback}->($self->{source}->rowid, ": @msg"); | |
416 | } | |
417 | ||
57e4a9c7 TC |
418 | =item event() |
419 | ||
420 | Called by various parts of the system to report events. These are | |
421 | intended for tools. | |
422 | ||
423 | =cut | |
424 | ||
425 | sub event { | |
426 | my ($self, $event, $args) = @_; | |
427 | ||
428 | if ($self->{listen}{$event}) { | |
429 | $self->{listen}{$event}->($event, $args); | |
430 | } | |
431 | } | |
432 | ||
d415d0ba TC |
433 | =item find_file() |
434 | ||
435 | my $fullname = $imp->find_file($filename) | |
436 | ||
437 | Search the configured file search path for C<$filename> and return the | |
438 | full path to the file. | |
439 | ||
440 | Returns an empty list on failure. | |
441 | ||
442 | =cut | |
443 | ||
3709451d TC |
444 | sub find_file { |
445 | my ($self, $file) = @_; | |
446 | ||
447 | for my $path (@{$self->{file_path}}) { | |
448 | my $full = "$path/$file"; | |
449 | -f $full and return $full; | |
450 | } | |
451 | ||
452 | return; | |
453 | } | |
454 | ||
d415d0ba TC |
455 | =item section() |
456 | ||
457 | Return the configuration section for the profile. | |
458 | ||
459 | =cut | |
460 | ||
461 | sub section { | |
462 | my ($self) = @_; | |
463 | ||
464 | return "import profile $self->{profile}"; | |
3709451d TC |
465 | } |
466 | ||
d415d0ba TC |
467 | =item maps() |
468 | ||
469 | Return a hash reference mapping field names to column numbers. | |
470 | ||
471 | =cut | |
472 | ||
473 | sub maps { | |
474 | $_[0]{map}; | |
475 | } | |
476 | ||
477 | =item cfg() | |
478 | ||
479 | Return the BSE::Cfg object used to configure the importer. | |
480 | ||
481 | =cut | |
482 | ||
483 | sub cfg { | |
484 | $_[0]{cfg}; | |
485 | } | |
486 | ||
487 | =item profile() | |
488 | ||
489 | Return the profile name. | |
490 | ||
491 | =cut | |
492 | ||
493 | sub profile { | |
494 | $_[0]{profile}; | |
495 | } | |
496 | ||
497 | =item cfg_entry() | |
498 | ||
499 | my $value = $imp->cfg_entry($key, $default) | |
500 | ||
501 | Return the specified config value from the section for this profile. | |
502 | ||
503 | =cut | |
504 | ||
505 | sub cfg_entry { | |
506 | my ($self, $key, $default) = @_; | |
507 | ||
508 | return $self->{cfg}->entry($self->{section}, $key, $default); | |
3709451d TC |
509 | } |
510 | ||
57e4a9c7 TC |
511 | =item update_only |
512 | ||
513 | Returns true if only performing updates. | |
514 | ||
515 | =cut | |
516 | ||
517 | sub update_only { | |
518 | $_[0]{update_only}; | |
519 | } | |
520 | ||
3f58d535 TC |
521 | =item actor |
522 | ||
523 | The actor supplied to new. | |
524 | ||
525 | =cut | |
526 | ||
527 | sub actor { | |
3eed7da4 | 528 | $_[0]{actor}; |
3f58d535 TC |
529 | } |
530 | ||
3709451d | 531 | 1; |
d415d0ba TC |
532 | |
533 | =back | |
534 | ||
535 | =head1 SEE ALSO | |
536 | ||
537 | L<BSE::Importer::Source::Base>, L<BSE::Importer::Source::XLS>, | |
6b801f1a TC |
538 | L<BSE::Importer::Source::CSV>, L<BSE::Importer::Target::Base>, |
539 | L<BSE::Importer::Target::Article>, L<BSE::Importer::Target::Product>, | |
d415d0ba TC |
540 | |
541 | =head1 AUTHOR | |
542 | ||
543 | Tony Cook <tony@develop-help.com> | |
544 | ||
545 | =cut | |
546 |