support importing from CSV files
authorTony Cook <tony@develop-help.com>
Thu, 28 Mar 2013 00:37:42 +0000 (11:37 +1100)
committerTony Cook <tony@develop-help.com>
Mon, 8 Apr 2013 00:54:35 +0000 (10:54 +1000)
MANIFEST
site/cgi-bin/modules/BSE/Importer.pm
site/cgi-bin/modules/BSE/Importer/Source/CSV.pm [new file with mode: 0644]
t/130-importer/000-load.t [new file with mode: 0644]
t/130-importer/010-csv.t [new file with mode: 0644]
t/data/importer/basic.csv [new file with mode: 0644]

index 1f5d99b..8148985 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -109,6 +109,7 @@ site/cgi-bin/modules/BSE/ImageHandler/Flash.pm
 site/cgi-bin/modules/BSE/ImageHandler/Img.pm
 site/cgi-bin/modules/BSE/Importer.pm
 site/cgi-bin/modules/BSE/Importer/Source/Base.pm
+site/cgi-bin/modules/BSE/Importer/Source/CSV.pm
 site/cgi-bin/modules/BSE/Importer/Source/XLS.pm
 site/cgi-bin/modules/BSE/Importer/Target/Article.pm
 site/cgi-bin/modules/BSE/Importer/Target/Base.pm
@@ -902,6 +903,8 @@ t/120-thumb/10scale.t
 t/120-thumb/data/scale40x30.png
 t/120-thumb/data/scale40x30fill.png
 t/120-thumb/data/simple.png
+t/130-importer/000-load.t
+t/130-importer/010-csv.t
 t/900-kwalitee/010-strict-warn.t
 t/900-kwalitee/020-checktemplates.t
 t/900-kwalitee/030-messages.t
@@ -913,6 +916,7 @@ t/cfg/cfg/99end.cfg
 t/cfg/isafile.cfg
 t/cfg/t/varinc.cfg
 t/data/govhouse.jpg
+t/data/importer/basic.csv
 t/data/known_pod_issues.txt
 t/data/t101.jpg
 t/t000load.t
index 9da93f5..8ab71c8 100644 (file)
@@ -2,7 +2,7 @@ package BSE::Importer;
 use strict;
 use Config;
 
-our $VERSION = "1.003";
+our $VERSION = "1.005";
 
 =head1 NAME
 
@@ -484,8 +484,8 @@ sub cfg_entry {
 =head1 SEE ALSO
 
 L<BSE::Importer::Source::Base>, L<BSE::Importer::Source::XLS>,
-L<BSE::Importer::Target::Base>, L<BSE::Importer::Target::Article>,
-L<BSE::Importer::Target::Product>,
+L<BSE::Importer::Source::CSV>, L<BSE::Importer::Target::Base>,
+L<BSE::Importer::Target::Article>, L<BSE::Importer::Target::Product>,
 
 =head1 AUTHOR
 
diff --git a/site/cgi-bin/modules/BSE/Importer/Source/CSV.pm b/site/cgi-bin/modules/BSE/Importer/Source/CSV.pm
new file mode 100644 (file)
index 0000000..4b715d5
--- /dev/null
@@ -0,0 +1,165 @@
+package BSE::Importer::Source::CSV;
+use strict;
+use base 'BSE::Importer::Source::Base';
+use Text::CSV;
+
+our $VERSION = "1.000";
+
+my @text_csv_options = qw(quote_char escape_char sep_char binary allow_loose_quotes allow_loose_escapes allow_whitespace);
+
+my @escape_options = qw(quote_char escape_char sep_char);
+
+=head1 NAME
+
+BSE::Importer::Source::CSV - import source for CSV files.
+
+=head1 SYNOPSIS
+
+   [import profile foo]
+   source=CSV
+   ; these are the defaults
+   skiplines=1
+   binary=1
+   quote_char="
+   sep_char=,
+   escape_char="
+   allow_loose_quotes=0
+   allow_loose_escaped=0
+   allow_whitespace=0
+   encoding=utf-8
+
+=head1 DESCRIPTION
+
+Uses CSV (comma separated values) text files a data source.
+
+=head1 CONFIGURATION
+
+The following extra configuration can be set in the profile's
+configuration, (mostly better described in L<Text::CSV>.
+
+=over
+
+=item *
+
+C<skiplines> - the number of lines for skip at the top, eg. for column
+headings.  Default: 1.
+
+=item *
+
+C<binary> - whether the file should be treated as binary.  The default
+is typically correct. Default: 1.
+
+=item *
+
+C<sep_char> - the separator character between columns.  Default: ",".
+To use tab:
+
+  sep_char=\t
+
+=item *
+
+C<allow_whitespace> - set to true to ignore whitespace around the
+separator.  Default: 0.
+
+=item *
+
+C<encoding> - the character encoding to use in the input text.
+Default: "utf-8".
+
+=item *
+
+C<quote_char> - the character used for quoting fields containing
+blanks or separators.  Default: '"'.
+
+=item *
+
+C<escape_char> - the character used for quote escapes.  Default: '"'.
+
+=back
+
+=cut
+
+sub new {
+  my ($class, %opts) = @_;
+
+  my $self = $class->SUPER::new(%opts);
+
+  my $importer = delete $opts{importer};
+  my $opts = delete $opts{opts};
+
+  $self->{skiplines} = $importer->cfg_entry('skiplines', 1);
+
+  $self->{encoding} = $importer->cfg_entry('encoding', 'utf-8');
+  my %csv_opts = ( binary => 1 );
+  for my $opt (@text_csv_options) {
+    my $val = $importer->cfg_entry($opt);
+    defined $val and $csv_opts{$opt} = $val;
+  }
+
+  for my $opt (@escape_options) {
+    if (defined (my $val = $csv_opts{$opt})) {
+      if ($val =~ /\A\\(?:[nrtfbae]|x[0-9a-fA-F]{2}|[0-7]{1,3}|c.)\z/) {
+       $csv_opts{$opt} = eval '"' . $val . '"';
+      }
+    }
+  }
+
+  $self->{csv_opts} = \%csv_opts;
+
+  return $self;
+}
+
+sub each_row {
+  my ($self, $importer, $filename) = @_;
+
+  open my $fh, "<encoding($self->{encoding})", $filename
+    or die "Cannot open file $filename: $!\n";
+
+  my $csv = Text::CSV->new($self->{csv_opts})
+    or die "Cannot use CSV: ", Text::CSV->error_diag(), "\n";
+
+  for my $line (1 .. $self->{skiplines}) {
+    my $row = $csv->getline($fh);
+    if (!$row) {
+      if ($csv->eof) {
+       die "Ran out of rows reading the headers\n";
+      }
+      else {
+       die "Error reading header rows: ", $csv->error_diag, "\n";
+      }
+    }
+  }
+
+  while (my $row = $csv->getline($fh)) {
+    $self->{row} = $row;
+    $self->{line} = $.;
+    $importer->row($self);
+  }
+}
+
+sub get_column {
+  my ($self, $colnum) = @_;
+
+  $colnum > @{$self->{row}}
+    and return '';
+
+  return $self->{row}[$colnum-1];
+}
+
+sub rowid {
+  my $self = shift;
+
+  return "Line " . $self->{line};
+}
+
+1;
+
+=head1 AUTHOR
+
+Tony Cook <tony@develop-help.com>
+
+=head1 SEE ALSO
+
+L<BSE::Importer>
+
+=cut
diff --git a/t/130-importer/000-load.t b/t/130-importer/000-load.t
new file mode 100644 (file)
index 0000000..2b20e31
--- /dev/null
@@ -0,0 +1,20 @@
+#!perl -w
+use strict;
+use Test::More tests => 7;
+
+use_ok("BSE::Importer");
+use_ok("BSE::Importer::Target::Base");
+use_ok("BSE::Importer::Source::Base");
+use_ok("BSE::Importer::Target::Article");
+use_ok("BSE::Importer::Target::Product");
+SKIP: {
+  eval "require Spreadsheet::ParseExcel;"
+    or skip "Cannot load Spreadsheet::ParseExcel", 1;
+  use_ok("BSE::Importer::Source::Base");
+}
+SKIP: {
+  eval "require Text::CSV;"
+    or skip "Cannot load Text::CSV", 1;
+  use_ok("BSE::Importer::Source::CSV");
+}
+
diff --git a/t/130-importer/010-csv.t b/t/130-importer/010-csv.t
new file mode 100644 (file)
index 0000000..4175117
--- /dev/null
@@ -0,0 +1,79 @@
+#!perl -w
+use strict;
+use Test::More tests => 6;
+use BSE::Cfg;
+BEGIN {
+  eval "require Text::CSV;"
+    or plan skip_all => "Text::CSV not available";
+}
+use BSE::Importer::Source::CSV;
+
+{
+  my $cfg = BSE::Cfg->new_from_text(text => <<CFG);
+[import profile test]
+source=CSV
+CFG
+  my $importer = DummyImporter->new(columns => 3, cfg => $cfg);
+  my $src = BSE::Importer::Source::CSV->new
+    (
+     importer => $importer,
+     opts => { profile => "test", cfg => $cfg },
+    );
+  ok($src, "make a CSV source");
+
+  $src->each_row($importer, "t/data/importer/basic.csv");
+  is_deeply($importer->{rows},
+      [
+       [ "Line 2", 1, 2, 3 ],
+       [ "Line 3", qw(abc def hij) ],
+      ], "check data read");
+}
+
+{
+  my $cfg = BSE::Cfg->new_from_text(text => <<'CFG');
+[import profile test]
+source=CSV
+sep_char=\t
+escape_char=\r
+quote_char=\f
+CFG
+  my $importer = DummyImporter->new(columns => 3, cfg => $cfg);
+  my $src = BSE::Importer::Source::CSV->new
+    (
+     importer => $importer,
+     opts => { profile => "test", cfg => $cfg },
+    );
+  ok($src, "make a CSV source with escapes");
+  is($src->{csv_opts}{sep_char}, "\t", "check sep_char escaping");
+  is($src->{csv_opts}{escape_char}, "\r", "check escape_char escaping");
+  is($src->{csv_opts}{quote_char}, "\f", "check quote_char escaping");
+}
+
+package DummyImporter;
+
+sub new {
+  my ($class, %opts) = @_;
+  $opts{rows} = [];
+
+  return bless \%opts, $class;
+}
+
+sub row {
+  my ($self, $src) = @_;
+
+  my @row = $src->rowid;
+  for my $col (1 .. $self->{columns}) {
+    push @row, $src->get_column($col);
+  }
+  push @{$self->{rows}}, \@row;
+}
+
+sub cfg_entry {
+  my ($self, $key, $def) = @_;
+
+  $self->{cfg}->entry('import profile test', $key, $def);
+}
+
+sub profile {
+  "test";
+}
diff --git a/t/data/importer/basic.csv b/t/data/importer/basic.csv
new file mode 100644 (file)
index 0000000..5f6d423
--- /dev/null
@@ -0,0 +1,3 @@
+skipped,skipped,skipped,
+1,2,3
+"abc","def",hij