[Koha-patches] [PATCH 5/9] MT2116: Addons to the CSV export

Tue Feb 16 21:46:53 CET 2010

From: Matthias Meusburger <matthias.meusburger at biblibre.com>

Adds the ability to customize CSV exports through the use of a YAML file.

The following customizations are available :
  - Preprocessing
  - Postprocessing
  - Field-by-field processing

The YAML field should be stored in the tools/csv-profiles/ directory and
named after the id of the CSV profile you want to customize.
An example file is provided in that directory.
---
 C4/Record.pm                    |   80 ++++++++++++++++++++++++++++++++++----
 basket/downloadcart.pl          |   31 ++++++++-------
 opac/opac-downloadcart.pl       |   30 ++++++++-------
 opac/opac-downloadshelf.pl      |   30 +++++++++------
 tools/csv-profiles/example.yaml |   15 +++++++
 virtualshelves/downloadshelf.pl |   30 +++++++++------
 6 files changed, 155 insertions(+), 61 deletions(-)
 create mode 100644 tools/csv-profiles/example.yaml

diff --git a/C4/Record.pm b/C4/Record.pm
index 7c3fcc5..65f3f7a 100644
--- a/C4/Record.pm
+++ b/C4/Record.pm
@@ -32,6 +32,7 @@ use XML::LibXML;
 use C4::Biblio; #marc2bibtex
 use C4::Csv; #marc2csv
 use C4::Koha; #marc2csv
+use YAML; #marcrecords2csv
 use Text::CSV::Encoded; #marc2csv
 
 use vars qw($VERSION @ISA @EXPORT);
@@ -52,7 +53,6 @@ $VERSION = 3.00;
   &marc2modsxml
   &marc2bibtex
   &marc2csv
-
   &html2marcxml
   &html2marc
   &changeEncoding
@@ -328,11 +328,59 @@ sub marc2endnote {
 	
 }
 
-=head2 marc2csv - Convert from UNIMARC to CSV
+=head2 marcrecords2csv - Convert several records from UNIMARC to CSV
+Pre and postprocessing can be done through a YAML file
+
+=over 4
+
+my ($csv) = marcrecords2csv($biblios, $csvprofileid);
+
+Returns a CSV scalar
+
+=over 2
+
+C<$biblio> - a list of biblionumbers
+
+C<$csvprofileid> - the id of the CSV profile to use for the export (see export_format.export_format_id and the GetCsvProfiles function in C4::Csv)
+
+=back
+
+=back
+
+=cut
+sub marc2csv {
+    my ($biblios, $id) = @_;
+    my $output;
+    my $csv = Text::CSV::Encoded->new();
+
+    # Getting yaml file
+    my $configfile = "../tools/csv-profiles/$id.yaml";
+    my ($preprocess, $postprocess, $fieldprocessing);
+    if (-e $configfile){
+        ($preprocess,$postprocess, $fieldprocessing) = YAML::LoadFile($configfile);
+    }
+
+    warn $fieldprocessing;
+    # Preprocessing
+    eval $preprocess if ($preprocess);
+
+    my $firstpass = 1;
+    foreach my $biblio (@$biblios) {
+	$output .= marcrecord2csv($biblio, $id, $firstpass, $csv, $fieldprocessing) ;
+	$firstpass = 0;
+    }
+
+    # Postprocessing
+    eval $postprocess if ($postprocess);
+
+    return $output;
+}
+
+=head2 marc2csv - Convert a single record from UNIMARC to CSV
 
 =over 4
 
-my ($csv) = marc2csv($record, $csvprofileid, $header);
+my ($csv) = marc2csv($biblio, $csvprofileid, $header);
 
 Returns a CSV scalar
 
@@ -344,6 +392,8 @@ C<$csvprofileid> - the id of the CSV profile to use for the export (see export_f
 
 C<$header> - true if the headers are to be printed (typically at first pass)
 
+C<$csv> - an already initialised Text::CSV object
+
 =back
 
 =back
@@ -351,8 +401,8 @@ C<$header> - true if the headers are to be printed (typically at first pass)
 =cut
 
 
-sub marc2csv {
-    my ($biblio, $id, $header) = @_;
+sub marcrecord2csv {
+    my ($biblio, $id, $header, $csv, $fieldprocessing) = @_;
     my $output;
 
     # Getting the record
@@ -377,9 +427,8 @@ sub marc2csv {
     if ($fieldseparator eq '\t') { $fieldseparator = "\t" }
     if ($subfieldseparator eq '\t') { $subfieldseparator = "\t" }
 
-    # Init CSV
-    my $csv = Text::CSV::Encoded->new({ sep_char => $csvseparator });
-    $csv = $csv->encoding_out($encoding) if ($encoding ne 'utf8');
+    $csv->encoding_out($encoding) if ($encoding ne 'utf8');
+    $csv->sep_char($csvseparator);
 
     # Getting the marcfields
     my $marcfieldslist = $profile->{marcfields};
@@ -460,7 +509,20 @@ sub marc2csv {
 	} else {
 	    my @fields = ($record->field($marcfield));
 	    my $authvalues = GetKohaAuthorisedValuesFromField($marcfield, undef, $frameworkcode, undef);
-	    push (@fieldstab, join($fieldseparator, map((defined $authvalues->{$_->as_string}) ? $authvalues->{$_->as_string} : $_->as_string, @fields)));  		
+
+	    my @valuesarray;
+	    foreach (@fields) {
+		my $value;
+
+		# Getting authorised value
+		$value = defined $authvalues->{$_->as_string} ? $authvalues->{$_->as_string} : $_->as_string;
+
+		# Field processing
+		eval $fieldprocessing if ($fieldprocessing);
+
+		push @valuesarray, $value;
+	    }
+	    push (@fieldstab, join($fieldseparator, @valuesarray)); 
 	 }
     };
 
diff --git a/basket/downloadcart.pl b/basket/downloadcart.pl
index 4be46dd..5c3ff79 100755
--- a/basket/downloadcart.pl
+++ b/basket/downloadcart.pl
@@ -57,21 +57,24 @@ if ($bib_list && $format) {
     my $marcflavour         = C4::Context->preference('marcflavour');
     my $output;
 
-    # retrieve biblios from shelf
-    my $firstpass = 1;
-    foreach my $biblio (@bibs) {
-
-	my $record = GetMarcBiblio($biblio);
-
-	switch ($format) {
-	    case "iso2709" { $output .= $record->as_usmarc(); }
-	    case "ris"     { $output .= marc2ris($record); }
-	    case "bibtex"  { $output .= marc2bibtex($record, $biblio); }
-	    # We're in the case of a csv profile (firstpass is used for headers printing) :
-            case /^\d+$/   { $output .= marc2csv($biblio, $format, $firstpass); }
-	}
-        $firstpass = 0;
+    # CSV   
+    if ($format =~ /^\d+$/) {
+
+        $output = marc2csv(\@bibs, $format);
+    
+    # Other formats
+    } else {
+
+	foreach my $biblio (@bibs) {
 
+	    my $record = GetMarcBiblio($biblio);
+
+	    switch ($format) {
+		case "iso2709" { $output .= $record->as_usmarc(); }
+		case "ris"     { $output .= marc2ris($record); }
+		case "bibtex"  { $output .= marc2bibtex($record, $biblio); }
+	    }
+	}
     }
 
     # If it was a CSV export we change the format after the export so the file extension is fine
diff --git a/opac/opac-downloadcart.pl b/opac/opac-downloadcart.pl
index 241f879..3e7096b 100755
--- a/opac/opac-downloadcart.pl
+++ b/opac/opac-downloadcart.pl
@@ -57,21 +57,23 @@ if ($bib_list && $format) {
     my $marcflavour         = C4::Context->preference('marcflavour');
     my $output;
 
-    # retrieve biblios from shelf
-    my $firstpass = 1;
-    foreach my $biblio (@bibs) {
-
-	my $record = GetMarcBiblio($biblio);
-
-	switch ($format) {
-	    case "iso2709" { $output .= $record->as_usmarc(); }
-	    case "ris"     { $output .= marc2ris($record); }
-	    case "bibtex"  { $output .= marc2bibtex($record, $biblio); }
-	    # We're in the case of a csv profile (firstpass is used for headers printing) :
-            case /^\d+$/   { $output .= marc2csv($biblio, $format, $firstpass); }
-	}
-        $firstpass = 0;
+    # CSV   
+    if ($format =~ /^\d+$/) {
+
+        $output = marc2csv(\@bibs, $format);
+
+    # Other formats
+    } else {
+	foreach my $biblio (@bibs) {
 
+	    my $record = GetMarcBiblio($biblio);
+
+	    switch ($format) {
+		case "iso2709" { $output .= $record->as_usmarc(); }
+		case "ris"     { $output .= marc2ris($record); }
+		case "bibtex"  { $output .= marc2bibtex($record, $biblio); }
+	    }
+	}
     }
 
     # If it was a CSV export we change the format after the export so the file extension is fine
diff --git a/opac/opac-downloadshelf.pl b/opac/opac-downloadshelf.pl
index a03a79b..50b1927 100755
--- a/opac/opac-downloadshelf.pl
+++ b/opac/opac-downloadshelf.pl
@@ -57,21 +57,27 @@ if ($shelfid && $format) {
     my $marcflavour         = C4::Context->preference('marcflavour');
     my $output;
 
-    # retrieve biblios from shelf
-    my $firstpass = 1;
-    foreach my $biblio (@$items) {
-	my $biblionumber = $biblio->{biblionumber};
+   # CSV 
+    if ($format =~ /^\d+$/) {
+        my @biblios;
+        foreach (@$items) {
+            push @biblios, $_->{biblionumber};
+        }
+        $output = marc2csv(\@biblios, $format);
+            
+    # Other formats
+    } else {
+	foreach my $biblio (@$items) {
+	    my $biblionumber = $biblio->{biblionumber};
 
-	my $record = GetMarcBiblio($biblionumber);
+	    my $record = GetMarcBiblio($biblionumber);
 
-	switch ($format) {
-	    case "iso2709" { $output .= $record->as_usmarc(); }
-	    case "ris"     { $output .= marc2ris($record); }
-	    case "bibtex"  { $output .= marc2bibtex($record, $biblionumber); }
-	    # We're in the case of a csv profile (firstpass is used for headers printing) :
-	    case /^\d+$/   { $output .= marc2csv($biblionumber, $format, $firstpass); }
+	    switch ($format) {
+		case "iso2709" { $output .= $record->as_usmarc(); }
+		case "ris"     { $output .= marc2ris($record); }
+		case "bibtex"  { $output .= marc2bibtex($record, $biblionumber); }
+	    }
 	}
-	$firstpass = 0;
     }
 
     # If it was a CSV export we change the format after the export so the file extension is fine
diff --git a/tools/csv-profiles/example.yaml b/tools/csv-profiles/example.yaml
new file mode 100644
index 0000000..9c2e506
--- /dev/null
+++ b/tools/csv-profiles/example.yaml
@@ -0,0 +1,15 @@
+# CSV Export custom processing example
+# (should be name according to the csv export id : 1.yaml for example)
+--- |
+# preprocessing
+# Deactivate quoting
+$csv->quote_char('');
+--- |
+# postprocessing
+# Adding a trailer at the end of the CSV file
+$output .= "This is a trailer";
+--- |
+# field processing
+# Adding "Word " before each 001
+if ($marcfield eq '001') { $value = 'Word ' . $value; }
+---
diff --git a/virtualshelves/downloadshelf.pl b/virtualshelves/downloadshelf.pl
index f75d2d4..1d3e87c 100755
--- a/virtualshelves/downloadshelf.pl
+++ b/virtualshelves/downloadshelf.pl
@@ -57,21 +57,27 @@ if ($shelfid && $format) {
     my $marcflavour         = C4::Context->preference('marcflavour');
     my $output;
 
-    # retrieve biblios from shelf
-    my $firstpass = 1;
-    foreach my $biblio (@$items) {
-	my $biblionumber = $biblio->{biblionumber};
+    # CSV 
+    if ($format =~ /^\d+$/) {
+	my @biblios;
+	foreach (@$items) {
+	    push @biblios, $_->{biblionumber};
+	}
+	$output = marc2csv(\@biblios, $format);
+
+    # Other formats
+    } else {
+	foreach my $biblio (@$items) {
+	    my $biblionumber = $biblio->{biblionumber};
 
-	my $record = GetMarcBiblio($biblionumber);
+	    my $record = GetMarcBiblio($biblionumber);
 
-	switch ($format) {
-	    case "iso2709" { $output .= $record->as_usmarc(); }
-	    case "ris"     { $output .= marc2ris($record); }
-	    case "bibtex"  { $output .= marc2bibtex($record, $biblionumber); }
-	    # We're in the case of a csv profile (firstpass is used for headers printing) :
-	    case /^\d+$/   { $output .= marc2csv($biblionumber, $format, $firstpass); }
+	    switch ($format) {
+		case "iso2709" { $output .= $record->as_usmarc(); }
+		case "ris"     { $output .= marc2ris($record); }
+		case "bibtex"  { $output .= marc2bibtex($record, $biblionumber); }
+	    }
 	}
-	$firstpass = 0;
     }
 
     # If it was a CSV export we change the format after the export so the file extension is fine
-- 
1.6.3.3