[Koha-patches] [PATCH] Results of CSV module analysis, testing various CSV modules and configs.

Fri Nov 7 22:49:59 CET 2008

The upshot is that despite appearances, whe should NEVER use Text::CSV::Unicode
or any CSV module in non-binary mode.  Text::CSV works in binary mode
by using Text::CSV_XS, so the behavior is the same for both (in binary).
---
 t/other/Text_CSV_Various.t |   68 ++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 68 insertions(+), 0 deletions(-)
 create mode 100755 t/other/Text_CSV_Various.t

diff --git a/t/other/Text_CSV_Various.t b/t/other/Text_CSV_Various.t
new file mode 100755
index 0000000..a01f158
--- /dev/null
+++ b/t/other/Text_CSV_Various.t
@@ -0,0 +1,68 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use Test::More tests => 25;
+BEGIN {
+    diag "
+This test demonstrates why Koha uses the CSV parser and configration it does.
+Specifically, the test is for Unicode compliance in text parsing and data.
+This test requires other modules that Koha doesn't actually use, in order to compare.
+Therefore, running this test is not necessary to test your Koha installation.
+
+";
+	use FindBin;
+	use lib $FindBin::Bin;
+	use_ok('Text::CSV');
+	use_ok('Text::CSV_XS');
+	use_ok('Text::CSV::Unicode');
+}
+
+sub pretty_line {
+	my $max = 54;
+	(@_) or return "#" x $max . "\n";
+	my $phrase = "  " . shift() . "  ";
+	my $half = "#" x (($max - length($phrase))/2);
+	return $half . $phrase . $half . "\n";
+}
+
+my ($csv, $bin, %parsers);
+
+foreach(qw(Text::CSV Text::CSV_XS Text::CSV::Unicode)) {
+    ok($csv = $_->new(),            $_ . '->new()');
+    ok($bin = $_->new({binary=>1}), $_ . '->new({binary=>1})');
+    $csv and $parsers{$_} = $csv;
+    $bin and $parsers{$_ . " (binary)"} = $bin;
+}
+
+my $lines = [
+    {description=>"010D: LATIN SMALL LETTER C WITH CARON",     character=>'Ä', line=>'field1,second field,field3,do_we_have_a_Ä_problem?, f!fth field ,lastfield'},
+    {description=>"0117: LATIN SMALL LETTER E WITH DOT ABOVE", character=>'Ä—', line=>'field1,second field,field3,do_we_have_a_Ä—_problem?, f!fth field ,lastfield'},
+];
+# 010D: Ä LATIN SMALL LETTER C WITH CARON
+# 0117: Ä— LATIN SMALL LETTER E WITH DOT ABOVE
+diag sprintf "Testing %d lines with  %d parsers.", scalar(@$lines), scalar(keys %parsers);
+foreach my $key (sort keys %parsers) {
+    my $parser = $parsers{$key};
+    print "Testing parser $key version " . ($parser->version||'?') . "\n";
+}
+my $i = 0;
+LINE: foreach (@$lines) {
+    print pretty_line("Line " . ++$i);
+    print pretty_line($_->{description} . ': ' . $_->{character});
+    foreach my $key (sort keys %parsers) {
+        my $parser = $parsers{$key};
+        my ($status,$count, at fields);
+        ok($status = $parser->parse($_->{line}), "parse ($key)");
+        if ($status) {
+            @fields = $parser->fields;
+            ok(($count = scalar(@fields)) == 6, "Number of fields ($count of 6)");
+            my $j = 0;
+            foreach my $f (@fields) {
+                print "\t field " . ++$j . ": $f\n";
+            }
+        }
+    }
+}
+diag "done.\n";
-- 
1.5.5.GIT