[Koha-patches] [PATCH] Results of CSV module analysis, testing various CSV modules and configs.
Joe Atzberger
joe.atzberger at liblime.com
Fri Nov 7 22:49:59 CET 2008
The upshot is that despite appearances, whe should NEVER use Text::CSV::Unicode
or any CSV module in non-binary mode. Text::CSV works in binary mode
by using Text::CSV_XS, so the behavior is the same for both (in binary).
---
t/other/Text_CSV_Various.t | 68 ++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 68 insertions(+), 0 deletions(-)
create mode 100755 t/other/Text_CSV_Various.t
diff --git a/t/other/Text_CSV_Various.t b/t/other/Text_CSV_Various.t
new file mode 100755
index 0000000..a01f158
--- /dev/null
+++ b/t/other/Text_CSV_Various.t
@@ -0,0 +1,68 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use Test::More tests => 25;
+BEGIN {
+ diag "
+This test demonstrates why Koha uses the CSV parser and configration it does.
+Specifically, the test is for Unicode compliance in text parsing and data.
+This test requires other modules that Koha doesn't actually use, in order to compare.
+Therefore, running this test is not necessary to test your Koha installation.
+
+";
+ use FindBin;
+ use lib $FindBin::Bin;
+ use_ok('Text::CSV');
+ use_ok('Text::CSV_XS');
+ use_ok('Text::CSV::Unicode');
+}
+
+sub pretty_line {
+ my $max = 54;
+ (@_) or return "#" x $max . "\n";
+ my $phrase = " " . shift() . " ";
+ my $half = "#" x (($max - length($phrase))/2);
+ return $half . $phrase . $half . "\n";
+}
+
+my ($csv, $bin, %parsers);
+
+foreach(qw(Text::CSV Text::CSV_XS Text::CSV::Unicode)) {
+ ok($csv = $_->new(), $_ . '->new()');
+ ok($bin = $_->new({binary=>1}), $_ . '->new({binary=>1})');
+ $csv and $parsers{$_} = $csv;
+ $bin and $parsers{$_ . " (binary)"} = $bin;
+}
+
+my $lines = [
+ {description=>"010D: LATIN SMALL LETTER C WITH CARON", character=>'Ä', line=>'field1,second field,field3,do_we_have_a_Ä_problem?, f!fth field ,lastfield'},
+ {description=>"0117: LATIN SMALL LETTER E WITH DOT ABOVE", character=>'Ä', line=>'field1,second field,field3,do_we_have_a_Ä_problem?, f!fth field ,lastfield'},
+];
+# 010D: Ä LATIN SMALL LETTER C WITH CARON
+# 0117: Ä LATIN SMALL LETTER E WITH DOT ABOVE
+diag sprintf "Testing %d lines with %d parsers.", scalar(@$lines), scalar(keys %parsers);
+foreach my $key (sort keys %parsers) {
+ my $parser = $parsers{$key};
+ print "Testing parser $key version " . ($parser->version||'?') . "\n";
+}
+my $i = 0;
+LINE: foreach (@$lines) {
+ print pretty_line("Line " . ++$i);
+ print pretty_line($_->{description} . ': ' . $_->{character});
+ foreach my $key (sort keys %parsers) {
+ my $parser = $parsers{$key};
+ my ($status,$count, at fields);
+ ok($status = $parser->parse($_->{line}), "parse ($key)");
+ if ($status) {
+ @fields = $parser->fields;
+ ok(($count = scalar(@fields)) == 6, "Number of fields ($count of 6)");
+ my $j = 0;
+ foreach my $f (@fields) {
+ print "\t field " . ++$j . ": $f\n";
+ }
+ }
+ }
+}
+diag "done.\n";
--
1.5.5.GIT
More information about the Koha-patches
mailing list