[Koha-patches] [PATCH] Bug 2500 Correcting incorrect splitting of cutter numbers

Chris Nighswonger cnighswonger at foundations.edu
Tue Sep 15 15:27:13 CEST 2009


This patch does two things to improve the call number splitting algorithms:

1. It makes changes to ensure that cutter numbers are split correctly in ddcns

2. It moves custom/fiction/biography call number splitting to a separate algorithm.
    Before they were incorrectly placed in ddcns.

This patch also modifies the call number splitting tests to accept call numbers from the
command line to allow quick testing of any give call number against a particular algorithm.
---
 C4/Labels/Label.pm    |   46 +++++++++++++++--------------------
 t/Labels_split_ccn.t  |   63 +++++++++++++++++++++++++++++++++++++++++++++++++
 t/Labels_split_ddcn.t |   53 +++++++++++++++++++++++++++++-----------
 t/Labels_split_lccn.t |   48 +++++++++++++++++++++++++++++-------
 4 files changed, 159 insertions(+), 51 deletions(-)
 create mode 100755 t/Labels_split_ccn.t

diff --git a/C4/Labels/Label.pm b/C4/Labels/Label.pm
index 9cf5026..c3ef20b 100644
--- a/C4/Labels/Label.pm
+++ b/C4/Labels/Label.pm
@@ -6,6 +6,7 @@ use warnings;
 use Text::Wrap;
 use Algorithm::CheckDigits;
 use Text::CSV_XS;
+use Data::Dumper;
 
 use C4::Context;
 use C4::Debug;
@@ -133,9 +134,9 @@ sub _split_ddcn {
     $_ = $ddcn;
     s/\///g;   # in theory we should be able to simply remove all segmentation markers and arrive at the correct call number...
     my (@parts) = m/
-        ^([a-zA-Z-]+(?:$possible_decimal)?) # R220.3            # BIO   # first example will require extra splitting
+        ^([-a-zA-Z]*\s?(?:$possible_decimal)?) # R220.3  CD-ROM 787.87 # will require extra splitting
         \s+
-        (.+)                               # H2793Z H32 c.2   # R5c.1   # everything else (except bracketing spaces)
+        (.+)                               # H2793Z H32 c.2 EAS # everything else (except bracketing spaces)
         \s*
         /x;
     unless (scalar @parts)  {
@@ -143,41 +144,34 @@ sub _split_ddcn {
         push @parts, $_;     # if no match, just push the whole string.
     }
 
-    if ($parts[ 0] =~ /^([a-zA-Z]+)($possible_decimal)$/) {
+    if ($parts[0] =~ /^([-a-zA-Z]+)\s?($possible_decimal)$/) {
           shift @parts;         # pull off the mathching first element, like example 1
         unshift @parts, $1, $2; # replace it with the two pieces
     }
 
     push @parts, split /\s+/, pop @parts;   # split the last piece into an arbitrary number of pieces at spaces
-
-    if ($parts[-1] !~ /^.*\d-\d.*$/ && $parts[-1] =~ /^(.*\d+)(\D.*)$/) {
-         pop @parts;            # pull off the mathching last element, like example 2
-        push @parts, $1, $2;    # replace it with the two pieces
-    }
-
     $debug and print STDERR "split_ddcn array: ", join(" | ", @parts), "\n";
     return @parts;
 }
 
-sub _split_fcn {
+## NOTE: Custom call number types go here. It may be necessary to create additional splitting algorithms if some custom call numbers
+##      cannot be made to work here. Presently this splits standard non-ddcn, non-lccn fiction and biography call numbers.
+
+sub _split_ccn {
     my ($fcn) = @_;
-    my @fcn_split = ();
-    # Split fiction call numbers based on spaces
-    SPLIT_FCN:
-    while ($fcn) {
-        if ($fcn =~ m/([A-Za-z0-9]+\.?[0-9]?)(\W?).*?/x) {
-            push (@fcn_split, $1);
-            $fcn = $';
-        }
-        else {
-            last SPLIT_FCN;     # No match, break out of the loop
-        }
+    my @parts = ();
+    # Split call numbers based on spaces
+    push @parts, split /\s+/, $fcn;   # split the call number into an arbitrary number of pieces at spaces
+    if ($parts[-1] !~ /^.*\d-\d.*$/ && $parts[-1] =~ /^(.*\d+)(\D.*)$/) {
+        pop @parts;            # pull off the matching last element
+        push @parts, $1, $2;    # replace it with the two pieces
     }
-    unless (scalar @fcn_split) {
+    unless (scalar @parts) {
         warn sprintf('regexp failed to match string: %s', $_);
-        push (@fcn_split, $_);
+        push (@parts, $_);
     }
-    return @fcn_split;
+    $debug and print STDERR "split_ccn array: ", join(" | ", @parts), "\n";
+    return @parts;
 }
 
 sub _get_barcode_data {
@@ -414,11 +408,11 @@ sub draw_label_text {
         if ((grep {$field->{'code'} =~ m/$_/} @callnumber_list) and ($self->{'printing_type'} eq 'BIB') and ($self->{'callnum_split'})) { # If the field contains the call number, we do some sp
             if ($cn_source eq 'lcc') {
                 @label_lines = _split_lccn($field_data);
-                @label_lines = _split_fcn($field_data) if !@label_lines;    # If it was not a true lccn, try it as a fiction call number
+                @label_lines = _split_ccn($field_data) if !@label_lines;    # If it was not a true lccn, try it as a custom call number
                 push (@label_lines, $field_data) if !@label_lines;         # If it was not that, send it on unsplit
             } elsif ($cn_source eq 'ddc') {
                 @label_lines = _split_ddcn($field_data);
-                @label_lines = _split_fcn($field_data) if !@label_lines;
+                @label_lines = _split_ccn($field_data) if !@label_lines;
                 push (@label_lines, $field_data) if !@label_lines;
             } else {
                 warn sprintf('Call number splitting failed for: %s. Please add this call number to bug #2500 at bugs.koha.org', $field_data);
diff --git a/t/Labels_split_ccn.t b/t/Labels_split_ccn.t
new file mode 100755
index 0000000..82248ba
--- /dev/null
+++ b/t/Labels_split_ccn.t
@@ -0,0 +1,63 @@
+#!/usr/bin/perl
+#
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA  02111-1307 USA
+#
+# for context, see http://bugs.koha.org
+
+use strict;
+use warnings;
+
+use Test::More;
+
+BEGIN {
+    our $ccns = {};
+    if ($ARGV[0]) {
+        BAIL_OUT("USAGE: perl Labels_split_ccn.t 'BIO JP2 R5c.1' 'BIO,JP2,R5c.1'") unless $ARGV[1];
+        $ccns = {$ARGV[0] => [split (/,/,$ARGV[1])],};
+    }
+    else {
+        $ccns = {
+            'BIO JP2 R5c.1'         => [qw(BIO JP2 R5 c.1)],
+            'FIC GIR J5c.1'         => [qw(FIC GIR J5 c.1)],
+            'J DAR G7c.11'          => [qw( J  DAR G7 c.11)],
+            'MP3-CD F PARKER'       => [qw(MP3-CD F PARKER)],
+        };
+    }
+    my $test_num = 1;
+    foreach (keys(%$ccns)) {
+        my $split_num += scalar(@{$ccns->{$_}});
+        $test_num += 2 * $split_num;
+        $test_num += 4;
+    }
+    plan tests => $test_num;
+    use_ok('C4::Labels::Label');
+    use vars qw($ccns);
+}
+
+foreach my $ccn (sort keys %$ccns) {
+    my (@parts, @expected);
+    ok($ccn, "ddcn: $ccn");
+    ok(@expected = @{$ccns->{$ccn}}, "split expected to produce " . scalar(@expected) . " pieces");
+    ok(@parts = C4::Labels::Label::_split_ccn($ccn), "C4::Labels::Label::_split_ccn($ccn)");
+    ok(scalar(@expected) == scalar(@parts), sprintf("%d of %d pieces produced", scalar(@parts), scalar(@expected)));
+    my $i = 0;
+    foreach my $unit (@expected) {
+        my $part;
+        ok($part = $parts[$i], "($ccn)[$i] populated: " . (defined($part) ? $part : 'UNDEF'));
+        ok((defined($part) and $part eq $unit),     "($ccn)[$i]   matches: $unit");
+        $i++;
+    }
+}
diff --git a/t/Labels_split_ddcn.t b/t/Labels_split_ddcn.t
index 26aad55..f70b29f 100755
--- a/t/Labels_split_ddcn.t
+++ b/t/Labels_split_ddcn.t
@@ -1,32 +1,56 @@
 #!/usr/bin/perl
 #
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA  02111-1307 USA
+#
 # for context, see http://bugs.koha.org
 
 use strict;
 use warnings;
 
-use Test::More tests => 82;
+use Test::More;
 
 BEGIN {
-    use_ok('C4::Labels');
+    our $ddcns = {};
+    if ($ARGV[0]) {
+        BAIL_OUT("USAGE: perl Labels_split_ddcn.t '621.3828 J28l' '621.3828,J28l'") unless $ARGV[1];
+        $ddcns = {$ARGV[0] => [split (/,/,$ARGV[1])],};
+    }
+    else {
+        $ddcns = {
+            'R220.3 H2793Z H32 c.2' => [qw(R 220.3 H2793Z H32 c.2)],
+            'CD-ROM 787.87 EAS'     => [qw(CD-ROM 787.87 EAS)],
+            '252.051 T147 v.1-2'    => [qw(252.051 T147 v.1-2)],
+        };
+    }
+    my $test_num = 1;
+    foreach (keys(%$ddcns)) {
+        my $split_num += scalar(@{$ddcns->{$_}});
+        $test_num += 2 * $split_num;
+        $test_num += 4;
+    }
+    plan tests => $test_num;
+    use_ok('C4::Labels::Label');
+    use vars qw($ddcns);
 }
-ok(defined C4::Labels::split_ddcn, 'C4::Labels::split_ddcn defined');
-
-my $ddcns = {
-    'BIO JP2 R5c.1'         => [qw(BIO JP2 R5 c.1 )],
-    'FIC GIR J5c.1'         => [qw(FIC GIR J5 c.1 )],
-    'J DAR G7c.11'          => [qw( J  DAR G7 c.11)],
-    'R220.3 H2793Z H32 c.2' => [qw(R 220.3 H2793Z H32 c.2)],
-    'CD-ROM 787.87 EAS'     => [qw(CD-ROM 787.87 EAS)],
-    'MP3-CD F PARKER'       => [qw(MP3-CD F PARKER)],
-    '252.051 T147 v.1-2'    => [qw(252.051 T147 v.1-2)],
-};
 
 foreach my $ddcn (sort keys %$ddcns) {
     my (@parts, @expected);
     ok($ddcn, "ddcn: $ddcn");
     ok(@expected = @{$ddcns->{$ddcn}}, "split expected to produce " . scalar(@expected) . " pieces");
-    ok(@parts = C4::Labels::split_ddcn($ddcn), "C4::Labels::split_ddcn($ddcn)");
+    ok(@parts = C4::Labels::Label::_split_ddcn($ddcn), "C4::Labels::Label::_split_ddcn($ddcn)");
     ok(scalar(@expected) == scalar(@parts), sprintf("%d of %d pieces produced", scalar(@parts), scalar(@expected)));
     my $i = 0;
     foreach my $unit (@expected) {
@@ -36,4 +60,3 @@ foreach my $ddcn (sort keys %$ddcns) {
         $i++;
     }
 }
-
diff --git a/t/Labels_split_lccn.t b/t/Labels_split_lccn.t
index 1893e9d..f29a113 100755
--- a/t/Labels_split_lccn.t
+++ b/t/Labels_split_lccn.t
@@ -1,28 +1,56 @@
 #!/usr/bin/perl
 #
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA  02111-1307 USA
+#
 # for context, see http://bugs.koha.org/cgi-bin/bugzilla/show_bug.cgi?id=2691
 
 use strict;
 use warnings;
 
-use Test::More tests => 44;
+use Test::More;
 
 BEGIN {
-    use_ok('C4::Labels');
+    our $lccns = {};
+    if ($ARGV[0]) {
+        BAIL_OUT("USAGE: perl Labels_split_lccn.t 'HE 8700.7 .P6 T44 1983' 'HE,8700.7,.P6,T44,1983'") unless $ARGV[1];
+        $lccns = {$ARGV[0] => [split (/,/,$ARGV[1])],};
+    }
+    else {
+        $lccns = {
+            'HE8700.7 .P6T44 1983' => [qw(HE 8700.7 .P6 T44 1983)],
+            'BS2545.E8 H39 1996'   => [qw(BS 2545 .E8 H39 1996)],
+            'NX512.S85 A4 2006'    => [qw(NX 512 .S85 A4 2006)],
+        };
+    }
+    my $test_num = 1;
+    foreach (keys(%$lccns)) {
+        my $split_num += scalar(@{$lccns->{$_}});
+        $test_num += 2 * $split_num;
+        $test_num += 4;
+    }
+    plan tests => $test_num;
+    use_ok('C4::Labels::Label');
+    use vars qw($lccns);
 }
-ok(defined C4::Labels::split_lccn, 'C4::Labels::split_lccn defined');
-
-my $lccns = {
-    'HE8700.7 .P6T44 1983' => [qw(HE 8700.7 .P6 T44 1983)],
-    'BS2545.E8 H39 1996'   => [qw(BS 2545 .E8 H39 1996)],
-    'NX512.S85 A4 2006'    => [qw(NX 512 .S85 A4 2006)],
-};
 
 foreach my $lccn (sort keys %$lccns) {
     my (@parts, @expected);
     ok($lccn, "lccn: $lccn");
     ok(@expected = @{$lccns->{$lccn}}, "split expected to produce " . scalar(@expected) . " pieces");
-    ok(@parts = C4::Labels::split_lccn($lccn), "C4::Labels::split_lccn($lccn)");
+    ok(@parts = C4::Labels::Label::_split_lccn($lccn), "C4::Labels::Label::_split_lccn($lccn)");
     ok(scalar(@expected) == scalar(@parts), sprintf("%d of %d pieces produced", scalar(@parts), scalar(@expected)));
     my $i = 0;
     foreach my $unit (@expected) {
-- 
1.6.0.4




More information about the Koha-patches mailing list