[Koha-patches] [PATCH] [SIGNED-OFF] Bug 6752: Be stricter with utf-8 encoding of output

Thu Sep 1 13:56:59 CEST 2011

From: Colin Campbell <colin.campbell at ptfs-europe.com>

use encoding(UTF-8) rather than utf-8 for stricter encoding Marking output as
':utf8' only flags the data as utf8 using :encoding(UTF-8) also checks it as
valid utf-8 see binmode in perlfunc for more details In accordance with the
robustness principle input filehandles have not been changed as code may make
the undocumented assumption that invalid utf-8 is present in the imput Fixes
errors reported by t/00-testcritic.t Where feasable some filehandles have been
made lexical rather than reusing global filehandle vars

Signed-off-by: Frédéric Demians <f.demians at tamil.fr>
---
 admin/aqplan.pl                                    |    2 +-
 authorities/authorities-list.pl                    |    2 +-
 cataloguing/ysearch.pl                             |    2 +-
 circ/overdue.pl                                    |    2 +-
 circ/ypattrodue-attr-search-authvalue.pl           |    2 +-
 circ/ysearch.pl                                    |    2 +-
 docs/CAS/CASProxy/examples/koha_webservice.pl      |    2 +-
 misc/cronjobs/MARC21_parse_test.pl                 |    2 +-
 misc/cronjobs/overdue_notices.pl                   |    2 +-
 .../22_to_30/export_Authorities_xml.pl             |    6 ++--
 misc/migration_tools/bulkmarcimport.pl             |    2 +-
 misc/migration_tools/rebuild_zebra.pl              |   28 ++++++++++----------
 misc/sax_parser_test.pl                            |    2 +-
 misc/translator/xgettext.pl                        |    2 +-
 opac/ilsdi.pl                                      |    2 +-
 opac/oai.pl                                        |    2 +-
 reports/guided_reports.pl                          |    2 +-
 reports/serials_stats.pl                           |    2 +-
 svc/bib                                            |    2 +-
 svc/new_bib                                        |    2 +-
 t/db_dependent/lib/KohaTest.pm                     |    6 ++--
 tools/export.pl                                    |    2 +-
 22 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/admin/aqplan.pl b/admin/aqplan.pl
index 81418ff..b9917fd 100755
--- a/admin/aqplan.pl
+++ b/admin/aqplan.pl
@@ -463,7 +463,7 @@ output_html_with_http_headers $input, $cookie, $template->output;
 sub _print_to_csv {
     my ( $header, $results ) = @_;
 
-    binmode STDOUT, ":utf8";
+    binmode STDOUT, ':encoding(UTF-8)';
 
     my $csv = Text::CSV_XS->new(
         {   sep_char     => $del,
diff --git a/authorities/authorities-list.pl b/authorities/authorities-list.pl
index eec3233..2b15856 100755
--- a/authorities/authorities-list.pl
+++ b/authorities/authorities-list.pl
@@ -4,7 +4,7 @@ use warnings;
 use C4::Context;
 use C4::AuthoritiesMarc;
 use utf8;
-use open qw( :std :utf8 );
+use open qw[ :std :encoding(utf8) ];
 
 my $dbh=C4::Context->dbh;
 my $datatypes_query = $dbh->prepare(<<ENDSQL);
diff --git a/cataloguing/ysearch.pl b/cataloguing/ysearch.pl
index 5cd0205..40ac979 100755
--- a/cataloguing/ysearch.pl
+++ b/cataloguing/ysearch.pl
@@ -39,7 +39,7 @@ my $field = $input->param('field');
 # Prevent from disclosing data
 die() unless ($table eq "biblioitems"); 
 
-binmode STDOUT, ":utf8";
+binmode STDOUT, ':encoding(UTF-8)';
 print $input->header( -type => 'text/plain', -charset => 'UTF-8' );
 
 my ( $auth_status, $sessionID ) = check_cookie_auth( $input->cookie('CGISESSID'), { cataloguing => '*' } );
diff --git a/circ/overdue.pl b/circ/overdue.pl
index 18851a6..86e7b01 100755
--- a/circ/overdue.pl
+++ b/circ/overdue.pl
@@ -351,7 +351,7 @@ if ($noreport) {
     }
 
     if ($op eq 'csv') {
-        binmode(STDOUT, ":utf8");
+        binmode STDOUT, 'encoding(UTF-8)';
         my $csv = build_csv(\@overduedata);
         print $input->header(-type => 'application/vnd.sun.xml.calc',
                              -encoding    => 'utf-8',
diff --git a/circ/ypattrodue-attr-search-authvalue.pl b/circ/ypattrodue-attr-search-authvalue.pl
index 5ff1cd9..a7a950f 100755
--- a/circ/ypattrodue-attr-search-authvalue.pl
+++ b/circ/ypattrodue-attr-search-authvalue.pl
@@ -32,7 +32,7 @@ $attrcode =~ s|^/||;
 my ( $auth_status, $sessionID ) = check_cookie_auth( $input->cookie('CGISESSID'), { circulate => '*' } );
 exit 0 if $auth_status ne "ok";
 
-binmode STDOUT, ":utf8";
+binmode STDOUT, ':encoding(UTF-8)';
 print $input->header( -type => 'text/plain', -charset => 'UTF-8' );
 
 print STDERR ">> CALLING $0 (attrcode=$attrcode, query=$query)\n" if $debug;
diff --git a/circ/ysearch.pl b/circ/ysearch.pl
index f8fc52a..26e2def 100755
--- a/circ/ysearch.pl
+++ b/circ/ysearch.pl
@@ -33,7 +33,7 @@ use C4::Auth qw/check_cookie_auth/;
 my $input   = new CGI;
 my $query   = $input->param('query');
 
-binmode STDOUT, ":utf8";
+binmode STDOUT, ':encoding(UTF-8)';
 print $input->header(-type => 'text/plain', -charset => 'UTF-8');
 
 my ($auth_status, $sessionID) = check_cookie_auth($input->cookie('CGISESSID'), { circulate => '*' });
diff --git a/docs/CAS/CASProxy/examples/koha_webservice.pl b/docs/CAS/CASProxy/examples/koha_webservice.pl
index cb161f7..95b8bd3 100755
--- a/docs/CAS/CASProxy/examples/koha_webservice.pl
+++ b/docs/CAS/CASProxy/examples/koha_webservice.pl
@@ -34,7 +34,7 @@ The Proxy Ticket, needed for check_api_auth, that will try to make the CAS Serve
 use utf8;
 use strict;
 use warnings;
-binmode(STDOUT, ":utf8");
+binmode STDOUT, ':encoding(UTF-8)';
 
 use C4::Auth qw(check_api_auth);
 use C4::Output;
diff --git a/misc/cronjobs/MARC21_parse_test.pl b/misc/cronjobs/MARC21_parse_test.pl
index f607cd9..3510f2b 100755
--- a/misc/cronjobs/MARC21_parse_test.pl
+++ b/misc/cronjobs/MARC21_parse_test.pl
@@ -24,7 +24,7 @@ use MARC::Record;
 use MARC::File::XML;
 use MARC::File::USMARC;
 
-use open OUT => ':utf8';
+use open OUT => ':encoding(UTF-8)';
 
 use Getopt::Long qw(:config auto_help auto_version);
 use Pod::Usage;
diff --git a/misc/cronjobs/overdue_notices.pl b/misc/cronjobs/overdue_notices.pl
index 37774b5..1fb9592 100755
--- a/misc/cronjobs/overdue_notices.pl
+++ b/misc/cronjobs/overdue_notices.pl
@@ -323,7 +323,7 @@ if (@branchcodes) {
 # these are the fields that will be substituted into <<item.content>>
 my @item_content_fields = split( /,/, $itemscontent );
 
-binmode( STDOUT, ":utf8" );
+binmode STDOUT, ':encoding(UTF-8)';
 
 
 our $csv;       # the Text::CSV_XS object
diff --git a/misc/migration_tools/22_to_30/export_Authorities_xml.pl b/misc/migration_tools/22_to_30/export_Authorities_xml.pl
index 70647ee..42d2e5c 100755
--- a/misc/migration_tools/22_to_30/export_Authorities_xml.pl
+++ b/misc/migration_tools/22_to_30/export_Authorities_xml.pl
@@ -23,7 +23,7 @@ $rq->execute;
 #ATTENTION : Mettre la base en utf8 auparavant.
 #BEWARE : Set database into utf8 before.
 while (my ($authid)=$rq->fetchrow){
-open FILEOUTPUT,">:utf8", "./$filename/$authid.xml" or die "unable to open $filename";
+open my $fileoutput, '>:encoding(UTF-8)', "./$filename/$authid.xml" or die "unable to open $filename";
   my $record=AUTHgetauthority($dbh,$authid);
   if (! utf8::is_utf8($record)) {
     utf8::decode($record);
@@ -44,7 +44,7 @@ open FILEOUTPUT,">:utf8", "./$filename/$authid.xml" or die "unable to open $file
      # } else {
 #    $record->encoding( 'UTF-8' );
 #  }
-  print FILEOUTPUT $record->as_xml();
-close FILEOUPUT;
+  print {$fileoutput}  $record->as_xml();
+close $fileoutput;
 
 }
diff --git a/misc/migration_tools/bulkmarcimport.pl b/misc/migration_tools/bulkmarcimport.pl
index 4f738e8..70d5eb7 100755
--- a/misc/migration_tools/bulkmarcimport.pl
+++ b/misc/migration_tools/bulkmarcimport.pl
@@ -30,7 +30,7 @@ use Getopt::Long;
 use IO::File;
 use Pod::Usage;
 
-binmode(STDOUT, ":utf8");
+binmode STDOUT, ':encoding(UTF-8)';
 my ( $input_marc_file, $number, $offset) = ('',0,0);
 my ($version, $delete, $test_parameter, $skip_marc8_conversion, $char_encoding, $verbose, $commit, $fk_off,$format,$biblios,$authorities,$keepids,$match, $isbn_check, $logfile);
 my ($sourcetag,$sourcesubfield,$idmapfl);
diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl
index dadf43f..52a577c 100755
--- a/misc/migration_tools/rebuild_zebra.pl
+++ b/misc/migration_tools/rebuild_zebra.pl
@@ -309,7 +309,7 @@ sub export_marc_records_from_sth {
     my ($record_type, $sth, $directory, $as_xml, $noxml, $nosanitize) = @_;
 
     my $num_exported = 0;
-    open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
+    open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;
     my $i = 0;
     my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",'');
     while (my ($record_number) = $sth->fetchrow_array) {
@@ -339,7 +339,7 @@ sub export_marc_records_from_sth {
                 }
             }
             if ( $marcxml ) {
-                print OUT $marcxml if $marcxml;
+                print {$fh} $marcxml if $marcxml;
                 $num_exported++;
             }
             next;
@@ -352,7 +352,7 @@ sub export_marc_records_from_sth {
             # to care, though, at least if you're using the GRS-1 filter.  It does
             # care if you're using the DOM filter, which requires valid XML file(s).
             eval {
-                print OUT ($as_xml) ? $marc->as_xml_record(C4::Context->preference('marcflavour')) : $marc->as_usmarc();
+                print {$fh} ($as_xml) ? $marc->as_xml_record(C4::Context->preference('marcflavour')) : $marc->as_usmarc();
                 $num_exported++;
             };
             if ($@) {
@@ -361,7 +361,7 @@ sub export_marc_records_from_sth {
         }
     }
     print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
-    close OUT;
+    close $fh;
     return $num_exported;
 }
 
@@ -369,7 +369,7 @@ sub export_marc_records_from_list {
     my ($record_type, $entries, $directory, $as_xml, $noxml, $records_deleted) = @_;
 
     my $num_exported = 0;
-    open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
+    open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
     my $i = 0;
 
     # Skip any deleted records. We check for this anyway, but this reduces error spam
@@ -386,12 +386,12 @@ sub export_marc_records_from_list {
             # strung together with no single root element.  zebraidx doesn't seem
             # to care, though, at least if you're using the GRS-1 filter.  It does
             # care if you're using the DOM filter, which requires valid XML file(s).
-            print OUT ($as_xml) ? $marc->as_xml_record(C4::Context->preference('marcflavour')) : $marc->as_usmarc();
+            print {$fh} ($as_xml) ? $marc->as_xml_record(C4::Context->preference('marcflavour')) : $marc->as_usmarc();
             $num_exported++;
         }
     }
     print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
-    close OUT;
+    close $fh;
     return $num_exported;
 }
 
@@ -399,7 +399,7 @@ sub generate_deleted_marc_records {
     my ($record_type, $entries, $directory, $as_xml) = @_;
 
     my $records_deleted = {};
-    open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
+    open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
     my $i = 0;
     foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
         print "\r$i" unless ($i++ %100 or !$verbose_logging);
@@ -415,12 +415,12 @@ sub generate_deleted_marc_records {
             fix_unimarc_100($marc);
         }
 
-        print OUT ($as_xml) ? $marc->as_xml_record(C4::Context->preference("marcflavour")) : $marc->as_usmarc();
+        print {$fh} ($as_xml) ? $marc->as_xml_record(C4::Context->preference("marcflavour")) : $marc->as_usmarc();
 
         $records_deleted->{$record_number} = 1;
     }
     print "\nRecords exported: $i\n" if ( $verbose_logging );
-    close OUT;
+    close $fh;
     return $records_deleted;
     
 
@@ -830,8 +830,8 @@ if ($authorities) {
     # AUTHORITIES : copying mandatory files
     #
     unless (-f C4::Context->zebraconfig('authorityserver')->{config}) {
-    open ZD,">:utf8 ",C4::Context->zebraconfig('authorityserver')->{config};
-    print ZD "
+    open my $zd, '>:encoding(UTF-8)' ,C4::Context->zebraconfig('authorityserver')->{config};
+    print {$zd} "
 # generated by KOHA/misc/migration_tools/rebuild_zebra.pl 
 profilePath:\${srcdir:-.}:$authorityserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/
 
@@ -975,8 +975,8 @@ if ($biblios) {
     # BIBLIOS : copying mandatory files
     #
     unless (-f C4::Context->zebraconfig('biblioserver')->{config}) {
-    open ZD,">:utf8 ",C4::Context->zebraconfig('biblioserver')->{config};
-    print ZD "
+    open my $zd, '>:encoding(UTF-8)', C4::Context->zebraconfig('biblioserver')->{config};
+    print {$zd} "
 # generated by KOHA/misc/migrtion_tools/rebuild_zebra.pl 
 profilePath:\${srcdir:-.}:$biblioserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/
 
diff --git a/misc/sax_parser_test.pl b/misc/sax_parser_test.pl
index b2e5974..fb56de1 100755
--- a/misc/sax_parser_test.pl
+++ b/misc/sax_parser_test.pl
@@ -9,7 +9,7 @@ use Encode;
 my $parser = XML::SAX::ParserFactory->parser(
 Handler => MySAXHandler->new
 );
-binmode STDOUT, ":utf8";
+binmode STDOUT, ':encoding(UTF-8)';
 print "\x{65}\x{301}\n";
 $parser->parse_string(encode_utf8("<xml>\x{65}\x{301}</xml>"));
 $parser->parse_string("<xml>\xEF\xBB\xBF\x{65}\x{301}</xml>");
diff --git a/misc/translator/xgettext.pl b/misc/translator/xgettext.pl
index 99e9612..f7a940b 100755
--- a/misc/translator/xgettext.pl
+++ b/misc/translator/xgettext.pl
@@ -351,7 +351,7 @@ if (defined $output && $output ne '-') {
     print STDERR "$0: Outputting to STDOUT...\n" if $verbose_p;
     open(OUTPUT, ">&STDOUT");
 }
-binmode( OUTPUT, ":utf8" );
+binmode OUTPUT, ':encoding(UTF-8)';
 
 if (defined $files_from) {
     print STDERR "$0: Opening input file list \"$files_from\"\n" if $verbose_p;
diff --git a/opac/ilsdi.pl b/opac/ilsdi.pl
index 48fd7ba..419c0f2 100755
--- a/opac/ilsdi.pl
+++ b/opac/ilsdi.pl
@@ -228,7 +228,7 @@ if ( $service and any { $service eq $_ } @services ) {
 }
 
 # Output XML by passing the hashref to XMLOut
-binmode(STDOUT, ":utf8");
+binmode STDOUT, ':encoding(UTF-8)';
 print CGI::header('-type'=>'text/xml', '-charset'=>'utf-8');
 print XMLout(
     $out,
diff --git a/opac/oai.pl b/opac/oai.pl
index 038b6d1..2ef3f28 100755
--- a/opac/oai.pl
+++ b/opac/oai.pl
@@ -41,7 +41,7 @@ else {
     );
 }
 
-binmode( STDOUT, ":utf8" );
+binmode STDOUT, ':encoding(UTF-8)';
 my $repository = C4::OAI::Repository->new();
 
 # __END__ Main Prog
diff --git a/reports/guided_reports.pl b/reports/guided_reports.pl
index f5667a2..27377e0 100755
--- a/reports/guided_reports.pl
+++ b/reports/guided_reports.pl
@@ -539,7 +539,7 @@ elsif ($phase eq 'Run this report'){
 }
 
 elsif ($phase eq 'Export'){
-    binmode STDOUT, ':utf8';
+    binmode STDOUT, ':encoding(UTF-8)';
 
 	# export results to tab separated text or CSV
 	my $sql    = $input->param('sql');  # FIXME: use sql from saved report ID#, not new user-supplied SQL!
diff --git a/reports/serials_stats.pl b/reports/serials_stats.pl
index 640ef60..88918cc 100755
--- a/reports/serials_stats.pl
+++ b/reports/serials_stats.pl
@@ -103,7 +103,7 @@ if($do_it){
         $template->param(datas => \@datas,
                          do_it => 1);
     }else{
-        binmode STDOUT, ':utf8';
+        binmode STDOUT, ':encoding(UTF-8)';
         print $input->header(-type => 'application/vnd.sun.xml.calc',
                          -encoding => 'utf-8',
                              -name => "$basename.csv",
diff --git a/svc/bib b/svc/bib
index 29121ca..4808559 100755
--- a/svc/bib
+++ b/svc/bib
@@ -27,7 +27,7 @@ use C4::Biblio;
 use XML::Simple;
 
 my $query = new CGI;
-binmode STDOUT, ":utf8";
+binmode STDOUT, ':encoding(UTF-8)';
 
 my ($status, $cookie, $sessionID) = check_api_auth($query, { editcatalogue => 'edit_catalogue'} );
 unless ($status eq "ok") {
diff --git a/svc/new_bib b/svc/new_bib
index b84eaea..dba1fc7 100755
--- a/svc/new_bib
+++ b/svc/new_bib
@@ -28,7 +28,7 @@ use XML::Simple;
 use C4::Charset;
 
 my $query = new CGI;
-binmode STDOUT, ":utf8";
+binmode STDOUT, ':encoding(UTF-8)';
 
 my ($status, $cookie, $sessionID) = check_api_auth($query, { editcatalogue => 'edit_catalogue'} );
 unless ($status eq "ok") {
diff --git a/t/db_dependent/lib/KohaTest.pm b/t/db_dependent/lib/KohaTest.pm
index 70c963d..d8cf495 100644
--- a/t/db_dependent/lib/KohaTest.pm
+++ b/t/db_dependent/lib/KohaTest.pm
@@ -625,11 +625,11 @@ sub reindex_marc {
         mkdir "$directory/$record_type";
         my $sth = $dbh->prepare($record_type eq "biblio" ? "SELECT marc FROM biblioitems" : "SELECT marc FROM auth_header");
         $sth->execute();
-        open OUT, ">:utf8", "$directory/$record_type/records";
+        open my $out, '>:encoding(UTF-8)', "$directory/$record_type/records";
         while (my ($blob) = $sth->fetchrow_array) {
-            print OUT $blob;
+            print {$out} $blob;
         }
-        close OUT;
+        close $out;
         my $zebra_server = "${record_type}server";
         my $zebra_config  = C4::Context->zebraconfig($zebra_server)->{'config'};
         my $zebra_db_dir  = C4::Context->zebraconfig($zebra_server)->{'directory'};
diff --git a/tools/export.pl b/tools/export.pl
index 4a09868..a39f080 100755
--- a/tools/export.pl
+++ b/tools/export.pl
@@ -56,7 +56,7 @@ my ($template, $loggedinuser, $cookie)
 	}
 
 if ($op eq "export") {
-    binmode(STDOUT,":utf8");
+    binmode STDOUT, ':encoding(UTF-8)';
 	print $query->header(   -type => 'application/octet-stream', 
                             -charset => 'utf-8',
                             -attachment=>$filename);
-- 
1.7.6.1