[Koha-patches] [PATCH] [SIGNED-OFF] Bug 5859: Allow user to set encoding for import

Nicole C. Engard nengard at bywatersolutions.com
Mon Apr 18 00:15:51 CEST 2011


From: Jared Camins-Esakov <jcamins at bywatersolutions.com>

Rather than having options for MARC21 and UNIMARC in the "Character encoding"
dropdown, the user should be able to select the appropriate character encoding.
The default retains the current behavior, which is to allow the system to guess
which character encoding is in use. However, it should be noticed that this is
almost always wrong for non-UTF8 records with non-ASCII characters. Specifying
a character set is much more reliable if you're not using UTF-8.

Rebased to use Template::Toolkit instead of HTML::Template::Pro.

Signed-off-by: Jared Camins-Esakov <jcamins at bywatersolutions.com>
Signed-off-by: Nicole C. Engard <nengard at bywatersolutions.com>
---
 C4/ImportBatch.pm                                  |   11 +++++++----
 .../prog/en/modules/tools/stage-marc-import.tt     |    9 ++-------
 tools/stage-marc-import.pl                         |    8 ++++----
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/C4/ImportBatch.pm b/C4/ImportBatch.pm
index 145e5d9..b6db406 100644
--- a/C4/ImportBatch.pm
+++ b/C4/ImportBatch.pm
@@ -210,7 +210,7 @@ sub ModBiblioInBatch {
 =head2 BatchStageMarcRecords
 
   ($batch_id, $num_records, $num_items, @invalid_records) = 
-    BatchStageMarcRecords($marc_flavor, $marc_records, $file_name, 
+    BatchStageMarcRecords($encoding, $marc_records, $file_name, 
                           $comments, $branch_code, $parse_items,
                           $leave_as_staging, 
                           $progress_interval, $progress_callback);
@@ -218,7 +218,7 @@ sub ModBiblioInBatch {
 =cut
 
 sub  BatchStageMarcRecords {
-    my $marc_flavor = shift;
+    my $encoding = shift;
     my $marc_records = shift;
     my $file_name = shift;
     my $comments = shift;
@@ -258,13 +258,16 @@ sub  BatchStageMarcRecords {
             &$progress_callback($rec_num);
         }
         my ($marc_record, $charset_guessed, $char_errors) =
-            MarcToUTF8Record($marc_blob, C4::Context->preference("marcflavour"));
+            MarcToUTF8Record($marc_blob, C4::Context->preference("marcflavour"), $encoding);
+
+        $encoding = $charset_guessed unless $encoding;
+
         my $import_record_id;
         if (scalar($marc_record->fields()) == 0) {
             push @invalid_records, $marc_blob;
         } else {
             $num_valid++;
-            $import_record_id = AddBiblioToBatch($batch_id, $rec_num, $marc_record, $marc_flavor, int(rand(99999)), 0);
+            $import_record_id = AddBiblioToBatch($batch_id, $rec_num, $marc_record, $encoding, int(rand(99999)), 0);
             if ($parse_items) {
                 my @import_items_ids = AddItemsToImportBiblio($batch_id, $import_record_id, $marc_record, 0);
                 $num_items += scalar(@import_items_ids);
diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/stage-marc-import.tt b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/stage-marc-import.tt
index f44181e..fc6db51 100644
--- a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/stage-marc-import.tt
+++ b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/stage-marc-import.tt
@@ -93,13 +93,8 @@ function CheckForm(f) {
 		
 	</li>
 	<li>
-		<label for="syntax">Character encoding: </label>
-        [% IF ( UNIMARC ) %]
-            <select name="syntax" id="syntax"><option value="MARC21">MARC21</option><option value="UNIMARC" selected="selected">UNIMARC</option></select>
-        [% ELSE %]
-            <select name="syntax" id="syntax"><option value="MARC21" selected="selected">MARC21</option><option value="UNIMARC">UNIMARC</option></select>
-        [% END %]
-		
+		<label for="encoding">Character encoding: </label>
+            <select name="encoding" id="encoding"><option value="" selected="selected">Default</option><option value="utf8">UTF-8</option><option value="MARC-8">MARC 8</option><option value="ISO_5426">ISO 5426</option><option value="ISO_6937">ISO 6937</option><option value=ISO_8859-1">ISO 8859-1</option><option value="EUC-KR">EUC-KR</option></select>
 	</li>
 </ol></fieldset>
   <fieldset class="rows">
diff --git a/tools/stage-marc-import.pl b/tools/stage-marc-import.pl
index b113332..745a36a 100755
--- a/tools/stage-marc-import.pl
+++ b/tools/stage-marc-import.pl
@@ -55,7 +55,7 @@ my $nomatch_action = $input->param('nomatch_action');
 my $parse_items = $input->param('parse_items');
 my $item_action = $input->param('item_action');
 my $comments = $input->param('comments');
-my $syntax = $input->param('syntax');
+my $encoding = $input->param('encoding');
 my ($template, $loggedinuser, $cookie)
 	= get_template_and_user({template_name => "tools/stage-marc-import.tmpl",
 					query => $input,
@@ -130,10 +130,10 @@ if ($completedJobID) {
     }
 
     # FIXME branch code
-    my ($batch_id, $num_valid, $num_items, @import_errors) = BatchStageMarcRecords($syntax, $marcrecord, $filename, 
-                                                                                   $comments, '', $parse_items, 0,
-                                                                                   50, staging_progress_callback($job, $dbh));
+    my ($batch_id, $num_valid, $num_items, @import_errors) = BatchStageMarcRecords($encoding, $marcrecord, $filename, $comments, '', $parse_items, 0, 50, staging_progress_callback($job, $dbh));
+
     $dbh->commit();
+
     my $num_with_matches = 0;
     my $checked_matches = 0;
     my $matcher_failed = 0;
-- 
1.7.2.3



More information about the Koha-patches mailing list