[Koha-patches] [PATCH] Bug 5859: Allow user to set encoding for import

Jared Camins-Esakov jcamins at bywatersolutions.com
Tue Apr 5 14:04:26 CEST 2011


Rather than having options for MARC21 and UNIMARC in the "Character encoding"
dropdown, the user should be able to select the appropriate character encoding.
The default retains the current behavior, which is to allow the system to guess
which character encoding is in use. However, it should be noticed that this is
almost always wrong for non-UTF8 records with non-ASCII characters. Specifying
a character set is much more reliable if you're not using UTF-8.
---
 C4/ImportBatch.pm                                  |   11 +++++++----
 .../prog/en/modules/tools/stage-marc-import.tmpl   |    8 ++------
 tools/stage-marc-import.pl                         |    8 ++++----
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/C4/ImportBatch.pm b/C4/ImportBatch.pm
index 145e5d9..b6db406 100644
--- a/C4/ImportBatch.pm
+++ b/C4/ImportBatch.pm
@@ -210,7 +210,7 @@ sub ModBiblioInBatch {
 =head2 BatchStageMarcRecords
 
   ($batch_id, $num_records, $num_items, @invalid_records) = 
-    BatchStageMarcRecords($marc_flavor, $marc_records, $file_name, 
+    BatchStageMarcRecords($encoding, $marc_records, $file_name, 
                           $comments, $branch_code, $parse_items,
                           $leave_as_staging, 
                           $progress_interval, $progress_callback);
@@ -218,7 +218,7 @@ sub ModBiblioInBatch {
 =cut
 
 sub  BatchStageMarcRecords {
-    my $marc_flavor = shift;
+    my $encoding = shift;
     my $marc_records = shift;
     my $file_name = shift;
     my $comments = shift;
@@ -258,13 +258,16 @@ sub  BatchStageMarcRecords {
             &$progress_callback($rec_num);
         }
         my ($marc_record, $charset_guessed, $char_errors) =
-            MarcToUTF8Record($marc_blob, C4::Context->preference("marcflavour"));
+            MarcToUTF8Record($marc_blob, C4::Context->preference("marcflavour"), $encoding);
+
+        $encoding = $charset_guessed unless $encoding;
+
         my $import_record_id;
         if (scalar($marc_record->fields()) == 0) {
             push @invalid_records, $marc_blob;
         } else {
             $num_valid++;
-            $import_record_id = AddBiblioToBatch($batch_id, $rec_num, $marc_record, $marc_flavor, int(rand(99999)), 0);
+            $import_record_id = AddBiblioToBatch($batch_id, $rec_num, $marc_record, $encoding, int(rand(99999)), 0);
             if ($parse_items) {
                 my @import_items_ids = AddItemsToImportBiblio($batch_id, $import_record_id, $marc_record, 0);
                 $num_items += scalar(@import_items_ids);
diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/stage-marc-import.tmpl b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/stage-marc-import.tmpl
index 8660db0..a1f0b79 100644
--- a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/stage-marc-import.tmpl
+++ b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/stage-marc-import.tmpl
@@ -96,12 +96,8 @@ function CheckForm(f) {
 		
 	</li>
 	<li>
-		<label for="syntax">Character encoding: </label>
-        <!-- TMPL_IF name="UNIMARC" -->
-            <select name="syntax" id="syntax"><option value="MARC21">MARC21</option><option value="UNIMARC" selected="selected">UNIMARC</option></select>
-        <!-- TMPL_ELSE -->
-            <select name="syntax" id="syntax"><option value="MARC21" selected="selected">MARC21</option><option value="UNIMARC">UNIMARC</option></select>
-        <!-- /TMPL_IF -->
+		<label for="encoding">Character encoding: </label>
+        <select name="encoding" id="encoding"><option value="" selected="selected">Default</option><option value="utf8">UTF-8</option><option value="MARC-8">MARC 8</option><option value="ISO_5426">ISO 5426</option><option value="ISO_6937">ISO 6937</option><option value=ISO_8859-1">ISO 8859-1</option><option value="EUC-KR">EUC-KR</option></select>
 		
 	</li>
 </ol></fieldset>
diff --git a/tools/stage-marc-import.pl b/tools/stage-marc-import.pl
index b113332..745a36a 100755
--- a/tools/stage-marc-import.pl
+++ b/tools/stage-marc-import.pl
@@ -55,7 +55,7 @@ my $nomatch_action = $input->param('nomatch_action');
 my $parse_items = $input->param('parse_items');
 my $item_action = $input->param('item_action');
 my $comments = $input->param('comments');
-my $syntax = $input->param('syntax');
+my $encoding = $input->param('encoding');
 my ($template, $loggedinuser, $cookie)
 	= get_template_and_user({template_name => "tools/stage-marc-import.tmpl",
 					query => $input,
@@ -130,10 +130,10 @@ if ($completedJobID) {
     }
 
     # FIXME branch code
-    my ($batch_id, $num_valid, $num_items, @import_errors) = BatchStageMarcRecords($syntax, $marcrecord, $filename, 
-                                                                                   $comments, '', $parse_items, 0,
-                                                                                   50, staging_progress_callback($job, $dbh));
+    my ($batch_id, $num_valid, $num_items, @import_errors) = BatchStageMarcRecords($encoding, $marcrecord, $filename, $comments, '', $parse_items, 0, 50, staging_progress_callback($job, $dbh));
+
     $dbh->commit();
+
     my $num_with_matches = 0;
     my $checked_matches = 0;
     my $matcher_failed = 0;
-- 
1.7.2.3



More information about the Koha-patches mailing list