[Koha-patches] [PATCH 1/8] Take care of encoding

paul.poulain at biblibre.com paul.poulain at biblibre.com
Wed Jan 19 21:55:55 CET 2011


From: Henri-Damien LAURENT <henridamien.laurent at biblibre.com>

---
 misc/migration_tools/bulkmarcimport.pl |   54 +++++++++++++++++++++++++++++---
 1 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/misc/migration_tools/bulkmarcimport.pl b/misc/migration_tools/bulkmarcimport.pl
index 3bb9ed8..2b968d5 100755
--- a/misc/migration_tools/bulkmarcimport.pl
+++ b/misc/migration_tools/bulkmarcimport.pl
@@ -63,8 +63,50 @@ GetOptions(
 $biblios=!$authorities||$biblios;
 
 if ($version || ($input_marc_file eq '')) {
-    pod2usage( -verbose => 2 );
-    exit;
+    print <<EOF
+Small script to import bibliographic records into Koha.
+
+Parameters:
+  h      this version/help screen
+  file   /path/to/file/to/dump: the file to import
+  v      verbose mode. 1 means "some infos", 2 means "MARC dumping"
+  fk     Turn off foreign key checks during import.
+  n      the number of records to import. If missing, all the file is imported
+  o      file offset before importing, ie number of records to skip.
+  commit the number of records to wait before performing a 'commit' operation
+  l file logs actions done for each record and their status into file
+  t      test mode: parses the file, saying what he would do, but doing nothing.
+  s      skip automatic conversion of MARC-8 to UTF-8.  This option is 
+         provided for debugging.
+  c      the characteristic MARC flavour. At the moment, only MARC21 and 
+         UNIMARC are supported. MARC21 by default.
+  d      delete EVERYTHING related to biblio in koha-DB before import. Tables:
+         biblio, biblioitems, titems
+  m      format, MARCXML or ISO2709 (defaults to ISO2709)
+  yaml file  format a yaml file with ids
+  keepids field store ids in field (usefull for authorities, where 001 contains the authid for Koha, that can contain a very valuable info for authorities coming from LOC or BNF. useless for biblios probably)
+  x      source bib tag for reporting the source bib number
+  y      source subfield for reporting the source bib number
+  idmap  file for the koha bib and source id
+  keepids store ids in 009 (usefull for authorities, where 001 contains the authid for Koha, that can contain a very valuable info for authorities coming from LOC or BNF. useless for biblios probably)
+  b|biblios type of import : bibliographic records
+  a|authorities type of import : authority records
+  match  matchindex,fieldtomatch matchpoint to use to deduplicate
+          fieldtomatch can be either 001 to 999 
+                       or field and list of subfields as such 100abcde
+  i|isbn if set, a search will be done on isbn, and, if the same isbn is found, the biblio is not added. It's another
+         method to deduplicate. 
+         match & i can be both set.
+IMPORTANT: don't use this script before you've entered and checked your MARC 
+           parameters tables twice (or more!). Otherwise, the import won't work 
+           correctly and you will get invalid data.
+
+SAMPLE: 
+  \$ export KOHA_CONF=/etc/koha.conf
+  \$ perl misc/migration_tools/bulkmarcimport.pl -d -commit 1000 \\
+    -file /home/jmf/koha.mrc -n 3000
+EOF
+exit;
 }
 
 if (defined $idmapfl) {
@@ -195,6 +237,7 @@ RECORD: while (  ) {
             next RECORD;            
         }
     }
+    SetUTF8Flag($record);
     my $isbn;
     # remove trailing - in isbn (only for biblios, of course)
     if ($biblios) {
@@ -221,16 +264,17 @@ RECORD: while (  ) {
        my $server=($authorities?'authorityserver':'biblioserver');
        my ($error, $results,$totalhits)=C4::Search::SimpleSearch( $query, 0, 3, [$server] );
        die "unable to search the database for duplicates : $error" if (defined $error);
-       #warn "$query $server : $totalhits";
+       $debug && warn "$query $server : $totalhits";
        if ($results && scalar(@$results)==1){
            my $marcrecord = MARC::File::USMARC::decode($results->[0]);
+           SetUTF8Flag($marcrecord);
 	   	   $id=GetRecordId($marcrecord,$tagid,$subfieldid);
        } 
        elsif  ($results && scalar(@$results)>1){
-       $debug && warn "more than one match for $query";
+          $debug && warn "more than one match for $query";
        } 
        else {
-       $debug && warn "nomatch for $query";
+          $debug && warn "nomatch for $query";
        }
     }
 	my $originalid;
-- 
1.7.1



More information about the Koha-patches mailing list