[Koha-patches] [PATCH 1/8] Take care of encoding
paul.poulain at biblibre.com
paul.poulain at biblibre.com
Wed Jan 19 21:55:55 CET 2011
From: Henri-Damien LAURENT <henridamien.laurent at biblibre.com>
---
misc/migration_tools/bulkmarcimport.pl | 54 +++++++++++++++++++++++++++++---
1 files changed, 49 insertions(+), 5 deletions(-)
diff --git a/misc/migration_tools/bulkmarcimport.pl b/misc/migration_tools/bulkmarcimport.pl
index 3bb9ed8..2b968d5 100755
--- a/misc/migration_tools/bulkmarcimport.pl
+++ b/misc/migration_tools/bulkmarcimport.pl
@@ -63,8 +63,50 @@ GetOptions(
$biblios=!$authorities||$biblios;
if ($version || ($input_marc_file eq '')) {
- pod2usage( -verbose => 2 );
- exit;
+ print <<EOF
+Small script to import bibliographic records into Koha.
+
+Parameters:
+ h this version/help screen
+ file /path/to/file/to/dump: the file to import
+ v verbose mode. 1 means "some infos", 2 means "MARC dumping"
+ fk Turn off foreign key checks during import.
+ n the number of records to import. If missing, all the file is imported
+ o file offset before importing, ie number of records to skip.
+ commit the number of records to wait before performing a 'commit' operation
+ l file logs actions done for each record and their status into file
+ t test mode: parses the file, saying what he would do, but doing nothing.
+ s skip automatic conversion of MARC-8 to UTF-8. This option is
+ provided for debugging.
+ c the characteristic MARC flavour. At the moment, only MARC21 and
+ UNIMARC are supported. MARC21 by default.
+ d delete EVERYTHING related to biblio in koha-DB before import. Tables:
+ biblio, biblioitems, titems
+ m format, MARCXML or ISO2709 (defaults to ISO2709)
+ yaml file format a yaml file with ids
+ keepids field store ids in field (usefull for authorities, where 001 contains the authid for Koha, that can contain a very valuable info for authorities coming from LOC or BNF. useless for biblios probably)
+ x source bib tag for reporting the source bib number
+ y source subfield for reporting the source bib number
+ idmap file for the koha bib and source id
+ keepids store ids in 009 (usefull for authorities, where 001 contains the authid for Koha, that can contain a very valuable info for authorities coming from LOC or BNF. useless for biblios probably)
+ b|biblios type of import : bibliographic records
+ a|authorities type of import : authority records
+ match matchindex,fieldtomatch matchpoint to use to deduplicate
+ fieldtomatch can be either 001 to 999
+ or field and list of subfields as such 100abcde
+ i|isbn if set, a search will be done on isbn, and, if the same isbn is found, the biblio is not added. It's another
+ method to deduplicate.
+ match & i can be both set.
+IMPORTANT: don't use this script before you've entered and checked your MARC
+ parameters tables twice (or more!). Otherwise, the import won't work
+ correctly and you will get invalid data.
+
+SAMPLE:
+ \$ export KOHA_CONF=/etc/koha.conf
+ \$ perl misc/migration_tools/bulkmarcimport.pl -d -commit 1000 \\
+ -file /home/jmf/koha.mrc -n 3000
+EOF
+exit;
}
if (defined $idmapfl) {
@@ -195,6 +237,7 @@ RECORD: while ( ) {
next RECORD;
}
}
+ SetUTF8Flag($record);
my $isbn;
# remove trailing - in isbn (only for biblios, of course)
if ($biblios) {
@@ -221,16 +264,17 @@ RECORD: while ( ) {
my $server=($authorities?'authorityserver':'biblioserver');
my ($error, $results,$totalhits)=C4::Search::SimpleSearch( $query, 0, 3, [$server] );
die "unable to search the database for duplicates : $error" if (defined $error);
- #warn "$query $server : $totalhits";
+ $debug && warn "$query $server : $totalhits";
if ($results && scalar(@$results)==1){
my $marcrecord = MARC::File::USMARC::decode($results->[0]);
+ SetUTF8Flag($marcrecord);
$id=GetRecordId($marcrecord,$tagid,$subfieldid);
}
elsif ($results && scalar(@$results)>1){
- $debug && warn "more than one match for $query";
+ $debug && warn "more than one match for $query";
}
else {
- $debug && warn "nomatch for $query";
+ $debug && warn "nomatch for $query";
}
}
my $originalid;
--
1.7.1
More information about the Koha-patches
mailing list