[Koha-cvs] koha/misc export_marc_biblios.pl build_authorit... [rel_TG]

Tumer Garip tgarip at neu.edu.tr
Mon Apr 2 02:52:00 CEST 2007


CVSROOT:	/sources/koha
Module name:	koha
Branch:		rel_TG
Changes by:	Tumer Garip <tgarip1957>	07/04/02 00:52:00

Modified files:
	misc           : export_marc_biblios.pl 
Added files:
	misc           : build_authorities.pl bulkauthimport_marc.pl 
	                 bulkbiblioimport_marc.pl 
	                 bulkitemsimport_marc.pl bulkkohaimport_xml.pl 
	                 export_marc_authorities.pl 
	misc/migration_tools: build_marc_items.pl 
	                      separate_items_from_biblios.pl 

Log message:
	Utilities to upgrade from rel2_2 DB and create new separated biblio+holdings marc db

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/koha/misc/export_marc_biblios.pl?cvsroot=koha&only_with_tag=rel_TG&r1=1.1.2.1&r2=1.1.2.2
http://cvs.savannah.gnu.org/viewcvs/koha/misc/build_authorities.pl?cvsroot=koha&only_with_tag=rel_TG&rev=1.1.2.1
http://cvs.savannah.gnu.org/viewcvs/koha/misc/bulkauthimport_marc.pl?cvsroot=koha&only_with_tag=rel_TG&rev=1.1.2.1
http://cvs.savannah.gnu.org/viewcvs/koha/misc/bulkbiblioimport_marc.pl?cvsroot=koha&only_with_tag=rel_TG&rev=1.1.2.1
http://cvs.savannah.gnu.org/viewcvs/koha/misc/bulkitemsimport_marc.pl?cvsroot=koha&only_with_tag=rel_TG&rev=1.1.2.1
http://cvs.savannah.gnu.org/viewcvs/koha/misc/bulkkohaimport_xml.pl?cvsroot=koha&only_with_tag=rel_TG&rev=1.1.2.1
http://cvs.savannah.gnu.org/viewcvs/koha/misc/export_marc_authorities.pl?cvsroot=koha&only_with_tag=rel_TG&rev=1.1.2.1
http://cvs.savannah.gnu.org/viewcvs/koha/misc/migration_tools/build_marc_items.pl?cvsroot=koha&only_with_tag=rel_TG&rev=1.1.2.1
http://cvs.savannah.gnu.org/viewcvs/koha/misc/migration_tools/separate_items_from_biblios.pl?cvsroot=koha&only_with_tag=rel_TG&rev=1.1.2.1

Patches:
Index: export_marc_biblios.pl
===================================================================
RCS file: /sources/koha/koha/misc/Attic/export_marc_biblios.pl,v
retrieving revision 1.1.2.1
retrieving revision 1.1.2.2
diff -u -b -r1.1.2.1 -r1.1.2.2
--- export_marc_biblios.pl	26 Mar 2007 22:38:10 -0000	1.1.2.1
+++ export_marc_biblios.pl	2 Apr 2007 00:51:59 -0000	1.1.2.2
@@ -1,5 +1,5 @@
 #!/usr/bin/perl
-## This script allows you to export a rel_2_2 bibliographic db in 
+## This script allows you to export a authorities db in 
 #MARC21 format from the command line.
 #
 use strict;

Index: build_authorities.pl
===================================================================
RCS file: build_authorities.pl
diff -N build_authorities.pl
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ build_authorities.pl	2 Apr 2007 00:51:59 -0000	1.1.2.1
@@ -0,0 +1,250 @@
+#!/usr/bin/perl
+# script that rebuild thesaurus from biblio table.
+
+use strict;
+
+# Koha modules used
+use MARC::File::XML;
+use MARC::File::USMARC;
+use MARC::Record;
+use MARC::Batch;
+use C4::Context;
+use C4::Biblio;
+use C4::AuthoritiesMarc;
+use Time::HiRes qw(gettimeofday);
+use Encode;
+use Getopt::Long;
+use Data::Dumper;
+my ( $input_marc_file, $number) = ('',0);
+my ($version, $verbose, $delete, $confirm, $howmany);
+GetOptions(
+    'h' => \$version,
+    'd' => \$delete,
+    'v' => \$verbose,
+    'c' => \$confirm,
+# this $howmany parameter & other commented code was here to enable incremental building of the authorities, but it does not work well.
+# 	'n:s' => \$howmany,
+);
+
+if ($version || (!$confirm)) {
+	print <<EOF
+Script to recreate a authority tables into Koha from biblios
+parameters :
+\th : this version/help screen
+\tc : confirm. this script run without -c shows this help, pls run it with -c to execute it
+\tv : verbose mode.
+\td : delete the thesaurus before doing work. This deleting is smart enough to delete only the categories to rebuild. However, it is quite slow. Don''t be surprised...
+
+BEFORE RUNNING this script, you MUST edit it & adapt the %whattodo hash to fit your needs. It contains :
+* as key, the code of the authority to be created. It's the one you've choosen (or will choose) in Koha >> parameters >> thesaurus structure >> add). It can be whatever you want. NP/CO/NG/TI/NC in CVS refers to UNIMARC french RAMEAU category codes.
+* in values a sub-hash with the following values :
+\ttaglist : the list of MARC tags using this authority
+\tkey : the list of MARC subfields used as key for authority. 2 entries in biblio having the same key will be considered as the same.
+\tother : the list of MARC subfields not used as key, but to be copied in authority.
+\tauthtag : the field in authority that will be reported in biblio. Remember that all subfields in tag "authtag" will be reported in the same subfield of the biblio (in MARC tags that are in "taglist")
+
+
+Any warning will be stored in the warnings.log file.
+EOF
+;#'
+die;
+}
+
+my $dbh = C4::Context->dbh;
+
+
+my %whattodo = (AUTH =>	{
+				# the list of MARC tags using this authority
+				taglist	=> "100|700",
+				# the list of MARC subfields used as key for authority. 2 entries in biblio having the same key will be considered as the same.
+				key		=> "a|d",
+				# the list of MARC subfields not used as key, but to be copied in authority.
+				other	=> "",
+				# the field in authority that will be reported in biblio. Remember that all subfields in tag "authtag" will be reported in the same subfield of the biblio (in MARC tags that are in "taglist")
+				authtag => "100",
+			},
+		
+		CORP =>	{taglist	=> "110|710",
+				key		=> "a|b",
+				other	=> "",
+				authtag => "110",
+			},
+		ESUB =>	{	taglist	=> "650|651|655|656|657",
+				key		=> "a|x|v|y|z",
+				other	=> "",
+				authtag => "150",
+			},
+		TSUB =>	{	taglist	=> "690",
+				key		=> "a|x|v|y|z",
+				other	=> "",
+				authtag => "150",
+			},
+	
+		);
+my %authorities;
+
+open WARNING_FILE,">:utf8","warnings.log";
+
+my $field_list;
+my $category_list;
+foreach (keys %whattodo) {
+	$field_list .= $whattodo{$_}->{taglist}.'|';
+	$category_list.= "'".$_."',"
+}
+chop $field_list;
+
+if ($delete) {
+
+	print "deleting AUTHORITIES \n";
+	$dbh->do("delete from auth_header where authtypecode in ($category_list)");
+# 	die;
+}
+my $existing=$dbh->prepare("select authid,authtypecode from  auth_header where authtypecode=?");
+my $delsth=$dbh->prepare("delete from auth_header where authid=?");
+my $starttime = gettimeofday;
+my $i=1;
+my $z=1;
+foreach my $DOauthtype (keys %whattodo) {
+$existing->execute($DOauthtype);
+my $modified;
+my $alreadydone;
+my $totalskipped;
+print "reading authorities.. \n";
+while (my ($authid,$authtypecode) = $existing->fetchrow) {
+	my $authrecord = XMLgetauthorityhash($dbh,$authid);
+my $DOauthtag = $whattodo{$DOauthtype}->{authtag};
+my $DOkey = $whattodo{$DOauthtype}->{key};
+
+my $authPrimaryKey;
+			
+					foreach my $sub(split '\|',$DOkey) {
+					my $term=XML_readline_onerecord($authrecord,"","",$DOauthtag,$sub);
+					$term=~s/^\s+|\s+$//g ;
+					$term=~ s/(\.|\?|\;|\=|\/|\\|\:|\!|,|\-|\"|\(|\)|\[|\]|\{|\}|\/)//g;
+					$term=~s/\s\s/\s/g;
+					$authPrimaryKey .= join('|',$term)."|" if $term;
+					}
+					
+				$authPrimaryKey=uc($authPrimaryKey) if $authPrimaryKey;
+				if (!$authorities{$DOauthtype}->{$authPrimaryKey} && $authPrimaryKey) {
+					$authorities{$DOauthtype}->{$authPrimaryKey}->{authid} = $authid;
+					$authorities{$DOauthtype}->{$authPrimaryKey}->{record} = $authrecord;
+					$authorities{$DOauthtype}->{$authPrimaryKey}->{modified} = 0;
+					$z++;
+				} 
+			
+}#while authid
+}#foreach authtype in authorities
+print "received authorities $z \n";
+$|=1; # flushes output
+
+my $sth = $dbh->prepare("select biblionumber from biblio");
+$sth->execute;
+
+
+my $modified;
+my $alreadydone;
+my $totalskipped;
+while (my ($biblionumber) = $sth->fetchrow) {
+	my $record = XMLgetbibliohash($dbh,$biblionumber);
+	$modified=0;
+	$i++;
+	
+	print " $i in ".(gettimeofday-$starttime)." s\n" unless ($i % 100);
+	
+	my $totdone=0;
+		foreach my $DOauthtype (keys %whattodo) {
+			my $DOtaglist = $whattodo{$DOauthtype}->{taglist};
+			my $DOkey = $whattodo{$DOauthtype}->{key};
+			my $DOother = $whattodo{$DOauthtype}->{other};
+			my $DOauthtag = $whattodo{$DOauthtype}->{authtag};
+				# try to find the authority in 
+				# build the "key"
+				my $authPrimaryKey;
+				foreach my $sub(split '\|',$DOkey) {
+					my $term=XML_readline_onerecord($record,"","",$DOauthtag,$sub);
+					$term=~s/^\s+|\s+$//g ;
+					$term=~ s/(\.|\?|\;|\=|\/|\\|\:|\!|,|\-|\"|\(|\)|\[|\]|\{|\}|\/)//g;
+					$term=~s/\s\s/\s/g;
+					$authPrimaryKey .= join('|',$term)."|" if $term;
+				}##foreach $DOkey
+				
+					$authPrimaryKey=uc($authPrimaryKey) if $authPrimaryKey;
+
+				
+				# if authority exist, check it can't be completed by subfields not previously seen.
+				# otherwise, create if with whatever available.
+				if ($authorities{$DOauthtype}->{$authPrimaryKey} &&  $authPrimaryKey) {
+					# check that the existing authority has all the datas. Otherwise, add them, but don't modify already parsed biblios.
+					# at the end of the script, all authorities will be updated. So, the "merge_authority.pl" tool can be used to update all biblios.
+					foreach my $subfieldtotest (split '\|',$DOother) {
+							my $existsubauth=XML_readline_onerecord($authorities{$DOauthtype}->{$authPrimaryKey}->{record},"","",$DOauthtag,$subfieldtotest);
+							my $existsub=XML_readline_onerecord($record,"","",$DOauthtag,$subfieldtotest);
+							$existsub=Encode::encode('utf8',$existsub);
+							$existsubauth=Encode::encode('utf8',$existsubauth);
+							if ($existsubauth ne $existsub && $existsub && $existsubauth) {
+								print WARNING_FILE "========\nERROR ON $i $subfieldtotest authorities seems to differ, can't choose between : \n".$existsubauth." \n====== AND ======\n ".$existsub."\n=======\n";
+								print "W";
+							}
+							#
+							if (!$existsubauth && $existsub) {
+								XML_writeline($authorities{$DOauthtype}->{$authPrimaryKey}->{record},"",$existsub,"",$DOauthtag,$subfieldtotest);
+								
+								$authorities{$DOauthtype}->{$authPrimaryKey}->{modified} = 1;
+							}
+						
+					}#each subfieltotest
+				} elsif( $authPrimaryKey) {
+					my $authrecord = "<record><leader>     nz||a22     o||4500</leader><controlfield tag='001'></controlfield><datafield tag='100' ind1='' ind2='' code='a'></datafield></record>";##dummyrecord
+					$authrecord=XML_xml2hash_onerecord($authrecord);
+					my $authfield;
+					foreach my $sub (split '\|',$DOkey) {
+					my $existsub=XML_readline_onerecord($record,"","",$DOauthtag,$sub);
+					$existsub=Encode::encode('utf8',$existsub);						
+					XML_writeline($authrecord,"",$existsub,"",$DOauthtag,$sub) if $existsub;
+					}
+					foreach my $sub(split '\|',$DOother) {
+					my $existsub=XML_readline_onerecord($record,"","",$DOauthtag,$sub);							
+					$existsub=Encode::encode('utf8',$existsub);
+					XML_writeline($authrecord,"",$existsub,"",$DOauthtag,$sub) if $existsub;
+				
+					}
+					my $authid = AUTHaddauthority($dbh,$authrecord,'',$DOauthtype);
+					print "AUTHORITY $authid  added \n";
+					$authorities{$DOauthtype}->{$authPrimaryKey}->{authid} = $authid;
+					$authorities{$DOauthtype}->{$authPrimaryKey}->{record} = $authrecord;
+					$authorities{$DOauthtype}->{$authPrimaryKey}->{modified} = 0;
+				XML_writeline($record,"authid",$authid,"biblios");
+				$modified++;
+				}
+				
+			
+		}
+	
+#
+# NC
+#
+# OK, done, now store modified biblio if it has been modified
+	if ($modified) {
+		NEWnewbiblio($dbh,$record);
+		print "$modified";
+	} else {
+		# if $totalskipped is not null, we are in a biblio that has no authorities entry, but inside an already done part of the job
+			print "*";
+	}
+}
+
+#
+# now, parse authorities & modify them if they have been modified/completed by a subfield not existing on the 1st biblio using this authority.
+#
+foreach my $authtype (keys %whattodo) {
+	foreach my $authentry (keys %{$authorities{$authtype}}) {
+		print "AUTH : $authentry\n" if $authorities{$authtype}->{$authentry}->{modified};
+		
+		AUTHaddauthority($dbh,$authorities{$authtype}->{$authentry}->{record},$authorities{$authtype}->{$authentry}->{authid},$authtype) if $authorities{$authtype}->{$authentry}->{modified};
+	}
+}
+#
+my $timeneeded = gettimeofday - $starttime;
+print "$i entries done in $timeneeded seconds (".($i/$timeneeded)." per second)\n";
+close WARNING_FILE;
\ No newline at end of file

Index: bulkauthimport_marc.pl
===================================================================
RCS file: bulkauthimport_marc.pl
diff -N bulkauthimport_marc.pl
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ bulkauthimport_marc.pl	2 Apr 2007 00:51:59 -0000	1.1.2.1
@@ -0,0 +1,82 @@
+#!/usr/bin/perl
+# small script that import an iso2709 file into koha 2.0
+
+use strict;
+
+# Koha modules used
+use MARC::File::USMARC;
+use MARC::File::XML;
+use MARC::Record;
+use MARC::Batch;
+use C4::Biblio;
+use C4::Context;
+use C4::AuthoritiesMarc;
+use Time::HiRes qw(gettimeofday);
+
+use Getopt::Long;
+my $input_marc_file ="");
+my ($version, $delete, $test_parameter,$auth, $verbose);
+GetOptions(
+    'file:s'    => \$input_marc_file,
+     'h:s'    =>\$version,
+    'd' => \$delete,
+    't' => \$test_parameter,
+    'auth:s' => \$auth,
+    'v:s' => \$verbose,
+);
+
+if ($version || ($input_marc_file eq '')) {
+	print <<EOF
+small script to import an iso2709 file into Koha.
+parameters :
+\th : this version/help screen
+\tfile /path/to/file/to/import : the file to import
+\tv : verbose mode. 1 means "some infos", 2 means "MARC dumping"
+\tt : test mode : parses the file, saying what he would do, but doing nothing.
+\tauth : Authority type. If not specified attempts to read it from record
+\d : delete EVERYTHING related to authorities in koha-DB before import   :
+NOTE: If auhority files contains authid's they will be retained and the same numbered authorities will be replaced
+
+IMPORTANT : don't use this script before you've entered and checked twice (or more) your  MARC parameters tables.
+If you fail this, the import won't work correctly and you will get invalid datas.
+
+SAMPLE : ./bulkauthimport.pl -file /home/paul/koha.dev/local/npl -v 1
+EOF
+;#'
+die;
+}
+
+my $dbh = C4::Context->dbh;
+
+if ($delete) {
+	print "deleting authorities\n";
+	$dbh->do("truncate table auth_header");
+	
+}
+if ($test_parameter) {
+	print "TESTING MODE ONLY\n    DOING NOTHING\n===============\n";
+}
+
+my $starttime = gettimeofday;
+my $batch = MARC::Batch->new( 'USMARC', $input_marc_file );
+$batch->warnings_off();
+$batch->strict_off();
+my $i=0;
+while ( my $record = $batch->next() ) {
+	$i++;
+my $xml=MARC::File::XML::record($record);
+my $xmlhash=XML_xml2hash_onerecord($xml);
+	 $auth=XML_readline_onerecord($xmlhash,"authtypecode","authorities") unless $auth;
+	my $authid=XML_readline_onerecord($xmlhash,"authid","authorities")
+	if (!$auth||$auth eq""){
+	print "Records do not have authoritytype define with -auth parameter";
+	die;
+	}
+	## now, create authority with AUTHadd call.
+	unless ($test_parameter) {
+		$authid = AUTHaddauthority($dbh,$xmlhash,$authid,$authtypecode);
+		warn "ADDED authority NB $authid in DB\n" if $verbose;
+	}
+}
+my $timeneeded = gettimeofday - $starttime;
+print "$i MARC record done in $timeneeded seconds";

Index: bulkbiblioimport_marc.pl
===================================================================
RCS file: bulkbiblioimport_marc.pl
diff -N bulkbiblioimport_marc.pl
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ bulkbiblioimport_marc.pl	2 Apr 2007 00:51:59 -0000	1.1.2.1
@@ -0,0 +1,76 @@
+#!/usr/bin/perl
+# small script that import an iso2709 file into koha 2.0
+
+use strict;
+
+# Koha modules used
+use MARC::File::USMARC;
+use MARC::File::XML;
+use MARC::Record;
+use MARC::Batch;
+use C4::Biblio;
+use C4::Context;
+use Time::HiRes qw(gettimeofday);
+
+use Getopt::Long;
+my ($input_marc_file= "");
+my ($version, $delete, $test_parameter,$frameworkcode, $verbose);
+GetOptions(
+    'file:s'    => \$input_marc_file,
+     'h:s'    =>\$version,
+    'd' => \$delete,
+    't' => \$test_parameter,
+    'frame:s' => \$frameworkcode,
+    'v:s' => \$verbose,
+);
+
+if ($version || ($input_marc_file eq '')) {
+	print <<EOF
+small script to import an iso2709 file into Koha.
+parameters :
+\th : this version/help screen
+\tfile /path/to/file/to/import : the file to import
+\tv : verbose mode. 1 means "some infos", 2 means "MARC dumping"
+\tt : test mode : parses the file, saying what he would do, but doing nothing.
+\tframe : Frameworkcode. If not specified attempts to read it from record
+\d : delete EVERYTHING related to biblios in koha-DB before import   :
+NOTE: If  files contains biblionumbers they will be retained and the same numbered biblios will be replaced
+
+IMPORTANT : don't use this script before you've entered and checked twice (or more) your  MARC parameters tables.
+If you fail this, the import won't work correctly and you will get invalid datas.
+
+SAMPLE : ./bulkbiblioimport_marc.pl -file /home/paul/koha.dev/local/npl/biblios.mrc -v 1
+EOF
+;#'
+die;
+}
+
+my $dbh = C4::Context->dbh;
+
+if ($delete) {
+	print "deleting biblio\n";
+	$dbh->do("truncate table biblio");
+	
+}
+if ($test_parameter) {
+	print "TESTING MODE ONLY\n    DOING NOTHING\n===============\n";
+}
+
+my $starttime = gettimeofday;
+my $batch = MARC::Batch->new( 'USMARC', $input_marc_file );
+$batch->warnings_off();
+$batch->strict_off();
+my $i=0;
+while ( my $record = $batch->next() ) {
+	$i++;
+my $xml=MARC::File::XML::record($record);
+my $xmlhash=XML_xml2hash_onerecord($xml);
+
+	## now, create authority with NEWnew call.
+	unless ($test_parameter) {
+		my $biblionumber =NEWnewbiblio($dbh,$xmlhash,$frameworkcode);
+		warn "ADDED biblionumber NB $biblionumber in DB\n" if $verbose;
+	}
+}
+my $timeneeded = gettimeofday - $starttime;
+print "$i MARC record done in $timeneeded seconds";

Index: bulkitemsimport_marc.pl
===================================================================
RCS file: bulkitemsimport_marc.pl
diff -N bulkitemsimport_marc.pl
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ bulkitemsimport_marc.pl	2 Apr 2007 00:51:59 -0000	1.1.2.1
@@ -0,0 +1,78 @@
+#!/usr/bin/perl
+# small script that import an iso2709 file into koha 2.0
+
+use strict;
+
+# Koha modules used
+use MARC::File::USMARC;
+use MARC::File::XML;
+use MARC::Record;
+use MARC::Batch;
+use C4::Biblio;
+use C4::Context;
+use Time::HiRes qw(gettimeofday);
+
+use Getopt::Long;
+my $input_marc_file = '';
+my ($version, $delete, $test_parameter, $verbose);
+GetOptions(
+    'file:s'    => \$input_marc_file,
+     'h:s'    =>\$version,
+    'd' => \$delete,
+    't' => \$test_parameter,
+    'v:s' => \$verbose,
+);
+
+if ($version || ($input_marc_file eq '')) {
+	print <<EOF
+small script to import an iso2709 file into Koha.
+parameters :
+\th : this version/help screen
+\tfile /path/to/file/to/import : the file to import
+\tv : verbose mode. 1 means "some infos", 2 means "MARC dumping"
+\tt : test mode : parses the file, saying what he would do, but doing nothing.
+\d : delete EVERYTHING related to authorities in koha-DB before import   :
+NOTE: If items files do not contain biblionumbers they will not be imported
+
+IMPORTANT : don't use this script before you've entered and checked twice (or more) your  MARC parameters tables.
+If you fail this, the import won't work correctly and you will get invalid datas.
+
+SAMPLE : ./bulkitemsimport_marc.pl -file /home/paul/koha.dev/local/npl/biblios.mrc -v 1
+EOF
+;#'
+die;
+}
+
+my $dbh = C4::Context->dbh;
+
+if ($delete) {
+	print "deleting items\n";
+	$dbh->do("truncate table items");
+	
+}
+if ($test_parameter) {
+	print "TESTING MODE ONLY\n    DOING NOTHING\n===============\n";
+}
+
+my $starttime = gettimeofday;
+my $batch = MARC::Batch->new( 'USMARC', $input_marc_file );
+$batch->warnings_off();
+$batch->strict_off();
+my $i=0;
+while ( my $record = $batch->next() ) {
+	$i++;
+my $xml=MARC::File::XML::record($record);
+my $xmlhash=XML_xml2hash_onerecord($xml);
+my $biblionumber=XML_readline_onerecord($xmlhash,"biblionumber","holdings");
+   if (!$biblionumber){
+     print "NO biblionumber in record cannot continue";
+     die;
+   }
+	## now, create authority with NEWnew call.
+	unless ($test_parameter) {
+		my $itemnumber = NEWnewitem($dbh,$xmlhash,$biblionumber);
+		warn "ADDED itemnumber NB $itemnumber in DB\n" if $verbose;
+	}
+}
+my $timeneeded = gettimeofday - $starttime;
+print "$i MARC record done in $timeneeded seconds";

Index: bulkkohaimport_xml.pl
===================================================================
RCS file: bulkkohaimport_xml.pl
diff -N bulkkohaimport_xml.pl
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ bulkkohaimport_xml.pl	2 Apr 2007 00:51:59 -0000	1.1.2.1
@@ -0,0 +1,89 @@
+#!/usr/bin/perl
+# small script that import an iso2709 file into koha 2.0
+
+use strict;
+
+# Koha modules used
+
+use C4::Biblio;
+use C4::Context;
+use Time::HiRes qw(gettimeofday);
+
+use Getopt::Long;
+my $input_marc_file= "";
+my ($version, $delete, $test_parameter,$frameworkcode, $verbose);
+GetOptions(
+    'file:s'    => \$input_marc_file,
+     'h:s'    =>\$version,
+    'd:s' => \$delete,
+    't' => \$test_parameter,
+    'frame:s' => \$frameworkcode,
+    'v:s' => \$verbose,
+);
+
+if ($version || ($input_marc_file eq '')) {
+	print <<EOF
+small script to import an iso2709 file into Koha.
+parameters :
+\th : this version/help screen
+\tfile /path/to/file/to/import : the file to import
+\tv : verbose mode. 1 means "some infos", 2 means "MARC dumping"
+\tt : test mode : parses the file, saying what he would do, but doing nothing.
+\tframe : Frameworkcode. If not specified attempts to read it from record
+\d : delete EVERYTHING related to biblios in koha-DB before import   :
+NOTE: If  files contains biblionumbers they will be retained and the same numbered biblios will be replaced
+
+IMPORTANT : don't use this script before you've entered and checked twice (or more) your  MARC parameters tables.
+If you fail this, the import won't work correctly and you will get invalid datas.
+
+SAMPLE : ./bulkbiblioimport_marc.pl -file /home/paul/koha.dev/local/npl/biblios.mrc -v 1
+EOF
+;#'
+die;
+}
+
+my $dbh = C4::Context->dbh;
+
+if ($delete) {
+	print "deleting biblio\n";
+	$dbh->do("truncate table biblio");
+	print "deleting items\n";
+	$dbh->do("truncate table items");
+	
+}
+if ($test_parameter) {
+	print "TESTING MODE ONLY\n    DOING NOTHING\n===============\n";
+}
+
+my $starttime = gettimeofday;
+open INPUT, "<:utf8","$input_marc_file" || print "no infile $input_marc_file";
+my $i=0;
+my $xml;
+ while ( <INPUT> ) {
+if (m/\<kohacollection\>/ || m/\<\/kohacollection\>/){next;}
+
+ $xml.=$_;
+	if (m/\<\/koharecord\>/){
+	$xml=createrecord($xml);
+	}#koharecord
+}#while
+close(INPUT);
+my $timeneeded = gettimeofday - $starttime;
+print "$i KOHA records done in $timeneeded seconds";
+
+sub createrecord{
+my $xmlin=shift;
+my $xmlhash=XML_xml2hash($xmlin);
+	my ($biblio, at items)=XML_separate($xmlhash);
+	## now, create biblios with NEWnew call.
+	unless ($test_parameter) {
+	$i++;
+		my $biblionumber = NEWnewbiblio($dbh,$biblio,$frameworkcode);
+		print "ADDED biblionumber NB $biblionumber in DB\n" if $verbose;
+		foreach my $item (@items){
+		my $itemnumber = NEWnewitem($dbh,$item,$biblionumber);
+		print "ADDED itemnumber NB $itemnumber in DB\n" if $verbose;
+		}
+	}##test
+	return "";
+}
\ No newline at end of file

Index: export_marc_authorities.pl
===================================================================
RCS file: export_marc_authorities.pl
diff -N export_marc_authorities.pl
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ export_marc_authorities.pl	2 Apr 2007 00:51:59 -0000	1.1.2.1
@@ -0,0 +1,34 @@
+#!/usr/bin/perl
+## This script allows you to export a authorities db in 
+#MARC21 format from the command line.
+#
+use strict;
+
+use C4::Auth;
+use MARC::Record;
+use MARC::File::USMARC;
+use MARC::File::XML;
+use Getopt::Long;
+my  $out_marc_file;
+
+GetOptions(
+    'file:s'    => \$out_marc_file,
+   
+);
+my $record;
+open(OUT,">:utf8", $out_marc_file) or die $!;
+
+	
+my $dbh=C4::Context->dbh;
+	my $sth;
+		$sth=$dbh->prepare("select marcxml from auth_header  order by authid ");
+		$sth->execute();
+	
+	while (my ($xml) = $sth->fetchrow) {
+	eval{
+	 $record=MARC::Record->new_from_xml( $xml,"UTF-8");
+	};
+	if ($@){next;}
+		print OUT $record->as_usmarc;
+	}
+close(OUT);

Index: migration_tools/build_marc_items.pl
===================================================================
RCS file: migration_tools/build_marc_items.pl
diff -N migration_tools/build_marc_items.pl
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ migration_tools/build_marc_items.pl	2 Apr 2007 00:51:59 -0000	1.1.2.1
@@ -0,0 +1,112 @@
+#!/usr/bin/perl 
+#-----------------------------------
+# Script Name: build_marc_items.pl
+# Script Version: 4.1.0
+# Date:  01/04/2007
+##I Utility function to export items from a rel2_2 as separete marc records
+##Writen by Tumer Garip tgarip at neu.edu.tr
+
+
+
+use strict;
+
+use C4::Context;
+use C4::Biblio;
+use MARC::Record;
+use MARC::File::USMARC;
+use MARC::Field;
+my $dbh=C4::Context->dbh;
+use Time::HiRes qw(gettimeofday);
+use Getopt::Long;
+my $outitems;
+GetOptions(
+     'outitems:s' => \$outitems,
+);
+if (!$outitems ) {
+	print <<EOF
+parameters :
+\toutitems : file to create for items marc from a rel2_2 DB
+
+IMPORTANT : this script has a mapping structure to map items to marc. 
+It assumes the default mapping this version installs.
+ Change as necessary to match what you will be defining or defined if you like.
+
+SAMPLE : ./build_marc_items.pl  -outitems holdings.mrc
+EOF
+;#'
+die;
+}
+##Adjust this mapping list to your own needs### IMPORTANT
+my %mapping_list = (	
+	 itemnumber           =>'001',     biblionumber     => '004',
+            multivolumepart      => '952i',
+            barcode          => '952p',
+            booksellerid         =>'952e',     dateaccessioned  => '008',
+            homebranch           => '952a',     holdingbranch    => '952b',
+            price                => '952u',     replacementprice => '952v',
+            replacementpricedate =>'952w' , datelastseen     => '005',
+            multivolume          => '952j',     stack            =>'952f',
+            itemlost             => '9521',     wthdrawn         =>'9520',
+            paidfor              => '952r',     itemnotes        => '952z',
+            itemcallnumber       =>'952o',      notforloan       => '952y',
+            location             =>'952g',     Cutterextra      =>'952m',
+	);
+
+open(OUTITEMS,">:utf8","$outitems") ;
+my $starttime = gettimeofday;
+my $sth=$dbh->prepare("SELECT * FROM items  order by itemnumber");
+$sth->execute;
+
+
+my $b=0;
+my $timeneeded;
+while (my $data = $sth->fetchrow_hashref) {	 
+my $record=MARC::Record->new();
+my %prevtag;
+my $addedfield;
+foreach my $key (keys %mapping_list){
+  if($data->{$key}){
+my $newtag=substr($mapping_list{$key},0,3);
+my $newsub=substr($mapping_list{$key},3,1);
+	if ($key eq 'datelastseen'){
+	my $datelastseen=$data->{$key};
+	$datelastseen=~s /\-//g;
+	$datelastseen.="000000.0"; ###MARC field 005 requires this
+	$data->{$key}=$datelastseen;
+	}elsif($key eq 'dateaccessioned'){
+	my $dateaccessioned=$data->{$key};
+		$dateaccessioned=~s /\-//g;
+		$dateaccessioned=substr($dateaccessioned,2,6);
+		$dateaccessioned.="s        xxu||||| |||| 00| 0 xxx d";
+	$data->{$key}=$dateaccessioned;## MARC 008 requires this
+	}
+	   
+		if ($newsub && !$prevtag{$newtag}){
+		$addedfield=MARC::Field->new($newtag,"","",$newsub=>$data->{$key});
+		$record->insert_fields_ordered($addedfield) ;
+		}elsif (!$newsub && !$prevtag{$newtag}){
+		$addedfield=MARC::Field->new($newtag,$data->{$key});
+		$record->insert_fields_ordered($addedfield) ;
+		}elsif($prevtag{$newtag}){
+		$record->field($newtag)->update($newsub=>$data->{$key});
+		}## a subfield exists
+	
+	$prevtag{$newtag}=1;
+    }
+
+
+	
+}##foreach $key
+$b++;
+## Now print out
+$record->leader('     nx||a22     1i|4500');
+print OUTITEMS $record->as_usmarc;	
+}##while 
+
+close(OUTITEMS);
+	
+	$timeneeded = gettimeofday - $starttime ;
+	print "$b items in $timeneeded s\n" ;
+
+
+$dbh->disconnect();

Index: migration_tools/separate_items_from_biblios.pl
===================================================================
RCS file: migration_tools/separate_items_from_biblios.pl
diff -N migration_tools/separate_items_from_biblios.pl
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ migration_tools/separate_items_from_biblios.pl	2 Apr 2007 00:51:59 -0000	1.1.2.1
@@ -0,0 +1,63 @@
+#!/usr/bin/perl
+# script that separate old KOHA rel2 marc records into biblios and holdings records
+#  Written by TG on 10/04/2006
+use strict;
+
+# Koha modules used
+
+use C4::Context;
+use C4::Biblio;
+use MARC::Record;
+use MARC::File::USMARC;
+use MARC::Batch;
+use Time::HiRes qw(gettimeofday);
+use Getopt::Long;
+my ($outbiblios,$input_marc_file);
+GetOptions(
+    'file:s'    => \$input_marc_file,
+     'outbiblio:s' => \$outbiblios,
+     
+);
+if ($outbiblios || ($input_marc_file eq '')) {
+	print <<EOF
+parameters :
+
+\tfile /path/to/file/to/marc : the marc file to parse
+\toutbiblio : file to create for biblio only marcs
+
+NOTE: this script assumes items to be at tag 952 change as necessary to match yours
+
+SAMPLE : ./separate_items_from_biblios.pl -file exportedmarc.mrc -outbiblio biblios.mrc
+EOF
+;#'
+die;
+}
+
+#####CHANGE THESE AS APPROPRIATE TO EXISTING RECORDS
+my $itemtag="952";
+
+#############
+
+open(OUTBIBLIO,">$outbiblio") ;
+my $starttime = gettimeofday;
+my $timeneeded;
+
+my $i=0;
+my $batch = MARC::Batch->new( 'USMARC', $input_marc_file );
+$batch->warnings_off();
+$batch->strict_off();
+my $i=0;
+
+while ( my $record = $batch->next() ) {
+my @itemfields=$record->field('$itemtag);
+	foreach my $itemfield(@itemfields){
+	$record->delete_field($itemfield);
+	}
+$i++;
+print OUTBIBLIO $record;
+}
+close OUTBIBLIO;
+$timeneeded = gettimeofday - $starttime ;
+	warn "$i records in $timeneeded s\n" ;
+
+END;





More information about the Koha-cvs mailing list