[Koha-cvs] CVS: koha/misc/migration_tools build6xx.pl,NONE,1.1.2.1 buildEDITORS.pl,NONE,1.1.2.1

Paul POULAIN tipaul at users.sourceforge.net
Tue Jan 11 16:13:39 CET 2005


Update of /cvsroot/koha/koha/misc/migration_tools
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16469/misc/migration_tools

Added Files:
      Tag: rel_2_2
	build6xx.pl buildEDITORS.pl 
Log Message:
new migration tools :
* build6xx : script that build 6xx tags in authority list. Note this script is a framework, it MUST be tuned.
* buildEDITORS : script that build EDITORS authority file from biblios. Useful for unimarc_210c and uinmarc_225a plugins. run without parameters to get help.

--- NEW FILE ---
#!/usr/bin/perl
# script that rebuild thesaurus from biblio table.

# delete  FROM  `marc_subfield_table`  WHERE tag =  "606" AND subfieldcode = 9;
use strict;

# Koha modules used
use MARC::File::USMARC;
use MARC::Record;
use MARC::Batch;
use C4::Context;
use C4::Biblio;
use C4::AuthoritiesMarc;
use Time::HiRes qw(gettimeofday);

use Getopt::Long;
my ( $input_marc_file, $number) = ('',0);
my ($version, $verbose, $test_parameter, $field,$delete,$category,$subfields);
GetOptions(
    'h' => \$version,
    'd' => \$delete,
    't' => \$test_parameter,
    's:s' => \$subfields,
    'v' => \$verbose,
    'c:s' => \$category,
);

if ($version || ($category eq '')) {
	print <<EOF
small script to recreate a authority table into Koha.
parameters :
\tc : thesaurus category. Can be filled with anything, the NC is hardcoded. But mandatory to confirm that you want to rebuild 6xx
\d : delete every entry of the selected category before doing work.

SAMPLES :
 ./build6xx.pl -c NC -d 
EOF
;#
die;
}

my $dbh = C4::Context->dbh;
my @subf = $subfields =~ /(##\d\d\d##.)/g;
if ($delete) {
	print "deleting thesaurus\n";
	my $del1 = $dbh->prepare("delete from auth_subfield_table where authid=?");
	my $del2 = $dbh->prepare("delete from auth_word where authid=?");
	my $sth = $dbh->prepare("select authid from auth_header where authtypecode='NC'");
	$sth->execute;
	while (my ($authid) = $sth->fetchrow) {
		$del1->execute($authid);
		$del2->execute($authid);
	}
	$dbh->do("delete from auth_header where authtypecode='NC'");
	$dbh->do("delete from marc_subfield_table where tag='606' and subfieldcode='9'");
	$dbh->do("delete from marc_word where tagsubfield='6069'");
}

if ($test_parameter) {
	print "TESTING MODE ONLY\n    DOING NOTHING\n===============\n";
}
$|=1; # flushes output
my $starttime = gettimeofday;
my $sth = $dbh->prepare("select bibid from marc_biblio");
$sth->execute;
my $i=1;
my %alreadydone;

# search biblios to "connect" to an authority with any number of $x (limited to 4 $x in this script)
my $sthBIBLIOS = $dbh->prepare("select distinct m1.bibid,m1.tag,m1.tagorder,m1.subfieldorder from marc_subfield_table as m1 where tag in (606) and subfieldcode='a' and subfieldvalue=?");
my $sthBIBLIOSx = $dbh->prepare("select distinct m1.bibid,m1.tag,m1.tagorder,m1.subfieldorder from marc_subfield_table as m1 left join marc_subfield_table as m2 on m1.bibid=m2.bibid where m1.tag in (606) and m1.subfieldcode='a' and m2.subfieldcode='x' and m1.subfieldvalue=? and m2.subfieldvalue=?");
my $sthBIBLIOSxx = $dbh->prepare("select distinct m1.bibid,m1.tag,m1.tagorder,m1.subfieldorder from marc_subfield_table as m1 left join marc_subfield_table as m2 on m1.bibid=m2.bibid left join marc_subfield_table as m3 on m1.bibid=m3.bibid where m1.tag in (606) and m1.subfieldcode='a' and m2.subfieldcode='x' and m3.subfieldcode='x' and m1.subfieldvalue=? and m2.subfieldvalue=? and m3.subfieldvalue=?");
my $sthBIBLIOSxxx = $dbh->prepare("select distinct m1.bibid,m1.tag,m1.tagorder,m1.subfieldorder from marc_subfield_table as m1 left join marc_subfield_table as m2 on m1.bibid=m2.bibid left join marc_subfield_table as m3 on m1.bibid=m4.bibid left join marc_subfield_table as m4 on m1.bibid=m4.bibid where m1.tag in (606) and m1.subfieldcode='a' and m2.subfieldcode='x' and m3.subfieldcode='x' and m4.subfieldcode='x' and m1.subfieldvalue=? and m2.subfieldvalue=? and m3.subfieldvalue=? and m4.subfieldvalue=?");
my $sthBIBLIOSxxxx = $dbh->prepare("select distinct m1.bibid,m1.tag,m1.tagorder,m1.subfieldorder from marc_subfield_table as m1 left join marc_subfield_table as m2 on m1.bibid=m2.bibid left join marc_subfield_table as m3 on m1.bibid=m4.bibid left join marc_subfield_table as m4 on m1.bibid=m4.bibid left join marc_subfield_table as m5 on m1.bibid=m5.bibid where m1.tag in (606) and m1.subfieldcode='a' and m2.subfieldcode='x' and m3.subfieldcode='x' and m4.subfieldcode='x' and m5.subfieldcode='x' and m1.subfieldvalue=? and m2.subfieldvalue=? and m3.subfieldvalue=? and m4.subfieldvalue=? and m5.subfieldvalue=?");

# loop through each biblio
while (my ($bibid) = $sth->fetchrow) {
	my $record = MARCgetbiblio($dbh,$bibid);
	my $timeneeded = gettimeofday - $starttime;
	print "$i in $timeneeded s\n" unless ($i % 50);
	foreach my $field ($record->field(995)) {
		$record->delete_field($field);
	}
	my $totdone=0;
	my $authid;
	# search the 606 field(s)
	foreach my $field ($record->field("606")) {
		foreach my $authentry ($field->subfield("a")) {
			# the hashentry variable contains all $x fields and the $a in a single string. Used to differenciate
			# $xsomething$aelse and $asomething else
			my $hashentry = $authentry;
			foreach my $x ($field->subfield('x')) {
				$hashentry.=" -- $x";
			}
			# remove é,à,$e...
			# all the same for mysql, but NOT for perl hashes !
			# without those lines, tôt is not tot and patée is not patee
			$hashentry =~ s/é|ê|è/e/g;
			$hashentry =~ s/â|à/a/g;
			$hashentry =~ s/î/i/g;
			$hashentry =~ s/ô/o/g;
			$hashentry =~ s/ù|û/u/g;
			# uppercase all, in case of typing error.
			$hashentry = uc($hashentry);
			$totdone++;
			if ($alreadydone{$hashentry}) {
				$authid = $alreadydone{$hashentry};
				print ".";
			} else {
				print "*";
				#create authority.
				my $authorityRecord = MARC::Record->new();
				my $newfield = MARC::Field->new(250,'','','a' => "".$authentry);
				foreach my $x ($field->subfield('x')) {
					$newfield->add_subfields('x' => $x);
				}
				foreach my $z ($field->subfield('z')) {
					$newfield->add_subfields('z' => $z);
				}
				$authorityRecord->insert_fields_ordered($newfield);
				$authid=AUTHaddauthority($dbh,$authorityRecord,'','NC');
				$alreadydone{$hashentry} = $authid;
				# we have the authority number, now we update all biblios that use this authority...
				my @x = $field->subfield('x'); # depending on the number of $x in the subfield
				if ($#x eq -1) { # no $x
					$sthBIBLIOS->execute($authentry);
					while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOS->fetchrow) {
						# check that the field does not already have a $x (if it has, it will or has been managed by another authority
						my $inbiblio = MARCgetbiblio($dbh,$bibid);
						my $isOK = 0;
						# loop in each 606 field
						foreach my $in606 ($inbiblio->field('606')) {
							my $inEntry = $in606->subfield('a');
							# and rebuild the $x -- $x -- $a string (like for $hashentry, few lines before)
							foreach my $x ($in606->subfield('x')) {
								$inEntry.=" -- $x";
							}
							$inEntry =~ s/é|ê|è/e/g;
							$inEntry =~ s/â|à/a/g;
							$inEntry =~ s/î/i/g;
							$inEntry =~ s/ô/o/g;
							$inEntry =~ s/ù|û/u/g;
							$inEntry = uc($inEntry);
							# ok, it's confirmed that we must add the $9 subfield for this biblio, so...
							$isOK=1 if $inEntry eq $hashentry;
						}
						# ... add it !
						C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
					}
				}
				if ($#x eq 0) { # one $x
					$sthBIBLIOSx->execute($authentry,$x[0]);
					while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOSx->fetchrow) {
						my $inbiblio = MARCgetbiblio($dbh,$bibid);
						my $isOK = 0;
						foreach my $in606 ($inbiblio->field('606')) {
							my $inEntry = $in606->subfield('a');
							foreach my $x ($in606->subfield('x')) {
								$inEntry.=" -- $x";
							}
							$inEntry =~ s/é|ê|è/e/g;
							$inEntry =~ s/â|à/a/g;
							$inEntry =~ s/î/i/g;
							$inEntry =~ s/ô/o/g;
							$inEntry =~ s/ù|û/u/g;
							$inEntry = uc($inEntry);
							$isOK=1 if $inEntry eq $hashentry;
						}
						C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
					}
				}
				if ($#x eq 1) { # two $x
					$sthBIBLIOSxx->execute($authentry,$x[0],$x[1]);
					while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOSxx->fetchrow) {
						my $inbiblio = MARCgetbiblio($dbh,$bibid);
						my $isOK = 0;
						foreach my $in606 ($inbiblio->field('606')) {
							my $inEntry = $in606->subfield('a');
							foreach my $x ($in606->subfield('x')) {
								$inEntry.=" -- $x";
							}
							$inEntry =~ s/é|ê|è/e/g;
							$inEntry =~ s/â|à/a/g;
							$inEntry =~ s/î/i/g;
							$inEntry =~ s/ô/o/g;
							$inEntry =~ s/ù|û/u/g;
							$inEntry = uc($inEntry);
							$isOK=1 if $inEntry eq $hashentry;
						}
						C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
					}
				}
				if ($#x eq 2) { # 3 $x
					$sthBIBLIOSxxx->execute($authentry,$x[0],$x[1],$x[2]);
					while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOSxxx->fetchrow) {
						my $inbiblio = MARCgetbiblio($dbh,$bibid);
						my $isOK = 0;
						foreach my $in606 ($inbiblio->field('606')) {
							my $inEntry = $in606->subfield('a');
							foreach my $x ($in606->subfield('x')) {
								$inEntry.=" -- $x";
							}
							$inEntry =~ s/é|ê|è/e/g;
							$inEntry =~ s/â|à/a/g;
							$inEntry =~ s/î/i/g;
							$inEntry =~ s/ô/o/g;
							$inEntry =~ s/ù|û/u/g;
							$inEntry = uc($inEntry);
							$isOK=1 if $inEntry eq $hashentry;
						}
						C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
					}
				}
				if ($#x eq 3) { # 3 $x
					$sthBIBLIOSxxxx->execute($authentry,$x[0],$x[1],$x[2],$x[3]);
					while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOSxxxx->fetchrow) {
						my $inbiblio = MARCgetbiblio($dbh,$bibid);
						my $isOK = 0;
						foreach my $in606 ($inbiblio->field('606')) {
							my $inEntry = $in606->subfield('a');
							foreach my $x ($in606->subfield('x')) {
								$inEntry.=" -- $x";
							}
							$inEntry =~ s/é|ê|è/e/g;
							$inEntry =~ s/â|à/a/g;
							$inEntry =~ s/î/i/g;
							$inEntry =~ s/ô/o/g;
							$inEntry =~ s/ù|û/u/g;
							$inEntry = uc($inEntry);
							$isOK=1 if $inEntry eq $hashentry;
						}
						C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
					}
				}
				if ($#x >4) {
					# too many $x, not handled, warn the developper that tries to migrate
					print "warning there is ".$#x.'$x values';
				}
			}
		}
	}
	$i++;
}
my $timeneeded = gettimeofday - $starttime;
print "$i entries done in $timeneeded seconds (".($i/$timeneeded)." per second)\n";

--- NEW FILE ---
#!/usr/bin/perl
# script that rebuild EDITORS

use strict;

# Koha modules used
use MARC::File::USMARC;
use MARC::Record;
use MARC::Batch;
use C4::Context;
use C4::Biblio;
use C4::AuthoritiesMarc;
use Time::HiRes qw(gettimeofday);

use Getopt::Long;
my ( $input_marc_file, $number) = ('',0);
my ($version, $verbose, $test_parameter, $confirm,$delete);
GetOptions(
    'h' => \$version,
    'd' => \$delete,
    't' => \$test_parameter,
    'v' => \$verbose,
    'c' => \$confirm,
);

if ($version or !$confirm) {
	print <<EOF
small script to recreate a authority table into Koha.
This will parse all your biblios to recreate isbn / editor / collections for the unimarc_210c and unimarc_225a plugins.

Remember those plugins will work only if you have an EDITORS authority type, with
\t200a being the first 2 parts of an ISBN
\t200b being the editor name
\t200c (repeatable) being the series title

parameters :
\t-c : confirmation flag. the script will run only with this flag. Otherwise, it will just show this help screen.
\t-d : delete existing EDITORS before rebuilding them
\t-t : test parameters : run the script but don't create really the EDITORS
EOF
;#'

exit;
}

my $dbh = C4::Context->dbh;
if ($delete) {
	print "deleting EDITORS\n";
	my $del1 = $dbh->prepare("delete from auth_subfield_table where authid=?");
	my $del2 = $dbh->prepare("delete from auth_word where authid=?");
	my $sth = $dbh->prepare("select authid from auth_header where authtypecode='EDITORS'");
	$sth->execute;
	while (my ($authid) = $sth->fetchrow) {
		$del1->execute($authid);
		$del2->execute($authid);
	}
	$dbh->do("delete from auth_header where authtypecode='EDITORS'");
}

if ($test_parameter) {
	print "TESTING MODE ONLY\n    DOING NOTHING\n===============\n";
}
$|=1; # flushes output
my $starttime = gettimeofday;
my $sth = $dbh->prepare("select bibid from marc_biblio");
$sth->execute;
my $i=1;
my %alreadydone;
my $counter;
my %hash;
while (my ($bibid) = $sth->fetchrow) {
	my $record = MARCgetbiblio($dbh,$bibid);
	my $isbnField = $record->field('010');
	next unless $isbnField;
	my $isbn=$isbnField->subfield('a');
	my $seg1;
	if(substr($isbn, 0, 1) <=7) {
		$seg1 = substr($isbn, 0, 1);
	} elsif(substr($isbn, 0, 2) <= 94) {
		$seg1 = substr($isbn, 0, 2);
	} elsif(substr($isbn, 0, 3) <= 995) {
		$seg1 = substr($isbn, 0, 3);
	} elsif(substr($isbn, 0, 4) <= 9989) {
		$seg1 = substr($isbn, 0, 4);
	} else {
		$seg1 = substr($isbn, 0, 5);
	}
	my $x = substr($isbn, length($seg1));
	my $seg2;
	if(substr($x, 0, 2) <= 19) {
# 		if(sTmp2 < 10) sTmp2 = "0" sTmp2;
		$seg2 = substr($x, 0, 2);
	} elsif(substr($x, 0, 3) <= 699) {
		$seg2 = substr($x, 0, 3);
	} elsif(substr($x, 0, 4) <= 8399) {
		$seg2 = substr($x, 0, 4);
	} elsif(substr($x, 0, 5) <= 89999) {
		$seg2 = substr($x, 0, 5);
	} elsif(substr($x, 0, 6) <= 9499999) {
		$seg2 = substr($x, 0, 6);
	} else {
		$seg2 = substr($x, 0, 7);
	}
	$counter++;
	print ".";
	my $timeneeded = gettimeofday - $starttime;
	print "$counter in $timeneeded s\n" unless ($counter % 50);
	
	my $field = $record->field('210');
	my $editor;
	$editor=$field->subfield('c') if $field;
	
	$field = $record->field('225');
	my $collection;
	$collection=$field->subfield('a') if $field;
	
	print "WARNING : editor empty for ".$record->as_formatted unless $editor and !$verbose;

	$hash{$seg1.$seg2}->{editors} = $editor unless ($hash{$seg1.$seg2}->{editors});
	$hash{$seg1.$seg2}->{collections}->{$collection}++ if $collection;
}

foreach my $isbnstart (sort keys %hash) {
	print "$isbnstart -- ".$hash{$isbnstart}->{editors} if $verbose;
	my $collections = $hash{$isbnstart}->{collections};
	my $seriestitlelist;
	foreach my $collection (sort keys %$collections) {
		print " CC $collection : ".$collections->{$collection} if $verbose;
		$seriestitlelist.=$collection."|";
	}
	my $authorityRecord = MARC::Record->new();
	my $newfield = MARC::Field->new(200,'','','a' => "".$isbnstart,
												'b' => "".$hash{$isbnstart}->{editors},
												'c' => "".$seriestitlelist);
	$authorityRecord->insert_fields_ordered($newfield);
	my $authid=AUTHaddauthority($dbh,$authorityRecord,'','EDITORS');

# 	print $authorityRecord->as_formatted."\n";
	print "\n" if $verbose;
}
exit;

# 	my $timeneeded = gettimeofday - $starttime;
# 	print "$i in $timeneeded s\n" unless ($i % 50);
# 	foreach my $field ($record->field(995)) {
# 		$record->delete_field($field);
# 	}
# 	my $totdone=0;
# 	my $authid;
# 	foreach my $fieldnumber (('710','711','712')) {
# 		foreach my $field ($record->field($fieldnumber)) {
# 	# 		print "=>".$field->as_formatted."\n";
# 			foreach my $authentry ($field->subfield("a")) {
# 				my $hashentry = $authentry;
# 				# la particularité de ce script là, c'est que l'entrée dans la table d'autorité est $a -- $b (et pas $x -- $x -- $x -- $a comme pour les autorités NC)
# 				# si nécessaire, compléter avec le $c (n'existe pas dans le fichier que j'ai migré avec cette moulinette
# 				# supprimer les accents, certaines entrées sont sans, d'autres avec !
# 				# mysql ne différencie pas, mais les hash perl oui !
# 				$hashentry =~ s/é|ê|è/e/g;
# 				$hashentry =~ s/â|à/a/g;
# 				$hashentry =~ s/î/i/g;
# 				$hashentry =~ s/ô/o/g;
# 				$hashentry =~ s/ù|û/u/g;
# 				$hashentry = uc($hashentry);
# 				print "==>$hashentry" if $hashentry =~ /.*ETATS.*/;
# 				$totdone++;
# 				if ($alreadydone{$hashentry}) {
# 					$authid = $alreadydone{$hashentry};
# 					print ".";
# 				} else {
# 					print "*";
# 					#create authority.
# 					my $authorityRecord = MARC::Record->new();
# 					my $newfield = MARC::Field->new(210,'','','a' => "".$authentry, 
# 												'b' => "".$field->subfield('b'),
# 												'c' => "".$field->subfield('c'),
# 												);
# 					$authorityRecord->insert_fields_ordered($newfield);
# 					$authid=AUTHaddauthority($dbh,$authorityRecord,'','CO');
# 					$alreadydone{$hashentry} = $authid;
# 					# OK, on garde la notice d'autorité, on cherche les notices biblio et on les met à jour...
# 					if ($fieldnumber eq '710') {
# 						$sthBIBLIOS710->execute($authentry);
# 						while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOS710->fetchrow) {
# 							my $inbiblio = MARCgetbiblio($dbh,$bibid);
# 							my $isOK = 0;
# 							foreach my $in7xx ($inbiblio->field($fieldnumber)) {
# 								# !!!!! ici, il faut reconstruire l'entrée de la table de hachage comme ci dessus
# 								# sinon, 
# 								my $inEntry = $in7xx->subfield('a');
# 								$inEntry =~ s/é|ê|è/e/g;
# 								$inEntry =~ s/â|à/a/g;
# 								$inEntry =~ s/î/i/g;
# 								$inEntry =~ s/ô/o/g;
# 								$inEntry =~ s/ù|û/u/g;
# 								$inEntry = uc($inEntry);
# 								$isOK=1 if $inEntry eq $hashentry;
# 							}
# 							C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
# 						}
# 					}
# 					if ($fieldnumber eq '711') {
# 						$sthBIBLIOS711->execute($authentry);
# 						while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOS711->fetchrow) {
# 							my $inbiblio = MARCgetbiblio($dbh,$bibid);
# 							my $isOK = 0;
# 							foreach my $in7xx ($inbiblio->field($fieldnumber)) {
# 								# !!!!! ici, il faut reconstruire l'entrée de la table de hachage comme ci dessus
# 								# sinon, 
# 								my $inEntry = $in7xx->subfield('a');
# 								$inEntry =~ s/é|ê|è/e/g;
# 								$inEntry =~ s/â|à/a/g;
# 								$inEntry =~ s/î/i/g;
# 								$inEntry =~ s/ô/o/g;
# 								$inEntry =~ s/ù|û/u/g;
# 								$inEntry = uc($inEntry);
# 								$isOK=1 if $inEntry eq $hashentry;
# 							}
# 							C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
# 						}
# 					}
# 					if ($fieldnumber eq '712') {
# 						$sthBIBLIOS712->execute($authentry);
# 						while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOS712->fetchrow) {
# 							my $inbiblio = MARCgetbiblio($dbh,$bibid);
# 							my $isOK = 0;
# 							foreach my $in7xx ($inbiblio->field($fieldnumber)) {
# 								# !!!!! ici, il faut reconstruire l'entrée de la table de hachage comme ci dessus
# 								# sinon, 
# 								my $inEntry = $in7xx->subfield('a');
# 								$inEntry =~ s/é|ê|è/e/g;
# 								$inEntry =~ s/â|à/a/g;
# 								$inEntry =~ s/î/i/g;
# 								$inEntry =~ s/ô/o/g;
# 								$inEntry =~ s/ù|û/u/g;
# 								$inEntry = uc($inEntry);
# 								$isOK=1 if $inEntry eq $hashentry;
# 							}
# 							C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
# 						}
# 					}
# 				}
# 			}
# 		}
# 	}
# 	$i++;
# }
# my $timeneeded = gettimeofday - $starttime;
# print "$i entries done in $timeneeded seconds (".($i/$timeneeded)." per second)\n";





More information about the Koha-cvs mailing list