[Koha-patches] [PATCH] New script generating authorities from biblio records
Frederic Demians
f.demians at tamil.fr
Wed Oct 15 16:27:12 CEST 2008
/misc/migration_tools/auth_from_biblio.pl allow to:
* extract authorities from biblio records and place
them in a text file
* load authorities into auth_header table
* update $9 subfield in biblio records
Run the script to get detailed explanations
---
misc/migration_tools/auth-marc21.conf | 14 ++
misc/migration_tools/auth-unimarc.conf | 29 +++
misc/migration_tools/auth_from_biblio.pl | 335 ++++++++++++++++++++++++++++++
3 files changed, 378 insertions(+), 0 deletions(-)
create mode 100644 misc/migration_tools/auth-marc21.conf
create mode 100644 misc/migration_tools/auth-unimarc.conf
create mode 100755 misc/migration_tools/auth_from_biblio.pl
diff --git a/misc/migration_tools/auth-marc21.conf b/misc/migration_tools/auth-marc21.conf
new file mode 100644
index 0000000..2b7d979
--- /dev/null
+++ b/misc/migration_tools/auth-marc21.conf
@@ -0,0 +1,14 @@
+---
+authcode: PERSO_NAME
+authtag: 100
+authletters: abd
+bibliotags:
+ - 100
+ - 700
+ - 693
+---
+authcode: TOPIC_TERM
+authtag: 150
+authletters: a
+bibliotags:
+ - 650
diff --git a/misc/migration_tools/auth-unimarc.conf b/misc/migration_tools/auth-unimarc.conf
new file mode 100644
index 0000000..1048cfe
--- /dev/null
+++ b/misc/migration_tools/auth-unimarc.conf
@@ -0,0 +1,29 @@
+---
+authcode: NP
+authletters: abcd
+authtag: 200
+bibliotags:
+ - 700
+ - 701
+ - 702
+---
+authcode: CO
+authletters: abcd
+authtag: 210
+bibliotags:
+ - 710
+ - 711
+ - 712
+---
+authcode: SAUT
+authletters: a
+authtag: 200
+bibliotags:
+ - 600
+---
+authcode: SNC
+authletters: a
+authtag: 250
+bibliotags:
+ - 606
+
diff --git a/misc/migration_tools/auth_from_biblio.pl b/misc/migration_tools/auth_from_biblio.pl
new file mode 100755
index 0000000..34b7866
--- /dev/null
+++ b/misc/migration_tools/auth_from_biblio.pl
@@ -0,0 +1,335 @@
+#!/usr/bin/perl
+
+#
+# This software is placed under the gnu General Public License, v2
+# (http://www.gnu.org/licenses/gpl.html)
+#
+# Copyright 2008 Tamil s.a.r.l.
+#
+
+
+
+package C4::Authority::Handler;
+
+use strict;
+use warnings;
+use diagnostics;
+use Carp;
+use MARC::Record;
+use C4::Context;
+use C4::Biblio;
+use C4::AuthoritiesMarc;
+use File::Temp qw( tempfile );
+use YAML::Syck;
+use List::Util qw( first );
+
+
+sub new {
+ my $self = {};
+ my $class = shift;
+ $self->{AUTHORITIES} = shift;
+ bless $self, $class;
+ return $self;
+}
+
+
+
+sub create_from_biblio {
+ my $self = shift;
+ my $target_file = shift;
+ my @authorities = @{ $self->{AUTHORITIES} };
+
+ my ($fh_tmp_file, $tmp_file) = tempfile( $target_file . '.XXXX' );
+ binmode( $fh_tmp_file, ":utf8" );
+
+ print <<EOS;
+Create authorities from biblio records
+ source: Koha DB
+ target: $target_file
+ temporary file: $tmp_file
+EOS
+
+ my $dbh = C4::Context->dbh;
+ my $count;
+ my $sth = $dbh->prepare( "SELECT biblionumber FROM biblio" );
+ $sth->execute();
+ while ( my ($biblionumber) = $sth->fetchrow ) {# loop on all biblio records
+ ++$count;
+ my $record = GetMarcBiblio( $biblionumber );
+ foreach my $authority ( @authorities ) { # loop on all authority types
+ #print "authority: ", $authority->{authcode}, "\n";
+ my @bibliotags = @{$authority->{bibliotags}};
+ foreach my $tag ( @bibliotags ) {
+ # loop on all biblio tags related to the current authority
+ foreach my $field ( $record->field( $tag ) ) {
+ # All field repetitions
+ my $concat = '';
+ foreach my $subfield ( $field->subfields() ) {
+ my ($letter, $value) = @$subfield;
+ #chop $value;
+ $value =~ s/^\s+//;
+ $value =~ s/\s+$//;
+ $value =~ /([\w ,']+)/;
+ $value = $1;
+ $value =~ s/^\s+//;
+ $value =~ s/\s+$//;
+ if ( $authority->{authletters} =~ /$letter/ ) {
+ $concat .= "\t" if $concat;
+ $concat .= "$letter|$value";
+ }
+ }
+ print $fh_tmp_file $authority->{authcode}, "\t$concat\n"
+ if $concat;
+ }
+ }
+ }
+ }
+ close $fh_tmp_file;
+ print " bibios count: $count\n";
+
+ print <<EOS;
+Sort and de-duplicate
+ source: $tmp_file
+ target: $target_file
+EOS
+ system( "sort -f $tmp_file | uniq -i >$target_file" );
+
+}
+
+
+sub load_from_file {
+ my $self = shift;
+ my $source_file = shift;
+ my $truncate = shift;
+ my @authorities = @{ $self->{AUTHORITIES} };
+
+ print <<"EOS";
+Load authorities into Koha from a file
+ source: $source_file
+ target: Koha DB
+EOS
+
+ my $dbh = C4::Context->dbh;
+
+ if ( $truncate ) {
+ print "Truncate table: auth_header\n";
+ $dbh->do( "truncate auth_header" );
+ }
+
+ open my $file_handle, "<:utf8", $source_file
+ or croak "Can't open authorities file: $source_file";
+ while ( <$file_handle> ) {
+ chop;
+ my ($authcode, $sub) = /(\w+)\t(.*)/;
+ my (@subfields) = split /\t|\|/, $sub;
+ #print "$auth_code => $sub\n";
+ #print "tbl : ", @subfields, "\n";
+ #print "size: ", $#subfields, "\n";
+ my $authority = first { $_->{authcode} eq $authcode } @authorities;
+ next if !$authority;
+ #print "<$authcode>:", "0:",$subfields[0], " - 1:",$subfields[1], " => $tag\n";
+ if ( $#subfields > 0 ) {
+ my $record = MARC::Record->new();
+ my $leader = $record->leader();
+ substr($leader, 5, 3) = 'naa';
+ substr($leader, 9, 1) = 'a'; # encodage utf8
+ $record->encoding( 'UTF-8' );
+ $record->leader($leader);
+ $record->append_fields( MARC::Field->new(
+ $authority->{authtag}, '', '', @subfields));
+ #print $record->as_formatted(), "\n->$authcode\n";
+ my ($authid) = AddAuthority($record, 0, $authcode);
+ }
+ }
+ close $file_handle;
+}
+
+
+sub link_biblio_to {
+ my $self = shift;
+ my @authorities = @{ $self->{AUTHORITIES} };
+
+ print "Linking biblio record to authorities\n";
+
+ my $zconn = C4::Context->Zconn( "authorityserver" );
+ my $dbh = C4::Context->dbh;
+ my $sth = $dbh->prepare( "SELECT biblionumber FROM biblio" );
+ $sth->execute();
+ while ( my ($biblionumber) = $sth->fetchrow ) { # loop on all biblio records
+ print "Notice #$biblionumber\n";
+ my $record = GetMarcBiblio( $biblionumber );
+ foreach my $authority ( @authorities ) { # loop on all authority types
+ foreach my $tag ( @{ $authority->{bibliotags} } ) {
+ # loop on all biblio tags related to the current authority
+ FIELD:
+ foreach my $field ( $record->field( $tag ) ) {
+ # All field repetitions
+ my $concat = '';
+ SUBFIELD:
+ foreach my $subfield ( $field->subfields() ) {
+ my ($letter, $value) = @$subfield;
+ $value =~ s/^\s+//;
+ $value =~ s/\s+$//;
+ $value =~ /([\w ,']+)/;
+ $value = $1;
+ next SUBFIELD if !$value;
+ if ( $authority->{authletters} =~ /$letter/ ) {
+ $concat = '@and ' . $concat if $concat;
+ $concat .= ' @attr 1=Heading @attr 6=3 "' .$value .'"';
+ }
+ }
+ next FIELD if !$concat;
+ my $query
+ = '@and @attr 1=authtype '
+ . $authority->{ authcode } . ' ' . $concat;
+ #print "$query\n";
+ my $rs = $zconn->search_pqf( $query );
+ #print "result set size: ", $rs->size(), "\n";
+ if ( $rs->size() >= 1 ) {
+ my $auth = $rs->record(0);
+ my $m = new_from_usmarc MARC::Record( $auth->raw() );
+ my $id = $m->field('001')->data();
+ #print "ID: $id\n";
+ my @ns = ();
+ push( @ns, '9', $id );
+ for ( $field->subfields() ) {
+ my ($letter, $value) = @$_;
+ push( @ns, $letter, $value ) if $letter ne '9';
+ }
+ $field->replace_with( new MARC::Field(
+ $field->tag, $field->indicator(1), $field->indicator(2),
+ @ns ) );
+ }
+ else {
+ print "ERROR: authority not found\n";
+ }
+ $rs->destroy();
+ }
+ }
+ }
+ #print $record->as_formatted(), "\n";
+ ModBiblio( $record, $biblionumber );
+ }
+}
+
+
+
+package main;
+
+use strict;
+use warnings;
+use diagnostics;
+use YAML::Syck;
+use Pod::Usage;
+use Getopt::Long;
+
+binmode( STDOUT, ":utf8" );
+
+my $verbose = 0;
+my $help = 0;
+my $truncate = 0;
+GetOptions(
+ 'verbose' => \$verbose,
+ 'help' => \$help,
+ 'truncate' => \$truncate,
+);
+
+usage() if $help;
+
+my $command = @ARGV ? shift : '';
+if ($command eq 'create_from_biblio') {
+ if ( $#ARGV != 1 ) {
+ usage();
+ }
+ my $conf_file = shift;
+ my $authorities_file = shift;
+ my @authorities = LoadFile( $conf_file );
+ #print $#ARGV,"\n";
+ my $handler = C4::Authority::Handler->new( \@authorities );
+ $handler->create_from_biblio( $authorities_file );
+}
+elsif ( $command eq 'load_from_file' ) {
+ if ( $#ARGV != 1 ) {
+ usage();
+ }
+ my $conf_file = shift;
+ my $authorities_file = shift;
+ my @authorities = LoadFile( $conf_file );
+ my $handler = C4::Authority::Handler->new( \@authorities );
+ $handler->load_from_file( $authorities_file, $truncate );
+}
+elsif ( $command eq 'link_biblio_to' ) {
+ if ( $#ARGV != 0 ) {
+ usage();
+ }
+ my $conf_file = shift;
+ my @authorities = LoadFile( $conf_file );
+ my $handler = C4::Authority::Handler->new( \@authorities );
+ $handler->link_biblio_to();
+}
+else {
+ usage();
+}
+
+
+sub usage {
+ pod2usage( -verbose => 2 );
+}
+
+
+
+=head1 NAME
+
+auth_from_biblio.pl - Generate authorities from bibliographic records
+
+=head1 USAGE
+
+=over
+
+=item auth_from_biblio.pl create_from_biblio F<auth.conf> F<authorities.txt>
+
+Creates a text file F<authorities.txt> containing authorities extracted
+from Koha biblio records using F<auth.conf> authorities configuration.
+
+=item auth_from_biblio.pl [--truncate] load_from_file F<auth.conf>
+F<authorities.txt>
+
+Load authorities from F<authorities.txt> into Koha using F<auth.conf>
+authorities configuration. With --truncate, auth_head Koha table is
+truncated before loading new authorities. After this processing,
+authorities have to be indexed with Zebra in order to be
+searchable.
+
+=item auth_from_biblio.pl link_biblio_to F<auth.conf>
+
+Link biblio records fields with authorities via $9 subfield.
+Authorities must have been indexed with Zebra.
+After this processing, it is necessary to
+index bibliographic records with Zebra.
+
+=back
+
+=head1 CONFIGURATION
+
+Authorities configuration file looks like that:
+
+ ---
+ authcode: NP
+ authletters: abcd
+ authtag: 200
+ bibliotags:
+ - 700
+ - 701
+ - 702
+ ---
+ authcode: CO
+ authletters: abcd
+ authtag: 210
+ bibliotags:
+ - 710
+ - 711
+ - 712
+ ---
+
+=cut
+
--
1.5.5.GIT
More information about the Koha-patches
mailing list