[Koha-patches] [PATCH] New script generating authorities from biblio records

Frederic Demians f.demians at tamil.fr
Wed Oct 15 16:27:12 CEST 2008


/misc/migration_tools/auth_from_biblio.pl allow to:

  * extract authorities from biblio records and place
    them in a text file
  * load authorities into auth_header table
  * update $9 subfield in biblio records

Run the script to get detailed explanations
---
 misc/migration_tools/auth-marc21.conf    |   14 ++
 misc/migration_tools/auth-unimarc.conf   |   29 +++
 misc/migration_tools/auth_from_biblio.pl |  335 ++++++++++++++++++++++++++++++
 3 files changed, 378 insertions(+), 0 deletions(-)
 create mode 100644 misc/migration_tools/auth-marc21.conf
 create mode 100644 misc/migration_tools/auth-unimarc.conf
 create mode 100755 misc/migration_tools/auth_from_biblio.pl

diff --git a/misc/migration_tools/auth-marc21.conf b/misc/migration_tools/auth-marc21.conf
new file mode 100644
index 0000000..2b7d979
--- /dev/null
+++ b/misc/migration_tools/auth-marc21.conf
@@ -0,0 +1,14 @@
+--- 
+authcode: PERSO_NAME
+authtag: 100
+authletters: abd
+bibliotags: 
+  - 100
+  - 700
+  - 693
+--- 
+authcode: TOPIC_TERM
+authtag: 150
+authletters: a
+bibliotags: 
+  - 650
diff --git a/misc/migration_tools/auth-unimarc.conf b/misc/migration_tools/auth-unimarc.conf
new file mode 100644
index 0000000..1048cfe
--- /dev/null
+++ b/misc/migration_tools/auth-unimarc.conf
@@ -0,0 +1,29 @@
+--- 
+authcode: NP
+authletters: abcd
+authtag: 200
+bibliotags: 
+  - 700
+  - 701
+  - 702
+--- 
+authcode: CO
+authletters: abcd
+authtag: 210
+bibliotags: 
+  - 710
+  - 711
+  - 712
+--- 
+authcode: SAUT
+authletters: a
+authtag: 200
+bibliotags: 
+  - 600
+--- 
+authcode: SNC
+authletters: a
+authtag: 250
+bibliotags: 
+  - 606
+
diff --git a/misc/migration_tools/auth_from_biblio.pl b/misc/migration_tools/auth_from_biblio.pl
new file mode 100755
index 0000000..34b7866
--- /dev/null
+++ b/misc/migration_tools/auth_from_biblio.pl
@@ -0,0 +1,335 @@
+#!/usr/bin/perl
+
+#
+# This software is placed under the gnu General Public License, v2 
+# (http://www.gnu.org/licenses/gpl.html)
+#
+# Copyright 2008 Tamil s.a.r.l.
+#
+
+
+
+package C4::Authority::Handler;
+
+use strict;
+use warnings;
+use diagnostics;
+use Carp;
+use MARC::Record;
+use C4::Context;
+use C4::Biblio;
+use C4::AuthoritiesMarc;
+use File::Temp qw( tempfile );
+use YAML::Syck;
+use List::Util qw( first );
+
+
+sub new {    
+    my $self = {};
+    my $class = shift;
+    $self->{AUTHORITIES} = shift;
+    bless $self, $class;
+    return $self;
+}
+
+
+
+sub create_from_biblio {
+    my $self        = shift;
+    my $target_file = shift;
+    my @authorities = @{ $self->{AUTHORITIES} }; 
+
+    my ($fh_tmp_file, $tmp_file) = tempfile( $target_file . '.XXXX' );
+    binmode( $fh_tmp_file, ":utf8" );
+
+    print <<EOS;
+Create authorities from biblio records
+    source:         Koha DB
+    target:         $target_file
+    temporary file: $tmp_file
+EOS
+
+    my $dbh = C4::Context->dbh;
+    my $count;
+    my $sth = $dbh->prepare( "SELECT biblionumber FROM biblio" );
+    $sth->execute();
+    while ( my ($biblionumber) = $sth->fetchrow ) {# loop on all biblio records
+        ++$count;
+        my $record = GetMarcBiblio( $biblionumber );
+        foreach my $authority ( @authorities ) { # loop on all authority types
+            #print "authority: ", $authority->{authcode}, "\n";
+            my @bibliotags = @{$authority->{bibliotags}};
+            foreach my $tag ( @bibliotags ) { 
+                # loop on all biblio tags related to the current authority
+                foreach my $field ( $record->field( $tag ) ) {
+                    # All field repetitions
+                    my $concat = '';
+                    foreach my $subfield ( $field->subfields() ) {
+                        my ($letter, $value) = @$subfield;
+                        #chop $value;
+                        $value =~ s/^\s+//;
+                        $value =~ s/\s+$//;
+                    	$value =~ /([\w ,']+)/;
+                    	$value = $1;
+                        $value =~ s/^\s+//;
+                        $value =~ s/\s+$//;
+                        if ( $authority->{authletters} =~ /$letter/ ) {
+                            $concat .= "\t" if $concat;
+                            $concat .= "$letter|$value";   
+                        }
+                    }
+                    print $fh_tmp_file $authority->{authcode}, "\t$concat\n" 
+                        if $concat;
+                }
+            }
+        }
+    }
+    close $fh_tmp_file;
+    print "    bibios count:   $count\n";
+
+    print <<EOS;
+Sort and de-duplicate
+    source:         $tmp_file
+    target:         $target_file
+EOS
+    system( "sort -f $tmp_file | uniq -i >$target_file" );
+
+}
+
+
+sub load_from_file {
+    my $self = shift;
+    my $source_file = shift;
+    my $truncate = shift;
+    my @authorities = @{ $self->{AUTHORITIES} }; 
+
+    print <<"EOS";
+Load authorities into Koha from a file
+    source: $source_file
+    target: Koha DB
+EOS
+    
+    my $dbh = C4::Context->dbh;
+
+    if ( $truncate ) {
+        print "Truncate table: auth_header\n";
+        $dbh->do( "truncate auth_header" );
+    }
+
+    open my $file_handle, "<:utf8", $source_file 
+        or croak "Can't open authorities file: $source_file"; 
+    while ( <$file_handle> ) {
+        chop;
+        my ($authcode, $sub) = /(\w+)\t(.*)/;
+        my (@subfields) = split /\t|\|/, $sub; 
+        #print "$auth_code => $sub\n";
+        #print "tbl : ", @subfields, "\n";
+        #print "size: ", $#subfields, "\n";
+        my $authority = first { $_->{authcode} eq $authcode } @authorities;
+        next if !$authority;
+        #print "<$authcode>:", "0:",$subfields[0], " - 1:",$subfields[1], " => $tag\n";
+      	if ( $#subfields > 0 ) {
+            my $record = MARC::Record->new();
+            my $leader = $record->leader();
+            substr($leader, 5, 3) = 'naa';
+            substr($leader, 9, 1) = 'a';    # encodage utf8
+            $record->encoding( 'UTF-8' );
+            $record->leader($leader);
+            $record->append_fields( MARC::Field->new(
+                $authority->{authtag}, '', '', @subfields));
+            #print $record->as_formatted(), "\n->$authcode\n";
+            my ($authid) = AddAuthority($record, 0, $authcode);
+    	}
+    }
+    close $file_handle;
+}
+
+
+sub link_biblio_to {
+    my $self = shift;
+    my @authorities = @{ $self->{AUTHORITIES} }; 
+
+    print "Linking biblio record to authorities\n";
+
+    my $zconn = C4::Context->Zconn( "authorityserver" );
+    my $dbh = C4::Context->dbh;
+    my $sth = $dbh->prepare( "SELECT biblionumber FROM biblio" );
+    $sth->execute();
+    while ( my ($biblionumber) = $sth->fetchrow ) { # loop on all biblio records
+        print "Notice #$biblionumber\n";
+        my $record = GetMarcBiblio( $biblionumber );
+        foreach my $authority ( @authorities ) { # loop on all authority types
+            foreach my $tag ( @{ $authority->{bibliotags} } ) { 
+                # loop on all biblio tags related to the current authority
+                FIELD:
+                foreach my $field ( $record->field( $tag ) ) {
+                    # All field repetitions
+                    my $concat = '';
+                    SUBFIELD:
+                    foreach my $subfield ( $field->subfields() ) {
+                        my ($letter, $value) = @$subfield;
+                        $value =~ s/^\s+//;
+                        $value =~ s/\s+$//;
+                    	$value =~ /([\w ,']+)/;
+                    	$value = $1;
+			            next SUBFIELD if !$value;
+			            if ( $authority->{authletters} =~ /$letter/ ) {
+                            $concat = '@and ' . $concat if $concat;
+                            $concat .= ' @attr 1=Heading @attr 6=3 "' .$value .'"';   
+                        }
+                    }
+		            next FIELD if !$concat;
+                    my $query 
+                        = '@and @attr 1=authtype ' 
+                          . $authority->{ authcode } . ' ' . $concat;
+                    #print "$query\n";
+                    my $rs = $zconn->search_pqf( $query );
+                    #print "result set size: ", $rs->size(), "\n";
+                    if ( $rs->size() >= 1 ) {
+                        my $auth = $rs->record(0);
+                        my $m = new_from_usmarc MARC::Record( $auth->raw() );
+                        my $id = $m->field('001')->data();
+                        #print "ID: $id\n";
+                        my @ns = ();
+                        push( @ns, '9', $id );
+                        for ( $field->subfields() ) {
+                            my ($letter, $value) = @$_;
+                            push( @ns, $letter, $value ) if $letter ne '9';
+                        }
+                        $field->replace_with( new MARC::Field(
+                            $field->tag, $field->indicator(1), $field->indicator(2),
+                            @ns ) );
+                    }
+                    else {
+                        print "ERROR: authority not found\n";
+                    }
+                    $rs->destroy();
+                }
+            }
+        }
+        #print $record->as_formatted(), "\n";
+        ModBiblio( $record, $biblionumber );
+    }
+}
+
+
+
+package main;
+
+use strict;
+use warnings;
+use diagnostics;
+use YAML::Syck;
+use Pod::Usage;
+use Getopt::Long;
+
+binmode( STDOUT, ":utf8" );
+
+my $verbose     = 0;
+my $help        = 0;
+my $truncate    = 0;
+GetOptions( 
+    'verbose'   => \$verbose,
+    'help'      => \$help,
+    'truncate'  => \$truncate,
+);
+
+usage() if $help;          
+
+my $command = @ARGV ? shift : '';
+if ($command eq 'create_from_biblio') {
+    if ( $#ARGV != 1 ) {
+        usage();
+    }
+    my $conf_file = shift;
+    my $authorities_file = shift;
+    my @authorities = LoadFile( $conf_file );
+    #print $#ARGV,"\n";
+    my $handler = C4::Authority::Handler->new( \@authorities );
+    $handler->create_from_biblio( $authorities_file );
+}
+elsif ( $command eq 'load_from_file' ) {
+    if ( $#ARGV != 1 ) {
+        usage();
+    }
+    my $conf_file = shift;
+    my $authorities_file = shift;
+    my @authorities = LoadFile( $conf_file );
+    my $handler = C4::Authority::Handler->new( \@authorities );
+    $handler->load_from_file( $authorities_file, $truncate );
+}
+elsif ( $command eq 'link_biblio_to' ) {
+   if ( $#ARGV != 0 ) {
+        usage();
+    }
+    my $conf_file = shift;
+    my @authorities = LoadFile( $conf_file );
+    my $handler = C4::Authority::Handler->new( \@authorities );
+    $handler->link_biblio_to(); 
+}
+else {
+    usage();
+}
+
+
+sub usage {
+    pod2usage( -verbose => 2 );
+} 
+
+
+
+=head1 NAME
+
+auth_from_biblio.pl - Generate authorities from bibliographic records
+
+=head1 USAGE
+
+=over
+
+=item auth_from_biblio.pl create_from_biblio F<auth.conf> F<authorities.txt>
+
+Creates a text file F<authorities.txt> containing authorities extracted
+from Koha biblio records using F<auth.conf> authorities configuration.
+
+=item auth_from_biblio.pl [--truncate] load_from_file F<auth.conf> 
+F<authorities.txt>
+
+Load authorities from F<authorities.txt> into Koha using F<auth.conf>
+authorities configuration. With --truncate, auth_head Koha table is
+truncated before loading new authorities. After this processing,
+authorities have to be indexed with Zebra in order to be
+searchable.
+
+=item auth_from_biblio.pl link_biblio_to F<auth.conf>
+
+Link biblio records fields with authorities via $9 subfield. 
+Authorities must have been indexed with Zebra. 
+After this processing, it is necessary to
+index bibliographic records with Zebra.
+
+=back
+     
+=head1 CONFIGURATION
+    
+Authorities configuration file looks like that:
+
+ --- 
+ authcode: NP
+ authletters: abcd
+ authtag: 200
+ bibliotags: 
+   - 700
+   - 701
+   - 702
+ --- 
+ authcode: CO
+ authletters: abcd
+ authtag: 210
+ bibliotags: 
+   - 710
+   - 711
+   - 712
+ --- 
+
+=cut
+
-- 
1.5.5.GIT




More information about the Koha-patches mailing list