[Koha-patches] [PATCH] Bug 7286: fix rebuild_zebra.pl to add rebuild_zebra_sliced.zsh

Stéphane Delaune stephane.delaune at biblibre.com
Fri Dec 2 18:16:15 CET 2011


Signed-off-by: Stéphane Delaune <stephane.delaune at biblibre.com>
---
 misc/migration_tools/rebuild_zebra.pl         |   54 ++++++++++-
 misc/migration_tools/rebuild_zebra_sliced.zsh |  127 +++++++++++++++++++++++++
 2 files changed, 176 insertions(+), 5 deletions(-)
 create mode 100755 misc/migration_tools/rebuild_zebra_sliced.zsh

diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl
index fa87f22..8264121 100755
--- a/misc/migration_tools/rebuild_zebra.pl
+++ b/misc/migration_tools/rebuild_zebra.pl
@@ -34,6 +34,10 @@ my $want_help;
 my $as_xml;
 my $process_zebraqueue;
 my $do_not_clear_zebraqueue;
+my $item_limit;
+my $min;
+my $where;
+my $ofset;
 my $verbose_logging;
 my $zebraidx_log_opt = " -v none,fatal,warn ";
 my $result = GetOptions(
@@ -51,6 +55,10 @@ my $result = GetOptions(
 	'x'				=> \$as_xml,
     'y'             => \$do_not_clear_zebraqueue,
     'z'             => \$process_zebraqueue,
+    'l:i'           => \$item_limit,
+    'where:s'       => \$where,
+    'min:i'         => \$min,
+    'ofset:i'       => \$ofset,
     'v'             => \$verbose_logging,
 );
 
@@ -78,6 +86,12 @@ if ( !$as_xml and $nosanitize ) {
     die $msg;
 }
 
+if ( $nosanitize and $item_limit ) {
+    my $msg = "Cannot specify both -item_limit and -nosanitize\n";
+    $msg .= "Please do '$0 --help' to see usage.\n";
+    die $msg;
+}
+
 if ($process_zebraqueue and ($skip_export or $reset)) {
     my $msg = "Cannot specify -r or -s if -z is specified\n";
     $msg   .= "Please do '$0 --help' to see usage.\n";
@@ -294,13 +308,21 @@ sub select_all_records {
 }
 
 sub select_all_authorities {
-    my $sth = $dbh->prepare("SELECT authid FROM auth_header");
+    my $strsth=qq{SELECT authid from auth_header};
+    $strsth.=qq{ WHERE $where } if ($where);
+    $strsth.=qq{ LIMIT $min } if ($min && !$ofset);
+    $strsth.=qq{ LIMIT $min,$ofset } if ($min && $ofset);
+    my $sth = $dbh->prepare($strsth);
     $sth->execute();
     return $sth;
 }
 
 sub select_all_biblios {
-    my $sth = $dbh->prepare("SELECT biblionumber FROM biblioitems ORDER BY biblionumber");
+    my $strsth = qq{ SELECT biblionumber FROM biblioitems };
+    $strsth.=qq{ WHERE $where } if ($where);
+    $strsth.=qq{ LIMIT $min } if ($min && !$ofset);
+    $strsth.=qq{ LIMIT $min,$ofset } if ($ofset);
+    my $sth = $dbh->prepare($strsth);
     $sth->execute();
     return $sth;
 }
@@ -406,6 +428,7 @@ sub generate_deleted_marc_records {
         my $marc = MARC::Record->new();
         if ($record_type eq 'biblio') {
             fix_biblio_ids($marc, $record_number, $record_number);
+            fix_biblio_items( $marc ) if $item_limit;
         } else {
             fix_authority_id($marc, $record_number);
         }
@@ -431,10 +454,14 @@ sub get_corrected_marc_record {
 
     if (defined $marc) {
         fix_leader($marc);
-        if ($record_type eq 'authority') {
-            fix_authority_id($marc, $record_number);
+        if ( $record_type eq 'biblio' ) {
+            my $succeeded = fix_biblio_ids( $marc, $record_number );
+            fix_biblio_items( $marc ) if $item_limit;
+            return unless $succeeded;
+        } else {
+            fix_authority_id( $marc, $record_number );
         }
-        if (C4::Context->preference("marcflavour") eq "UNIMARC") {
+        if ( C4::Context->preference("marcflavour") eq "UNIMARC" ) {
             fix_unimarc_100($marc);
         }
     }
@@ -498,6 +525,18 @@ sub fix_leader {
     $marc->leader(substr($leader, 0, 24));
 }
 
+sub fix_biblio_items {
+    my $marc = shift;
+
+    my ($itemtagfield, $itemtagsubfield) = GetMarcFromKohaField('items.itemnumber','');
+
+    my $i = 0;
+    for my $itemfield ( $marc->field($itemtagfield) ) {
+	$marc->delete_field($itemfield) if $i >= $item_limit;
+        $i++;
+    }
+}
+
 sub fix_biblio_ids {
     # FIXME - it is essential to ensure that the biblionumber is present,
     #         otherwise, Zebra will choke on the record.  However, this
@@ -635,9 +674,14 @@ Parameters:
                             the same records - specify -y to override this.  
                             Cannot be used with -z.
 
+    -l                      set a maximum number of exported items per biblio.
+                            Doesn't work with -nosanitize.
+
     -v                      increase the amount of logging.  Normally only 
                             warnings and errors from the indexing are shown.
 
+    -min   1234                 minimum biblionumber 
+    -ofset 1243                 count biblios to process
     -munge-config           Deprecated option to try
                             to fix Zebra config files.
     --help or -h            show this message.
diff --git a/misc/migration_tools/rebuild_zebra_sliced.zsh b/misc/migration_tools/rebuild_zebra_sliced.zsh
new file mode 100755
index 0000000..1e9554d
--- /dev/null
+++ b/misc/migration_tools/rebuild_zebra_sliced.zsh
@@ -0,0 +1,127 @@
+#!/usr/bin/zsh
+
+# Copyright 2011 BibLibre SARL
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with Koha; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+INCREMENT=$1
+BIBLIOSTART=$2
+BIBLIOEND=$3
+
+#echo " | $INCREMENT , $BIBLIOSTART , $BIBLIOEND | ";
+# this script rebuild the zebra index recursively
+# BIBLIOSTART is the record number to BIBLIOSTART on
+# BIBLIOEND is the record number to BIBLIOEND on
+# increment specify how many records we must try at once
+# At the BIBLIOEND of each "chunk", this script checks if the indexing process has been OK
+# if it hasn't, the slice is splitted in 10, and the reindexing is called again on each smaller chunk
+# if the increment goes to 1, it means we tried to reindex 1 by 1, and the failing indexing concern wrong records
+
+# the logs are stored in a directory called logs/ that must be a subdirectory of reindex.zsh
+
+# at the BIBLIOEND of the script, just type :
+#grep -l "previous transaction" `ls rebuild1.*.err`
+# the result contains all the biblios that have not been indexed
+# WARNING : the numbers are not the biblionumber but the record number, they can be reached by :
+# SELECT biblionumber FROM biblio LIMIT YourNumberHere,1;
+
+# EXAMPLE to run the script on a 800 000 biblios database :
+# ./reindex.zsh 50000 0 800000
+# will reindex the DB, BIBLIOSTARTing with chunks of 50k biblios
+#if { grep -E "previous transaction" `dirname $0`/logs/rebuild$INCREMENT.$i.err } ; then
+
+lastbiblionumber=`perl -e '#!/usr/bin/perl
+use C4::Context;
+my $dbh = C4::Context->dbh;
+my $querylastbiblionumber = "SELECT max(biblionumber) FROM biblio;";
+my $sthlastbiblionumber   = $dbh->prepare($querylastbiblionumber);
+$sthlastbiblionumber->execute();
+my ( $lastbiblionumber ) = $sthlastbiblionumber->fetchrow_array; print $lastbiblionumber;'`
+#echo $lastbiblionumber;
+let "maxbiblionumber = $lastbiblionumber + 1"
+if [ $# = 2 ]
+then
+    BIBLIOEND=$lastbiblionumber
+elif [ $# = 1 ]
+then
+    BIBLIOSTART=0
+    BIBLIOEND=$lastbiblionumber
+elif [ $# = 0 ]
+then
+    INCREMENT=10000
+    BIBLIOSTART=0
+    BIBLIOEND=$lastbiblionumber
+fi
+if [[ $INCREMENT =~ ^10*$ ]]
+then
+else
+    echo "The first argument (INCREMENT) must be 1 or a multiple of 10"
+    exit 2
+fi
+if [[ $BIBLIOSTART =~ ^[0-9]*$ ]]
+then
+else
+    echo "The second argument (BIBLIOSTART) must be an integer"
+    exit 2
+fi
+if [[ $BIBLIOEND =~ ^[0-9]*$ ]]
+then
+else
+    echo "The third argument (BIBLIOEND) must be an integer"
+    exit 2
+fi
+if [ $BIBLIOSTART -lt $BIBLIOEND ]
+then
+else
+    echo "The second argument (BIBLIOSTART) must be lower than the third argument (BIBLIOEND)"
+    exit 2
+fi
+if [ $BIBLIOEND -lt $maxbiblionumber ]
+then
+else
+    echo "end"
+    exit 1
+fi
+    ls `dirname $0`/logs/ >/dev/null 2>&1
+    if [ $? != 0 ]
+    then
+        mkdir `dirname $0`/logs
+    else
+        rm `dirname $0`/logs/*
+    fi
+    #/home/koha/src/misc/migration_tools/rebuild_zebra.pl -r -b -v -x -nosanitize -ofset 1 -min 1
+    for ((i=$BIBLIOSTART ; i<$BIBLIOEND ; i=i+$INCREMENT))
+    do
+        echo "I = " $i "with increment " $INCREMENT
+        `dirname $0`/rebuild_zebra.pl -b -v -x -nosanitize -d /tmp/rebuild -k -ofset $INCREMENT -min $i > `dirname $0`/logs/rebuild$INCREMENT.$i.log 2> `dirname $0`/logs/rebuild$INCREMENT.$i.err
+        if (($INCREMENT >1 ));
+        then
+            if { grep -q "previous transaction" `dirname $0`/logs/rebuild$INCREMENT.$i.err } ;
+            then
+                echo "I must split $i (increment $INCREMENT) because previous transaction didn't reach commit"
+                ((subincrement=$INCREMENT/10))
+                ((newBIBLIOEND=$i+$INCREMENT))
+                $0 $subincrement $i $newBIBLIOEND
+            elif { ! grep -q "Records: $INCREMENT" `dirname $0`/logs/rebuild$INCREMENT.$i.err } ;
+            then
+                echo "I must split $i (increment $INCREMENT) because index was uncomplete, less than $INCREMENT records indexed"
+                ((subincrement=$INCREMENT/10))
+                ((newBIBLIOEND=$i+$INCREMENT))
+                $0 $subincrement $i $newBIBLIOEND
+            fi
+        fi
+    done
+exit 0
-- 
1.7.0.4



More information about the Koha-patches mailing list