[Koha-patches] [PATCH] Bug 3301 - Speed up rebuild_zebra script

Frédéric Demians f.demians at tamil.fr
Sun Jun 7 09:40:05 CEST 2009


With this patch, rebuild_zebra can re-index a whole Koha DB
quickly:

  rebuild_zebra -r -a -b -marcxml

Biblio/authority records are dump directly in a file
from marcxml field without beeing transformed into
MARC::Record object and corrected.
---
 misc/migration_tools/rebuild_zebra.pl |   25 +++++++++++++++++++++----
 1 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl
index e6ad84e..11e4cc1 100755
--- a/misc/migration_tools/rebuild_zebra.pl
+++ b/misc/migration_tools/rebuild_zebra.pl
@@ -17,6 +17,7 @@ use C4::AuthoritiesMarc;
 $|=1; # flushes output
 
 my $directory;
+my $marcxml;
 my $skip_export;
 my $keep_export;
 my $reset;
@@ -36,6 +37,7 @@ my $result = GetOptions(
     'reset'         => \$reset,
     's'             => \$skip_export,
     'k'             => \$keep_export,
+    'marcxml'       => \$marcxml,
     'b'             => \$biblios,
     'noxml'         => \$noxml,
     'w'             => \$noshadow,
@@ -119,13 +121,13 @@ if ($do_munge) {
 }
 
 if ($authorities) {
-    index_records('authority', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt);
+    index_records('authority', $directory, $skip_export, $process_zebraqueue, $as_xml || $marcxml, $noxml, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt);
 } else {
     print "skipping authorities\n" if ( $verbose_logging );
 }
 
 if ($biblios) {
-    index_records('biblio', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt);
+    index_records('biblio', $directory, $skip_export, $process_zebraqueue, $as_xml || $marcxml, $noxml, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt);
 } else {
     print "skipping biblios\n" if ( $verbose_logging );
 }
@@ -186,7 +188,7 @@ sub index_records {
             mark_zebraqueue_batch_done($entries);
         } else {
             my $sth = select_all_records($record_type);
-            $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml);
+            $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml, $marcxml);
             unless ($do_not_clear_zebraqueue) {
                 mark_all_zebraqueue_done($record_type);
             }
@@ -270,7 +272,7 @@ sub select_all_biblios {
 }
 
 sub export_marc_records_from_sth {
-    my ($record_type, $sth, $directory, $as_xml, $noxml) = @_;
+    my ($record_type, $sth, $directory, $as_xml, $noxml, $marcxml) = @_;
 
     my $num_exported = 0;
     open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
@@ -278,6 +280,16 @@ sub export_marc_records_from_sth {
     while (my ($record_number) = $sth->fetchrow_array) {
         print "." if ( $verbose_logging );
         print "\r$i" unless ($i++ %100 or !$verbose_logging);
+        if ( $marcxml ) {
+            my $marcxml = $record_type eq 'biblio'
+                          ? GetXmlBiblio( $record_number )
+                          : GetAuthorityXML( $record_number );
+            if ( $marcxml ) {
+                print OUT $marcxml if $marcxml;
+                $num_exported++;
+            }
+            next;
+        }
         my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
         if (defined $marc) {
             # FIXME - when more than one record is exported and $as_xml is true,
@@ -548,6 +560,11 @@ Parameters:
     -x                      export and index as xml instead of is02709 (biblios only).
                             use this if you might have records > 99,999 chars,
 							
+    -marcxml                export biblio/authority records directly from DB marcxml
+                            field without sanitizing records. It speed up
+                            dump process but could fail if DB contains badly
+                            encoded records,
+
     -w                      skip shadow indexing for this batch
 
     -y                      do NOT clear zebraqueue after indexing; normally,
-- 
1.5.6.5




More information about the Koha-patches mailing list