[Koha-patches] [PATCH] Bug 3301 - Speed up rebuild_zebra script
Frédéric Demians
f.demians at tamil.fr
Sun Jun 7 09:40:05 CEST 2009
With this patch, rebuild_zebra can re-index a whole Koha DB
quickly:
rebuild_zebra -r -a -b -marcxml
Biblio/authority records are dump directly in a file
from marcxml field without beeing transformed into
MARC::Record object and corrected.
---
misc/migration_tools/rebuild_zebra.pl | 25 +++++++++++++++++++++----
1 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl
index e6ad84e..11e4cc1 100755
--- a/misc/migration_tools/rebuild_zebra.pl
+++ b/misc/migration_tools/rebuild_zebra.pl
@@ -17,6 +17,7 @@ use C4::AuthoritiesMarc;
$|=1; # flushes output
my $directory;
+my $marcxml;
my $skip_export;
my $keep_export;
my $reset;
@@ -36,6 +37,7 @@ my $result = GetOptions(
'reset' => \$reset,
's' => \$skip_export,
'k' => \$keep_export,
+ 'marcxml' => \$marcxml,
'b' => \$biblios,
'noxml' => \$noxml,
'w' => \$noshadow,
@@ -119,13 +121,13 @@ if ($do_munge) {
}
if ($authorities) {
- index_records('authority', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt);
+ index_records('authority', $directory, $skip_export, $process_zebraqueue, $as_xml || $marcxml, $noxml, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt);
} else {
print "skipping authorities\n" if ( $verbose_logging );
}
if ($biblios) {
- index_records('biblio', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt);
+ index_records('biblio', $directory, $skip_export, $process_zebraqueue, $as_xml || $marcxml, $noxml, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt);
} else {
print "skipping biblios\n" if ( $verbose_logging );
}
@@ -186,7 +188,7 @@ sub index_records {
mark_zebraqueue_batch_done($entries);
} else {
my $sth = select_all_records($record_type);
- $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml);
+ $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml, $marcxml);
unless ($do_not_clear_zebraqueue) {
mark_all_zebraqueue_done($record_type);
}
@@ -270,7 +272,7 @@ sub select_all_biblios {
}
sub export_marc_records_from_sth {
- my ($record_type, $sth, $directory, $as_xml, $noxml) = @_;
+ my ($record_type, $sth, $directory, $as_xml, $noxml, $marcxml) = @_;
my $num_exported = 0;
open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
@@ -278,6 +280,16 @@ sub export_marc_records_from_sth {
while (my ($record_number) = $sth->fetchrow_array) {
print "." if ( $verbose_logging );
print "\r$i" unless ($i++ %100 or !$verbose_logging);
+ if ( $marcxml ) {
+ my $marcxml = $record_type eq 'biblio'
+ ? GetXmlBiblio( $record_number )
+ : GetAuthorityXML( $record_number );
+ if ( $marcxml ) {
+ print OUT $marcxml if $marcxml;
+ $num_exported++;
+ }
+ next;
+ }
my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
if (defined $marc) {
# FIXME - when more than one record is exported and $as_xml is true,
@@ -548,6 +560,11 @@ Parameters:
-x export and index as xml instead of is02709 (biblios only).
use this if you might have records > 99,999 chars,
+ -marcxml export biblio/authority records directly from DB marcxml
+ field without sanitizing records. It speed up
+ dump process but could fail if DB contains badly
+ encoded records,
+
-w skip shadow indexing for this batch
-y do NOT clear zebraqueue after indexing; normally,
--
1.5.6.5
More information about the Koha-patches
mailing list