[Koha-patches] [PATCH 1/2] [ENH] Bug 6435 Add daemon mode to rebuild_zebra.pl
Doug Kingston
dpk at randomnotes.org
Mon Oct 28 03:30:15 CET 2013
This change adds code to check the zebraqueue table with a cheap SQL query
and a daemon loop that checks for new entries and processes them incremantally
before sleeping for a controllable number of seconds. The default is 5 seconds
which provides a near realtime search index update. This is desirable particularly
for libraries that are doing active catalogue updating. The query is adjusted
based on whether -a, -b, or -a -b are specified.
Help text updated. Tested against a live 3.12 system.
Note that this fix will benefit from the fix to lack of locking (bug 11078)
---
misc/migration_tools/rebuild_zebra.pl | 65 ++++++++++++++++++++++++++++-----
1 file changed, 56 insertions(+), 9 deletions(-)
diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl
index a46526d..6489fc5 100755
--- a/misc/migration_tools/rebuild_zebra.pl
+++ b/misc/migration_tools/rebuild_zebra.pl
@@ -21,6 +21,8 @@ $|=1; # flushes output
# If the cron job starts us in an unreadable dir, we will break without
# this.
chdir $ENV{HOME} if (!(-r '.'));
+my $daemon_mode;
+my $daemon_sleep = 5;
my $directory;
my $nosanitize;
my $skip_export;
@@ -45,6 +47,8 @@ my $run_user = (getpwuid($<))[0];
my $verbose_logging = 0;
my $zebraidx_log_opt = " -v none,fatal,warn ";
my $result = GetOptions(
+ 'daemon' => \$daemon_mode,
+ 'sleep:i' => \$daemon_sleep,
'd:s' => \$directory,
'r|reset' => \$reset,
's' => \$skip_export,
@@ -152,16 +156,13 @@ if ($do_munge) {
my $tester = XML::LibXML->new();
-if ($authorities) {
- index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
-} else {
- print "skipping authorities\n" if ( $verbose_logging );
-}
-
-if ($biblios) {
- index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
+if ($daemon_mode) {
+ while (1) {
+ do_one_pass() if ( zebraqueue_not_empty() );
+ sleep $daemon_sleep;
+ }
} else {
- print "skipping biblios\n" if ( $verbose_logging );
+ do_one_pass();
}
@@ -191,6 +192,40 @@ if ($keep_export) {
}
}
+sub do_one_pass {
+ if ($authorities) {
+ index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
+ } else {
+ print "skipping authorities\n" if ( $verbose_logging );
+ }
+
+ if ($biblios) {
+ index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
+ } else {
+ print "skipping biblios\n" if ( $verbose_logging );
+ }
+}
+
+# Check the zebra update queue and return true if there are records to process
+sub zebraqueue_not_empty {
+ my $where_str;
+
+ if ($authorities && $biblios) {
+ $where_str = 'done = 0;';
+ } elsif ($biblios) {
+ $where_str = 'server = "biblioserver" AND done = 0;';
+ } else {
+ $where_str = 'server = "authorityserver" AND done = 0;';
+ }
+ my $query = $dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE ' .
+ $where_str );
+
+ $query->execute;
+ my $count = $query->fetchrow_arrayref->[0];
+ print "queued records: $count\n" if $verbose_logging > 0;
+ return $count > 0;
+}
+
# This checks to see if the zebra directories exist under the provided path.
# If they don't, then zebra is likely to spit the dummy. This returns true
# if the directories had to be created, false otherwise.
@@ -692,6 +727,18 @@ Parameters:
-a index authority records
+ -daemon Run in daemon mode. The program will loop checking
+ for entries on the zebraqueue table, processing
+ them incrementally if present, and then sleep
+ for a few seconds before repeating the process
+ Checking the zebraqueue table is done with a cheap
+ SQL query. This allows for near realtime update of
+ the zebra search index with low system overhead.
+ Use -sleep to control the checking interval.
+
+ -sleep 10 Seconds to sleep between checks of the zebraqueue
+ table in daemon mode. The default is 5 seconds.
+
-z select only updated and deleted
records marked in the zebraqueue
table. Cannot be used with -r
--
1.7.9.5
More information about the Koha-patches
mailing list