[Koha-patches] [PATCH 1/2] [ENH] Bug 6435 Add daemon mode to rebuild_zebra.pl

Doug Kingston dpk at randomnotes.org
Mon Oct 28 03:30:15 CET 2013


This change adds code to check the zebraqueue table with a cheap SQL query
and a daemon loop that checks for new entries and processes them incremantally
before sleeping for a controllable number of seconds.  The default is 5 seconds
which provides a near realtime search index update.  This is desirable particularly
for libraries that are doing active catalogue updating.  The query is adjusted
based on whether -a, -b, or -a -b are specified.

Help text updated.  Tested against a live 3.12 system.

Note that this fix will benefit from the fix to lack of locking (bug 11078)
---
 misc/migration_tools/rebuild_zebra.pl |   65 ++++++++++++++++++++++++++++-----
 1 file changed, 56 insertions(+), 9 deletions(-)

diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl
index a46526d..6489fc5 100755
--- a/misc/migration_tools/rebuild_zebra.pl
+++ b/misc/migration_tools/rebuild_zebra.pl
@@ -21,6 +21,8 @@ $|=1; # flushes output
 # If the cron job starts us in an unreadable dir, we will break without
 # this.
 chdir $ENV{HOME} if (!(-r '.'));
+my $daemon_mode;
+my $daemon_sleep = 5;
 my $directory;
 my $nosanitize;
 my $skip_export;
@@ -45,6 +47,8 @@ my $run_user = (getpwuid($<))[0];
 my $verbose_logging = 0;
 my $zebraidx_log_opt = " -v none,fatal,warn ";
 my $result = GetOptions(
+    'daemon'        => \$daemon_mode,
+    'sleep:i'       => \$daemon_sleep,
     'd:s'           => \$directory,
     'r|reset'       => \$reset,
     's'             => \$skip_export,
@@ -152,16 +156,13 @@ if ($do_munge) {
 
 my $tester = XML::LibXML->new();
 
-if ($authorities) {
-    index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
-} else {
-    print "skipping authorities\n" if ( $verbose_logging );
-}
-
-if ($biblios) {
-    index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
+if ($daemon_mode) {
+    while (1) {
+        do_one_pass() if ( zebraqueue_not_empty() );
+        sleep $daemon_sleep;
+    }
 } else {
-    print "skipping biblios\n" if ( $verbose_logging );
+    do_one_pass();
 }
 
 
@@ -191,6 +192,40 @@ if ($keep_export) {
     }
 }
 
+sub do_one_pass {
+    if ($authorities) {
+        index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
+    } else {
+        print "skipping authorities\n" if ( $verbose_logging );
+    }
+
+    if ($biblios) {
+        index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
+    } else {
+        print "skipping biblios\n" if ( $verbose_logging );
+    }
+}
+
+# Check the zebra update queue and return true if there are records to process
+sub zebraqueue_not_empty {
+    my $where_str;
+
+    if ($authorities && $biblios) {
+	$where_str = 'done = 0;';
+    } elsif ($biblios) {
+	$where_str = 'server = "biblioserver" AND done = 0;';
+    } else {
+	$where_str = 'server = "authorityserver" AND done = 0;';
+    }
+    my $query = $dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE ' .
+	                      $where_str );
+
+    $query->execute;
+    my $count = $query->fetchrow_arrayref->[0];
+    print "queued records: $count\n" if $verbose_logging > 0;
+    return $count > 0;
+}
+
 # This checks to see if the zebra directories exist under the provided path.
 # If they don't, then zebra is likely to spit the dummy. This returns true
 # if the directories had to be created, false otherwise.
@@ -692,6 +727,18 @@ Parameters:
 
     -a                      index authority records
 
+    -daemon                 Run in daemon mode.  The program will loop checking
+                            for entries on the zebraqueue table, processing
+                            them incrementally if present, and then sleep
+                            for a few seconds before repeating the process
+                            Checking the zebraqueue table is done with a cheap
+                            SQL query.  This allows for near realtime update of
+                            the zebra search index with low system overhead.
+                            Use -sleep to control the checking interval.
+
+    -sleep 10               Seconds to sleep between checks of the zebraqueue
+                            table in daemon mode.  The default is 5 seconds.
+
     -z                      select only updated and deleted
                             records marked in the zebraqueue
                             table.  Cannot be used with -r
-- 
1.7.9.5



More information about the Koha-patches mailing list