[Koha-patches] [PATCH] Bug 6435 Add daemon mode to rebuild_zebra.pl

Doug Kingston dpk at randomnotes.org
Thu Sep 19 06:08:04 CEST 2013


This change adds code to check the zebraqueue table with a cheap SQL query
and a daemon loop that checks for new entries and processes them
incremantally
before sleeping for a controllable number of seconds.  The default is 5
seconds
which provides a near realtime search index update.  This is desirable
particularly
for libraries that are doing active catalogue updating.

Help text updated.  Tested against a live 3.12 system.
---
 misc/migration_tools/rebuild_zebra.pl |   54 +++++++++++++++++++++++++++---
---
 1 file changed, 45 insertions(+), 9 deletions(-)

diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/
rebuild_zebra.pl
index a46526d..2e01e9c 100755
--- a/misc/migration_tools/rebuild_zebra.pl
+++ b/misc/migration_tools/rebuild_zebra.pl
@@ -21,6 +21,8 @@ $|=1; # flushes output
 # If the cron job starts us in an unreadable dir, we will break without
 # this.
 chdir $ENV{HOME} if (!(-r '.'));
+my $daemon_mode;
+my $daemon_sleep = 5;
 my $directory;
 my $nosanitize;
 my $skip_export;
@@ -45,6 +47,8 @@ my $run_user = (getpwuid($<))[0];
 my $verbose_logging = 0;
 my $zebraidx_log_opt = " -v none,fatal,warn ";
 my $result = GetOptions(
+    'daemon'        => \$daemon_mode,
+    'sleep:i'       => \$daemon_sleep,
     'd:s'           => \$directory,
     'r|reset'       => \$reset,
     's'             => \$skip_export,
@@ -152,16 +156,13 @@ if ($do_munge) {

 my $tester = XML::LibXML->new();

-if ($authorities) {
-    index_records('authority', $directory, $skip_export, $skip_index,
$process_zebraqueue, $as_xml, $noxml, $nosanitize,
$do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt,
$authorityserverdir);
-} else {
-    print "skipping authorities\n" if ( $verbose_logging );
-}
-
-if ($biblios) {
-    index_records('biblio', $directory, $skip_export, $skip_index,
$process_zebraqueue, $as_xml, $noxml, $nosanitize,
$do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt,
$biblioserverdir);
+if ($daemon_mode) {
+    while (1) {
+        do_one_pass() if ( zebraqueue_not_empty() );
+        sleep $daemon_sleep;
+    }
 } else {
-    print "skipping biblios\n" if ( $verbose_logging );
+    do_one_pass();
 }


@@ -191,6 +192,29 @@ if ($keep_export) {
     }
 }

+sub do_one_pass {
+    if ($authorities) {
+        index_records('authority', $directory, $skip_export, $skip_index,
$process_zebraqueue, $as_xml, $noxml, $nosanitize,
$do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt,
$authorityserverdir);
+    } else {
+        print "skipping authorities\n" if ( $verbose_logging );
+    }
+
+    if ($biblios) {
+        index_records('biblio', $directory, $skip_export, $skip_index,
$process_zebraqueue, $as_xml, $noxml, $nosanitize,
$do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt,
$biblioserverdir);
+    } else {
+        print "skipping biblios\n" if ( $verbose_logging );
+    }
+}
+
+# Check the zebra update queue and return true if there are records to
process
+sub zebraqueue_not_empty {
+    my $query = $dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE done
= 0;');
+    $query->execute;
+    my $count = $query->fetchrow_arrayref->[0];
+    print "queued records: $count\n" if $verbose_logging > 0;
+    return $count > 0;
+}
+
 # This checks to see if the zebra directories exist under the provided
path.
 # If they don't, then zebra is likely to spit the dummy. This returns true
 # if the directories had to be created, false otherwise.
@@ -692,6 +716,18 @@ Parameters:

     -a                      index authority records

+    -daemon                 Run in daemon mode.  The program will loop
checking
+                            for entries on the zebraqueue table, processing
+                            them incrementally if present, and then sleep
+                            for a few seconds before repeating the process
+                            Checking the zebraqueue table is done with a
cheap
+                            SQL query.  This allows for near realtime
update of
+                            the zebra search index with low system
overhead.
+                            Use -sleep to control the checking interval.
+
+    -sleep 10               Seconcds to sleep between checks of the
zebraqueue
+                            table in daemon mode.  The default is 5
seconds.
+
     -z                      select only updated and deleted
                             records marked in the zebraqueue
                             table.  Cannot be used with -r
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.koha-community.org/pipermail/koha-patches/attachments/20130918/693d3f01/attachment-0001.html>


More information about the Koha-patches mailing list