[Koha-patches] [PATCH] Bug 11078 Add locking to rebuild_zebra (revised patch 2014-02-18)

Doug Kingston dpk at randomnotes.org
Wed Feb 19 08:06:58 CET 2014


This patch adds locking to rebuild_zebra.pl to ensure that simultaneous
changes are prevented (as one is likely to overwrite the other).
Incremental updates in daemon mode will skipped if the lock is busy
and they will be picked up on the next pass.  Non-daemon mode
invocations will also exit immediately if they cannot get the lock
unless the new flag -wait-for-lock is specified, in which case they
will wait until the get the lock and then proceed.

Supporting changes made to Makefile.PL and templates for the new
locking directory (paralleling the other zebra lock directories).
We stash the zebra_lockdir in koha-conf.xml so rebuild_zebra.pl
can find it.

To adderss earlier QA concerns we:
1. added code to check if flock is available and ignore locking if
its missing (from M. de Rooy)

2. changed default for adhoc invocations to abort if they cannot
obtain the lock.  Added option -wait-for-lock if the user prefers
to wait until the lock is free, and then continue processing.

3. added missing entry to t/db_dependent/zebra_config.pl

4. added a fallback locking directory of /tmp
---
 Makefile.PL                               |    5 +-
 debian/templates/koha-conf-site.xml.in    |    1 +
 etc/koha-conf.xml                         |    1 +
 misc/bin/koha-zebra-ctl.sh                |    1 +
 misc/migration_tools/rebuild_zebra.pl     |   83 ++++++++++++++++++++++++++---
 skel/var/lock/koha/zebradb/rebuild/README |    1 +
 t/db_dependent/zebra_config.pl            |    1 +
 7 files changed, 85 insertions(+), 8 deletions(-)
 create mode 100644 skel/var/lock/koha/zebradb/rebuild/README

diff --git a/Makefile.PL b/Makefile.PL
index 66f9a0f..7d5eb4a 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -186,7 +186,9 @@ Directory for Zebra configuration files.
 
 =item ZEBRA_LOCK_DIR
 
-Directory for Zebra's lock files.
+Directory for Zebra's lock files.  This includes subdirs for authorities,
+biblios, and the zebra rebuild function.  Any activity to reindex
+zebra from koha should interlock here with rebuild_zebra.pl.
 
 =item ZEBRA_DATA_DIR
 
@@ -318,6 +320,7 @@ my $target_map = {
   './skel/var/lib/koha/zebradb/biblios/register'  => { target => 'ZEBRA_DATA_DIR', trimdir => 6 },
   './skel/var/lib/koha/zebradb/biblios/shadow'  => { target => 'ZEBRA_DATA_DIR', trimdir => 6 },
   './skel/var/lib/koha/zebradb/biblios/tmp'  => { target => 'ZEBRA_DATA_DIR', trimdir => 6 },
+  './skel/var/lock/koha/zebradb/rebuild' => { target => 'ZEBRA_LOCK_DIR', trimdir => 6 },
   './skel/var/lib/koha/plugins' => { target => 'PLUGINS_DIR', trimdir => 6 },
   './sms'                       => 'INTRANET_CGI_DIR',
   './suggestion'                => 'INTRANET_CGI_DIR',
diff --git a/debian/templates/koha-conf-site.xml.in b/debian/templates/koha-conf-site.xml.in
index 9bf090b..371b1d2 100644
--- a/debian/templates/koha-conf-site.xml.in
+++ b/debian/templates/koha-conf-site.xml.in
@@ -280,6 +280,7 @@
  <memcached_namespace>__MEMCACHED_NAMESPACE__</memcached_namespace>
  <zebra_bib_index_mode>__BIBLIOS_INDEXING_MODE__</zebra_bib_index_mode>
  <zebra_auth_index_mode>__AUTHORITIES_INDEXING_MODE__</zebra_auth_index_mode>
+ <zebra_lockdir>/var/lock/koha/__KOHASITE__</zebra_lockdir>
  <queryparser_config>/etc/koha/searchengine/queryparser.yaml</queryparser_config>
 </config>
 
diff --git a/etc/koha-conf.xml b/etc/koha-conf.xml
index 1da5378..72da2de 100644
--- a/etc/koha-conf.xml
+++ b/etc/koha-conf.xml
@@ -297,6 +297,7 @@ __PAZPAR2_TOGGLE_XML_POST__
  <useldapserver>0</useldapserver><!-- see C4::Auth_with_ldap for extra configs you must add if you want to turn this on -->
  <zebra_bib_index_mode>__BIB_INDEX_MODE__</zebra_bib_index_mode>
  <zebra_auth_index_mode>__AUTH_INDEX_MODE__</zebra_auth_index_mode>
+ <zebra_lockdir>__ZEBRA_LOCK_DIR__</zebra_lockdir>
  <queryparser_config>__KOHA_CONF_DIR__/searchengine/queryparser.yaml</queryparser_config>
 </config>
 </yazgfs>
diff --git a/misc/bin/koha-zebra-ctl.sh b/misc/bin/koha-zebra-ctl.sh
index 397eb19..82b9162 100755
--- a/misc/bin/koha-zebra-ctl.sh
+++ b/misc/bin/koha-zebra-ctl.sh
@@ -65,6 +65,7 @@ case "$1" in
         mkdir -p $LOCKDIR
         mkdir -p $LOCKDIR/biblios
         mkdir -p $LOCKDIR/authorities
+        mkdir -p $LOCKDIR/rebuild
         if [[ $EUID -eq 0 ]]; then
             chown -R $USER:$GROUP $LOCKDIR
         fi
diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl
index 04cf454..ec5f518 100755
--- a/misc/migration_tools/rebuild_zebra.pl
+++ b/misc/migration_tools/rebuild_zebra.pl
@@ -5,6 +5,7 @@ use strict;
 
 use C4::Context;
 use Getopt::Long;
+use Fcntl qw(:flock);
 use File::Temp qw/ tempdir /;
 use File::Path;
 use C4::Biblio;
@@ -42,6 +43,8 @@ my $where;
 my $offset;
 my $run_as_root;
 my $run_user = (getpwuid($<))[0];
+my $wait_for_lock = 0;
+my $use_flock;
 
 my $verbose_logging = 0;
 my $zebraidx_log_opt = " -v none,fatal,warn ";
@@ -62,11 +65,12 @@ my $result = GetOptions(
     'x'             => \$as_xml,
     'y'             => \$do_not_clear_zebraqueue,
     'z'             => \$process_zebraqueue,
-    'where:s'        => \$where,
-    'length:i'        => \$length,
+    'where:s'       => \$where,
+    'length:i'      => \$length,
     'offset:i'      => \$offset,
-    'v+'             => \$verbose_logging,
-    'run-as-root'    => \$run_as_root,
+    'v+'            => \$verbose_logging,
+    'run-as-root'   => \$run_as_root,
+    'wait-for-lock' => \$wait_for_lock,
 );
 
 if (not $result or $want_help) {
@@ -151,12 +155,27 @@ my $dbh = C4::Context->dbh;
 my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
 my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
 
+# Protect again simultaneous update of the zebra index by using a lock file.
+# Create our own lock directory if its missing.  This shouild be created
+# by koha-zebra-ctl.sh or at system installation.  If the desired directory
+# does not exist and cannot be created, we fall back on /tmp - which will
+# always work.
+
+my $lockdir = C4::Context->config("zebra_lockdir") // "/var/lock";
+$lockdir .= "/rebuild";
+unless (-d $lockdir) {
+    eval { mkpath($lockdir, 0, oct(755)) };
+    $lockdir = "/tmp" if ($@);
+}
+my $lockfile = $lockdir . "/rebuild..LCK";
+
 if ( $verbose_logging ) {
     print "Zebra configuration information\n";
     print "================================\n";
     print "Zebra biblio directory      = $biblioserverdir\n";
     print "Zebra authorities directory = $authorityserverdir\n";
     print "Koha directory              = $kohadir\n";
+    print "Lockfile                    = $lockfile\n";
     print "BIBLIONUMBER in :     $biblionumbertagfield\$$biblionumbertagsubfield\n";
     print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
     print "================================\n";
@@ -164,13 +183,37 @@ if ( $verbose_logging ) {
 
 my $tester = XML::LibXML->new();
 
+# The main work is done here by calling do_one_pass().  We have added locking
+# avoid race conditions between Full rebuilds and incremental updates either from
+# daemon mode or periodic invocation from cron.  The race can lead to an updated
+# record being overwritten by a rebuild if the update is applied after the export
+# by the rebuild and before the rebuild finishes (more likely to effect large
+# catalogs).
+#
+# We have chosen to exit immediately by default if we cannot obtain the lock
+# to prevent the potential for a infinite backlog from cron invocations, but an
+# option (wait-for-lock) is provided to let the program wait for the lock.
+# See http://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=11078 for details.
+open my $LockFH, q{>}, $lockfile or die "$lockfile: $!";
 if ($daemon_mode) {
     while (1) {
-        do_one_pass() if ( zebraqueue_not_empty() );
+        # For incremental updates, skip the update if the updates are locked
+        if (_flock($LockFH, LOCK_EX|LOCK_NB)) {
+            do_one_pass() if ( zebraqueue_not_empty() );
+            _flock($LockFH, LOCK_UN);
+        }
         sleep $daemon_sleep;
     }
 } else {
-    do_one_pass();
+    # all one-off invocations
+    my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB;
+    if (_flock($LockFH, $lock_mode)) {
+        do_one_pass();
+        _flock($LockFH, LOCK_UN);
+    } else {
+        # Can't die() here because we have files to dlean up.
+        print "Aborting rebuild.  Unable to flock $lockfile: $!\n";
+    }
 }
 
 
@@ -228,7 +271,7 @@ sub zebraqueue_not_empty {
         $where_str = 'server = "authorityserver" AND done = 0;';
     }
     my $query =
-      $dbh->prepare( 'SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str );
+        $dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str );
 
     $query->execute;
     my $count = $query->fetchrow_arrayref->[0];
@@ -724,6 +767,26 @@ sub do_indexing {
 
 }
 
+sub _flock {
+# test if flock is present; if so, use it; if not, return true
+# op refers to the official flock operations incl LOCK_EX, LOCK_UN, etc.
+# combining LOCK_EX with LOCK_NB returns immediately
+    my ($fh, $op)= @_;
+    if( !defined($use_flock) ) {
+        #check if flock is present; if not, you will have a fatal error
+        my $i=eval { flock($fh, $op) };
+        #assuming that $fh and $op are fine(..), an undef i means no flock
+        $use_flock= defined($i)? 1: 0;
+        print "Warning: flock could not be used!\n" if $verbose_logging && !$use_flock;
+        return 1 if !$use_flock;
+        return $i;
+    }
+    else {
+        return 1 if !$use_flock;
+        return flock($fh, $op);
+    }
+}
+
 sub print_usage {
     print <<_USAGE_;
 $0: reindex MARC bibs and/or authorities in Zebra.
@@ -809,6 +872,12 @@ Parameters:
 
     --run-as-root           explicitily allow script to run as 'root' user
 
+    --wait-for-lock         when not running in daemon mode, the default
+                            behavior is to abort a rebuild if the rebuild
+                            lock is busy.  This option will cause the program
+                            to wait for the lock to free and then continue
+                            processing the rebuild request,
+
     --help or -h            show this message.
 _USAGE_
 }
diff --git a/skel/var/lock/koha/zebradb/rebuild/README b/skel/var/lock/koha/zebradb/rebuild/README
new file mode 100644
index 0000000..98b4c5b
--- /dev/null
+++ b/skel/var/lock/koha/zebradb/rebuild/README
@@ -0,0 +1 @@
+Zebra rebuild lock dir
diff --git a/t/db_dependent/zebra_config.pl b/t/db_dependent/zebra_config.pl
index 47854da..2a5e99d 100755
--- a/t/db_dependent/zebra_config.pl
+++ b/t/db_dependent/zebra_config.pl
@@ -31,6 +31,7 @@ if ($indexing_mode eq 'dom') {
 make_path("$destination/var/lock/zebradb");
 make_path("$destination/var/lock/zebradb/biblios");
 make_path("$destination/var/lock/zebradb/authorities");
+make_path("$destination/var/lock/zebradb/rebuild");
 make_path("$destination/var/lib/zebradb");
 make_path("$destination/var/lib/zebradb/biblios");
 make_path("$destination/var/lib/zebradb/biblios/key");
-- 
1.7.9.5



More information about the Koha-patches mailing list