[Koha-patches] [PATCH 2/2] Bug 11078 Add locking to rebuild_zebra to prevent race conditions.

Doug Kingston dpk at randomnotes.org
Mon Oct 28 03:30:46 CET 2013


The race condition exists whether you are doing incremental updates with
a periodic cronjob or with the new daemon mode.  Suppose you start a full rebuild
at time T0 which will take until T20 to extract the records.  Suppose also at T10,
a biblio or auth is updated and processed through the zebraqueue by T15.
In this situation the updated record in zebra will be overwritten when
the full rebuild records are uploaded to zebra after T20.  We prevent this
by only allowing one rebuild_zebra per koha instance to be running at one time.

This patch adds flock based locking for rebuild_zebra.pl on a per-instance basis.
This prevents races between full rebuilds and background incremental updates from
the zebraqueue table in the database.  Add a lockdir to the config file
to allow the proper location to be determined at runtime.

When running in daemon mode (see bug 6435), incremental updates will be skipped
while a full rebuild is running, and resume afterwards.  A full rebuild or other
adhoc request will wait for any previous lock to clear.

Tested by flocking the lock file while invoking rebuild_zebra.pl in
various modes (daemon, adhoc zebraqueue task, and full rebuild) using
flock program I will attach to bug.
---
 etc/koha-conf.xml                     |    1 +
 misc/migration_tools/rebuild_zebra.pl |   25 ++++++++++++++++++++++++-
 rewrite-config.PL                     |    1 +
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/etc/koha-conf.xml b/etc/koha-conf.xml
index d42f041..3f180bf 100644
--- a/etc/koha-conf.xml
+++ b/etc/koha-conf.xml
@@ -283,6 +283,7 @@ __PAZPAR2_TOGGLE_XML_POST__
  <opachtdocs>__OPAC_TMPL_DIR__</opachtdocs>
  <intrahtdocs>__INTRANET_TMPL_DIR__</intrahtdocs>
  <includes>__INTRANET_TMPL_DIR__/prog/en/includes/</includes>
+ <lockdir>__LOCK_DIR__</lockdir>
  <logdir>__LOG_DIR__</logdir>
  <docdir>__DOC_DIR__</docdir>
  <backupdir>__BACKUP_DIR__</backupdir>
diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl
index 6489fc5..e18725c 100755
--- a/misc/migration_tools/rebuild_zebra.pl
+++ b/misc/migration_tools/rebuild_zebra.pl
@@ -5,6 +5,7 @@ use strict;
 
 use C4::Context;
 use Getopt::Long;
+use Fcntl qw(:flock);
 use File::Temp qw/ tempdir /;
 use File::Path;
 use C4::Biblio;
@@ -139,12 +140,20 @@ my $dbh = C4::Context->dbh;
 my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
 my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
 
+# We need to create our own lock directory which incorporates the database instance
+# we are indexing to facilitate multiple instances on the same machine.
+
+my $lockdir = C4::Context->config("lockdir") . "/koha_rebuild_zebra_" . C4::Context->config("database");
+mkpath($lockdir, 0, 0755) unless (-d $lockdir);
+my $lockfile = $lockdir . "/lock";
+
 if ( $verbose_logging ) {
     print "Zebra configuration information\n";
     print "================================\n";
     print "Zebra biblio directory      = $biblioserverdir\n";
     print "Zebra authorities directory = $authorityserverdir\n";
     print "Koha directory              = $kohadir\n";
+    print "Lockfile                    = $lockfile\n";
     print "BIBLIONUMBER in :     $biblionumbertagfield\$$biblionumbertagsubfield\n";
     print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
     print "================================\n";
@@ -156,13 +165,27 @@ if ($do_munge) {
 
 my $tester = XML::LibXML->new();
 
+# The main work is done here by calling do_one_pass().  We have added locking
+# avoid race conditions between Full rebuilds and incremental updates either from
+# daemon mode or periodic invocation from cron.  The race can lead to an updated
+# record being overwritten by a rebuild if the update is applied after the export
+# by the rebuild and before the rebuild finishes (more likely to effect large
+# catalogs).
+open(LockFH, ">$lockfile") or die "$lockfile: $!";
 if ($daemon_mode) {
     while (1) {
-        do_one_pass() if ( zebraqueue_not_empty() );
+        # For incremental updates, skip the update if the updates are locked
+	if (flock(LockFH, LOCK_EX|LOCK_NB)) {
+	    do_one_pass() if ( zebraqueue_not_empty() );
+	    flock(LockFH, LOCK_UN);
+	}
         sleep $daemon_sleep;
     }
 } else {
+    # all one-off invocations, wait for the lock to free
+    flock(LockFH, LOCK_EX);
     do_one_pass();
+    flock(LockFH, LOCK_UN);
 }
 
 
diff --git a/rewrite-config.PL b/rewrite-config.PL
index e903d49..24dae36 100644
--- a/rewrite-config.PL
+++ b/rewrite-config.PL
@@ -102,6 +102,7 @@ $prefix = $ENV{'INSTALL_BASE'} || "/usr";
   "__KOHA_GROUP__" => "koha",
   "__ZEBRA_PASS__" => "zebrastripes",
   "__ZEBRA_USER__" => "kohauser",
+  "__LOCK_DIR__" => "$ENV{'INSTALL_BASE'}/var/lock",
   '__BACKUP_DIR__' => "$prefix/var/spool",
   '__INTRANET_CGI_DIR__' => "$prefix/intranet/cgi-bin",
   '__INTRANET_TMPL_DIR__' => "$prefix/intranet/templates",
-- 
1.7.9.5



More information about the Koha-patches mailing list