diff --git a/Makefile.PL b/Makefile.PL
index 277742ad593..8b896338f48 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -186,7 +186,9 @@ Directory for Zebra configuration files.
=item ZEBRA_LOCK_DIR
-Directory for Zebra's lock files.
+Directory for Zebra's lock files. This includes subdirs for authorities,
+biblios, and the zebra rebuild function. Any activity to reindex
+zebra from koha should interlock here with rebuild_zebra.pl.
=item ZEBRA_DATA_DIR
@@ -318,6 +320,7 @@ my $target_map = {
'./skel/var/lib/koha/zebradb/biblios/register' => { target => 'ZEBRA_DATA_DIR', trimdir => 6 },
'./skel/var/lib/koha/zebradb/biblios/shadow' => { target => 'ZEBRA_DATA_DIR', trimdir => 6 },
'./skel/var/lib/koha/zebradb/biblios/tmp' => { target => 'ZEBRA_DATA_DIR', trimdir => 6 },
+ './skel/var/lock/koha/zebradb/rebuild' => { target => 'ZEBRA_LOCK_DIR', trimdir => 6 },
'./skel/var/lib/koha/plugins' => { target => 'PLUGINS_DIR', trimdir => 6 },
'./sms' => 'INTRANET_CGI_DIR',
'./suggestion' => 'INTRANET_CGI_DIR',
diff --git a/debian/templates/koha-conf-site.xml.in b/debian/templates/koha-conf-site.xml.in
index 9bf090b3216..371b1d29d9d 100644
--- a/debian/templates/koha-conf-site.xml.in
+++ b/debian/templates/koha-conf-site.xml.in
@@ -280,6 +280,7 @@
__MEMCACHED_NAMESPACE__
__BIBLIOS_INDEXING_MODE__
__AUTHORITIES_INDEXING_MODE__
+ /var/lock/koha/__KOHASITE__
/etc/koha/searchengine/queryparser.yaml
diff --git a/etc/koha-conf.xml b/etc/koha-conf.xml
index 1da53780c15..72da2de9f8d 100644
--- a/etc/koha-conf.xml
+++ b/etc/koha-conf.xml
@@ -297,6 +297,7 @@ __PAZPAR2_TOGGLE_XML_POST__
0
__BIB_INDEX_MODE__
__AUTH_INDEX_MODE__
+ __ZEBRA_LOCK_DIR__
__KOHA_CONF_DIR__/searchengine/queryparser.yaml
diff --git a/misc/bin/koha-zebra-ctl.sh b/misc/bin/koha-zebra-ctl.sh
index 397eb19c6e0..82b91628dd1 100755
--- a/misc/bin/koha-zebra-ctl.sh
+++ b/misc/bin/koha-zebra-ctl.sh
@@ -65,6 +65,7 @@ case "$1" in
mkdir -p $LOCKDIR
mkdir -p $LOCKDIR/biblios
mkdir -p $LOCKDIR/authorities
+ mkdir -p $LOCKDIR/rebuild
if [[ $EUID -eq 0 ]]; then
chown -R $USER:$GROUP $LOCKDIR
fi
diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl
index 04cf45428df..ec5f5189f0d 100755
--- a/misc/migration_tools/rebuild_zebra.pl
+++ b/misc/migration_tools/rebuild_zebra.pl
@@ -5,6 +5,7 @@
use C4::Context;
use Getopt::Long;
+use Fcntl qw(:flock);
use File::Temp qw/ tempdir /;
use File::Path;
use C4::Biblio;
@@ -42,6 +43,8 @@
my $offset;
my $run_as_root;
my $run_user = (getpwuid($<))[0];
+my $wait_for_lock = 0;
+my $use_flock;
my $verbose_logging = 0;
my $zebraidx_log_opt = " -v none,fatal,warn ";
@@ -62,11 +65,12 @@
'x' => \$as_xml,
'y' => \$do_not_clear_zebraqueue,
'z' => \$process_zebraqueue,
- 'where:s' => \$where,
- 'length:i' => \$length,
+ 'where:s' => \$where,
+ 'length:i' => \$length,
'offset:i' => \$offset,
- 'v+' => \$verbose_logging,
- 'run-as-root' => \$run_as_root,
+ 'v+' => \$verbose_logging,
+ 'run-as-root' => \$run_as_root,
+ 'wait-for-lock' => \$wait_for_lock,
);
if (not $result or $want_help) {
@@ -151,12 +155,27 @@
my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
+# Protect again simultaneous update of the zebra index by using a lock file.
+# Create our own lock directory if its missing. This shouild be created
+# by koha-zebra-ctl.sh or at system installation. If the desired directory
+# does not exist and cannot be created, we fall back on /tmp - which will
+# always work.
+
+my $lockdir = C4::Context->config("zebra_lockdir") // "/var/lock";
+$lockdir .= "/rebuild";
+unless (-d $lockdir) {
+ eval { mkpath($lockdir, 0, oct(755)) };
+ $lockdir = "/tmp" if ($@);
+}
+my $lockfile = $lockdir . "/rebuild..LCK";
+
if ( $verbose_logging ) {
print "Zebra configuration information\n";
print "================================\n";
print "Zebra biblio directory = $biblioserverdir\n";
print "Zebra authorities directory = $authorityserverdir\n";
print "Koha directory = $kohadir\n";
+ print "Lockfile = $lockfile\n";
print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
print "================================\n";
@@ -164,13 +183,37 @@
my $tester = XML::LibXML->new();
+# The main work is done here by calling do_one_pass(). We have added locking
+# avoid race conditions between Full rebuilds and incremental updates either from
+# daemon mode or periodic invocation from cron. The race can lead to an updated
+# record being overwritten by a rebuild if the update is applied after the export
+# by the rebuild and before the rebuild finishes (more likely to effect large
+# catalogs).
+#
+# We have chosen to exit immediately by default if we cannot obtain the lock
+# to prevent the potential for a infinite backlog from cron invocations, but an
+# option (wait-for-lock) is provided to let the program wait for the lock.
+# See http://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=11078 for details.
+open my $LockFH, q{>}, $lockfile or die "$lockfile: $!";
if ($daemon_mode) {
while (1) {
- do_one_pass() if ( zebraqueue_not_empty() );
+ # For incremental updates, skip the update if the updates are locked
+ if (_flock($LockFH, LOCK_EX|LOCK_NB)) {
+ do_one_pass() if ( zebraqueue_not_empty() );
+ _flock($LockFH, LOCK_UN);
+ }
sleep $daemon_sleep;
}
} else {
- do_one_pass();
+ # all one-off invocations
+ my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB;
+ if (_flock($LockFH, $lock_mode)) {
+ do_one_pass();
+ _flock($LockFH, LOCK_UN);
+ } else {
+ # Can't die() here because we have files to dlean up.
+ print "Aborting rebuild. Unable to flock $lockfile: $!\n";
+ }
}
@@ -228,7 +271,7 @@ sub zebraqueue_not_empty {
$where_str = 'server = "authorityserver" AND done = 0;';
}
my $query =
- $dbh->prepare( 'SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str );
+ $dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str );
$query->execute;
my $count = $query->fetchrow_arrayref->[0];
@@ -724,6 +767,26 @@ sub do_indexing {
}
+sub _flock {
+# test if flock is present; if so, use it; if not, return true
+# op refers to the official flock operations incl LOCK_EX, LOCK_UN, etc.
+# combining LOCK_EX with LOCK_NB returns immediately
+ my ($fh, $op)= @_;
+ if( !defined($use_flock) ) {
+ #check if flock is present; if not, you will have a fatal error
+ my $i=eval { flock($fh, $op) };
+ #assuming that $fh and $op are fine(..), an undef i means no flock
+ $use_flock= defined($i)? 1: 0;
+ print "Warning: flock could not be used!\n" if $verbose_logging && !$use_flock;
+ return 1 if !$use_flock;
+ return $i;
+ }
+ else {
+ return 1 if !$use_flock;
+ return flock($fh, $op);
+ }
+}
+
sub print_usage {
print <<_USAGE_;
$0: reindex MARC bibs and/or authorities in Zebra.
@@ -809,6 +872,12 @@ sub print_usage {
--run-as-root explicitily allow script to run as 'root' user
+ --wait-for-lock when not running in daemon mode, the default
+ behavior is to abort a rebuild if the rebuild
+ lock is busy. This option will cause the program
+ to wait for the lock to free and then continue
+ processing the rebuild request,
+
--help or -h show this message.
_USAGE_
}
diff --git a/skel/var/lock/koha/zebradb/rebuild/README b/skel/var/lock/koha/zebradb/rebuild/README
new file mode 100644
index 00000000000..98b4c5b91c1
--- /dev/null
+++ b/skel/var/lock/koha/zebradb/rebuild/README
@@ -0,0 +1 @@
+Zebra rebuild lock dir
diff --git a/t/db_dependent/zebra_config.pl b/t/db_dependent/zebra_config.pl
index 47854da42b2..2a5e99dcb96 100755
--- a/t/db_dependent/zebra_config.pl
+++ b/t/db_dependent/zebra_config.pl
@@ -31,6 +31,7 @@
make_path("$destination/var/lock/zebradb");
make_path("$destination/var/lock/zebradb/biblios");
make_path("$destination/var/lock/zebradb/authorities");
+make_path("$destination/var/lock/zebradb/rebuild");
make_path("$destination/var/lib/zebradb");
make_path("$destination/var/lib/zebradb/biblios");
make_path("$destination/var/lib/zebradb/biblios/key");