[Koha-patches] [PATCH 1/6] bug 7818: update installer for biblio DOM indexing

Galen Charlton gmc at esilibrary.com
Mon Mar 26 07:44:42 CEST 2012


Adds the necessary bits to enable DOM indexing for bib
records as an option during installation from source.

Signed-off-by: Galen Charlton <gmc at esilibrary.com>
---
 Makefile.PL                                 |   22 +++++++++++
 etc/koha-conf.xml                           |   11 ++++-
 etc/zebradb/biblios/etc/dom-config-marc.xml |   54 ++++++++++++++++++++++++++
 etc/zebradb/biblios/etc/dom-config.xml      |   54 ++++++++++++++++++++++++++
 etc/zebradb/retrieval-info-bib-dom.xml      |   22 +++++++++++
 etc/zebradb/retrieval-info-bib-grs1.xml     |   40 +++++++++++++++++++
 etc/zebradb/zebra-biblios-dom.cfg           |   55 +++++++++++++++++++++++++++
 misc/koha-install-log                       |    4 +-
 rewrite-config.PL                           |    3 +
 9 files changed, 261 insertions(+), 4 deletions(-)
 create mode 100644 etc/zebradb/biblios/etc/dom-config-marc.xml
 create mode 100644 etc/zebradb/biblios/etc/dom-config.xml
 create mode 100644 etc/zebradb/retrieval-info-bib-dom.xml
 create mode 100644 etc/zebradb/retrieval-info-bib-grs1.xml
 create mode 100644 etc/zebradb/zebra-biblios-dom.cfg

diff --git a/Makefile.PL b/Makefile.PL
index 2e414d5..c3644f8 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -408,6 +408,7 @@ my %config_defaults = (
   'INSTALL_SRU'       => 'yes',
   'INSTALL_PAZPAR2'   => 'no',
   'AUTH_INDEX_MODE'   => 'dom',
+  'BIB_INDEX_MODE'    => 'dom',
   'ZEBRA_MARC_FORMAT' => 'marc21',
   'ZEBRA_LANGUAGE'    => 'en',
   'ZEBRA_TOKENIZER'   => 'chr',
@@ -460,6 +461,7 @@ my %valid_config_values = (
   'INSTALL_ZEBRA' => { 'yes' => 1, 'no' => 1 },
   'INSTALL_SRU' => { 'yes' => 1, 'no' => 1 },
   'AUTH_INDEX_MODE' => { 'grs1' => 1, 'dom' => 1 },
+  'BIB_INDEX_MODE'  => { 'grs1' => 1, 'dom' => 1 },
   'ZEBRA_MARC_FORMAT' => { 'marc21' => 1, 'normarc' => 1, 'unimarc' => 1 }, # FIXME should generate from contents of distributation
   'ZEBRA_LANGUAGE'    => { 'en' => 1, 'fr' => 1, 'nb' => 1 }, # FIXME should generate from contents of distribution
   'ZEBRA_TOKENIZER' => { chr => 1, icu => 1 },
@@ -501,12 +503,15 @@ if ($config{'INSTALL_ZEBRA'} eq "yes") {
     push @{ $pl_files->{'rewrite-config.PL'} }, (
         'blib/ZEBRA_CONF_DIR/etc/passwd',
         'blib/ZEBRA_CONF_DIR/zebra-biblios.cfg',
+        'blib/ZEBRA_CONF_DIR/zebra-biblios-dom.cfg',
         'blib/ZEBRA_CONF_DIR/zebra-authorities.cfg',
         'blib/ZEBRA_CONF_DIR/zebra-authorities-dom.cfg',
         'blib/ZEBRA_CONF_DIR/explain-authorities.xml',
         'blib/ZEBRA_CONF_DIR/explain-biblios.xml',
         'blib/ZEBRA_CONF_DIR/retrieval-info-auth-grs1.xml',
         'blib/ZEBRA_CONF_DIR/retrieval-info-auth-dom.xml',
+        'blib/ZEBRA_CONF_DIR/retrieval-info-bib-grs1.xml',
+        'blib/ZEBRA_CONF_DIR/retrieval-info-bib-dom.xml',
     );
     push @{ $pl_files->{'rewrite-config.PL'} }, (
         'blib/SCRIPT_DIR/koha-zebra-ctl.sh',
@@ -520,8 +525,11 @@ if ($config{'INSTALL_ZEBRA'} eq "yes") {
         );
     }
     $config{'ZEBRA_AUTH_CFG'} = $config{'AUTH_INDEX_MODE'} eq 'dom' ? 'zebra-authorities-dom.cfg' : 'zebra-authorities.cfg';
+    $config{'ZEBRA_BIB_CFG'}  = $config{'BIB_INDEX_MODE'}  eq 'dom' ? 'zebra-biblios-dom.cfg'     : 'zebra-biblios.cfg';
     $config{'AUTH_RETRIEVAL_CFG'} =
         $config{'AUTH_INDEX_MODE'} eq 'dom' ? 'retrieval-info-auth-dom.xml' : 'retrieval-info-auth-grs1.xml';
+    $config{'BIB_RETRIEVAL_CFG'}  =
+        $config{'BIB_INDEX_MODE'}  eq 'dom' ? 'retrieval-info-bib-dom.xml'  : 'retrieval-info-bib-grs1.xml';
 }
 
 if ($config{'INSTALL_MODE'} ne "dev") {
@@ -942,6 +950,20 @@ Primary language for Zebra indexing);
 
         $msg = q(
 Koha can use one of  two different indexing modes
+for the MARC bibliographic records:
+
+grs1 - uses the Zebra GRS-1 filter, available
+       for legacy support
+dom  - uses the DOM XML filter; offers improved
+       functionality.
+
+Bibliographic indexing mode);
+        $msg .= _add_valid_values_disp('BIB_INDEX_MODE', $valid_values);
+        $config{'BIB_INDEX_MODE'} = _get_value('BIB_INDEX_MODE', $msg, $defaults->{'BIB_INDEX_MODE'}, $valid_values, $install_log_values);
+
+
+        $msg = q(
+Koha can use one of  two different indexing modes
 for the MARC authorities records:
 
 grs1 - uses the Zebra GRS-1 filter, available
diff --git a/etc/koha-conf.xml b/etc/koha-conf.xml
index f5e2c0f..2cb4e52 100644
--- a/etc/koha-conf.xml
+++ b/etc/koha-conf.xml
@@ -27,7 +27,7 @@ __PAZPAR2_TOGGLE_XML_PRE__
 <listen id="mergeserver">tcp:@:__MERGE_SERVER_PORT__</listen>
 <server id="mergeserver"  listenref="mergeserver"> 
     <directory>__ZEBRA_DATA_DIR__/biblios</directory>
-    <config>__ZEBRA_CONF_DIR__/zebra-biblios.cfg</config>
+    <config>__ZEBRA_CONF_DIR__/__ZEBRA_BIB_CFG__</config>
     <cql2rpn>__ZEBRA_CONF_DIR__/pqf.properties</cql2rpn>
 </server>
 __PAZPAR2_TOGGLE_XML_POST__
@@ -35,7 +35,7 @@ __PAZPAR2_TOGGLE_XML_POST__
 <!-- BIBLIOGRAPHIC RECORDS -->
 <server id="biblioserver"  listenref="biblioserver"> 
     <directory>__ZEBRA_DATA_DIR__/biblios</directory>
-    <config>__ZEBRA_CONF_DIR__/zebra-biblios.cfg</config>
+    <config>__ZEBRA_CONF_DIR__/__ZEBRA_BIB_CFG__</config>
     <cql2rpn>__ZEBRA_CONF_DIR__/pqf.properties</cql2rpn>
      <!-- <docpath>xsl</docpath> -->
      <!-- <stylesheet>xsl/default.xsl</stylesheet> -->
@@ -182,8 +182,11 @@ __PAZPAR2_TOGGLE_XML_POST__
 <!--
 <server id="publicserver"  listenref="publicserver"> 
     <directory>__ZEBRA_DATA_DIR__/biblios</directory>
-    <config>__ZEBRA_CONF_DIR__/zebra-biblios.cfg</config>
+    <config>__ZEBRA_CONF_DIR__/__ZEBRA_BIB_CFG__</config>
     <cql2rpn>__ZEBRA_CONF_DIR__/pqf.properties</cql2rpn>
+    <xi:include href="__KOHA_CONF_DIR__/zebradb/__AUTH_RETRIEVAL_CFG__"
+                xmlns:xi="http://www.w3.org/2001/XInclude">
+      <xi:fallback>
      <retrievalinfo>
        <retrieval syntax="usmarc" name="F"/>
        <retrieval syntax="usmarc" name="B"/>
@@ -232,6 +235,8 @@ __PAZPAR2_TOGGLE_XML_POST__
          </backend>
        </retrieval>
      </retrievalinfo>
+      </xi:fallback>
+    </xi:include>
     <xi:include href="__KOHA_CONF_DIR__/zebradb/explain-biblios.xml"
                 xmlns:xi="http://www.w3.org/2001/XInclude">
       <xi:fallback>
diff --git a/etc/zebradb/biblios/etc/dom-config-marc.xml b/etc/zebradb/biblios/etc/dom-config-marc.xml
new file mode 100644
index 0000000..b53c68e
--- /dev/null
+++ b/etc/zebradb/biblios/etc/dom-config-marc.xml
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!-- 
+$Id: dom-config.xml,v 1.1 2007-12-13 17:42:28 adam Exp $
+   Copyright (C) 1995-2006
+   Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra.  If not, write to the
+Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, 
+MA 02110-1301 USA.
+-->
+
+<!--
+Bibliographic DOM config for MARC ISO2709 input
+-->
+<dom>
+  <extract name="index">
+    <xslt stylesheet="biblio-zebra-indexdefs.xsl"/>
+  </extract>
+  <retrieve name="F">
+    <xslt stylesheet="identity.xsl" />
+  </retrieve>
+  <retrieve name="usmarc">
+    <xslt stylesheet="identity.xsl" />
+  </retrieve>
+  <retrieve name="marc">
+    <xslt stylesheet="identity.xsl" />
+  </retrieve>
+  <retrieve name="marcxml">
+    <xslt stylesheet="identity.xsl" />
+  </retrieve>
+  <retrieve name="zebra">
+    <xslt stylesheet="zebra.xsl"/>
+  </retrieve>
+  <retrieve name="index">
+    <xslt stylesheet="biblio-zebra-indexdefs.xsl"/>
+  </retrieve>
+  <input>
+    <marc inputcharset="utf-8"/>
+  </input>
+</dom>
diff --git a/etc/zebradb/biblios/etc/dom-config.xml b/etc/zebradb/biblios/etc/dom-config.xml
new file mode 100644
index 0000000..8b3b858
--- /dev/null
+++ b/etc/zebradb/biblios/etc/dom-config.xml
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!-- 
+$Id: dom-config.xml,v 1.1 2007-12-13 17:42:28 adam Exp $
+   Copyright (C) 1995-2006
+   Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra.  If not, write to the
+Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, 
+MA 02110-1301 USA.
+-->
+
+<!--
+Bibliographic DOM config for MARC XML input
+-->
+<dom>
+  <extract name="index">
+    <xslt stylesheet="biblio-zebra-indexdefs.xsl"/>
+  </extract>
+  <retrieve name="F">
+    <xslt stylesheet="identity.xsl" />
+  </retrieve>
+  <retrieve name="usmarc">
+    <xslt stylesheet="identity.xsl" />
+  </retrieve>
+  <retrieve name="marc">
+    <xslt stylesheet="identity.xsl" />
+  </retrieve>
+  <retrieve name="marcxml">
+    <xslt stylesheet="identity.xsl" />
+  </retrieve>
+  <retrieve name="zebra">
+    <xslt stylesheet="zebra.xsl"/>
+  </retrieve>
+  <retrieve name="index">
+    <xslt stylesheet="biblio-zebra-indexdefs.xsl"/>
+  </retrieve>
+  <input>
+    <xmlreader level="0"/>
+  </input>
+</dom>
diff --git a/etc/zebradb/retrieval-info-bib-dom.xml b/etc/zebradb/retrieval-info-bib-dom.xml
new file mode 100644
index 0000000..3093c72
--- /dev/null
+++ b/etc/zebradb/retrieval-info-bib-dom.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<retrievalinfo xmlns="http://indexdata.com/yaz">
+  <retrieval syntax="usmarc" name="F">
+    <backend syntax="xml" name="marc">
+      <marc inputformat="xml" outputformat="marc"
+            inputcharset="utf-8"
+            outputcharset="utf-8"/>
+    </backend>
+  </retrieval>
+  <retrieval syntax="usmarc" name="B">
+    <backend syntax="xml" name="marc">
+      <marc inputformat="xml" outputformat="marc"
+            inputcharset="utf-8"
+            outputcharset="utf-8"/>
+    </backend>
+  </retrieval>
+  <retrieval syntax="xml" name="index"/> <!-- allow viewing index entries -->
+  <retrieval syntax="xml" name="marc"
+             identifier="info:srw/schema/1/marcxml-v1.1"/>
+  <retrieval syntax="xml" name="marcxml"
+             identifier="info:srw/schema/1/marcxml-v1.1"/>
+</retrievalinfo>
diff --git a/etc/zebradb/retrieval-info-bib-grs1.xml b/etc/zebradb/retrieval-info-bib-grs1.xml
new file mode 100644
index 0000000..58f319d
--- /dev/null
+++ b/etc/zebradb/retrieval-info-bib-grs1.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<retrievalinfo xmlns="http://indexdata.com/yaz">
+  <retrieval syntax="usmarc" name="F"/>
+  <retrieval syntax="usmarc" name="B"/>
+  <retrieval syntax="xml" name="marcxml"
+             identifier="info:srw/schema/1/marcxml-v1.1">
+    <backend syntax="usmarc" name="F">
+      <marc inputformat="marc" outputformat="marcxml"
+            inputcharset="utf-8"/>
+    </backend>
+  </retrieval>
+  <retrieval syntax="xml" name="dc">
+    <backend syntax="usmarc" name="F">
+      <marc inputformat="marc" outputformat="marcxml"
+            inputcharset="utf-8"/>
+      <xslt stylesheet="__INTRANET_TMPL_DIR__/prog/en/xslt/MARC21slim2DC.xsl"/>
+    </backend>
+  </retrieval>
+  <retrieval syntax="xml" name="mods">
+    <backend syntax="usmarc" name="F">
+      <marc inputformat="marc" outputformat="marcxml"
+            inputcharset="utf-8"/>
+      <xslt stylesheet="__INTRANET_TMPL_DIR__/prog/en/xslt/MARC21slim2MODS.xsl"/>
+    </backend>
+  </retrieval>
+  <retrieval syntax="xml" name="rdfdc">
+    <backend syntax="usmarc" name="F">
+      <marc inputformat="marc" outputformat="marcxml"
+            inputcharset="utf-8"/>
+      <xslt stylesheet="__INTRANET_TMPL_DIR__/prog/en/xslt/MARC21slim2RDFDC.xsl"/>
+    </backend>
+  </retrieval>
+  <retrieval syntax="xml" name="utils">
+    <backend syntax="usmarc" name="F">
+      <marc inputformat="marc" outputformat="marcxml"
+            inputcharset="utf-8"/>
+      <xslt stylesheet="__INTRANET_TMPL_DIR__/prog/en/xslt/MARC21slimUtils.xsl"/>
+    </backend>
+  </retrieval>
+</retrievalinfo>
diff --git a/etc/zebradb/zebra-biblios-dom.cfg b/etc/zebradb/zebra-biblios-dom.cfg
new file mode 100644
index 0000000..a748ddd
--- /dev/null
+++ b/etc/zebradb/zebra-biblios-dom.cfg
@@ -0,0 +1,55 @@
+# Simple Zebra configuration file that defines
+# a database with MARCXML records.
+# $Id: zebra.cfg,v 1.1.2.2 2006/05/09 12:03:16 rangi Exp $
+#
+# Where are the config files located?
+profilePath:__ZEBRA_CONF_DIR__/biblios/etc:__ZEBRA_CONF_DIR__/etc:__ZEBRA_CONF_DIR__/marc_defs/__ZEBRA_MARC_FORMAT__/biblios:__ZEBRA_CONF_DIR__/lang_defs/__ZEBRA_LANGUAGE__:__ZEBRA_CONF_DIR__/xsl
+# modulePath - where to look for loadable zebra modules
+modulePath: /usr/lib/idzebra-2.0/modules
+
+encoding: UTF-8
+# Files that describe the attribute sets supported.
+attset: bib1.att
+attset: explain.att
+attset: gils.att
+
+# systag sysno rank
+
+# Specify record type
+# group   .recordType[  .name  ]:  type
+# type is split into fundamental type. file-read-type . argument
+# http://www.indexdata.dk/zebra/doc/zebra-cfg.tkl
+# http://www.indexdata.dk/zebra/doc/grs.tkl
+
+recordtype: dom.__ZEBRA_CONF_DIR__/biblios/etc/dom-config.xml
+marcxml.recordtype: dom.__ZEBRA_CONF_DIR__/biblios/etc/dom-config.xml
+iso2709.recordtype: dom.__ZEBRA_CONF_DIR__/biblios/etc/dom-config-marc.xml
+
+recordId: (bib1,Local-number)
+storeKeys:1
+storeData:1
+
+
+# Lock File Area
+lockDir: __ZEBRA_LOCK_DIR__/biblios
+perm.anonymous:ar
+perm.__ZEBRA_USER__:rw
+passwd: __ZEBRA_CONF_DIR__/etc/passwd
+register: __ZEBRA_DATA_DIR__/biblios/register:20G
+shadow: __ZEBRA_DATA_DIR__/biblios/shadow:20G
+
+# Temp File area for result sets
+setTmpDir: __ZEBRA_DATA_DIR__/biblios/tmp
+
+# Temp File area for index program
+keyTmpDir: __ZEBRA_DATA_DIR__/biblios/key
+
+# Approx. Memory usage during indexing
+memMax: 50M
+rank:rank-1
+truncmax: 1000000000
+
+# Specifies the maximum number of records that will be sorted in a result set.
+# If the result set contains more than that limit, the records after the limit
+# will not be sorted. If omitted, the default value is 1,000.
+sortmax: 1000
diff --git a/misc/koha-install-log b/misc/koha-install-log
index 6f6a0eb..d842484 100644
--- a/misc/koha-install-log
+++ b/misc/koha-install-log
@@ -44,7 +44,9 @@ ZEBRA_RUN_DIR=__ZEBRA_RUN_DIR__
 ZEBRA_MARC_FORMAT=__ZEBRA_MARC_FORMAT__
 ZEBRA_LANGUAGE=__ZEBRA_LANGUAGE__
 ZEBRA_AUTH_CFG=__ZEBRA_AUTH_CFG__
+ZEBRA_BIB_CFG=__ZEBRA_BIB_CFG__
 AUTH_RETRIEVAL_CFG=__AUTH_RETRIEVAL_CFG__
+BIB_RETRIEVAL_CFG=__BIB_RETRIEVAL_CFG__
 MERGE_SERVER_HOST=__MERGE_SERVER_HOST__
 MERGE_SERVER_PORT=__MERGE_SERVER_PORT__
 PAZPAR2_HOST=__PAZPAR2_HOST__
@@ -54,7 +56,7 @@ INSTALL_BASE=__INSTALL_BASE__
 INSTALL_ZEBRA=__INSTALL_ZEBRA__
 INSTALL_SRU=__INSTALL_SRU__
 INSTALL_PAZPAR2=__INSTALL_PAZPAR2__
-AUTH_INDEX_MODE=__AUTH_INDEX_MODE__
+BIB_INDEX_MODE=__BIB_INDEX_MODE__
 RUN_DATABASE_TESTS=__RUN_DATABASE_TESTS__
 PATH_TO_ZEBRA=__PATH_TO_ZEBRA__
 USE_MEMCACHED=__USE_MEMCACHED__
diff --git a/rewrite-config.PL b/rewrite-config.PL
index 967a546..083b3a8 100644
--- a/rewrite-config.PL
+++ b/rewrite-config.PL
@@ -122,7 +122,9 @@ $prefix = $ENV{'INSTALL_BASE'} || "/usr";
   '__ZEBRA_LANGUAGE__' => 'en',
   '__ZEBRA_TOKENIZER_STMT__' => 'charmap word-phrase-utf.chr',
   '__ZEBRA_AUTH_CFG__' => 'zebra-authorities.cfg',
+  '__ZEBRA_BIB_CFG__' => 'zebra-biblios.cfg',
   '__AUTH_RETRIEVAL_CFG__' => 'retrieval-info-auth-grs1.xml',
+  '__BIB_RETRIEVAL_CFG__' => 'retrieval-info-bib-grs1.xml',
   "__MERGE_SERVER_HOST__" => $myhost,
   "__MERGE_SERVER_PORT__" => '11001',
   "__PAZPAR2_HOST__" => $myhost,
@@ -135,6 +137,7 @@ $prefix = $ENV{'INSTALL_BASE'} || "/usr";
   "__PAZPAR2_TOGGLE_XML_PRE__" => '<!--',
   "__PAZPAR2_TOGGLE_XML_POST__" => '-->',
   "__AUTH_INDEX_MODE__" => 'grs1',
+  "__BIB_INDEX_MODE__" => 'grs1',
   "__RUN_DATABASE_TESTS__" => 'no',
   "__PATH_TO_ZEBRA__" => "",
   "__USE_MEMCACHED__" => 'no',
-- 
1.7.2.5



More information about the Koha-patches mailing list