[Koha-patches] [PATCH] bug 10729 Add phrases configuration for icu
Colin Campbell
colin.campbell at ptfs-europe.com
Wed Aug 14 17:36:42 CEST 2013
Add a separate phrases-icu.xml for phrase indexes
The file is based on that distributed with zebra
with a couple of additions to reflect Koha usage
This patch adds a separate tokenizer variable
for phrase indexes so that default.idx is
correctly rewritten for sites using icu
indexing
---
Makefile.PL | 4 ++++
etc/zebradb/etc/default.idx | 2 +-
etc/zebradb/etc/phrases-icu.xml | 10 ++++++++++
rewrite-config.PL | 1 +
4 files changed, 16 insertions(+), 1 deletion(-)
create mode 100644 etc/zebradb/etc/phrases-icu.xml
diff --git a/Makefile.PL b/Makefile.PL
index a61072f..8340e8f 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -558,6 +558,10 @@ $config{ZEBRA_TOKENIZER_STMT} = $config{ZEBRA_TOKENIZER} eq 'icu'
? 'icuchain words-icu.xml'
: 'charmap word-phrase-utf.chr';
+$config{ZEBRA_PTOKENIZER_STMT} = $config{ZEBRA_TOKENIZER} eq 'icu'
+ ? 'icuchain phrases-icu.xml'
+ : 'charmap word-phrase-utf.chr';
+
my %test_suite_override_dirs = (
KOHA_CONF_DIR => ['etc'],
ZEBRA_CONF_DIR => ['etc', 'zebradb'],
diff --git a/etc/zebradb/etc/default.idx b/etc/zebradb/etc/default.idx
index d6314c6..3a70392 100644
--- a/etc/zebradb/etc/default.idx
+++ b/etc/zebradb/etc/default.idx
@@ -18,7 +18,7 @@ __ZEBRA_TOKENIZER_STMT__
index p
completeness 1
firstinfield 1
-__ZEBRA_TOKENIZER_STMT__
+__ZEBRA_PTOKENIZER_STMT__
# URX (URL) index
# Used if structure=urx (@attr 4=104)
diff --git a/etc/zebradb/etc/phrases-icu.xml b/etc/zebradb/etc/phrases-icu.xml
new file mode 100644
index 0000000..59d415c
--- /dev/null
+++ b/etc/zebradb/etc/phrases-icu.xml
@@ -0,0 +1,10 @@
+<icu_chain locale="">
+ <transform rule="[:Control:] Any-Remove"/>
+ <tokenize rule="s"/>
+ <transform rule="[:Punctuation:] Remove"/>
+ <transform rule="NFD"/>
+ <transform rule="[:Nonspacing Mark:] Remove"/>
+ <transform rule="NFC"/>
+ <display/>
+ <casemap rule="l"/>
+</icu_chain>
diff --git a/rewrite-config.PL b/rewrite-config.PL
index e903d49..f53d402 100644
--- a/rewrite-config.PL
+++ b/rewrite-config.PL
@@ -124,6 +124,7 @@ $prefix = $ENV{'INSTALL_BASE'} || "/usr";
'__ZEBRA_LANGUAGE__' => 'en',
'__ZEBRA_TOKENIZER__' => 'chr',
'__ZEBRA_TOKENIZER_STMT__' => 'charmap word-phrase-utf.chr',
+ '__ZEBRA_PTOKENIZER_STMT__' => 'charmap word-phrase-utf.chr',
'__ZEBRA_AUTH_CFG__' => 'zebra-authorities.cfg',
'__ZEBRA_BIB_CFG__' => 'zebra-biblios.cfg',
'__AUTH_RETRIEVAL_CFG__' => 'retrieval-info-auth-grs1.xml',
--
1.8.4.rc2.15.g96cb27a
More information about the Koha-patches
mailing list