[PATCH 7/7] [SIGNED-OFF] Bug 8211: Add exploded search options

Jared Camins-Esakov jcamins at cpbibliography.com
Fri Aug 24 01:35:22 CEST 2012


When working with hierarchical subject headings, it is sometimes helpful
to do a search for all records with a specific subject, plus
broader/narrower/related subjects. This patch adds support for these
"exploded" subject searches to Koha.

To test:
1) Make sure you have a bunch of hierarchical subjects. I created
   geographical subjects for "Arizona," "United States," and "Phoenix,"
   and linked them together using 551s, and made sure I had a half
   dozen records linking to each (but not all to all three).
2) Do a search for su-br:Arizona (or choose "Subject and broader terms"
   on the advanced search screen with "more options" displayed), and
   check that you get the records with the subject "Arizona" and the
   records with the subject "United States"
3) Do a search for su-na:Arizona (or choose "Subject and narrower terms"
   on the advanced search screen with "more options" displayed), and
   check that you get the records with the subject "Arizona" and the
   records with the subject "Phoenix"
4) Do a search for su-rl:Arizona (or choose "Subject and related terms"
   on the advanced search screen with "more options" displayed), and
   check that you get the records with the subject "Arizona," the
   records with the subject "United States," and the records with the
   subject "Phoenix"
5) Ensure that other searches still work (keyword, subject, ccl,
   whatever)
6) Sign off

Technical details:
This patch adds a shim in front of C4::Search::buildQuery in order to
preprocess the query and call the _handle_exploding_search callback.
This shim will allow us to gradually offload query parsing to a new
query parser module.

Signed-off-by: wajasu <matted-34813 at mypacks.net>
---
 C4/Search.pm                                       | 102 +++++++++++++++++++++
 .../prog/en/includes/search_indexes.inc            |   5 +
 .../opac-tmpl/prog/en/modules/opac-advsearch.tt    |   5 +-
 t/db_dependent/Search.t                            |  80 +++++++++++++++-
 4 files changed, 190 insertions(+), 2 deletions(-)

diff --git a/C4/Search.pm b/C4/Search.pm
index 3edf118..dbdb4e2 100644
--- a/C4/Search.pm
+++ b/C4/Search.pm
@@ -34,6 +34,8 @@ use C4::Charset;
 use YAML;
 use URI::Escape;
 use Business::ISBN;
+use MARC::Record;
+use MARC::Field;
 
 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
 
@@ -1028,6 +1030,104 @@ sub getIndexes{
     return \@indexes;
 }
 
+=head2 _handle_exploding_index
+
+    my $query = _handle_exploding_index($index, $term)
+
+Callback routine to generate the search for "exploding" indexes (i.e.
+those indexes which are turned into multiple or-connected searches based
+on authority data).
+
+=cut
+
+sub _handle_exploding_index {
+    my ( $index, $term ) = @_;
+
+    return unless ($index =~ m/(su-br|su-na|su-rl)/ && $term);
+
+    my $marcflavour = C4::Context->preference('marcflavour');
+
+    my $codesubfield = $marcflavour eq 'UNIMARC' ? '5' : 'w';
+    my $wantedcodes = '';
+    my @subqueries = ( "(su=\"$term\")");
+    my ($error, $results, $total_hits) = SimpleSearch( "Heading,wrdl=$term", undef, undef, [ "authorityserver" ] );
+    foreach my $auth (@$results) {
+        my $record = MARC::Record->new_from_usmarc($auth);
+        my @references = $record->field('5..');
+        if (@references) {
+            if ($index eq 'su-br') {
+                $wantedcodes = 'g';
+            } elsif ($index eq 'su-na') {
+                $wantedcodes = 'h';
+            } elsif ($index eq 'su-rl') {
+                $wantedcodes = '';
+            }
+            foreach my $reference (@references) {
+                my $codes = $reference->subfield($codesubfield);
+                push @subqueries, '(su="' . $reference->as_string('abcdefghijlmnopqrstuvxyz') . '")' if (($codes && $codes eq $wantedcodes) || !$wantedcodes);
+            }
+        }
+    }
+    return join(' or ', @subqueries);
+}
+
+=head2 parseQuery
+
+    ( $operators, $operands, $indexes, $limits,
+      $sort_by, $scan, $lang ) =
+            buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
+
+Shim function to ease the transition from buildQuery to a new QueryParser.
+This function is called at the beginning of buildQuery, and modifies
+buildQuery's input. If it can handle the input, it returns a query that
+buildQuery will not try to parse.
+=cut
+
+sub parseQuery {
+    my ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = @_;
+
+    my @operators = $operators ? @$operators : ();
+    my @indexes   = $indexes   ? @$indexes   : ();
+    my @operands  = $operands  ? @$operands  : ();
+    my @limits    = $limits    ? @$limits    : ();
+    my @sort_by   = $sort_by   ? @$sort_by   : ();
+
+    my $query = $operands[0];
+    my $index;
+    my $term;
+
+# TODO: once we are using QueryParser, all this special case code for
+#       exploded search indexes will be replaced by a callback to
+#       _handle_exploding_index
+    if ( $query =~ m/^(.*)\b(su-br|su-na|su-rl)[:=](\w.*)$/ ) {
+        $query = $1;
+        $index = $2;
+        $term  = $3;
+    } else {
+        $query = '';
+        for ( my $i = 0 ; $i <= @operands ; $i++ ) {
+            if ($operands[$i] && $indexes[$i] =~ m/(su-br|su-na|su-rl)/) {
+                $index = $indexes[$i];
+                $term = $operands[$i];
+            } elsif ($operands[$i]) {
+                $query .= $operators[$i] eq 'or' ? ' or ' : ' and ' if ($query);
+                $query .= "($indexes[$i]:$operands[$i])";
+            }
+        }
+    }
+
+    if ($index) {
+        my $queryPart = _handle_exploding_index($index, $term);
+        if ($queryPart) {
+            $query .= "($queryPart)";
+        }
+        $operators = ();
+        $operands[0] = "ccl=$query";
+    }
+
+    return ( $operators, \@operands, $indexes, $limits, $sort_by, $scan, $lang);
+}
+
 =head2 buildQuery
 
 ( $error, $query,
@@ -1049,6 +1149,8 @@ sub buildQuery {
 
     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 
+    ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = parseQuery($operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
+
     # dereference
     my @operators = $operators ? @$operators : ();
     my @indexes   = $indexes   ? @$indexes   : ();
diff --git a/koha-tmpl/intranet-tmpl/prog/en/includes/search_indexes.inc b/koha-tmpl/intranet-tmpl/prog/en/includes/search_indexes.inc
index 2413216..09e748b 100644
--- a/koha-tmpl/intranet-tmpl/prog/en/includes/search_indexes.inc
+++ b/koha-tmpl/intranet-tmpl/prog/en/includes/search_indexes.inc
@@ -13,6 +13,11 @@
     <option value="se">&nbsp;&nbsp;&nbsp;&nbsp; Series title</option>
     <option value="su">Subject</option>
     <option value="su,phr">&nbsp;&nbsp;&nbsp;&nbsp; Subject as phrase</option>
+    [% IF ( expanded_options ) %]
+        <option value="su-br">&nbsp;&nbsp;&nbsp;&nbsp; Subject and broader terms</option>
+        <option value="su-na">&nbsp;&nbsp;&nbsp;&nbsp; Subject and narrower terms</option>
+        <option value="su-rl">&nbsp;&nbsp;&nbsp;&nbsp; Subject and related terms</option>
+    [% END %]
     <option value="bc">Barcode</option>
     <option value="location">Shelving location</option>
     <option value="sn">Standard number</option>
diff --git a/koha-tmpl/opac-tmpl/prog/en/modules/opac-advsearch.tt b/koha-tmpl/opac-tmpl/prog/en/modules/opac-advsearch.tt
index 6ed2b86..cca5d0d 100644
--- a/koha-tmpl/opac-tmpl/prog/en/modules/opac-advsearch.tt
+++ b/koha-tmpl/opac-tmpl/prog/en/modules/opac-advsearch.tt
@@ -67,8 +67,11 @@
         <select name="idx">
             <option value="kw">Keyword</option>
             <option value="su,wrdl">Subject</option>
-[% IF ( search_boxes_loo.expanded_options ) %]
+[% IF ( expanded_options ) %]
             <option value="su,phr">&nbsp;&nbsp;&nbsp;&nbsp; Subject phrase</option>
+            <option value="su-br">&nbsp;&nbsp;&nbsp;&nbsp; Subject and broader terms</option>
+            <option value="su-na">&nbsp;&nbsp;&nbsp;&nbsp; Subject and narrower terms</option>
+            <option value="su-rl">&nbsp;&nbsp;&nbsp;&nbsp; Subject and related terms</option>
 [% END %]
             <option value="ti">Title</option>
 [% IF ( expanded_options ) %]
diff --git a/t/db_dependent/Search.t b/t/db_dependent/Search.t
index 3a517a8..6d3d7a3 100644
--- a/t/db_dependent/Search.t
+++ b/t/db_dependent/Search.t
@@ -12,7 +12,7 @@ use YAML;
 use C4::Debug;
 require C4::Context;
 
-use Test::More tests => 57;
+use Test::More tests => 78;
 use Test::MockModule;
 use MARC::Record;
 use File::Spec;
@@ -515,4 +515,82 @@ END {
     }
 }
 
+# Testing exploding indexes
+my $term;
+my $searchmodule = new Test::MockModule('C4::Search');
+$searchmodule->mock('SimpleSearch', sub {
+    my $query = shift;
+
+    is($query, "Heading,wrdl=$term", "Searching for expected term '$term' for exploding") or return '', [], 0;
+
+    my $record = MARC::Record->new;
+    if ($query =~ m/Arizona/) {
+        $record->add_fields(
+            [ '001', '1234' ],
+            [ '151', ' ', ' ', a => 'Arizona' ],
+            [ '551', ' ', ' ', a => 'United States', w => 'g' ],
+            [ '551', ' ', ' ', a => 'Maricopa County', w => 'h' ],
+            [ '551', ' ', ' ', a => 'Navajo County', w => 'h' ],
+            [ '551', ' ', ' ', a => 'Pima County', w => 'h' ],
+            [ '551', ' ', ' ', a => 'New Mexico' ],
+            );
+    }
+    return '', [ $record->as_usmarc() ], 1;
+});
+
+$term = 'Arizona';
+( $error, $query, $simple_query, $query_cgi,
+$query_desc, $limit, $limit_cgi, $limit_desc,
+$stopwords_removed, $query_type ) = buildQuery([], [ $term ], [ 'su-br' ], [  ], [], 0, 'en');
+matchesExplodedTerms("Advanced search for broader subjects", $query, 'Arizona', 'United States');
+
+( $error, $query, $simple_query, $query_cgi,
+$query_desc, $limit, $limit_cgi, $limit_desc,
+$stopwords_removed, $query_type ) = buildQuery([], [ $term ], [ 'su-na' ], [  ], [], 0, 'en');
+matchesExplodedTerms("Advanced search for narrower subjects", $query, 'Arizona', 'Maricopa County', 'Navajo County', 'Pima County');
+
+( $error, $query, $simple_query, $query_cgi,
+$query_desc, $limit, $limit_cgi, $limit_desc,
+$stopwords_removed, $query_type ) = buildQuery([], [ $term ], [ 'su-rl' ], [  ], [], 0, 'en');
+matchesExplodedTerms("Advanced search for related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
+
+( $error, $query, $simple_query, $query_cgi,
+$query_desc, $limit, $limit_cgi, $limit_desc,
+$stopwords_removed, $query_type ) = buildQuery([], [ "$term", 'history' ], [ 'su-rl', 'kw' ], [  ], [], 0, 'en');
+matchesExplodedTerms("Advanced search for related subjects and keyword 'history' searches related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
+like($query, qr/history/, "Advanced search for related subjects and keyword 'history' searches for 'history'");
+
+( $error, $query, $simple_query, $query_cgi,
+$query_desc, $limit, $limit_cgi, $limit_desc,
+$stopwords_removed, $query_type ) = buildQuery([], [ 'history', "$term" ], [ 'kw', 'su-rl' ], [  ], [], 0, 'en');
+matchesExplodedTerms("Order of terms doesn't matter for advanced search", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
+like($query, qr/history/, "Order of terms doesn't matter for advanced search");
+
+( $error, $query, $simple_query, $query_cgi,
+$query_desc, $limit, $limit_cgi, $limit_desc,
+$stopwords_removed, $query_type ) = buildQuery([], [ "su-br:$term" ], [  ], [  ], [], 0, 'en');
+matchesExplodedTerms("Simple search for broader subjects", $query, 'Arizona', 'United States');
+
+( $error, $query, $simple_query, $query_cgi,
+$query_desc, $limit, $limit_cgi, $limit_desc,
+$stopwords_removed, $query_type ) = buildQuery([], [ "su-na:$term" ], [  ], [  ], [], 0, 'en');
+matchesExplodedTerms("Simple search for narrower subjects", $query, 'Arizona', 'Maricopa County', 'Navajo County', 'Pima County');
+
+( $error, $query, $simple_query, $query_cgi,
+$query_desc, $limit, $limit_cgi, $limit_desc,
+$stopwords_removed, $query_type ) = buildQuery([], [ "su-rl:$term" ], [  ], [  ], [], 0, 'en');
+matchesExplodedTerms("Simple search for related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
+
+( $error, $query, $simple_query, $query_cgi,
+$query_desc, $limit, $limit_cgi, $limit_desc,
+$stopwords_removed, $query_type ) = buildQuery([], [ "history and su-rl:$term" ], [  ], [  ], [], 0, 'en');
+matchesExplodedTerms("Simple search for related subjects and keyword 'history' searches related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
+like($query, qr/history/, "Simple search for related subjects and keyword 'history' searches for 'history'");
+
+sub matchesExplodedTerms {
+    my ($message, $query, @terms) = @_;
+    my $match = "(( or )?\\((" . join ('|', map { "su=\"$_\"" } @terms) . ")\\)){" . scalar(@terms) . "}";
+    like($query, qr/$match/, $message);
+}
+
 1;
-- 
1.7.11.4


------=_Part_4078_30216792.1347236422714--



More information about the Koha-patches mailing list