[Koha-patches] [PATCH] C4::Search :Enhancements
Henri-Damien LAURENT
henridamien.laurent at biblibre.com
Tue Aug 25 23:56:19 CEST 2009
Stemming is now available for more languages
Adding also getDistinctValues which gets distinct values for a zebra index or for a Mysql field
---
C4/Search.pm | 66 +++++++++++++++++++++++++++++++++++++++++++++-----
catalogue/search.pl | 3 +-
opac/opac-search.pl | 3 +-
3 files changed, 63 insertions(+), 9 deletions(-)
diff --git a/C4/Search.pm b/C4/Search.pm
index b6bec0f..83a8455 100644
--- a/C4/Search.pm
+++ b/C4/Search.pm
@@ -62,6 +62,7 @@ This module provides searching functions for Koha's bibliographic databases
&buildQuery
&NZgetRecords
&AddSearchHistory
+ &GetDistinctValues
);
# make all your functions, whether exported or not;
@@ -678,7 +679,8 @@ sub _detect_truncation {
# STEMMING
sub _build_stemmed_operand {
- my ($operand) = @_;
+ my ($operand,$lang) = @_;
+ require Lingua::Stem::Snowball;
my $stemmed_operand;
# If operand contains a digit, it is almost certainly an identifier, and should
@@ -689,7 +691,8 @@ sub _build_stemmed_operand {
return $operand if $operand =~ /\d/;
# FIXME: the locale should be set based on the user's language and/or search choice
- my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
+ my $stemmer = Lingua::Stem::Snowball->new( lang => $lang,
+ encoding => "UTF-8" );
# FIXME: these should be stored in the db so the librarian can modify the behavior
$stemmer->add_exceptions(
@@ -700,8 +703,8 @@ sub _build_stemmed_operand {
}
);
my @words = split( / /, $operand );
- my $stems = $stemmer->stem(@words);
- for my $stem (@$stems) {
+ my @stems = $stemmer->stem(\@words);
+ for my $stem (@stems) {
$stemmed_operand .= "$stem";
$stemmed_operand .= "?"
unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
@@ -778,7 +781,7 @@ sub _build_weighted_query {
$simple_query, $query_cgi,
$query_desc, $limit,
$limit_cgi, $limit_desc,
-$stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
+$stopwords_removed, $query_type ) = buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
Build queries and limits in CCL, CGI, Human,
handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
@@ -789,7 +792,7 @@ See verbose embedded documentation.
=cut
sub buildQuery {
- my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
+ my ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = @_;
warn "---------\nEnter buildQuery\n---------" if $DEBUG;
@@ -974,7 +977,8 @@ sub buildQuery {
# Handle Stemming
my $stemmed_operand;
- $stemmed_operand = _build_stemmed_operand($operand) if $stemming;
+ $stemmed_operand = _build_stemmed_operand($operand, $lang)
+ if $stemming;
warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
@@ -2129,6 +2133,54 @@ sub z3950_search_args {
return $array;
}
+=head2 GetDistinctValues($field);
+
+C<$field> is a reference to the fields array
+
+=cut
+
+sub GetDistinctValues {
+ my ($fieldname,$string)=@_;
+ # returns a reference to a hash of references to branches...
+ if ($fieldname=~/\./){
+ my ($table,$column)=split /\./, $fieldname;
+ my $dbh = C4::Context->dbh;
+ warn "select DISTINCT($column) as value, count(*) as cnt from $table group by lib order by $column ";
+ my $sth = $dbh->prepare("select DISTINCT($column) as value, count(*) as cnt from $table ".($string?" where $column like \"$string%\"":"")."group by value order by $column ");
+ $sth->execute;
+ my $elements=$sth->fetchall_arrayref({});
+ return $elements;
+ }
+ else {
+ $string||= qq("");
+ my @servers=qw<biblioserver authorityserver>;
+ my (@zconns, at results);
+ for ( my $i = 0 ; $i < @servers ; $i++ ) {
+ $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
+ $results[$i] =
+ $zconns[$i]->scan(
+ ZOOM::Query::CCL2RPN->new( qq"$fieldname $string", $zconns[$i])
+ );
+ }
+ # The big moment: asynchronously retrieve results from all servers
+ my @elements;
+ while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
+ my $ev = $zconns[ $i - 1 ]->last_event();
+ if ( $ev == ZOOM::Event::ZEND ) {
+ next unless $results[ $i - 1 ];
+ my $size = $results[ $i - 1 ]->size();
+ if ( $size > 0 ) {
+ for (my $j=0;$j<$size;$j++){
+ my %hashscan;
+ @hashscan{qw(value cnt)}=$results[ $i - 1 ]->display_term($j);
+ push @elements, \%hashscan;
+ }
+ }
+ }
+ }
+ return \@elements;
+ }
+}
END { } # module clean-up code here (global destructor)
diff --git a/catalogue/search.pl b/catalogue/search.pl
index 33da2b0..f30d068 100755
--- a/catalogue/search.pl
+++ b/catalogue/search.pl
@@ -433,7 +433,8 @@ my ( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit
my @results;
## I. BUILD THE QUERY
-( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by,$scan);
+my $lang = C4::Output::getlanguagecookie($cgi);
+( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by,$scan,$lang);
## parse the query_cgi string and put it into a form suitable for <input>s
my @query_inputs;
diff --git a/opac/opac-search.pl b/opac/opac-search.pl
index 08b9c53..8cbc92f 100755
--- a/opac/opac-search.pl
+++ b/opac/opac-search.pl
@@ -331,7 +331,8 @@ my ($error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_
my @results;
## I. BUILD THE QUERY
-( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by);
+my $lang = C4::Output::getlanguagecookie($cgi);
+( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by, 0, $lang);
sub _input_cgi_parse ($) {
my @elements;
--
1.6.0.4
More information about the Koha-patches
mailing list