[Koha-cvs] koha C4/AuthoritiesMarc.pm C4/Biblio.pm C4/Sear...

paul poulain paul at koha-fr.org
Thu May 10 16:45:15 CEST 2007


CVSROOT:	/sources/koha
Module name:	koha
Changes by:	paul poulain <tipaul>	07/05/10 14:45:15

Modified files:
	C4             : AuthoritiesMarc.pm Biblio.pm Search.pm 
	authorities    : authorities.pl 
	misc/migration_tools: rebuild_nozebra.pl rebuild_zebra.pl 
	opac           : opac-rss.pl 

Log message:
	Koha NoZebra :
	- support for authorities
	- some bugfixes in ordering and "CCL" parsing
	- support for authorities <=> biblios walking
	
	Seems I can do what I want now, so I consider its done, except for bugfixes that will be needed i m sure !

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/koha/C4/AuthoritiesMarc.pm?cvsroot=koha&r1=1.45&r2=1.46
http://cvs.savannah.gnu.org/viewcvs/koha/C4/Biblio.pm?cvsroot=koha&r1=1.203&r2=1.204
http://cvs.savannah.gnu.org/viewcvs/koha/C4/Search.pm?cvsroot=koha&r1=1.141&r2=1.142
http://cvs.savannah.gnu.org/viewcvs/koha/authorities/authorities.pl?cvsroot=koha&r1=1.23&r2=1.24
http://cvs.savannah.gnu.org/viewcvs/koha/misc/migration_tools/rebuild_nozebra.pl?cvsroot=koha&r1=1.4&r2=1.5
http://cvs.savannah.gnu.org/viewcvs/koha/misc/migration_tools/rebuild_zebra.pl?cvsroot=koha&r1=1.7&r2=1.8
http://cvs.savannah.gnu.org/viewcvs/koha/opac/opac-rss.pl?cvsroot=koha&r1=1.2&r2=1.3

Patches:
Index: C4/AuthoritiesMarc.pm
===================================================================
RCS file: /sources/koha/koha/C4/AuthoritiesMarc.pm,v
retrieving revision 1.45
retrieving revision 1.46
diff -u -b -r1.45 -r1.46
--- C4/AuthoritiesMarc.pm	6 Apr 2007 14:48:45 -0000	1.45
+++ C4/AuthoritiesMarc.pm	10 May 2007 14:45:15 -0000	1.46
@@ -93,7 +93,100 @@
 =cut
 sub SearchAuthorities {
   my ($tags, $and_or, $excluding, $operator, $value, $offset,$length,$authtypecode,$sortby) = @_;
+#     warn "CALL : $tags, $and_or, $excluding, $operator, $value, $offset,$length,$authtypecode,$sortby";
   my $dbh=C4::Context->dbh;
+    if (C4::Context->preference('NoZebra')) {
+    
+        #
+        # build the query
+        #
+        my $query;
+        my @auths=split / /,$authtypecode ;
+        foreach my  $auth (@auths){
+            $query .="AND auth_type= $auth ";
+        }
+        $query =~ s/^AND //;
+        my $dosearch;
+        for(my $i = 0 ; $i <= $#{$value} ; $i++)
+        {
+            if (@$value[$i]){
+                if (@$tags[$i] eq "mainmainentry") {
+                    $query .=" AND mainmainentry";
+                }elsif (@$tags[$i] eq "mainentry") {
+                    $query .=" AND mainentry";
+                } else {
+                    $query .=" AND ";
+                }
+                if (@$operator[$i] eq 'is') {
+                    $query.=(@$tags[$i]?"=":""). '"'.@$value[$i].'"';
+                }elsif (@$operator[$i] eq "="){
+                    $query.=(@$tags[$i]?"=":""). '"'.@$value[$i].'"';
+                }elsif (@$operator[$i] eq "start"){
+                    $query.=(@$tags[$i]?"=":"").'"'.@$value[$i].'%"';
+                } else {
+                    $query.=(@$tags[$i]?"=":"").'"'.@$value[$i].'%"';
+                }
+                $dosearch=1;
+            }#if value
+        }
+        #
+        # do the query (if we had some search term
+        #
+        if ($dosearch) {
+#             warn "QUERY : $query";
+            my $result = C4::Search::NZanalyse($query,'authorityserver');
+#             warn "result : $result";
+            my %result;
+            foreach (split /;/,$result) {
+                my ($authid,$title) = split /,/,$_;
+                # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
+                # and we don't want to get only 1 result for each of them !!!
+                # hint & speed improvement : we can order without reading the record
+                # so order, and read records only for the requested page !
+                $result{$title.$authid}=$authid;
+            }
+            # sort the hash and return the same structure as GetRecords (Zebra querying)
+            my @finalresult = ();
+            my $numbers=0;
+            if ($sortby eq 'HeadingDsc') { # sort by mainmainentry desc
+                foreach my $key (sort {$b cmp $a} (keys %result)) {
+                    push @finalresult, $result{$key};
+#                     warn "push..."$#finalresult;
+                    $numbers++;
+                }
+            } else { # sort by mainmainentry ASC
+                foreach my $key (sort (keys %result)) {
+                    push @finalresult, $result{$key};
+#                     warn "push..."$#finalresult;
+                    $numbers++;
+                }
+            }
+            # limit the $results_per_page to result size if it's more
+            $length = $numbers-1 if $numbers < $length;
+            # for the requested page, replace authid by the complete record
+            # speed improvement : avoid reading too much things
+            for (my $counter=$offset;$counter<=$offset+$length;$counter++) {
+#                 $finalresult[$counter] = GetAuthority($finalresult[$counter])->as_usmarc;
+                my $separator=C4::Context->preference('authoritysep');
+                my $authrecord = MARC::File::USMARC::decode(GetAuthority($finalresult[$counter])->as_usmarc);
+                my $authid=$authrecord->field('001')->data(); 
+                my $summary=BuildSummary($authrecord,$authid,$authtypecode);
+                my $query_auth_tag = "SELECT auth_tag_to_report FROM auth_types WHERE authtypecode=?";
+                my $sth = $dbh->prepare($query_auth_tag);
+                $sth->execute($authtypecode);
+                my $auth_tag_to_report = $sth->fetchrow;
+                my %newline;
+                $newline{used}=CountUsage($authid);
+                $newline{summary} = $summary;
+                $newline{authid} = $authid;
+                $newline{even} = $counter % 2;
+                $finalresult[$counter]= \%newline;
+            }
+            return (\@finalresult, $numbers);
+        } else {
+            return;
+        }
+    } else {
   my $query;
   my $attr;
     # the marclist may contain "mainentry". In this case, search the tag_to_report, that depends on
@@ -147,7 +240,6 @@
   ## Adding order
   $query=' @or  @attr 7=1 @attr 1=Heading 0 @or  @attr 7=1 @attr 1=Heading-Entity 1'.$query if ($sortby eq "HeadingAsc");
   $query=' @or  @attr 7=2 @attr 1=Heading 0 @or  @attr 7=1 @attr 1=Heading-Entity 1'.$query if ($sortby eq "HeadingDsc");
-  warn $query;
   
   $offset=0 unless $offset;
   my $counter = $offset;
@@ -209,11 +301,12 @@
    }# all $z's
 
   }## if nbresult
-NOLUCK:
-# $oAResult->destroy();
-# $oAuth[0]->destroy();
+        NOLUCK:
+        # $oAResult->destroy();
+        # $oAuth[0]->destroy();
 
     return (\@finalresult, $nbresults);
+    }
 }
 
 =head2 CountUsage 
@@ -228,11 +321,15 @@
 =cut
 sub CountUsage {
   my ($authid) = @_;
-  ### try ZOOM search here
+    if (C4::Context->preference('NoZebra')) {
+        # Read the index Koha-Auth-Number for this authid and count the lines
+        my $result = C4::Search::NZanalyse("an=$authid");
+        return scalar split /;/,$result;
+    } else {
+        ### ZOOM search here
   my $oConnection=C4::Context->Zconn("biblioserver",1);
   my $query;
   $query= "an=".$authid;
-  
   my $oResult = $oConnection->search(new ZOOM::Query::CCL2RPN( $query, $oConnection ));
   my $result;
   while ((my $i = ZOOM::event([ $oConnection ])) != 0) {
@@ -242,6 +339,7 @@
       }
   }
   return ($result);
+    }
 }
 
 =head2 CountUsageChildren 
@@ -402,7 +500,7 @@
   ##Both authid and authtypecode is expected to be in the same field. Modify if other requirements arise
     $record->add_fields('001',$authid) unless $record->field('001');
     $record->add_fields('152','','','b'=>$authtypecode) unless $record->field('152');
-    warn $record->as_formatted;
+#     warn $record->as_formatted;
     $dbh->do("lock tables auth_header WRITE");
     $sth=$dbh->prepare("insert into auth_header (authid,datecreated,authtypecode,marc) values (?,now(),?,?)");
     $sth->execute($authid,$authtypecode,$record->as_usmarc);    
@@ -417,8 +515,7 @@
       $sth->finish;
   }
   $dbh->do("unlock tables");
-  ModZebra($authid,'specialUpdate',"authorityserver");
-
+  ModZebra($authid,'specialUpdate',"authorityserver",$record);
   return ($authid);
 }
 
@@ -439,7 +536,7 @@
     my ($authid) = @_;
     my $dbh=C4::Context->dbh;
 
-    ModZebra($authid,"recordDelete","authorityserver");
+    ModZebra($authid,"recordDelete","authorityserver",GetAuthority($authid));
     $dbh->do("delete from auth_header where authid=$authid") ;
 
 }
@@ -950,6 +1047,9 @@
         push @tags_using_authtype,$tagfield."9" ;
     }
 
+    if (C4::Context->preference('NoZebra')) {
+        warn "MERGE TO DO";
+    } else {
   # now, find every biblio using this authority
   my $oConnection=C4::Context->Zconn("biblioserver");
   my $query;
@@ -994,6 +1094,7 @@
     }
       
   }#foreach $marc
+    }
   # now, find every other authority linked with this authority
 #   my $oConnection=C4::Context->Zconn("authorityserver");
 #   my $query;
@@ -1054,8 +1155,16 @@
 
 =cut
 
-# $Id: AuthoritiesMarc.pm,v 1.45 2007/04/06 14:48:45 hdl Exp $
+# $Id: AuthoritiesMarc.pm,v 1.46 2007/05/10 14:45:15 tipaul Exp $
 # $Log: AuthoritiesMarc.pm,v $
+# Revision 1.46  2007/05/10 14:45:15  tipaul
+# Koha NoZebra :
+# - support for authorities
+# - some bugfixes in ordering and "CCL" parsing
+# - support for authorities <=> biblios walking
+#
+# Seems I can do what I want now, so I consider its done, except for bugfixes that will be needed i m sure !
+#
 # Revision 1.45  2007/04/06 14:48:45  hdl
 # Code Cleaning : AuthoritiesMARC.
 #

Index: C4/Biblio.pm
===================================================================
RCS file: /sources/koha/koha/C4/Biblio.pm,v
retrieving revision 1.203
retrieving revision 1.204
diff -u -b -r1.203 -r1.204
--- C4/Biblio.pm	3 May 2007 15:16:02 -0000	1.203
+++ C4/Biblio.pm	10 May 2007 14:45:15 -0000	1.204
@@ -33,7 +33,7 @@
 use vars qw($VERSION @ISA @EXPORT);
 
 # set the version for version checking
-$VERSION = do { my @v = '$Revision: 1.203 $' =~ /\d+/g; shift(@v).".".join( "_", map { sprintf "%03d", $_ } @v ); };
+$VERSION = do { my @v = '$Revision: 1.204 $' =~ /\d+/g; shift(@v).".".join( "_", map { sprintf "%03d", $_ } @v ); };
 
 @ISA = qw( Exporter );
 
@@ -2712,6 +2712,7 @@
 sub ModZebra {
 ###Accepts a $server variable thus we can use it for biblios authorities or other zebra dbs
     my ( $biblionumber, $op, $server, $newRecord ) = @_;
+#     warn "ModZebra with : ( $biblionumber, $op, $server, $newRecord )";
     my $dbh=C4::Context->dbh;
     #warn "SERVER:".$server;
 #
@@ -2724,25 +2725,31 @@
         # lock the nozebra table : we will read index lines, update them in Perl process
         # and write everything in 1 transaction.
         # lock the table to avoid someone else overwriting what we are doing
-        $dbh->do('LOCK TABLES nozebra WRITE,biblio WRITE,biblioitems WRITE, systempreferences WRITE');
+        $dbh->do('LOCK TABLES nozebra WRITE,biblio WRITE,biblioitems WRITE, systempreferences WRITE, auth_types WRITE, auth_header WRITE');
         my %result; # the result hash that will be builded by deletion / add, and written on mySQL at the end, to improve speed
-        my $record= GetMarcBiblio($biblionumber);
+        my $record;
+        if ($server eq 'biblioserver') {
+            $record= GetMarcBiblio($biblionumber);
+        } else {
+            $record= C4::AuthoritiesMarc::GetAuthority($biblionumber);
+        }
         if ($op eq 'specialUpdate') {
             # OK, we have to add or update the record
             # 1st delete (virtually, in indexes) ...
-            %result = _DelBiblioNoZebra($biblionumber,$record);
+            %result = _DelBiblioNoZebra($biblionumber,$record,$server);
             # ... add the record
-            %result=_AddBiblioNoZebra($biblionumber,$newRecord, %result);
+            %result=_AddBiblioNoZebra($biblionumber,$newRecord, $server, %result);
         } else {
             # it's a deletion, delete the record...
-            %result=_DelBiblioNoZebra($biblionumber,$record);
+#             warn "DELETE the record $biblionumber on $server".$record->as_formatted;
+            %result=_DelBiblioNoZebra($biblionumber,$record,$server);
         }
         # ok, now update the database...
-        my $sth = $dbh->prepare("UPDATE nozebra SET biblionumbers=? WHERE indexname=? AND value=?");
+        my $sth = $dbh->prepare("UPDATE nozebra SET biblionumbers=? WHERE server=? AND indexname=? AND value=?");
         foreach my $key (keys %result) {
             foreach my $index (keys %{$result{$key}}) {
-                warn "UPDATING : $key , $index with :".$result{$key}->{$index};
-                $sth->execute($result{$key}->{$index},$key,$index);
+#                 warn "UPDATING : $server $key , $index with :".$result{$key}->{$index};
+                $sth->execute($result{$key}->{$index}, $server, $key, $index);
             }
         }
     $dbh->do('UNLOCK TABLES');
@@ -2777,7 +2784,7 @@
 
 =head1 INTERNAL FUNCTIONS
 
-=head2 _DelBiblioNoZebra($biblionumber,$record);
+=head2 _DelBiblioNoZebra($biblionumber,$record,$server);
 
     function to delete a biblio in NoZebra indexes
     This function does NOT delete anything in database : it reads all the indexes entries
@@ -2785,21 +2792,33 @@
     The SQL part is done either :
     - after the Add if we are modifying a biblio (delete + add again)
     - immediatly after this sub if we are doing a true deletion.
+    $server can be 'biblioserver' or 'authorityserver' : it indexes biblios or authorities (in the same table, $server being part of the table itself
 
 =cut
 
 
 sub _DelBiblioNoZebra {
-    my ($biblionumber,$record)=@_;
+    my ($biblionumber, $record, $server)=@_;
     
-    warn "DELETING".$record->as_formatted;
     # Get the indexes
     my $dbh = C4::Context->dbh;
     # Get the indexes
-    my %index=GetNoZebraIndexes;
+    my %index;
+    my $title;
+    if ($server eq 'biblioserver') {
+        %index=GetNoZebraIndexes;
     # get title of the record (to store the 10 first letters with the index)
     my ($titletag,$titlesubfield) = GetMarcFromKohaField('biblio.title');
-    my $title = lc($record->subfield($titletag,$titlesubfield));
+        $title = lc($record->subfield($titletag,$titlesubfield));
+    } else {
+        # for authorities, the "title" is the $a mainentry
+        my $authref = C4::AuthoritiesMarc::GetAuthType($record->subfield(152,'b'));
+        warn "ERROR : authtype undefined for ".$record->as_formatted unless $authref;
+        $title = $record->subfield($authref->{auth_tag_to_report},'a');
+        $index{'mainmainentry'}= $authref->{'auth_tag_to_report'}.'a';
+        $index{'mainentry'}    = $authref->{'auth_tag_to_report'}.'*';
+        $index{'auth_type'}    = '152b';
+    }
     
     my %result;
     # remove blancks comma (that could cause problem when decoding the string for CQL retrieval) and regexp specific values
@@ -2807,7 +2826,7 @@
     # limit to 10 char, should be enough, and limit the DB size
     $title = substr($title,0,10);
     #parse each field
-    my $sth2=$dbh->prepare('SELECT biblionumbers FROM nozebra WHERE indexname=? AND value=?');
+    my $sth2=$dbh->prepare('SELECT biblionumbers FROM nozebra WHERE server=? AND indexname=? AND value=?');
     foreach my $field ($record->fields()) {
         #parse each subfield
         next if $field->tag <10;
@@ -2823,22 +2842,20 @@
                     $indexed=1;
                     my $line= lc $subfield->[1];
                     # remove meaningless value in the field...
-                    $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g;
+                    $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
                     # ... and split in words
-                    warn "DELETING : $key / $tag / $subfieldcode / $line";
                     foreach (split / /,$line) {
                         next unless $_; # skip  empty values (multiple spaces)
                         # if the entry is already here, do nothing, the biblionumber has already be removed
                         unless ($result{$key}->{$_} =~ /$biblionumber,$title\-(\d);/) {
                             # get the index value if it exist in the nozebra table and remove the entry, otherwise, do nothing
-                            $sth2->execute($key,$_);
+                            $sth2->execute($server,$key,$_);
                             my $existing_biblionumbers = $sth2->fetchrow;
                             # it exists
                             if ($existing_biblionumbers) {
 #                                 warn " existing for $key $_: $existing_biblionumbers";
                                 $result{$key}->{$_} =$existing_biblionumbers;
                                 $result{$key}->{$_} =~ s/$biblionumber,$title\-(\d);//;
-                                warn "after cleaning : $key / $_ = ".$result{$key}->{$_};
                             }
                         }
                     }
@@ -2847,18 +2864,17 @@
             # the subfield is not indexed, store it in __RAW__ index anyway
             unless ($indexed) {
                 my $line= lc $subfield->[1];
-                $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g;
+                $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
                 # ... and split in words
                 foreach (split / /,$line) {
                     next unless $_; # skip  empty values (multiple spaces)
                     # if the entry is already here, do nothing, the biblionumber has already be removed
                     unless ($result{'__RAW__'}->{$_} =~ /$biblionumber,$title\-(\d);/) {
                         # get the index value if it exist in the nozebra table and remove the entry, otherwise, do nothing
-                        $sth2->execute('__RAW__',$_);
+                        $sth2->execute($server,'__RAW__',$_);
                         my $existing_biblionumbers = $sth2->fetchrow;
                         # it exists
                         if ($existing_biblionumbers) {
-                            warn " existing for __RAW__ $_ : $existing_biblionumbers";
                             $result{'__RAW__'}->{$_} =$existing_biblionumbers;
                             $result{'__RAW__'}->{$_} =~ s/$biblionumber,$title\-(\d);//;
                         }
@@ -2878,20 +2894,33 @@
 
 
 sub _AddBiblioNoZebra {
-    my ($biblionumber,$record,%result)=@_;
+    my ($biblionumber, $record, $server, %result)=@_;
     my $dbh = C4::Context->dbh;
     # Get the indexes
-    my %index=GetNoZebraIndexes;
+    my %index;
+    my $title;
+    if ($server eq 'biblioserver') {
+        %index=GetNoZebraIndexes;
     # get title of the record (to store the 10 first letters with the index)
     my ($titletag,$titlesubfield) = GetMarcFromKohaField('biblio.title');
-    my $title = lc($record->subfield($titletag,$titlesubfield));
+        $title = lc($record->subfield($titletag,$titlesubfield));
+    } else {
+#     warn "server : $server";
+        # for authorities, the "title" is the $a mainentry
+        my $authref = C4::AuthoritiesMarc::GetAuthType($record->subfield(152,'b'));
+        warn "ERROR : authtype undefined for ".$record->as_formatted unless $authref;
+        $title = $record->subfield($authref->{auth_tag_to_report},'a');
+        $index{'mainmainentry'}=$authref->{auth_tag_to_report}.'a';
+        $index{'mainentry'}    = $authref->{auth_tag_to_report}.'*';
+        $index{'auth_type'}    = '152b';
+    }
 
     # remove blancks comma (that could cause problem when decoding the string for CQL retrieval) and regexp specific values
     $title =~ s/ |,|;|\[|\]|\(|\)|\*|-|'|=//g;
     # limit to 10 char, should be enough, and limit the DB size
     $title = substr($title,0,10);
     #parse each field
-    my $sth2=$dbh->prepare('SELECT biblionumbers FROM nozebra WHERE indexname=? AND value=?');
+    my $sth2=$dbh->prepare('SELECT biblionumbers FROM nozebra WHERE server=? AND indexname=? AND value=?');
     foreach my $field ($record->fields()) {
         #parse each subfield
         next if $field->tag <10;
@@ -2907,7 +2936,7 @@
                     $indexed=1;
                     my $line= lc $subfield->[1];
                     # remove meaningless value in the field...
-                    $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g;
+                    $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
                     # ... and split in words
                     foreach (split / /,$line) {
                         next unless $_; # skip  empty values (multiple spaces)
@@ -2919,18 +2948,18 @@
                             $result{$key}->{$_} .= "$biblionumber,$title-$weight;";
                         } else {
                             # get the value if it exist in the nozebra table, otherwise, create it
-                            $sth2->execute($key,$_);
+                            $sth2->execute($server,$key,$_);
                             my $existing_biblionumbers = $sth2->fetchrow;
                             # it exists
                             if ($existing_biblionumbers) {
-                                warn" existing : $existing_biblionumbers";
                                 $result{$key}->{$_} =$existing_biblionumbers;
                                 my $weight=$1+1;
                                 $result{$key}->{$_} =~ s/$biblionumber,$title\-(\d);//;
                                 $result{$key}->{$_} .= "$biblionumber,$title-$weight;";
                             # create a new ligne for this entry
                             } else {
-                                $dbh->do('INSERT INTO nozebra SET indexname='.$dbh->quote($key).',value='.$dbh->quote($_));
+#                             warn "INSERT : $server / $key / $_";
+                                $dbh->do('INSERT INTO nozebra SET server='.$dbh->quote($server).', indexname='.$dbh->quote($key).',value='.$dbh->quote($_));
                                 $result{$key}->{$_}.="$biblionumber,$title-1;";
                             }
                         }
@@ -2940,7 +2969,7 @@
             # the subfield is not indexed, store it in __RAW__ index anyway
             unless ($indexed) {
                 my $line= lc $subfield->[1];
-                $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g;
+                $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
                 # ... and split in words
                 foreach (split / /,$line) {
                     next unless $_; # skip  empty values (multiple spaces)
@@ -2951,7 +2980,7 @@
                         $result{'__RAW__'}->{$_} .= "$biblionumber,$title-$weight;";
                     } else {
                         # get the value if it exist in the nozebra table, otherwise, create it
-                        $sth2->execute('__RAW__',$_);
+                        $sth2->execute($server,'__RAW__',$_);
                         my $existing_biblionumbers = $sth2->fetchrow;
                         # it exists
                         if ($existing_biblionumbers) {
@@ -2961,7 +2990,7 @@
                             $result{'__RAW__'}->{$_} .= "$biblionumber,$title-$weight;";
                         # create a new ligne for this entry
                         } else {
-                            $dbh->do('INSERT INTO nozebra SET indexname="__RAW__",value='.$dbh->quote($_));
+                            $dbh->do('INSERT INTO nozebra SET server='.$dbh->quote($server).',  indexname="__RAW__",value='.$dbh->quote($_));
                             $result{'__RAW__'}->{$_}.="$biblionumber,$title-1;";
                         }
                     }
@@ -3174,7 +3203,7 @@
 
     $dbh->do($query);
     if ( $dbh->errstr ) {
-        warn "$query";
+        warn "ERROR in _koha_modify_biblioitem $query";
     }
 }
 
@@ -3866,8 +3895,16 @@
 
 =cut
 
-# $Id: Biblio.pm,v 1.203 2007/05/03 15:16:02 tipaul Exp $
+# $Id: Biblio.pm,v 1.204 2007/05/10 14:45:15 tipaul Exp $
 # $Log: Biblio.pm,v $
+# Revision 1.204  2007/05/10 14:45:15  tipaul
+# Koha NoZebra :
+# - support for authorities
+# - some bugfixes in ordering and "CCL" parsing
+# - support for authorities <=> biblios walking
+#
+# Seems I can do what I want now, so I consider its done, except for bugfixes that will be needed i m sure !
+#
 # Revision 1.203  2007/05/03 15:16:02  tipaul
 # BUGFIX for : NoZebra
 # - NoZebra features : seems they work fine now (adding, modifying, deleting)

Index: C4/Search.pm
===================================================================
RCS file: /sources/koha/koha/C4/Search.pm,v
retrieving revision 1.141
retrieving revision 1.142
diff -u -b -r1.141 -r1.142
--- C4/Search.pm	9 May 2007 19:42:48 -0000	1.141
+++ C4/Search.pm	10 May 2007 14:45:15 -0000	1.142
@@ -25,7 +25,7 @@
 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
 
 # set the version for version checking
-$VERSION = do { my @v = '$Revision: 1.141 $' =~ /\d+/g;
+$VERSION = do { my @v = '$Revision: 1.142 $' =~ /\d+/g;
     shift(@v) . "." . join( "_", map { sprintf "%03d", $_ } @v );
 };
 
@@ -1145,7 +1145,10 @@
 =cut
 
 sub NZanalyse {
-    my ($string) = @_;
+    my ($string,$server) = @_;
+    # $server contains biblioserver or authorities, depending on what we search on.
+    warn "querying : $string on $server";
+    $server='biblioserver' unless $server;
     # if we have a ", replace the content to discard temporarily any and/or/not inside
     my $commacontent;
     if ($string =~/"/) {
@@ -1156,32 +1159,32 @@
     # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
     # then, call again NZanalyse with $left and $right
     # (recursive until we find a leaf (=> something without and/or/not)
-    $string =~ /(.*)( and | or | not )(.*)/;
+    $string =~ /(.*)( and | or | not | AND | OR | NOT )(.*)/;
     my $left = $1;
     my $right = $3;
-    my $operand = $2;
+    my $operand = lc($2);
     # it's not a leaf, we have a and/or/not
     if ($operand) {
         # reintroduce comma content if needed
         $right =~ s/__X__/"$commacontent"/ if $commacontent;
         $left =~ s/__X__/"$commacontent"/ if $commacontent;
-#         print "noeud : $left / $operand / $right\n";
-        my $leftresult = NZanalyse($left);
-        my $rightresult = NZanalyse($right);
+#         warn "node : $left / $operand / $right\n";
+        my $leftresult = NZanalyse($left,$server);
+        my $rightresult = NZanalyse($right,$server);
         # OK, we have the results for right and left part of the query
         # depending of operand, intersect, union or exclude both lists
         # to get a result list
         if ($operand eq ' and ') {
-            my @leftresult = split /,/, $leftresult;
-#             my @rightresult = split /,/,$leftresult;
+            my @leftresult = split /;/, $leftresult;
+#             my @rightresult = split /;/,$leftresult;
             my $finalresult;
             # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
             # the result is stored twice, to have the same weight for AND than OR.
             # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
             # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
             foreach (@leftresult) {
-                if ($rightresult =~ "$_,") {
-                    $finalresult .= "$_,$_,";
+                if ($rightresult =~ "$_;") {
+                    $finalresult .= "$_;$_;";
                 }
             }
             return $finalresult;
@@ -1189,12 +1192,12 @@
             # just merge the 2 strings
             return $leftresult.$rightresult;
         } elsif ($operand eq ' not ') {
-            my @leftresult = split /,/, $leftresult;
-#             my @rightresult = split /,/,$leftresult;
+            my @leftresult = split /;/, $leftresult;
+#             my @rightresult = split /;/,$leftresult;
             my $finalresult;
             foreach (@leftresult) {
-                unless ($rightresult =~ "$_,") {
-                    $finalresult .= "$_,";
+                unless ($rightresult =~ "$_;") {
+                    $finalresult .= "$_;";
                 }
             }
             return $finalresult;
@@ -1206,28 +1209,32 @@
     } else {
         $string =~  s/__X__/"$commacontent"/ if $commacontent;
         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\// /g;
-#         print "feuille : $string\n";
+#         warn "leaf : $string\n";
         # parse the string in in operator/operand/value again
         $string =~ /(.*)(=|>|>=|<|<=)(.*)/;
         my $left = $1;
         my $operator = $2;
         my $right = $3;
         my $results;
-            # automatic replace for short operator
+        # automatic replace for short operators
             $left='title' if $left eq 'ti';
             $left='author' if $left eq 'au';
+        $left='koha-Auth-Number' if $left eq 'an';
         if ($operator) {
             #do a specific search
             my $dbh = C4::Context->dbh;
             $operator='LIKE' if $operator eq '=' and $right=~ /%/;
-            my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE indexname=? AND value $operator ?");
-#             print "$left / $operator / $right\n";
+            my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE server=? AND indexname=? AND value $operator ?");
+            warn "$left / $operator / $right\n";
             # split each word, query the DB and build the biblionumbers result
             foreach (split / /,$right) {
                 my $biblionumbers;
-                $sth->execute($left,$_);
+                next unless $_;
+#                 warn "EXECUTE : $server, $left, $_";
+                $sth->execute($server, $left, $_);
                 while (my $line = $sth->fetchrow) {
                     $biblionumbers .= $line;
+#                     warn "result : $line";
                 }
                 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
                 if ($results) {
@@ -1246,17 +1253,19 @@
         } else {
             #do a complete search (all indexes)
             my $dbh = C4::Context->dbh;
-            my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE value LIKE ?");
+            my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?");
             # split each word, query the DB and build the biblionumbers result
             foreach (split / /,$string) {
+#                 warn "search on all indexes on $_";
                 my $biblionumbers;
-                $sth->execute($_);
+                next unless $_;
+                $sth->execute($server, $_);
                 while (my $line = $sth->fetchrow) {
                     $biblionumbers .= $line;
                 }
                 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
                 if ($results) {
-                    my @leftresult = split /,/, $biblionumbers;
+                    my @leftresult = split /;/, $biblionumbers;
                     my $temp;
                     foreach (@leftresult) {
                         if ($results =~ "$_;") {
@@ -1269,6 +1278,7 @@
                 }
             }
         }
+#         warn "return : $results for LEAF : $string";
         return $results;
     }
 }
@@ -1303,7 +1313,7 @@
         my $result_hash;
         my $numbers=0;
         if ($ordering eq '1=9523 >i') { # sort popularity DESC
-            foreach my $key (sort {$b <=> $a} (keys %popularity)) {
+            foreach my $key (sort {$b cmp $a} (keys %popularity)) {
                 $result_hash->{'RECORDS'}[$numbers++] = $result{$popularity{$key}}->as_usmarc();
             }
         } else { # sort popularity ASC
@@ -1337,12 +1347,12 @@
         # sort the hash and return the same structure as GetRecords (Zebra querying)
         my $result_hash;
         my $numbers=0;
-        if ($ordering eq '1=1003 <i') { # sort by title desc
+        if ($ordering eq '1=1003 <i') { # sort by author desc
             foreach my $key (sort (keys %result)) {
                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
             }
-        } else { # sort by title ASC
-            foreach my $key (sort { $a <=> $b } (keys %result)) {
+        } else { # sort by author ASC
+            foreach my $key (sort { $a cmp $b } (keys %result)) {
                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
             }
         }
@@ -1378,7 +1388,7 @@
                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
             }
         } else { # sort by title ASC
-            foreach my $key (sort { $a <=> $b } (keys %result)) {
+            foreach my $key (sort { $a cmp $b } (keys %result)) {
                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
             }
         }
@@ -1400,12 +1410,12 @@
         # sort the hash and return the same structure as GetRecords (Zebra querying)
         my $result_hash;
         my $numbers=0;
-        if ($ordering eq '1=31 <i') { # sort by title desc
+        if ($ordering eq '1=31 <i') { # sort by pubyear desc
             foreach my $key (sort (keys %result)) {
                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
             }
-        } else { # sort by title ASC
-            foreach my $key (sort { $a <=> $b } (keys %result)) {
+        } else { # sort by pub year ASC
+            foreach my $key (sort { $b cmp $a } (keys %result)) {
                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
             }
         }
@@ -1435,7 +1445,7 @@
                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key};
             }
         } else { # sort by title ASC
-            foreach my $key (sort { $a <=> $b } (keys %result)) {
+            foreach my $key (sort { $b cmp $a } (keys %result)) {
                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key};
             }
         }
@@ -1476,7 +1486,7 @@
         # sort the hash and return the same structure as GetRecords (Zebra querying)
         my $result_hash;
         my $numbers=0;
-            foreach my $key (sort {$b <=> $a} (keys %result)) {
+            foreach my $key (sort {$b cmp $a} (keys %result)) {
                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key};
             }
         # limit the $results_per_page to result size if it's more

Index: authorities/authorities.pl
===================================================================
RCS file: /sources/koha/koha/authorities/authorities.pl,v
retrieving revision 1.23
retrieving revision 1.24
diff -u -b -r1.23 -r1.24
--- authorities/authorities.pl	24 Apr 2007 13:54:29 -0000	1.23
+++ authorities/authorities.pl	10 May 2007 14:45:15 -0000	1.24
@@ -1,6 +1,6 @@
 #!/usr/bin/perl
 
-# $Id: authorities.pl,v 1.23 2007/04/24 13:54:29 hdl Exp $
+# $Id: authorities.pl,v 1.24 2007/05/10 14:45:15 tipaul Exp $
 
 # Copyright 2000-2002 Katipo Communications
 #
@@ -392,7 +392,6 @@
 	#warn $record->as_formatted;
 	# check for a duplicate
 	my ($duplicateauthid,$duplicateauthvalue) = FindDuplicateAuthority($record,$authtypecode) if ($op eq "add") && (!$is_a_modif);
-warn "duplicate:$duplicateauthid,$duplicateauthvalue";	
 	my $confirm_not_duplicate = $input->param('confirm_not_duplicate');
 # it is not a duplicate (determined either by Koha itself or by user checking it's not a duplicate)
 	if (!$duplicateauthid or $confirm_not_duplicate) {

Index: misc/migration_tools/rebuild_nozebra.pl
===================================================================
RCS file: /sources/koha/koha/misc/migration_tools/rebuild_nozebra.pl,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -b -r1.4 -r1.5
--- misc/migration_tools/rebuild_nozebra.pl	4 May 2007 16:24:09 -0000	1.4
+++ misc/migration_tools/rebuild_nozebra.pl	10 May 2007 14:45:15 -0000	1.5
@@ -34,18 +34,15 @@
 my $dbh=C4::Context->dbh;
 $dbh->do("update systempreferences set value=1 where variable='NoZebra'");
 $dbh->do("CREATE TABLE `nozebra` (
-                `indexname` varchar(40) character set latin1 NOT NULL,
-                `value` varchar(250) character set latin1 NOT NULL,
-                `biblionumbers` longtext character set latin1 NOT NULL,
-                KEY `indexname` (`indexname`),
-                KEY `value` (`value`))
+                `server` varchar(20)     NOT NULL,
+                `indexname` varchar(40)  NOT NULL,
+                `value` varchar(250)     NOT NULL,
+                `biblionumbers` longtext NOT NULL,
+                KEY `indexname` (`server`,`indexname`),
+                KEY `value` (`server`,`value`))
                 ENGINE=InnoDB DEFAULT CHARSET=utf8");
+
 $dbh->do("truncate nozebra");
-my $sth;
-$sth=$dbh->prepare("select biblionumber from biblioitems order by biblionumber $limit");
-$sth->execute();
-my $i=0;
-my %result;
 
 my %index = GetNoZebraIndexes();
 
@@ -57,11 +54,11 @@
         'issn' => '011a',
         'biblionumber' =>'0909',
         'itemtype' => '200b',
-        'language' => '010a',
+        'language' => '101a',
         'publisher' => '210x',
         'date' => '210d',
         'note' => '300a,301a,302a,303a,304a,305a,306az,307a,308a,309a,310a,311a,312a,313a,314a,315a,316a,317a,318a,319a,320a,321a,322a,323a,324a,325a,326a,327a,328a,330a,332a,333a,336a,337a,345a',
-        'Koha-Auth-Number' => '6009,6019,6029,6039,6049,6059,6069,6109',
+        'Koha-Auth-Number' => '6009,6019,6029,6039,6049,6059,6069,6109,7009,7019,7029,7109,7119,7129',
         'subject' => '600*,601*,606*,610*',
         'dewey' => '676a',
         'host-item' => '995a,995c',\" where variable='NoZebraIndexes'");
@@ -71,6 +68,15 @@
     }
 }
 $|=1;
+
+print "***********************************\n";
+print "***** building BIBLIO indexes *****\n";
+print "***********************************\n";
+my $sth;
+$sth=$dbh->prepare("select biblionumber from biblioitems order by biblionumber $limit");
+$sth->execute();
+my $i=0;
+my %result;
 while (my ($biblionumber) = $sth->fetchrow) {
     $i++;
     print "\r$i";
@@ -99,7 +105,7 @@
                     $indexed=1;
                     my $line= lc $subfield->[1];
                     # remove meaningless value in the field...
-                    $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g;
+                    $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
                     # ... and split in words
                     foreach (split / /,$line) {
                         next unless $_; # skip  empty values (multiple spaces)
@@ -135,7 +141,97 @@
         }
     }
 }
-my $sth = $dbh->prepare("INSERT INTO nozebra (indexname,value,biblionumbers) VALUES (?,?,?)");
+my $sth = $dbh->prepare("INSERT INTO nozebra (server,indexname,value,biblionumbers) VALUES ('biblioserver',?,?,?)");
+foreach my $key (keys %result) {
+    foreach my $index (keys %{$result{$key}}) {
+        if (length($result{$key}->{$index}) > 1000000) {
+            print "very long index (".length($result{$key}->{$index}).")for $key / $index. update mySQL config file if you have an error just after this warning (max_paquet_size parameter)\n";
+        }
+        $sth->execute($key,$index,$result{$key}->{$index});
+    }
+}
+
+print "\n***********************************\n";
+print "***** building AUTHORITIES indexes *****\n";
+print "***********************************\n";
+
+my $sth;
+$sth=$dbh->prepare("select authid from auth_header order by authid $limit");
+$sth->execute();
+my $i=0;
+my %result;
+while (my ($authid) = $sth->fetchrow) {
+    $i++;
+    print "\r$i";
+    my $record = GetAuthority($authid);
+
+    my %index;
+    # for authorities, the "title" is the $a mainentry
+    my $authref = C4::AuthoritiesMarc::GetAuthType($record->subfield(152,'b'));
+    use Data::Dumper;
+#     warn "for $authid / ".$record->as_formatted. "Dumper : ".Dumper($authref);
+    warn "ERROR : authtype undefined for ".$record->as_formatted unless $authref;
+    my $title = $record->subfield($authref->{auth_tag_to_report},'a');
+    $index{'mainmainentry'}= $authref->{'auth_tag_to_report'}.'a';
+    $index{'mainentry'}    = $authref->{'auth_tag_to_report'}.'*';
+    $index{'auth_type'}    = '152b';
+
+    # remove blancks comma (that could cause problem when decoding the string for CQL retrieval) and regexp specific values
+    $title =~ s/ |,|;|\[|\]|\(|\)|\*|-|'|=//g;
+    # limit to 10 char, should be enough, and limit the DB size
+    $title = substr($title,0,10);
+    #parse each field
+    foreach my $field ($record->fields()) {
+        #parse each subfield
+        next if $field->tag <10;
+        foreach my $subfield ($field->subfields()) {
+            my $tag = $field->tag();
+            my $subfieldcode = $subfield->[0];
+            my $indexed=0;
+            # check each index to see if the subfield is stored somewhere
+            # otherwise, store it in __RAW__ index
+            foreach my $key (keys %index) {
+                if ($index{$key} =~ /$tag\*/ or $index{$key} =~ /$tag$subfieldcode/) {
+                    $indexed=1;
+                    my $line= lc $subfield->[1];
+                    # remove meaningless value in the field...
+                    $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
+                    # ... and split in words
+                    foreach (split / /,$line) {
+                        next unless $_; # skip  empty values (multiple spaces)
+                        # if the entry is already here, improve weight
+                        if ($result{$key}->{$_} =~ /$authid,$title\-(\d);/) {
+                            my $weight=$1+1;
+                            $result{$key}->{$_} =~ s/$authid,$title\-(\d);//;
+                            $result{$key}->{$_} .= "$authid,$title-$weight;";
+                        # otherwise, create it, with weight=1
+                        } else {
+                            $result{$key}->{$_}.="$authid,$title-1;";
+                        }
+                    }
+                }
+            }
+            # the subfield is not indexed, store it in __RAW__ index anyway
+            unless ($indexed) {
+                my $line= lc $subfield->[1];
+                $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g;
+                foreach (split / /,$line) {
+                        next unless $_;
+#                     warn $record->as_formatted."$_ =>".$title;
+                        if ($result{__RAW__}->{$_} =~ /$authid,$title\-(\d);/) {
+                            my $weight=$1+1;
+#                             $weight++;
+                            $result{__RAW__}->{$_} =~ s/$authid,$title\-(\d);//;
+                            $result{__RAW__}->{$_} .= "$authid,$title-$weight;";
+                        } else {
+                            $result{__RAW__}->{$_}.="$authid,$title-1;";
+                        }
+                }
+            }
+        }
+    }
+}
+my $sth = $dbh->prepare("INSERT INTO nozebra (server,indexname,value,biblionumbers) VALUES ('authorityserver',?,?,?)");
 foreach my $key (keys %result) {
     foreach my $index (keys %{$result{$key}}) {
         if (length($result{$key}->{$index}) > 1000000) {

Index: misc/migration_tools/rebuild_zebra.pl
===================================================================
RCS file: /sources/koha/koha/misc/migration_tools/rebuild_zebra.pl,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -b -r1.7 -r1.8
--- misc/migration_tools/rebuild_zebra.pl	17 Apr 2007 08:50:33 -0000	1.7
+++ misc/migration_tools/rebuild_zebra.pl	10 May 2007 14:45:15 -0000	1.8
@@ -14,7 +14,7 @@
 $|=1; # flushes output
 
 # limit for database dumping
-my $limit = "LIMIT 500";
+my $limit;# = "LIMIT 500";
 my $directory;
 my $skip_export;
 my $keep_export;

Index: opac/opac-rss.pl
===================================================================
RCS file: /sources/koha/koha/opac/opac-rss.pl,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- opac/opac-rss.pl	9 May 2007 10:18:11 -0000	1.2
+++ opac/opac-rss.pl	10 May 2007 14:45:15 -0000	1.3
@@ -68,7 +68,7 @@
 $query =~ s/:/=/g;
 
 # the number of lines to retrieve
-my $size=$cgi->param('size') || 20;
+my $size=$cgi->param('size') || 50;
 
 # the filename of the cached rdf file.
 my $filename = md5_base64($query);





More information about the Koha-cvs mailing list