[Koha-cvs] CVS: koha/z3950 processz3950queue,1.10,1.11 search.pl,1.2,1.3

Paul POULAIN tipaul at users.sourceforge.net
Wed Feb 11 09:44:30 CET 2004


Update of /cvsroot/koha/koha/z3950
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24479/z3950

Modified Files:
	processz3950queue search.pl 
Log Message:
synch'ing 2.0.0 branch (RC4 tag) and head

Index: processz3950queue
===================================================================
RCS file: /cvsroot/koha/koha/z3950/processz3950queue,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -r1.10 -r1.11
*** processz3950queue	6 Nov 2003 15:08:40 -0000	1.10
--- processz3950queue	11 Feb 2004 08:44:28 -0000	1.11
***************
*** 11,14 ****
--- 11,91 ----
  use Net::Z3950;
  
+ =head1 NAME
+ 
+ processz3950queue. The script that does z3950 searches.
+ 
+ =head1 SYNOPSIS
+ 
+ This script can be used on a console (as normal user) or by the daemon-launch script.
+ 
+ Don't forget to EXPORT PERL5LIB=/PATH/to/KOHA before executing it if you use console mode.
+ 
+ It :
+ 1- extracts z3950 requests from z3950queue table,
+ 2- creates entries in z3950results to store the result
+ 3- launch z3950 queries in asynchronous mode, using Unix fork()
+ 4- store results in marc_breeding table.
+ 
+ The z3950 results queries are managed in z3950/search.pl script (poped-up window in MARC editor).
+ 
+ =head1 DESCRIPTION
+ 
+ =head2 table z3950servers
+ 
+ This table stores the differents z3950 servers.
+ A server is used if checked=1. The rank is NOT used as searches are now asynchronous.
+ 
+ =head2 table z3950queue
+ 
+ Table use to manage queries. A single line is created  in this table for each z3950 search request.
+ If more than 1 server is called, the C<servers> field containt all of them separated by |.
+ 
+ z3950 search requests are done by z3950/search.pl script.
+ At this stage, the fields are created with C<startdate> and C<done> empty
+ 
+ Then, the processz3950queue finds this entry and :
+ 1- store date (time()) in C<startdate>
+ 2- set C<done> = -1
+ 
+ when the requests are all sent :
+ 2- set C<done> = 1
+ 3- set C<enddate> (FIXME: always equal to startdate for me)
+ 
+ entries are deleted when :
+ - C<startdate> is more than 1 day ago.
+ 
+ FIXME:
+ - results, numrecords fields are unused
+ 
+ =head2 table z3950results
+ 
+ 1 entry is created for each request, for each server called.
+ when created :
+ * C<startdate> is filled
+ * C<enddate> is null
+ * active is set to 0. when active is 0, it means the request has not been sent. when set to 1, it means it's on the way.
+ 
+ When a search is ended, C<enddate> is set, and C<active> is set to -1
+ 
+ =head1 How it's written
+ 
+ on every loop :
+ * delete old queries
+ * for each entry in z3950queue table that is not done=1 {
+ 	for each search request	{
+ 		for each server {
+ 			try to connect
+ 			look for results
+ 			*      results can be :
+ 			- existing and already running on another process (active=1)
+ 			- existing & finished (active=-1)
+ 			- non existent => create it and run the request.
+ 		}
+ 	}
+ }
+ =over 2
+ 
+ =cut
+ 
  
  if ($< == 0) {
***************
*** 30,34 ****
  my $dbh = C4::Context->dbh;
  
! my $sth=$dbh->prepare("update z3950results set active=0");
  $sth->execute;
  $sth->finish;
--- 107,112 ----
  my $dbh = C4::Context->dbh;
  
! # we begin the script, so "unactive" every pending request : they will never give anything, the script died :-(
! my $sth=$dbh->prepare("update z3950results set active=0 where active<>-1");
  $sth->execute;
  $sth->finish;
***************
*** 52,106 ****
  		print "starting loop\n";
  		$checkqueue = 1; # FIXME during testing, this line forces the loop. REMOVE it to use SIG{HUP} when "daemonized" !
  		if ($checkqueue) { # everytime a SIG{HUP} is recieved
  			$checkqueue=0;
! 			my $sth=$dbh->prepare("select id,term,type,servers,identifier from z3950queue order by id");
  			$sth->execute;
  			while (my ($id, $term, $type, $servers,$random) = $sth->fetchrow) {
  				if ($forkcounter<12) {
  					my $now=time();
  					my $stk=$dbh->prepare("select id,server,startdate,enddate,numrecords,active from z3950results where queryid=?");
  					($stk->execute($id)) || (next);
  					my %serverdone;
  					unless ($stk->rows) {
  						my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=? where id=?");
  						$sti->execute($now,$id);
  					}
  					while (my ($r_id, $r_server,$r_startdate,$r_enddate,$r_numrecords,$active) = $stk->fetchrow) {
! 						if ($r_enddate >0) {
  							$serverdone{$r_server}=1;
! 						} elsif ($active) {
  							$serverdone{$r_server}=1;
! 						} else {
  							$serverdone{$r_server}=-1;
  						}
  					}
- 
  					$stk->finish;
! 					my $attr='';
! 					if ($type eq 'isbn') {
! 						$attr='1=7';
! 					} elsif ($type eq 'title') {
! 						$attr='1=4';
! 					} elsif ($type eq 'author') {
! 						$attr='1=1003';
! 					} elsif ($type eq 'lccn') {
! 						$attr='1=9';
! 					} elsif ($type eq 'keyword') {
! 						$attr='1=1016';
! 					}
! 					$term='"'.$term.'"';
! 					my $query="\@attr $attr $term";
! 					my $totalrecords=0;
! 					my $serverinfo;
! 					my $stillprocessing=0;
! 					my $globalname;
! 					my $globalsyntax;
! 					my $globalencoding;
! 					foreach $serverinfo (split(/\|/, $servers)) {
! 						(next) if ($serverdone{$serverinfo} == 1);
! 						my $stillprocessing=1;
  						if (my $pid=fork()) {
  							$forkcounter++;
  						} else {
  							my $dbi = C4::Context->dbh;
  							my ($name, $server, $database, $user, $password,$syntax) = split(/\//, $serverinfo, 6);
--- 130,183 ----
  		print "starting loop\n";
  		$checkqueue = 1; # FIXME during testing, this line forces the loop. REMOVE it to use SIG{HUP} when "daemonized" !
+ # clean DB
+ 		my $now = time();
+ 		# delete z3950queue entries that are more than 1 day old
+ 		my $sth = $dbh->prepare("delete from z3950queue where ?-startdate > 86400");
+ 		$sth->execute($now);
+ 		# delete z3950results queries that are more than 1 hour old
+ 		$sth = $dbh->prepare("delete from z3950results where ?-startdate > 3600");
+ 		$sth->execute($now);
  		if ($checkqueue) { # everytime a SIG{HUP} is recieved
  			$checkqueue=0;
! # parse every entry in QUEUE
! 			$sth=$dbh->prepare("select id,term,type,servers,identifier from z3950queue where done<>1 or done is null order by id");
  			$sth->execute;
  			while (my ($id, $term, $type, $servers,$random) = $sth->fetchrow) {
+ # FIXME: there is no "else". So, if more than 12 requests at the same time => requests are lost !
  				if ($forkcounter<12) {
  					my $now=time();
+ # search for results entries for this request
  					my $stk=$dbh->prepare("select id,server,startdate,enddate,numrecords,active from z3950results where queryid=?");
  					($stk->execute($id)) || (next);
  					my %serverdone;
+ # if no results => set queue to done = -1, set startdate and begin creating z3950results table entries & z3950 queries
  					unless ($stk->rows) {
  						my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=? where id=?");
  						$sti->execute($now,$id);
  					}
+ # check which servers calls have already been created (before a crash)
  					while (my ($r_id, $r_server,$r_startdate,$r_enddate,$r_numrecords,$active) = $stk->fetchrow) {
! 						if ($r_enddate >0) { # result entry exist & finished
  							$serverdone{$r_server}=1;
! 						} elsif ($active) { # result entry exists & on the way (active=1) or already done (active=-1)
  							$serverdone{$r_server}=1;
! 						} else { # otherwise
  							$serverdone{$r_server}=-1;
  						}
+ 						# note that is the entry doesn't exist, the $serverdone{$r_server} is 0 (important later !)
  					}
  					$stk->finish;
! 					foreach my $serverinfo (split(/\|/, $servers)) {
! 						(next) if ($serverdone{$serverinfo} == 1); #(otherwise, is 0 or -1)
! 						my $totalrecords=0;
! 						my $globalname;
! 						my $globalsyntax;
! 						my $globalencoding;
! # fork a process for this z3950 query
  						if (my $pid=fork()) {
  							$forkcounter++;
  						} else {
+ # and connect to z3950 server
+ #FIXME: why do we need $dbi ? can't we use $dbh ?
  							my $dbi = C4::Context->dbh;
  							my ($name, $server, $database, $user, $password,$syntax) = split(/\//, $serverinfo, 6);
***************
*** 110,172 ****
  							my $servername=$1;
  							my $port=$2;
! 							print "Processing $type=$term at $name $server $database $syntax (".($forkcounter+1)." forks)\n";
! 							$now=time();
! 							my $q_serverinfo=$dbi->quote($serverinfo);
! 							my $resultsid;
! 							if ($serverdone{$serverinfo}==-1) {
! 								my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
! 								$stj->execute($q_serverinfo,$id);
! 								($resultsid) = $stj->fetchrow;
! 								$stj->finish;
! 							} else {
! 								my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
! 								$stj->execute($q_serverinfo,$id);
! 								($resultsid) = $stj->fetchrow;
! 								$stj->finish;
! 								unless ($resultsid) {
! 									$stj=$dbi->prepare("insert into z3950results (server, queryid, startdate) values (?,?,?)");
! 									$stj->execute($q_serverinfo, $id, $now);
! 									$resultsid=$dbi->{'mysql_insertid'};
! 									$stj->finish;
! 								}
  							}
! 							my $stj=$dbh->prepare("update z3950results set active=1 where id=?");
! 							$stj->execute($resultsid);
  							my $conn;
  							my $noconnection=0;
  							my $error=0;
  							if ($user) {
! 								eval { $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database, user => $user, password => $password); };
! 								if ($@) {
! 									$noconnection=1;
! 								} else {
! 									$error=pe();
! 								}
  							} else {
! 								eval { $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database); };
! 								if ($@) {
! 									$noconnection=1;
! 								} else {
! 									$error=pe();
! 								}
  							}
  							if ($noconnection || $error) {
! 								warn "no connection at $globalname ";
  							} else {
! 								warn "$globalname ==> $globalsyntax";
   								eval {$conn->option(elementSetName => 'F')};
  								eval { $conn->option(preferredRecordSyntax => Net::Z3950::RecordSyntax::USMARC);} if ($globalsyntax eq "USMARC");
   								eval { $conn->option(preferredRecordSyntax => Net::Z3950::RecordSyntax::UNIMARC);} if ($globalsyntax eq "UNIMARC");
   								if ($@) {
!  									print "$globalname ERROR: $@\n";
   								} else {
- #  									print "Q: $query\n";
  									my $rs=$conn->search($query);
   									pe();
  									my $numresults=$rs->size();
  									if ($numresults eq 0) {
! 										warn "$globalname ==> answered : no records found";
  									} else {
! 										warn "$globalname ==> answered : $numresults found";
  									}
   									pe();
--- 187,267 ----
  							my $servername=$1;
  							my $port=$2;
! 							my $attr='';
! 							if ($type eq 'isbn') {
! 								$attr='1=7';
! 							} elsif ($type eq 'title') {
! 								$attr='1=4';
! 							} elsif ($type eq 'author') {
! 								$attr='1=1003';
! 							} elsif ($type eq 'lccn') {
! 								$attr='1=9';
! 							} elsif ($type eq 'keyword') {
! 								$attr='1=1016';
  							}
! 							my $query="\@attr $attr \"$term\"";
! 							print "$$/$id : Processing $type=$term at $name $server $database $syntax (".($forkcounter+1)." forks)\n";
! # try to connect
  							my $conn;
  							my $noconnection=0;
  							my $error=0;
+ # the z3950 query is builded. Launch it.
  							if ($user) {
! 								$conn= new Net::Z3950::Connection($servername, $port, databaseName => $database, user => $user, password => $password) || ($noconnection=1);
  							} else {
! 								$conn= new Net::Z3950::Connection($servername, $port, databaseName => $database) || ($noconnection=1);
  							}
  							if ($noconnection || $error) {
! # if connection impossible, don't go further !
! 								print "$$/$id : no connection at $globalname\n";
! 								my $result = MARC::Record->new();
! 								my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($result,-1,"$globalname server is NOT responding","",$random);
  							} else {
! # else, build z3950 query and do it !
! 								$now=time();
! 								my $resultsid="";
! 	# create z3950results entries.
! 								if ($serverdone{$serverinfo}==-1) { # if entry exist, just retrieve it
! 									my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
! 									$stj->execute($serverinfo,$id);
! 									($resultsid) = $stj->fetchrow;
! 									$stj->finish;
! 									print "$$/$id : 1 >> $resultsid\n";
! 								} else { # else create it : (may be serverdone=1 or 0)
! 									my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
! 									$stj->execute($serverinfo,$id);
! 									($resultsid) = $stj->fetchrow;
! 									$stj->finish;
! 									print "$$/$id : 2 >> $resultsid\n";
! 									unless ($resultsid) {
! 										$stj=$dbi->prepare("insert into z3950results (server, queryid, startdate) values (?,?,?)");
! 										$stj->execute($serverinfo, $id, $now);
! 										$resultsid=$dbi->{'mysql_insertid'};
! 										$stj->finish;
! 										print "$$/$id : creating and ";
! 									}
! 								}
! 								print "$$/$id : working on results entry $resultsid\n";
! 	# set active to 1 => this request is on the way.
! 								my $stj=$dbi->prepare("update z3950results set active=1 where id=?");
! 								$stj->execute($resultsid);
! #######
! 								print "$$/$id : connected to $globalname\n";
   								eval {$conn->option(elementSetName => 'F')};
  								eval { $conn->option(preferredRecordSyntax => Net::Z3950::RecordSyntax::USMARC);} if ($globalsyntax eq "USMARC");
   								eval { $conn->option(preferredRecordSyntax => Net::Z3950::RecordSyntax::UNIMARC);} if ($globalsyntax eq "UNIMARC");
   								if ($@) {
!  									print "$$/$id : $globalname ERROR: $@ for $resultsid\n";
! 	# in case pb during connexion, set result to "empty" to avoid everlasting loops
! 									my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results='',enddate=? where id=?");
! 									$stj->execute(0,0,$now,$resultsid);
   								} else {
  									my $rs=$conn->search($query);
   									pe();
+ 	# we have an answer for a query => get results & store them in marc_breeding table
  									my $numresults=$rs->size();
  									if ($numresults eq 0) {
! 										print "$$/$id : $globalname : no records found\n";
  									} else {
! 										print "$$/$id : $globalname : $numresults records found, retrieving them (max 80)\n";
  									}
   									pe();
***************
*** 198,202 ****
  									if ($elapsed) {
  										my $speed=int($numresults/$elapsed*100)/100;
! 										print "$globalname   SPEED: $speed  $server done $numrecords\n";
  									}
  									my $q_result=$dbi->quote($result);
--- 293,297 ----
  									if ($elapsed) {
  										my $speed=int($numresults/$elapsed*100)/100;
! 										print "$$/$id : $globalname : $server records retrieved $numrecords SPEED: $speed\n";
  									}
  									my $q_result=$dbi->quote($result);
***************
*** 210,274 ****
  										$stj->execute($numresults,$numrecords,$now,$resultsid);
  									}
- 									my $counter=0;
- 									while ($counter<60 && $numrecords<$numresults) {
- 										$counter++;
- 										my $stj=$dbi->prepare("select highestseen from z3950results where id=?");
- 										$stj->execute($resultsid);
- 										my ($highestseen) = $stj->fetchrow;
- 										if ($highestseen>($numrecords-30)) {
- 											$counter=0;
- 											print "   $server rescanning\n";
- 											my $scantimerstart=time();
- 											for ($i=$numrecords+1; $i<=(($numresults<($numrecords+40)) ? ($numresults) : ($numrecords+40)); $i++) {
- 												my $rec=$rs->record($i);
- 												my $marcdata=$rec->rawdata();
- 												$result.=$marcdata;
- 											}
- 											my $scantimerend=time();
- 											($numresults<$numrecords+40) ? ($numrecords=$numresults) : ($numrecords += 40);
- 											my $elapsed=$scantimerend-$scantimerstart;
- 											if ($elapsed) {
- 												my $speed=int($numresults/$elapsed*100)/100;
- 												print "  SPEED: $speed  $server done $numrecords\n";
- 											}
- 
- 											my $q_result=$dbi->quote($result);
- 											($q_result) || ($q_result='""');
- 											$now=time();
- 											my $stj=$dbi->prepare("update z3950results set numdownloaded=?,results=? where id=?");
- 											$stj->execute($numrecords,$q_result,$resultsid);
- 										}
- 										sleep 5;
- 									}
   								}
  							}
! 							# FIXME - There's already a $stj in this scope
! 							my $stj=$dbi->prepare("update z3950results set active=0 where id=?");
! 							$stj->execute($resultsid);
! 							eval {$stj->finish};
! 							print "    $server done.\n";
  							exit;
- sub pe {
-    	return 0;
- 	my $code=$conn->errcode();
- 	my $msg=$conn->errmsg();
- 	my $ai=$conn->addinfo();
- 	print << "EOF";
- CODE:  $code
- MSG:   $msg
- ADDTL: $ai
- EOF
- 	return 0;
- }
  						}
  					}
- 					unless ($stillprocessing) {
- 						#my $sti=$dbh->prepare("select enddate from z3950queue where id=?");
- 						#$sti->execute($id);
- 						#my ($enddate) = $sti->fetchrow;
- 						#unless ($enddate) {
- 					}
  				} else {
  				}
  			}
  			$lastrun=time();
--- 305,325 ----
  										$stj->execute($numresults,$numrecords,$now,$resultsid);
  									}
   								}
+ 								$stj=$dbi->prepare("update z3950results set active=-1 where id=?");
+ 								$stj->execute($resultsid);
+ 								eval {my $stj->finish};
  							}
! #OK, the search is done inactivate it..
! 							print "$$/$id : $server search done.\n";
  							exit;
  						}
  					}
  				} else {
+ # $forkcounter >=12
  				}
+ 				# delete z3950queue entry, as everything is done
+ 				my $sti=$dbh->prepare("update z3950queue set done=1,enddate=? where id=?");
+ 				$now=time();
+ 				$sti->execute($now,$id);
  			}
  			$lastrun=time();
***************
*** 289,290 ****
--- 340,344 ----
  
  
+ sub pe {
+ 	return 0;
+ }

Index: search.pl
===================================================================
RCS file: /cvsroot/koha/koha/z3950/search.pl,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -r1.2 -r1.3
*** search.pl	23 Jun 2003 11:28:53 -0000	1.2
--- search.pl	11 Feb 2004 08:44:28 -0000	1.3
***************
*** 50,54 ****
  my $record;
  my $oldbiblio;
- warn "search.pl : $bibid / $title / $author / $isbn";
  if ($bibid > 0) {
  	$record = MARCgetbiblio($dbh,$bibid);
--- 50,53 ----
***************
*** 57,71 ****
  my $errmsg;
  unless ($random) { # if random is a parameter => we're just waiting for the search to end, it's a refresh.
- warn "NO RANDOM";
  	if ($isbn) {
- 	warn "ADD ISBN $isbn";
  		$random =rand(1000000000);
  		$errmsg = addz3950queue($isbn, "isbn", $random, 'CHECKED');
  	} elsif ($author) {
- 	warn "ADD AUTHOR $author";
  		$random =rand(1000000000);
  		$errmsg = addz3950queue($author, "author", $random, 'CHECKED');
  	} elsif ($title) {
- 	warn "ADD TITLE : $title";
  		$random =rand(1000000000);
  		$errmsg = addz3950queue($title, "title", $random, 'CHECKED');
--- 56,66 ----





More information about the Koha-cvs mailing list