[Koha-patches] [PATCH] URLs checker enhancement (bug #2959)

Frederic Demians f.demians at tamil.fr
Tue Feb 17 10:23:13 CET 2009


Improve URLs checker script in the way (half way) pointed out by Galen:

- A C4::URL::Checker class handle URL checking. This class is not yet
  in a separate file in C4 directory. This class would be easily
  extended to accomodate authorities URLs checking.
- Script output can now be formatted in CSV or HTML. HTML version
  link directly to MARC biblio record editor.
---
 misc/cronjobs/check-url.pl |  209 ++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 184 insertions(+), 25 deletions(-)

diff --git a/misc/cronjobs/check-url.pl b/misc/cronjobs/check-url.pl
index cd6dd22..64c16c9 100755
--- a/misc/cronjobs/check-url.pl
+++ b/misc/cronjobs/check-url.pl
@@ -7,54 +7,203 @@
 # (http://www.gnu.org/licenses/gpl.html)
 #
 
+
+
+package C4::URL::Checker;
+
+=head1 NAME 
+
+C4::URL::Checker - base object for checking URL stored in Koha DB
+
+=head1 SYNOPSIS
+
+ use C4::URL::Checker;
+
+ my $checker = C4::URL::Checker->new( );
+ $checker->{ host_default } = 'http://mylib.kohalibrary.com';
+ my $checked_urls = $checker->check_biblio( 123 );
+ foreach my $url ( @$checked_urls ) {
+     print "url:        ", $url->{ url        }, "\n",
+           "is_success: ", $url->{ is_success }, "\n",
+           "status:     ", $url->{ status     }, "\n";
+ }
+ 
+=head1 FUNCTIONS
+
+=head2 new
+
+Create a URL Checker. The returned object can be used to set
+default host variable :
+
+ my $checker = C4::URL::Checker->new( );
+ $checker->{ host_default } = 'http://mylib.kohalibrary.com';
+
+=head2 check_biblio
+
+Check all URL from a biblio record. Returns a pointer to an array
+containing all URLs with checking for each of them.
+
+ my $checked_urls = $checker->check_biblio( 123 );
+
+With 2 URLs, the returned array will look like that:
+
+  [
+    {
+      'url' => 'http://mylib.tamil.fr/img/62265_0055B.JPG',
+      'is_success' => 1,
+      'status' => 'ok'
+    },
+    {
+      'url' => 'http://mylib.tamil.fr//img/62265_0055C.JPG',
+      'is_success' => 0,
+      'status' => '404 - Page not found'
+    }
+  ],
+  
+
+=cut
+
+use LWP::UserAgent;
+use HTTP::Request;
+use C4::Biblio;
+
+
+
+sub new {
+
+    my $self = {};
+    my $class = shift;
+    
+    $self->{ user_agent } = new LWP::UserAgent;
+    
+    bless $self, $class;
+    return $self;
+}
+
+
+sub check_biblio {
+    my $self            = shift;
+    my $biblionumber    = shift;
+    my $uagent          = $self->{ user_agent   };
+    my $host            = $self->{ host_default };
+
+    my $record = GetMarcBiblio( $biblionumber ); 
+    return undef unless $record->field('856');
+
+    my @urls = ();
+    foreach my $field ( $record->field('856') ) {
+        my $url = $field->subfield('u');
+        next unless $url; 
+        $url = "$host/$url" unless $url =~ /^http/;
+        my $check = { url => $url };
+        my $req = HTTP::Request->new( GET => $url );
+        my $res = $uagent->request( $req, sub { die }, 1 );
+        if ( $res->is_success ) {
+            $check->{ is_success } = 1;
+            $check->{ status     } = 'ok';
+        }
+        else {
+            $check->{ is_success } = 0;
+            $check->{ status     } = $res->status_line;
+        }
+        push( @urls, $check );       
+    }
+    return \@urls;
+}
+
+
+
+package Main;
+
 use strict;
 use warnings;
 use diagnostics;
 use Carp;
-use LWP::Simple;
+
+use YAML::XS;
+
 use Pod::Usage;
 use Getopt::Long;
 use C4::Context;
-use C4::Biblio;
+
 
 
 my $verbose     = 0;
 my $help        = 0;
 my $host        = '';
+my $host_pro    = '';
+my $html        = 0;
+my $uriedit     = "/cgi-bin/koha/cataloguing/addbiblio.pl?biblionumber=";
 GetOptions( 
-    'verbose'   => \$verbose,
-    'help'      => \$help,
-    'host=s'    => \$host,
+    'verbose'       => \$verbose,
+    'html'          => \$html,
+    'help'          => \$help,
+    'host=s'        => \$host,
+    'host-pro=s'    => \$host_pro,
 );
 
+
 sub usage {
     pod2usage( -verbose => 2 );
     exit;
 } 
 
-usage() if $help;          
 
-my $context = new C4::Context(  );  
-my $dbh = $context->dbh;
-my $sth = $dbh->prepare( 
-    "SELECT biblionumber FROM biblioitems WHERE url <> ''" );
-$sth->execute;
-while ( my ($biblionumber) = $sth->fetchrow ) { 
-    my $record = GetMarcBiblio( $biblionumber );    
-    next unless $record->field('856');
-    foreach my $field ( $record->field('856') ) {
-        my $url = $field->subfield('u');
-        next unless $url;
-        $url = "$host/$url" unless $url =~ /^http/;
-        if ( head( $url ) ) {
-            print "$biblionumber\t$url\tsucceed\n" if $verbose;
-        }
-        else {
-            print "$biblionumber\t$url\tfailed\n";
+sub bibediturl {
+    my $biblionumber = shift;
+    my $html = "<a href=\"$host_pro$uriedit$biblionumber\">$biblionumber</a>";
+    return $html;
+}
+
+
+# 
+# Check all URLs from all current Koha biblio records
+#
+sub check_all_url {
+    my $checker = C4::URL::Checker->new();
+    $checker->{ host_default }  = $host;
+    
+    my $context = new C4::Context(  );  
+    my $dbh = $context->dbh;
+    my $sth = $dbh->prepare( 
+        "SELECT biblionumber FROM biblioitems WHERE url <> ''" );
+    $sth->execute;
+    print "<html>\n<body>\n<table>\n" if $html;
+    while ( my ($biblionumber) = $sth->fetchrow ) {
+        my $result = $checker->check_biblio( $biblionumber );  
+        next unless $result;  # No URL
+        foreach my $url ( @$result ) {
+            if ( ! $url->{ is_success } || $verbose ) {
+                print $html
+                      ? "<tr>\n<td>" . bibediturl( $biblionumber ) . 
+                        "</td>\n<td>" . $url->{url} . "</td>\n<td>" . 
+                        $url->{status} . "</td>\n</tr>\n\n"
+                      : "$biblionumber\t" . $url->{ url } . "\t" .
+                        $url->{ status } . "\n";
+            }
         }
     }
+    print "</table>\n</body>\n</html>\n" if $html;
 }
-exit;      
+
+
+# BEGIN
+
+usage() if $help;          
+
+if ( $html && !$host_pro ) {
+    if ( $host ) {
+        $host_pro = $host;
+    }
+    else {
+        print "Error: host_pro parameter or host must be provided in html mode\n";
+        exit;
+    }
+}
+
+check_all_url(); 
+
+
 
 =head1 NAME
 
@@ -82,7 +231,17 @@ For example, if --host=http://www.mylib.com, then when 856$u contains
 
 =item B<--verbose|-v>
 
-Output succeed URL checks with failed ones. 
+Outputs succeed URL checks with failed ones. 
+
+=item B<--html>
+
+Formats output in HTML. The result can be redirected to a file
+accessible by http. This way, it's possible to link directly to biblio
+record in edit mode. With this parameter B<--host-pro> is required.
+
+=item B<--host-pro=http://koha-pro.tld>
+
+Server host used to link to biblio record editing page.
 
 =item B<--help|-h>
 
-- 
1.5.5.GIT




More information about the Koha-patches mailing list