[Koha-patches] [PATCH] Bug 2959 - Cache already failed URLs to speedup check-url.pl

Frédéric Demians f.demians at tamil.fr
Fri Feb 12 23:06:02 CET 2010


Based on David Schuster improvement patch.

For David:

- To send the output into an HTML file, there is no need to add a
  paramater to this script, just redirect to a file:

  check-url --html --host-prot=http://koha-pro.mylib.org \\
    > /usr/local/koha/koha-tmpl/badurls.html

- If you want as a result a table with alternate rows, use CSS and
  JavaScript. For example, with jQuery (found with google):

  <style type="text/css">
  table {width:400px; border:1px solid blue;}
  .oddrow {background-color:#E5E5E5;}
  </style>
  <script type="text/javascript"
   src="http://code.jquery.com/jquery-latest.min.js"></script>
  <script type="text/javascript">
  $(function(){
    $("table.tiger-stripe tr:even").addClass("oddrow");
  });
---
 misc/cronjobs/check-url.pl |   31 +++++++++++++++++++++++--------
 1 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/misc/cronjobs/check-url.pl b/misc/cronjobs/check-url.pl
index 381c122..9700c98 100755
--- a/misc/cronjobs/check-url.pl
+++ b/misc/cronjobs/check-url.pl
@@ -77,6 +77,7 @@ sub new {
     my $class = shift;
     
     $self->{ user_agent } = new LWP::UserAgent;
+    $self->{ bad_url    } = { };
     
     bless $self, $class;
     return $self;
@@ -88,6 +89,7 @@ sub check_biblio {
     my $biblionumber    = shift;
     my $uagent          = $self->{ user_agent   };
     my $host            = $self->{ host_default };
+    my $bad_url         = $self->{ bad_url      };
 
     my $record = GetMarcBiblio( $biblionumber ); 
     return unless $record->field('856');
@@ -98,17 +100,24 @@ sub check_biblio {
         next unless $url; 
         $url = "$host/$url" unless $url =~ /^http/;
         my $check = { url => $url };
-        my $req = HTTP::Request->new( GET => $url );
-        my $res = $uagent->request( $req, sub { die }, 1 );
-        if ( $res->is_success ) {
+        if ( $bad_url->{ $url } ) {
             $check->{ is_success } = 1;
-            $check->{ status     } = 'ok';
+            $check->{ status     } = '500 Site already checked';
         }
         else {
-            $check->{ is_success } = 0;
-            $check->{ status     } = $res->status_line;
+            my $req = HTTP::Request->new( GET => $url );
+            my $res = $uagent->request( $req, sub { die }, 1 );
+            if ( $res->is_success ) {
+                $check->{ is_success } = 1;
+                $check->{ status     } = 'ok';
+            }
+            else {
+                $check->{ is_success } = 0;
+                $check->{ status     } = $res->status_line;
+                $bad_url->{ $url     } = 1;
+            }
         }
-        push( @urls, $check );       
+        push @urls, $check;
     }
     return \@urls;
 }
@@ -168,7 +177,13 @@ sub check_all_url {
     my $sth = $dbh->prepare( 
         "SELECT biblionumber FROM biblioitems WHERE url <> ''" );
     $sth->execute;
-    print "<html>\n<body>\n<table>\n" if $html;
+    if ( $html ) {
+        print <<EOS;
+<html>
+<body>
+<table>
+EOS
+    }
     while ( my ($biblionumber) = $sth->fetchrow ) {
         my $result = $checker->check_biblio( $biblionumber );  
         next unless $result;  # No URL
-- 
1.6.4.3




More information about the Koha-patches mailing list