[Koha-patches] [PATCH] OPAC new subjects cloud using Zebra scan on Subject field

Frederic Demians f.demians at tamil.fr
Wed Oct 1 17:44:47 CEST 2008


This patch implement a new subject cloud. The subject cloud is created in
pro interface:

  [1] /tools/create_cloud.pl creates a static file:
      /koha-tmpl/opac-tmpl/cloud.html
      Zebra scan is used to find top subjects, ie subjects
      which occurs the more in the whole catalogue.
      Write permission on /koha-tmpl/opac-tmpl required.
  [2] opac-main.pl displays the cloud if cloud file is
      present.

Compare to previous cloud, it has those advantages:
  - It doesn't require a batch job.
  - Speed--It uses Zebra and so has not to seek the whole
    MySQL DB, read-decode MARC records, etc.
  - Scalable--It doesn't load in memory all subjects but
    only a max of 1000 subjects.
  - Display use HTML::TagCloud and so it looks like
    'regular' tag cloud.
---
 .../intranet-tmpl/prog/en/includes/tools-menu.inc  |    1 +
 .../prog/en/modules/tools/tools-home.tmpl          |    4 +
 koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl |   10 +-
 tools/create_cloud.pl                              |  157 ++++++++++++++++++++
 4 files changed, 169 insertions(+), 3 deletions(-)
 create mode 100755 tools/create_cloud.pl

diff --git a/koha-tmpl/intranet-tmpl/prog/en/includes/tools-menu.inc b/koha-tmpl/intranet-tmpl/prog/en/includes/tools-menu.inc
index 2c94cb3..486eacb 100644
--- a/koha-tmpl/intranet-tmpl/prog/en/includes/tools-menu.inc
+++ b/koha-tmpl/intranet-tmpl/prog/en/includes/tools-menu.inc
@@ -10,6 +10,7 @@
     <!-- TMPL_IF NAME="CAN_user_tools_edit_news" -->
 	<li><a href="/cgi-bin/koha/tools/koha-news.pl">News</a></li>
     <!-- /TMPL_IF -->
+	<li><a href="/cgi-bin/koha/tools/create_cloud.pl">Create Subjects Cloud</a></li>
     <!-- TMPL_IF NAME="CAN_user_tools_label_creator" -->
 	<li><a href="/cgi-bin/koha/labels/label-home.pl">Labels (spine and barcode)</a></li>
     <!-- /TMPL_IF -->
diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/tools-home.tmpl b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/tools-home.tmpl
index 75d8104..218bf16 100644
--- a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/tools-home.tmpl
+++ b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/tools-home.tmpl
@@ -18,6 +18,10 @@
     <dt><a href="/cgi-bin/koha/tools/koha-news.pl">News</a></dt>
     <dd>Write news for the OPAC and staff interfaces</dd>
     <!-- /TMPL_IF -->
+
+    <dt><a href="/cgi-bin/koha/tools/create_cloud.pl">Subjects Cloud</a></dt>
+    <dd>Create a cloud of top subjects extracted from Catalogue</dd>
+
 	
     <!-- TMPL_IF NAME="CAN_user_tools_label_creator" -->
     <dt><a href="/cgi-bin/koha/labels/label-home.pl">Label and Patron Card Creator</a></dt>
diff --git a/koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl b/koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl
index a64408d..c8c4404 100644
--- a/koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl
+++ b/koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl
@@ -37,6 +37,10 @@
 
 	<!-- TMPL_IF NAME="OpacMainUserBlock" --><div id="opacmainuserblock" class="container"><!-- TMPL_VAR NAME="OpacMainUserBlock" --></div><!-- /TMPL_IF -->
 
+<div id="OpacMainCloud" class="container">
+    <!-- TMPL_INCLUDE NAME="../../../cloud.html" -->
+</div>
+
 <!-- TMPL_IF NAME="recentacquiloop"-->
     <div class="searchresults">
         <table>
@@ -53,7 +57,7 @@
                     <!-- TMPL_IF name="BiblioDefaultViewmarc" -->
                     <a class="title" href="/cgi-bin/koha/opac-MARCdetail.pl?biblionumber=<!-- TMPL_VAR NAME="biblionumber" ESCAPE="URL" -->">
                     <!-- TMPL_IF NAME="title"-->
-                        <!-- TMPL_VAR NAME="title" escape="html" -->
+                        <!-- TMPL_VAR NAME="title" -->
                     <!-- TMPL_ELSE -->
                         <span class="problem">(no title)</span>
                     <!-- /TMPL_IF -->
@@ -61,7 +65,7 @@
                     <!-- TMPL_ELSE -->
                     <!-- TMPL_IF name="BiblioDefaultViewisbd" -->
                     <a class="title" href="/cgi-bin/koha/opac-ISBDdetail.pl?biblionumber=<!-- TMPL_VAR NAME="biblionumber" ESCAPE="URL" -->">
-                    <!-- TMPL_IF NAME="title" escape="html"-->
+                    <!-- TMPL_IF NAME="title"-->
                         <!-- TMPL_VAR NAME="title" -->
                     <!-- TMPL_ELSE -->
                         <span class="problem">(no title)</span>
@@ -69,7 +73,7 @@
                     </a>
                     <!-- TMPL_ELSE -->
                     <a class="title" href="/cgi-bin/koha/opac-detail.pl?biblionumber=<!-- TMPL_VAR NAME="biblionumber" ESCAPE="URL" -->">
-                    <!-- TMPL_IF NAME="title" escape="html" -->
+                    <!-- TMPL_IF NAME="title"-->
                         <!-- TMPL_VAR NAME="title" -->
                     <!-- TMPL_ELSE -->
                         <span class="problem">(no title)</span>
diff --git a/tools/create_cloud.pl b/tools/create_cloud.pl
new file mode 100755
index 0000000..759e680
--- /dev/null
+++ b/tools/create_cloud.pl
@@ -0,0 +1,157 @@
+#!/usr/bin/perl
+
+# Copyright 2008 Tamil s.a.r.l. / www.tamil.fr
+#
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA  02111-1307 USA
+
+
+use strict;
+use warnings;
+use diagnostics;
+use C4::Auth;
+use C4::Output;
+use C4::Context;
+use CGI;
+use HTML::TagCloud;
+
+
+my @errors;
+my $input = CGI->new();
+
+my ( $template, $loggedinuser, $cookie ) = get_template_and_user({
+        template_name   => "tools/create_cloud.tmpl",
+        query           => $input,
+        type            => "intranet",
+        authnotrequired => 0,
+        debug           => 1,
+});
+
+my $number_of_subjects   = $input->param('number_of_subjects');
+if ( $number_of_subjects != undef ) {
+    if ( $number_of_subjects == 0) {
+        my $cloud_file_name = C4::Context->config( 'opachtdocs' ) . "/cloud.html";
+        open my $fh, ">", $cloud_file_name 
+            or die "Unable to create file $cloud_file_name";
+        $template->param( cloud_deleted => 1 );
+    }
+    elsif ($number_of_subjects && 
+        ($number_of_subjects > 1 && $number_of_subjects <=1000))
+    {
+        my @terms = zebra_top_terms( 'Subject', $number_of_subjects );
+        my $cloud = HTML::TagCloud->new;
+        for (0..$#terms-1) {   
+            my @term = @{ $terms[$_] };
+            $cloud->add( 
+                $term[0], 
+                "/cgi-bin/koha/opac-search.pl?q=su%3A" . $term[0] , 
+                $term[1]
+            );
+        }
+        my $cloud_file_name 
+            = C4::Context->config( 'opachtdocs' ) . "/cloud.html";
+        $template->param( cloud => $cloud->html_and_css() );
+        open my $fh, ">", $cloud_file_name 
+            or die "Unable to create file $cloud_file_name";
+        print $fh $cloud->html_and_css();
+        close $fh;
+    }
+}
+output_html_with_http_headers $input, $cookie, $template->output;
+
+
+
+#
+# zebra_top_terms:
+#   Returns a array of top terms in a specific zebra index
+#
+# PARAMETERS:
+#   $index_name   Name of Zebra index, for example 'Subject'
+#   $max_terms    Max number of returned terms
+#
+# RETURN:
+#   A 3-dimensionnal array
+#   [0] term
+#   [1] term number of occurences
+#   [2] term relative weight with terms set
+#
+sub zebra_top_terms {
+    my $index_name = shift;
+    my $max_terms = shift;
+    
+    my $MAX_OCCURENCE = 1000000000;
+    
+    my $zbiblio = C4::Context->Zconn( "biblioserver" );
+    my $number_of_terms = 0; 
+    my @terms;      # 2 dimensions array
+    my $min_occurence_index = -1;
+    my $min_occurence;
+    my $from = '0';
+    while (1) {
+        my $query = '@attr 1=' . $index_name . ' @attr 4=1 @attr 6=3 "'
+                    . $from . '"';
+        my $ss = $zbiblio->scan_pqf( $query );
+        last if $ss->size() == 0;
+        my $term = '';
+        my $occ = 0;
+        for my $index ( 0..$ss->size()-1 ) {
+            ($term, $occ) = $ss->term($index);
+            if ( $number_of_terms < $max_terms ) {
+                push( @terms, [ $term, $occ ] ); 
+                ++$number_of_terms;
+                if ( $number_of_terms == $max_terms ) {
+                    $min_occurence = $MAX_OCCURENCE;
+                    for (0..$number_of_terms-1) {
+                        my @term = @{ $terms[$_] };
+                        if ( $term[1] <= $min_occurence ) {
+                            $min_occurence       = $term[1];
+                            $min_occurence_index = $_;
+                        }
+                    }
+                }
+            }
+            else {
+                if ( $occ > $min_occurence) {
+                    @{ $terms[$min_occurence_index] }[0] = $term;
+                    @{ $terms[$min_occurence_index] }[1] = $occ;
+                    $min_occurence = $MAX_OCCURENCE;
+                    for (0..$max_terms-1) {
+                        my @term = @{ $terms[$_] };
+                        if ( $term[1] <= $min_occurence ) {
+                            $min_occurence       = $term[1];
+                            $min_occurence_index = $_;
+                        }
+                    }
+                }
+            }
+        }
+        $from = $term . 'a';
+    }
+
+    # Sort array of array by term occurence
+    @terms = sort { @{$a}[1] <=> @{$b}[1] } @terms;
+    
+    # A relatif weight to other set terms is added to each term
+    my $min = $terms[0][1];
+    my $max = $terms[$#terms-1][1];
+    my $delta = $max - $min;
+    foreach (0..$#terms-1) {
+        push( @{ $terms[$_] }, ( @{ $terms[$_] }[1] - $min ) / $delta );
+    }
+
+    return @terms;    
+}
+
+
-- 
1.5.5.GIT




More information about the Koha-patches mailing list