[Koha-patches] [PATCH] OPAC new subjects cloud using Zebra scan on Subject field (4th)

Frederic Demians f.demians at tamil.fr
Tue Oct 14 18:32:41 CEST 2008


This patch implement a new subject cloud. The subject cloud is created in
pro interface:

    [1] /tools/create_cloud.pl creates a static file:
        /koha-tmpl/opac-tmpl/cloud.html
        Zebra scan is used to find top subjects, ie subjects
        which occurs the more in the whole catalogue.
        Write permission on /koha-tmpl/opac-tmpl required.
    [2] opac-main.pl displays the cloud if cloud file is
        present.

Compared to previous cloud, it has those advantages:

    - It doesn't require a batch job.
    - Speed--It uses Zebra and so has not to seek the whole
      MySQL DB, read-decode MARC records, etc. Reading a
      subjects cloud table is not required anymore.
    - Scalable--It doesn't load in memory all subjects but
      only a max of 1000 subjects. It will continue to
      work with large DB.
    - Display subjects with size proportional to their
      weight in the set using a logarithmic algorithm.
    - Cloud is generated in the professional interface
      with a nice Ajax feedback during the process.
---
 .../intranet-tmpl/prog/en/includes/tools-menu.inc  |    1 +
 .../prog/en/modules/tools/create_cloud.tmpl        |  106 ++++++++++
 .../prog/en/modules/tools/tools-home.tmpl          |    4 +
 koha-tmpl/opac-tmpl/prog/en/css/opac.css           |   40 ++++-
 koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl |   10 +-
 tools/create_cloud.pl                              |   54 +++++
 tools/create_cloud_callback.pl                     |  219 ++++++++++++++++++++
 7 files changed, 430 insertions(+), 4 deletions(-)
 create mode 100644 koha-tmpl/intranet-tmpl/prog/en/modules/tools/create_cloud.tmpl
 create mode 100755 tools/create_cloud.pl
 create mode 100755 tools/create_cloud_callback.pl

diff --git a/koha-tmpl/intranet-tmpl/prog/en/includes/tools-menu.inc b/koha-tmpl/intranet-tmpl/prog/en/includes/tools-menu.inc
index 2c94cb3..486eacb 100644
--- a/koha-tmpl/intranet-tmpl/prog/en/includes/tools-menu.inc
+++ b/koha-tmpl/intranet-tmpl/prog/en/includes/tools-menu.inc
@@ -10,6 +10,7 @@
     <!-- TMPL_IF NAME="CAN_user_tools_edit_news" -->
 	<li><a href="/cgi-bin/koha/tools/koha-news.pl">News</a></li>
     <!-- /TMPL_IF -->
+	<li><a href="/cgi-bin/koha/tools/create_cloud.pl">Create Subjects Cloud</a></li>
     <!-- TMPL_IF NAME="CAN_user_tools_label_creator" -->
 	<li><a href="/cgi-bin/koha/labels/label-home.pl">Labels (spine and barcode)</a></li>
     <!-- /TMPL_IF -->
diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/create_cloud.tmpl b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/create_cloud.tmpl
new file mode 100644
index 0000000..de7aad0
--- /dev/null
+++ b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/create_cloud.tmpl
@@ -0,0 +1,106 @@
+<!-- TMPL_INCLUDE NAME="doc-head-open.inc" -->
+<title>Koha &rsaquo; Create Subjects Cloud </title>
+<!-- TMPL_INCLUDE NAME="doc-head-close.inc" -->
+</head>
+<body>
+<!-- TMPL_INCLUDE NAME="header.inc" -->
+<!-- TMPL_INCLUDE NAME="cat-search.inc" -->
+
+<div id="breadcrumbs"><a href="/cgi-bin/koha/mainpage.pl">Home</a> &rsaquo; <a href="/cgi-bin/koha/tools/tools-home.pl">Tools</a> &rsaquo; <a href="/cgi-bin/koha/tools/create_cloud.pl">Create Subjects Cloud</a></div>
+
+<div id="doc3" class="yui-t2">
+   
+   <div id="bd">
+	<div id="yui-main">
+	<div class="yui-b">
+<div class="yui-g">
+<div class="yui-u first">
+<h1>Create Subjects Cloud</h1>
+
+<!-- TMPL_IF name="cloud_deleted" -->
+<p style="color:red">Subjects Cloud deleted!</p>
+<!-- /TMPL_IF -->
+
+<!-- TMPL_IF name="number_of_subjects" -->
+<style>
+#cloud_progress th {
+    text-align: left;
+    width: 200px;
+}
+#cloud_progress td {
+    width: 250px;
+}
+</style>
+<div id="cloud_progress">
+<table>
+<tr>
+    <th>Scanned terms:</th>
+    <td id="scanned_terms">0</td>
+</tr>
+<tr>
+    <th>Progress:</th>
+    <td id="progress"><img src="http://us.i1.yimg.com/us.yimg.com/i/us/per/gr/gp/rel_interstitial_loading.gif"/></td>
+</tr>
+</table>
+</div>
+<div id="cloud_done" style="display:none;">
+<p>Cloud has been generated. Take a look at your OPAC.</p>
+</div>
+
+<script type="text/javascript">
+$('#cloud_progress').ready(function() {
+    var NumberOfSubjects = <!-- TMPL_VAR name="number_of_subjects" -->;
+    var xhr = $.ajax({
+        type: 'GET',
+        url: 'create_cloud_callback.pl?number_of_subjects=' + NumberOfSubjects,
+        success: function(msg) {
+            $('#cloud_progress').hide();
+            $('#cloud_done').show();
+        }
+    });
+    var count = 0;
+    xhr.onreadystatechange = function() {
+        if (xhr.readyState == 3) {
+            count = count + 1000;
+            $('#scanned_terms').html(count);
+        }
+    };
+});
+
+</script>
+
+<!-- TMPL_ELSE -->
+
+<form method="post" action="<!-- TMPL_VAR name="SCRIPT_NAME" -->" enctype="multipart/form-data">
+<fieldset class="rows">
+<legend>Parameters</legend>
+<ol>
+<li>
+<label for="number_of_subjects">Number of Subjects: </label>
+<input type="text" id="number_of_subjects" name="number_of_subjects" value="100"/>
+</li>
+</ol>
+</fieldset>
+<fieldset class="action">
+<input type="submit" value="Create Cloud" />
+</fieldset>
+</form>
+
+<p><b>Explanation:</b>
+A static Subjects Cloud will be created containing top subjects extracted from the
+Catalogue. It works only in Zebra mode. Please be patient. This can be a long
+processing. If you enter 0 above, previous 
+Subjects Cloud will be removed.
+</p>
+<!-- /TMPL_IF -->
+
+</div>
+</div>
+
+</div>
+</div>
+<div class="yui-b noprint">
+<!-- TMPL_INCLUDE NAME="tools-menu.inc" -->
+</div>
+</div>
+<!-- TMPL_INCLUDE NAME="intranet-bottom.inc" -->
diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/tools-home.tmpl b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/tools-home.tmpl
index 75d8104..218bf16 100644
--- a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/tools-home.tmpl
+++ b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/tools-home.tmpl
@@ -18,6 +18,10 @@
     <dt><a href="/cgi-bin/koha/tools/koha-news.pl">News</a></dt>
     <dd>Write news for the OPAC and staff interfaces</dd>
     <!-- /TMPL_IF -->
+
+    <dt><a href="/cgi-bin/koha/tools/create_cloud.pl">Subjects Cloud</a></dt>
+    <dd>Create a cloud of top subjects extracted from Catalogue</dd>
+
 	
     <!-- TMPL_IF NAME="CAN_user_tools_label_creator" -->
     <dt><a href="/cgi-bin/koha/labels/label-home.pl">Label and Patron Card Creator</a></dt>
diff --git a/koha-tmpl/opac-tmpl/prog/en/css/opac.css b/koha-tmpl/opac-tmpl/prog/en/css/opac.css
index b6cec6d..b6612dd 100644
--- a/koha-tmpl/opac-tmpl/prog/en/css/opac.css
+++ b/koha-tmpl/opac-tmpl/prog/en/css/opac.css
@@ -1727,4 +1727,42 @@ table#items th {
 }
 #action {
 	margin-top: 0;
-}
\ No newline at end of file
+}
+
+.subjectcloud {
+    text-align:  center; 
+    line-height: 16px; 
+    margin: 20px;
+    background: #f0f0f0;
+    padding: 3%;
+}
+.subjectcloud a {
+    font-weight: lighter;
+    text-decoration: none;
+}
+span.tagcloud0 { font-size: 12px;}
+span.tagcloud1 { font-size: 13px;}
+span.tagcloud2 { font-size: 14px;}
+span.tagcloud3 { font-size: 15px;}
+span.tagcloud4 { font-size: 16px;}
+span.tagcloud5 { font-size: 17px;}
+span.tagcloud6 { font-size: 18px;}
+span.tagcloud7 { font-size: 19px;}
+span.tagcloud8 { font-size: 20px;}
+span.tagcloud9 { font-size: 21px;}
+span.tagcloud10 { font-size: 22px;}
+span.tagcloud11 { font-size: 23px;}
+span.tagcloud12 { font-size: 24px;}
+span.tagcloud13 { font-size: 25px;}
+span.tagcloud14 { font-size: 26px;}
+span.tagcloud15 { font-size: 27px;}
+span.tagcloud16 { font-size: 28px;}
+span.tagcloud17 { font-size: 29px;}
+span.tagcloud18 { font-size: 30px;}
+span.tagcloud19 { font-size: 31px;}
+span.tagcloud20 { font-size: 32px;}
+span.tagcloud21 { font-size: 33px;}
+span.tagcloud22 { font-size: 34px;}
+span.tagcloud23 { font-size: 35px;}
+span.tagcloud24 { font-size: 36px;}
+
diff --git a/koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl b/koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl
index a64408d..c8c4404 100644
--- a/koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl
+++ b/koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl
@@ -37,6 +37,10 @@
 
 	<!-- TMPL_IF NAME="OpacMainUserBlock" --><div id="opacmainuserblock" class="container"><!-- TMPL_VAR NAME="OpacMainUserBlock" --></div><!-- /TMPL_IF -->
 
+<div id="OpacMainCloud" class="container">
+    <!-- TMPL_INCLUDE NAME="../../../cloud.html" -->
+</div>
+
 <!-- TMPL_IF NAME="recentacquiloop"-->
     <div class="searchresults">
         <table>
@@ -53,7 +57,7 @@
                     <!-- TMPL_IF name="BiblioDefaultViewmarc" -->
                     <a class="title" href="/cgi-bin/koha/opac-MARCdetail.pl?biblionumber=<!-- TMPL_VAR NAME="biblionumber" ESCAPE="URL" -->">
                     <!-- TMPL_IF NAME="title"-->
-                        <!-- TMPL_VAR NAME="title" escape="html" -->
+                        <!-- TMPL_VAR NAME="title" -->
                     <!-- TMPL_ELSE -->
                         <span class="problem">(no title)</span>
                     <!-- /TMPL_IF -->
@@ -61,7 +65,7 @@
                     <!-- TMPL_ELSE -->
                     <!-- TMPL_IF name="BiblioDefaultViewisbd" -->
                     <a class="title" href="/cgi-bin/koha/opac-ISBDdetail.pl?biblionumber=<!-- TMPL_VAR NAME="biblionumber" ESCAPE="URL" -->">
-                    <!-- TMPL_IF NAME="title" escape="html"-->
+                    <!-- TMPL_IF NAME="title"-->
                         <!-- TMPL_VAR NAME="title" -->
                     <!-- TMPL_ELSE -->
                         <span class="problem">(no title)</span>
@@ -69,7 +73,7 @@
                     </a>
                     <!-- TMPL_ELSE -->
                     <a class="title" href="/cgi-bin/koha/opac-detail.pl?biblionumber=<!-- TMPL_VAR NAME="biblionumber" ESCAPE="URL" -->">
-                    <!-- TMPL_IF NAME="title" escape="html" -->
+                    <!-- TMPL_IF NAME="title"-->
                         <!-- TMPL_VAR NAME="title" -->
                     <!-- TMPL_ELSE -->
                         <span class="problem">(no title)</span>
diff --git a/tools/create_cloud.pl b/tools/create_cloud.pl
new file mode 100755
index 0000000..a94265f
--- /dev/null
+++ b/tools/create_cloud.pl
@@ -0,0 +1,54 @@
+#!/usr/bin/perl
+
+# Copyright 2008 Tamil s.a.r.l. / www.tamil.fr
+#
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA  02111-1307 USA
+
+
+use strict;
+use warnings;
+use diagnostics;
+use C4::Auth;
+use C4::Output;
+use C4::Context;
+use CGI;
+
+
+my $input = CGI->new();
+
+my ( $template, $loggedinuser, $cookie ) = get_template_and_user({
+    template_name   => "tools/create_cloud.tmpl",
+    query           => $input,
+    type            => "intranet",
+    authnotrequired => 0,
+    debug           => 1,
+});
+
+my $number_of_subjects = $input->param('number_of_subjects');
+if ( $number_of_subjects ne undef ) {
+    if ( $number_of_subjects == 0) {
+        $template->param( cloud_deleted => 1 );
+        my $cloud_file_name = C4::Context->config( 'opachtdocs' ) . "/cloud.html";
+        open my $fh, ">", $cloud_file_name 
+            or die "Unable to create file $cloud_file_name";
+    }
+    elsif ($number_of_subjects && 
+        ($number_of_subjects > 1 && $number_of_subjects <=1000))
+    {
+        $template->param ( number_of_subjects => $number_of_subjects );
+    }
+}
+output_html_with_http_headers $input, $cookie, $template->output;
diff --git a/tools/create_cloud_callback.pl b/tools/create_cloud_callback.pl
new file mode 100755
index 0000000..6edfefc
--- /dev/null
+++ b/tools/create_cloud_callback.pl
@@ -0,0 +1,219 @@
+#!/usr/bin/perl
+
+# Copyright 2008 Tamil s.a.r.l. / www.tamil.fr
+#
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA  02111-1307 USA
+
+
+use strict;
+use warnings;
+use diagnostics;
+use Carp;
+use C4::Auth qw/check_cookie_auth/;
+use C4::Output;
+use C4::Context;
+use CGI;
+
+my $input = CGI->new();
+
+my ( $auth_status, $session_id) 
+    = check_cookie_auth( $input->cookie('CGISESSID'), { tools => '*' } );
+exit 0 if $auth_status ne 'ok';
+
+#binmode( STDOUT, ':utf8' );
+print $input->header( -type => 'text/plain', 
+                      -charset => 'UTF-8',
+                      -Cache-Control => 'no-cache' );
+
+my $number_of_subjects = $input->param('number_of_subjects');
+if ( $number_of_subjects == undef || $number_of_subjects <1 
+     || $number_of_subjects > 1000) {
+    print "error";
+    exit;
+}
+
+my $index = new ZebraIndex( 'Subject' );
+$index->scan( $number_of_subjects );
+
+my $html = $index->html_cloud();
+my $cloud_file_name 
+    = C4::Context->config( 'opachtdocs' ) . "/cloud.html";
+open my $fh, ">", $cloud_file_name 
+    or croak "Unable to create file $cloud_file_name";
+print $fh $html;
+close $fh;
+exit;
+
+
+
+package ZebraIndex;
+
+
+sub new {
+    my $self = {};
+    my $class = shift;
+    $self->{ zebra_index  } = shift;
+    $self->{ top_terms    } = undef;
+    $self->{ levels_cloud } = 24;
+    bless $self, $class;
+    return $self;
+}
+
+
+#
+# scan
+#   Scan zebra index and populate an array of top terms
+#
+# PARAMETERS:
+#   $max_terms    Max number of top terms
+#
+# RETURN:
+#   A 4-dimensionnal array in $self->{top_terms}
+#   [0] term
+#   [1] term number of occurences
+#   [2] term proportional relative weight in terms set E[0-1]
+#   [3] term logarithmic relative weight E [0-levels_cloud]
+#   
+#   This array is sorted alphabetically by terms ([0])
+#   It can be easily sorted by occurences:
+#     @t = sort { $a[1] <=> $a[1] } @{$self->{top_terms}};
+#
+sub scan {
+    my $self       = shift;
+    my $index_name = $self->{ zebra_index };
+    my $max_terms  = shift;
+    
+    my $MAX_OCCURENCE = 1000000000;
+    
+    my $zbiblio = C4::Context->Zconn( "biblioserver" );
+    $zbiblio->option( number => 1000 );
+    my $number_of_terms = 0; 
+    my @terms;      # 2 dimensions array
+    my $min_occurence_index = -1;
+    my $min_occurence;
+    my $from = '0';
+    my $count = 0;
+    while (1) {
+        my $ss;
+        eval {
+            my $query = '@attr 1=' . $index_name . ' @attr 4=1 @attr 6=3 "'
+                        . $from . '"';
+            #print "query: $query\n";
+            $ss = $zbiblio->scan_pqf( $query );
+        };
+        if ($@) {
+            chop $from;
+            next;
+        }
+        $ss->option( rpnCharset => 'UTF-8' );
+        last if $ss->size() == 0;
+        my $term = '';
+        my $occ = 0;
+        for my $index ( 0..$ss->size()-1 ) {
+            ($term, $occ) = $ss->display_term($index);
+            if ( $number_of_terms < $max_terms ) {
+                push( @terms, [ $term, $occ ] ); 
+                ++$number_of_terms;
+                if ( $number_of_terms == $max_terms ) {
+                    $min_occurence = $MAX_OCCURENCE;
+                    for (0..$number_of_terms-1) {
+                        my @term = @{ $terms[$_] };
+                        if ( $term[1] <= $min_occurence ) {
+                            $min_occurence       = $term[1];
+                            $min_occurence_index = $_;
+                        }
+                    }
+                }
+            }
+            else {
+                if ( $occ > $min_occurence) {
+                    @{ $terms[$min_occurence_index] }[0] = $term;
+                    @{ $terms[$min_occurence_index] }[1] = $occ;
+                    $min_occurence = $MAX_OCCURENCE;
+                    for (0..$max_terms-1) {
+                        my @term = @{ $terms[$_] };
+                        if ( $term[1] <= $min_occurence ) {
+                            $min_occurence       = $term[1];
+                            $min_occurence_index = $_;
+                        }
+                    }
+                }
+            }
+        }
+        $from = $term . 'a';
+        $count += $ss->size();
+        #print "$count\t$term\t$t\n";
+        print "$count\n";
+        STDOUT->flush();
+    }
+
+    # Sort array of array by terms weight
+    @terms = sort { @{$a}[1] <=> @{$b}[1] } @terms;
+    
+    # A relatif weight to other set terms is added to each term
+    my $min     = $terms[0][1];
+    my $log_min = log( $min );
+    my $max     = $terms[$#terms-1][1];
+    my $log_max = log( $max );
+    my $delta   = $max - $min;
+    my $factor;
+    if ($log_max - $log_min == 0) {
+        $log_min = $log_min - $self->{levels_cloud};
+        $factor = 1;
+    } 
+    else {
+        $factor = $self->{levels_cloud} / ($log_max - $log_min);
+    }
+
+    foreach (0..$#terms-1) {
+        my $count = @{ $terms[$_] }[1];
+        my $weight = ( $count - $min ) / $delta;
+        my $log_weight = int( (log($count) - $log_min) * $factor);
+        push( @{ $terms[$_] }, $weight );
+        push( @{ $terms[$_] }, $log_weight );
+    }
+    $self->{ top_terms } = \@terms;
+
+    # Sort array of array by terms alphabetical order
+    @terms = sort { @{$a}[0] cmp @{$b}[0] } @terms;
+}
+
+
+#
+# Returns a HTML version of index top terms formated
+# as a 'tag cloud'.
+#
+sub html_cloud {
+    my $self = shift;
+    my @terms = @{ $self->{top_terms} };
+    my $html = "<div class=\"subjectcloud\">\n";
+    for (0..$#terms-1) {   
+        my @term = @{ $terms[$_] };
+        my $uri = $term[0];
+        $uri =~ s/\(//g;
+        $html = $html
+            . '<span class="tagcloud'
+            . $term[3]
+            . '">'
+            . '<a href="/cgi-bin/koha/opac-search.pl?q=su%3A'
+            . $uri
+            . '">'
+            . $term[0]
+            . "</a></span>\n";
+    }
+    $html .= "</div>\n";
+    return $html;
+}
-- 
1.5.5.GIT




More information about the Koha-patches mailing list