[Koha-patches] [PATCH] OPAC new subjects cloud using Zebra scan on Subject field (2nd patch)
Frederic Demians
f.demians at tamil.fr
Fri Oct 3 08:49:51 CEST 2008
This patch implement a new subject cloud. The subject cloud is created in
pro interface:
[1] /tools/create_cloud.pl creates a static file:
/koha-tmpl/opac-tmpl/cloud.html
Zebra scan is used to find top subjects, ie subjects
which occurs the more in the whole catalogue.
Write permission on /koha-tmpl/opac-tmpl required.
[2] opac-main.pl displays the cloud if cloud file is
present.
Compared to previous cloud, it has those advantages:
- It doesn't require a batch job.
- Speed--It uses Zebra and so has not to seek the whole
MySQL DB, read-decode MARC records, etc. Reading a
subjects cloud table is not required anymore.
- Scalable--It doesn't load in memory all subjects but
only a max of 1000 subjects. It will continue to
work with large DB.
- Display subjects with size proportional to their
weight in the set.
---
.../intranet-tmpl/prog/en/includes/tools-menu.inc | 1 +
.../prog/en/modules/tools/tools-home.tmpl | 4 +
koha-tmpl/opac-tmpl/prog/en/css/opac.css | 59 ++++++-
koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl | 10 +-
tools/create_cloud.pl | 200 ++++++++++++++++++++
5 files changed, 270 insertions(+), 4 deletions(-)
create mode 100755 tools/create_cloud.pl
diff --git a/koha-tmpl/intranet-tmpl/prog/en/includes/tools-menu.inc b/koha-tmpl/intranet-tmpl/prog/en/includes/tools-menu.inc
index 2c94cb3..486eacb 100644
--- a/koha-tmpl/intranet-tmpl/prog/en/includes/tools-menu.inc
+++ b/koha-tmpl/intranet-tmpl/prog/en/includes/tools-menu.inc
@@ -10,6 +10,7 @@
<!-- TMPL_IF NAME="CAN_user_tools_edit_news" -->
<li><a href="/cgi-bin/koha/tools/koha-news.pl">News</a></li>
<!-- /TMPL_IF -->
+ <li><a href="/cgi-bin/koha/tools/create_cloud.pl">Create Subjects Cloud</a></li>
<!-- TMPL_IF NAME="CAN_user_tools_label_creator" -->
<li><a href="/cgi-bin/koha/labels/label-home.pl">Labels (spine and barcode)</a></li>
<!-- /TMPL_IF -->
diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/tools-home.tmpl b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/tools-home.tmpl
index 75d8104..218bf16 100644
--- a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/tools-home.tmpl
+++ b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/tools-home.tmpl
@@ -18,6 +18,10 @@
<dt><a href="/cgi-bin/koha/tools/koha-news.pl">News</a></dt>
<dd>Write news for the OPAC and staff interfaces</dd>
<!-- /TMPL_IF -->
+
+ <dt><a href="/cgi-bin/koha/tools/create_cloud.pl">Subjects Cloud</a></dt>
+ <dd>Create a cloud of top subjects extracted from Catalogue</dd>
+
<!-- TMPL_IF NAME="CAN_user_tools_label_creator" -->
<dt><a href="/cgi-bin/koha/labels/label-home.pl">Label and Patron Card Creator</a></dt>
diff --git a/koha-tmpl/opac-tmpl/prog/en/css/opac.css b/koha-tmpl/opac-tmpl/prog/en/css/opac.css
index b6cec6d..cc7ee5f 100644
--- a/koha-tmpl/opac-tmpl/prog/en/css/opac.css
+++ b/koha-tmpl/opac-tmpl/prog/en/css/opac.css
@@ -1727,4 +1727,61 @@ table#items th {
}
#action {
margin-top: 0;
-}
\ No newline at end of file
+}
+
+.subjectcloud {
+ text-align: center;
+ line-height: 16px;
+ margin: 20px;
+}
+span.tagcloud0 { font-size: 12px;}
+span.tagcloud0 a {text-decoration: none;}
+span.tagcloud1 { font-size: 13px;}
+span.tagcloud1 a {text-decoration: none;}
+span.tagcloud2 { font-size: 14px;}
+span.tagcloud2 a {text-decoration: none;}
+span.tagcloud3 { font-size: 15px;}
+span.tagcloud3 a {text-decoration: none;}
+span.tagcloud4 { font-size: 16px;}
+span.tagcloud4 a {text-decoration: none;}
+span.tagcloud5 { font-size: 17px;}
+span.tagcloud5 a {text-decoration: none;}
+span.tagcloud6 { font-size: 18px;}
+span.tagcloud6 a {text-decoration: none;}
+span.tagcloud7 { font-size: 19px;}
+span.tagcloud7 a {text-decoration: none;}
+span.tagcloud8 { font-size: 20px;}
+span.tagcloud8 a {text-decoration: none;}
+span.tagcloud9 { font-size: 21px;}
+span.tagcloud9 a {text-decoration: none;}
+span.tagcloud10 { font-size: 22px;}
+span.tagcloud10 a {text-decoration: none;}
+span.tagcloud11 { font-size: 23px;}
+span.tagcloud11 a {text-decoration: none;}
+span.tagcloud12 { font-size: 24px;}
+span.tagcloud12 a {text-decoration: none;}
+span.tagcloud13 { font-size: 25px;}
+span.tagcloud13 a {text-decoration: none;}
+span.tagcloud14 { font-size: 26px;}
+span.tagcloud14 a {text-decoration: none;}
+span.tagcloud15 { font-size: 27px;}
+span.tagcloud15 a {text-decoration: none;}
+span.tagcloud16 { font-size: 28px;}
+span.tagcloud16 a {text-decoration: none;}
+span.tagcloud17 { font-size: 29px;}
+span.tagcloud17 a {text-decoration: none;}
+span.tagcloud18 { font-size: 30px;}
+span.tagcloud18 a {text-decoration: none;}
+span.tagcloud19 { font-size: 31px;}
+span.tagcloud19 a {text-decoration: none;}
+span.tagcloud20 { font-size: 32px;}
+span.tagcloud20 a {text-decoration: none;}
+span.tagcloud21 { font-size: 33px;}
+span.tagcloud21 a {text-decoration: none;}
+span.tagcloud22 { font-size: 34px;}
+span.tagcloud22 a {text-decoration: none;}
+span.tagcloud23 { font-size: 35px;}
+span.tagcloud23 a {text-decoration: none;}
+span.tagcloud24 { font-size: 36px;}
+span.tagcloud24 a {text-decoration: none;}
+
diff --git a/koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl b/koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl
index a64408d..c8c4404 100644
--- a/koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl
+++ b/koha-tmpl/opac-tmpl/prog/en/modules/opac-main.tmpl
@@ -37,6 +37,10 @@
<!-- TMPL_IF NAME="OpacMainUserBlock" --><div id="opacmainuserblock" class="container"><!-- TMPL_VAR NAME="OpacMainUserBlock" --></div><!-- /TMPL_IF -->
+<div id="OpacMainCloud" class="container">
+ <!-- TMPL_INCLUDE NAME="../../../cloud.html" -->
+</div>
+
<!-- TMPL_IF NAME="recentacquiloop"-->
<div class="searchresults">
<table>
@@ -53,7 +57,7 @@
<!-- TMPL_IF name="BiblioDefaultViewmarc" -->
<a class="title" href="/cgi-bin/koha/opac-MARCdetail.pl?biblionumber=<!-- TMPL_VAR NAME="biblionumber" ESCAPE="URL" -->">
<!-- TMPL_IF NAME="title"-->
- <!-- TMPL_VAR NAME="title" escape="html" -->
+ <!-- TMPL_VAR NAME="title" -->
<!-- TMPL_ELSE -->
<span class="problem">(no title)</span>
<!-- /TMPL_IF -->
@@ -61,7 +65,7 @@
<!-- TMPL_ELSE -->
<!-- TMPL_IF name="BiblioDefaultViewisbd" -->
<a class="title" href="/cgi-bin/koha/opac-ISBDdetail.pl?biblionumber=<!-- TMPL_VAR NAME="biblionumber" ESCAPE="URL" -->">
- <!-- TMPL_IF NAME="title" escape="html"-->
+ <!-- TMPL_IF NAME="title"-->
<!-- TMPL_VAR NAME="title" -->
<!-- TMPL_ELSE -->
<span class="problem">(no title)</span>
@@ -69,7 +73,7 @@
</a>
<!-- TMPL_ELSE -->
<a class="title" href="/cgi-bin/koha/opac-detail.pl?biblionumber=<!-- TMPL_VAR NAME="biblionumber" ESCAPE="URL" -->">
- <!-- TMPL_IF NAME="title" escape="html" -->
+ <!-- TMPL_IF NAME="title"-->
<!-- TMPL_VAR NAME="title" -->
<!-- TMPL_ELSE -->
<span class="problem">(no title)</span>
diff --git a/tools/create_cloud.pl b/tools/create_cloud.pl
new file mode 100755
index 0000000..e741e0b
--- /dev/null
+++ b/tools/create_cloud.pl
@@ -0,0 +1,200 @@
+#!/usr/bin/perl
+
+# Copyright 2008 Tamil s.a.r.l. / www.tamil.fr
+#
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA 02111-1307 USA
+
+
+use strict;
+use warnings;
+use diagnostics;
+use C4::Auth;
+use C4::Output;
+use C4::Context;
+use CGI;
+use URI::Escape;
+
+
+my @errors;
+my $input = CGI->new();
+
+my ( $template, $loggedinuser, $cookie ) = get_template_and_user({
+ template_name => "tools/create_cloud.tmpl",
+ query => $input,
+ type => "intranet",
+ authnotrequired => 0,
+ debug => 1,
+});
+
+my $number_of_subjects = $input->param('number_of_subjects');
+if ( $number_of_subjects != undef ) {
+ if ( $number_of_subjects == 0) {
+ my $cloud_file_name = C4::Context->config( 'opachtdocs' ) . "/cloud.html";
+ open my $fh, ">", $cloud_file_name
+ or die "Unable to create file $cloud_file_name";
+ $template->param( cloud_deleted => 1 );
+ }
+ elsif ($number_of_subjects &&
+ ($number_of_subjects > 1 && $number_of_subjects <=1000))
+ {
+ my $index = new ZebraIndex( 'Subject' );
+ $index->scan( $number_of_subjects ); #FIXME: add some UI feedback...
+ my $html = $index->html_cloud();
+ my $cloud_file_name
+ = C4::Context->config( 'opachtdocs' ) . "/cloud.html";
+ $template->param( cloud => $html );
+ open my $fh, ">", $cloud_file_name
+ or die "Unable to create file $cloud_file_name";
+ print $fh $html;
+ close $fh;
+ }
+}
+output_html_with_http_headers $input, $cookie, $template->output;
+
+
+
+package ZebraIndex;
+
+
+sub new {
+ my $self = {};
+ my $class = shift;
+ $self->{ zebra_index } = shift;
+ $self->{ top_terms } = undef;
+ bless $self, $class;
+ return $self;
+}
+
+
+#
+# scan
+# Scan zebra index and populate an array of top terms
+#
+# PARAMETERS:
+# $max_terms Max number of top terms
+#
+# RETURN:
+# A 3-dimensionnal array in $self->{top_terms}
+# [0] term
+# [1] term number of occurences
+# [2] term relative weight with terms set E[0-1]
+#
+# This array is sorted alphabetically by terms ([0])
+# It can be easily sorted by occurences:
+# @t = sort { $a[1] <=> $a[1] } @returned_terms;
+#
+sub scan {
+ my $self = shift;
+ my $index_name = $self->{ zebra_index };
+ my $max_terms = shift;
+
+ my $MAX_OCCURENCE = 1000000000;
+
+ my $zbiblio = C4::Context->Zconn( "biblioserver" );
+ my $number_of_terms = 0;
+ my @terms; # 2 dimensions array
+ my $min_occurence_index = -1;
+ my $min_occurence;
+ my $from = '0';
+ while (1) {
+ my $ss;
+ eval {
+ my $query = '@attr 1=' . $index_name . ' @attr 4=1 @attr 6=3 "'
+ . $from . '"';
+ #print "query: $query\n";
+ $ss = $zbiblio->scan_pqf( $query );
+ };
+ if ($@) {
+ chop $from;
+ next;
+ }
+ $ss->option( rpnCharset => 'UTF-8' );
+ last if $ss->size() == 0;
+ my $term = '';
+ my $occ = 0;
+ for my $index ( 0..$ss->size()-1 ) {
+ ($term, $occ) = $ss->display_term($index);
+ if ( $number_of_terms < $max_terms ) {
+ push( @terms, [ $term, $occ ] );
+ ++$number_of_terms;
+ if ( $number_of_terms == $max_terms ) {
+ $min_occurence = $MAX_OCCURENCE;
+ for (0..$number_of_terms-1) {
+ my @term = @{ $terms[$_] };
+ if ( $term[1] <= $min_occurence ) {
+ $min_occurence = $term[1];
+ $min_occurence_index = $_;
+ }
+ }
+ }
+ }
+ else {
+ if ( $occ > $min_occurence) {
+ @{ $terms[$min_occurence_index] }[0] = $term;
+ @{ $terms[$min_occurence_index] }[1] = $occ;
+ $min_occurence = $MAX_OCCURENCE;
+ for (0..$max_terms-1) {
+ my @term = @{ $terms[$_] };
+ if ( $term[1] <= $min_occurence ) {
+ $min_occurence = $term[1];
+ $min_occurence_index = $_;
+ }
+ }
+ }
+ }
+ }
+ $from = $term . 'a';
+ }
+
+ # Sort array of array by terms alphabetical order
+ @terms = sort { @{$a}[0] cmp @{$b}[0] } @terms;
+
+ # A relatif weight to other set terms is added to each term
+ my $min = $terms[0][1];
+ my $max = $terms[$#terms-1][1];
+ my $delta = $max - $min;
+ foreach (0..$#terms-1) {
+ push( @{ $terms[$_] }, ( @{ $terms[$_] }[1] - $min ) / $delta );
+ }
+ $self->{ top_terms } = \@terms;
+}
+
+
+#
+# Returns a HTML version of index top terms formated
+# as a 'tag cloud'.
+#
+sub html_cloud {
+ my $self = shift;
+ my @terms = @{ $self->{top_terms} };
+ my $html = "<div class=\"subjectcloud\">\n";
+ for (0..$#terms-1) {
+ my @term = @{ $terms[$_] };
+ my $uri = $term[0];
+ $uri =~ s/\(//g;
+ $html = $html
+ . '<span class="tagcloud'
+ . int( ($term[2] * 25) )
+ . '">'
+ . '<a href="/cgi-bin/koha/opac-search.pl?q=su%3A'
+ . $uri
+ . '">'
+ . $term[0]
+ . "</a></span>\n";
+ }
+ $html .= "</div>\n";
+ return $html;
+}
--
1.5.5.GIT
More information about the Koha-patches
mailing list