[Koha-patches] [PATCH] New subs for moderation, fixed bug to allow multiple sort fields, added POD.

Joe Atzberger joe.atzberger at liblime.com
Mon May 19 23:23:36 CEST 2008


---
 C4/Tags.pm |  240 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 222 insertions(+), 18 deletions(-)

diff --git a/C4/Tags.pm b/C4/Tags.pm
index 0eebb0c..4d3c1c1 100644
--- a/C4/Tags.pm
+++ b/C4/Tags.pm
@@ -26,7 +26,7 @@ use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
 use vars qw($ext_dict $select_all @fields);
 
 BEGIN {
-	$VERSION = 0.01;
+	$VERSION = 0.02;
 	@ISA = qw(Exporter);
 	@EXPORT_OK = qw(
 		&get_tag &get_tags &get_tag_rows
@@ -35,6 +35,10 @@ BEGIN {
 		&remove_tag
 		&delete_tag_rows_by_ids
 		&rectify_weights
+		&get_approval_rows
+		&blacklist
+		&whitelist
+		&is_approved
 	);
 	# %EXPORT_TAGS = ();
 	$ext_dict = C4::Context->preference('TagsExternalDictionary');
@@ -45,7 +49,7 @@ BEGIN {
 	}
 	if ($ext_dict) {
 		require Lingua::Ispell;
-		import Lingua::Ispell qw(spellcheck);
+		import Lingua::Ispell qw(spellcheck add_word_lc save_dictionary);
 	}
 }
 
@@ -128,7 +132,7 @@ sub get_tag_rows ($) {
 		}
 		if ($key =~ /^limit$/i) {
 			my $val = $hash->{$key};
-			unless ($val =~ /^\d+$/) {
+			unless ($val =~ /^(\d+,)?\d+$/) {
 				carp "Non-nuerical limit value '$val' ignored!";
 				next;
 			}
@@ -151,7 +155,6 @@ sub get_tag_rows ($) {
 }
 
 sub get_tags (;$) {		# i.e., from tags_index
-	# my $self = shift;
 	my $hash = shift || {};
 	my @ok_fields = qw(term biblionumber weight limit sort);
 	my $wheres;
@@ -170,7 +173,7 @@ sub get_tags (;$) {		# i.e., from tags_index
 		}
 		if ($key =~ /^limit$/i) {
 			my $val = $hash->{$key};
-			unless ($val =~ /^\d+$/) {
+			unless ($val =~ /^(\d+,)?\d+$/) {
 				carp "Non-nuerical limit value '$val' ignored!";
 				next;
 			}
@@ -185,7 +188,12 @@ sub get_tags (;$) {		# i.e., from tags_index
 					carp "get_tags received illegal sort order '$by'";
 					next;
 				}
-				$order .= " ORDER BY $2 " . ($1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
+				if ($order) {
+					$order .= ", ";
+				} else {
+					$order = " ORDER BY ";
+				}
+				$order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
 			}
 			
 		} else {
@@ -212,14 +220,87 @@ sub get_tags (;$) {		# i.e., from tags_index
 	return $sth->fetchall_arrayref({});
 }
 
+sub get_approval_rows (;$) {		# i.e., from tags_approval
+	my $hash = shift || {};
+	my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort);
+	my $wheres;
+	my $limit  = "";
+	my $order  = "";
+	my @exe_args = ();
+	foreach my $key (keys %$hash) {
+		$debug and print STDERR "get_approval_rows arg. '$key' = ", $hash->{$key}, "\n";
+		unless (length $key) {
+			carp "Empty argument key to get_approval_rows: ignoring!";
+			next;
+		}
+		unless (1 == scalar grep {/^ $key $/xi} @ok_fields) {
+			carp "get_approval_rows received unreconized argument key '$key'.";
+			next;
+		}
+		if ($key =~ /^limit$/i) {
+			my $val = $hash->{$key};
+			unless ($val =~ /^(\d+,)?\d+$/) {
+				carp "Non-nuerical limit value '$val' ignored!";
+				next;
+			}
+			$limit = " LIMIT $val\n";
+		} elsif ($key =~ /^sort$/i) {
+			foreach my $by (split /\,/, $hash->{$key}) {
+				unless (
+					$by =~ /^([-+])?(term)/            or
+					$by =~ /^([-+])?(biblionumber)/    or
+					$by =~ /^([-+])?(weight_total)/    or
+					$by =~ /^([-+])?(approved(_by)?)/  or
+					$by =~ /^([-+])?(date_approved)/
+				) {
+					carp "get_approval_rows received illegal sort order '$by'";
+					next;
+				}
+				if ($order) {
+					$order .= ", ";
+				} else {
+					$order = " ORDER BY " unless $order;
+				}
+				$order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
+			}
+			
+		} else {
+			my $whereval = $key;
+			# ($key =~ /^term$/i) and $whereval = 'tags_index.term';
+			$wheres .= ($wheres) ? " AND    $whereval = ?\n" : " WHERE  $whereval = ?\n";
+			push @exe_args, $hash->{$key};
+		}
+	}
+	my $query = "
+	SELECT 	tags_approval.term          AS term,
+			tags_approval.approved      AS approved,
+			tags_approval.date_approved AS date_approved,
+			tags_approval.approved_by   AS approved_by,
+			tags_approval.weight_total  AS weight_total,
+			CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
+	FROM 	tags_approval
+	LEFT JOIN borrowers
+	ON      tags_approval.approved_by = borrowers.borrowernumber ";
+	$query .= ($wheres||'') . $order . $limit;
+	$debug and print STDERR "get_approval_rows query:\n $query\n",
+							"get_approval_rows query args: ", join(',', @exe_args), "\n";
+	my $sth = C4::Context->dbh->prepare($query);
+	if (@exe_args) {
+		$sth->execute(@exe_args);
+	} else {
+		$sth->execute;
+	}
+	return $sth->fetchall_arrayref({});
+}
+
 sub is_approved ($) {
 	my $term = shift or return undef;
-	if ($ext_dict) {
-		return (spellcheck($term) ? 0 : 1);
-	}
 	my $sth = C4::Context->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
 	$sth->execute($term);
-	$sth->rows or return undef;
+	unless ($sth->rows) {
+		$ext_dict and return (spellcheck($term) ? 0 : 1);
+		return undef;
+	}
 	return $sth->fetch;
 }
 
@@ -236,18 +317,70 @@ sub get_tag_index ($;$) {
 	return $sth->fetchrow_hashref;
 }
 
-sub add_tag_approval ($;$) {
+sub whitelist {
+	my $operator = shift;
+	defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
+	if ($ext_dict) {
+		foreach (@_) {
+			spellcheck($_) or next;
+			add_word_lc($_);
+		}
+	}
+	foreach (@_) {
+		my $aref = get_approval_rows({term=>$_});
+		if ($aref and scalar @$aref) {
+			mod_tag_approval($operator,$_,1);
+		} else {
+			add_tag_approval($_,$operator);
+		}
+	}
+	return scalar @_;
+}
+# note: there is no "unwhitelist" operation because there is no remove for Ispell.
+# The blacklist regexps should operate "in front of" the whitelist, so if you approve
+# a term mistakenly, you can still reverse it. But there is no going back to "neutral".
+sub blacklist {
+	my $operator = shift;
+	defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
+	foreach (@_) {
+		my $aref = get_approval_rows({term=>$_});
+		if ($aref and scalar @$aref) {
+			mod_tag_approval($operator,$_,-1);
+		} else {
+			add_tag_approval($_,$operator,-1);
+		}
+	}
+	return scalar @_;
+}
+sub add_filter {
+	my $operator = shift;
+	defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
+	my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
+	# my $sth = C4::Context->dbh->prepare($query);
+	return scalar @_;
+}
+sub remove_filter {
+	my $operator = shift;
+	defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
+	my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
+	# my $sth = C4::Context->dbh->prepare($query);
+	# $sth->execute($term);
+	return scalar @_;
+}
+
+sub add_tag_approval ($;$$) {	# or disapproval
 	my $term = shift or return undef;
 	my $query = "SELECT * FROM tags_approval WHERE term = ?";
 	my $sth = C4::Context->dbh->prepare($query);
 	$sth->execute($term);
 	($sth->rows) and return increment_weight_total($term);
-	my $ok = (@_ ? shift : 0);
-	if ($ok) {
-		$query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,1,NOW())";
-		$debug and print STDERR "add_tag_approval query:\n$query\nadd_tag_approval args: ($term,$ok)\n";
+	my $operator = (@_ ? shift : 0);
+	if ($operator) {
+		my $approval = (@_ ? shift : 1); # default is to approve
+		$query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
+		$debug and print STDERR "add_tag_approval query:\n$query\nadd_tag_approval args: ($term,$operator,$approval)\n";
 		$sth = C4::Context->dbh->prepare($query);
-		$sth->execute($term,$ok);
+		$sth->execute($term,$operator,$approval);
 	} else {
 		$query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
 		$debug and print STDERR "add_tag_approval query:\n$query\nadd_tag_approval args: ($term)\n";
@@ -257,6 +390,16 @@ sub add_tag_approval ($;$) {
 	return $sth->rows;
 }
 
+sub mod_tag_approval ($$$) {
+	my $operator = shift or return undef;
+	my $term     = shift or return undef;
+	my $approval = (@_ ? shift : 1);	# default is to approve
+	my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
+	$debug and print STDERR "mod_tag_approval query:\n$query\nmod_tag_approval args: ($operator,$approval,$term)\n";
+	my $sth = C4::Context->dbh->prepare($query);
+	$sth->execute($operator,$approval,$term);
+}
+
 sub add_tag_index ($$;$) {
 	my $term         = shift or return undef;
 	my $biblionumber = shift or return undef;
@@ -311,7 +454,7 @@ sub increment_weights ($$) {
 }
 sub decrement_weights ($$) {
 	decrement_weight(@_);
-	derement_weight_total(shift);
+	decrement_weight_total(shift);
 }
 sub increment_weight_total ($) {
 	_set_weight_total('weight_total+1',shift);
@@ -330,7 +473,7 @@ sub _set_weight_total ($$) {
 	UPDATE tags_approval
 	SET    weight_total=" . (shift) . "
 	WHERE  term=?
-	");
+	");						# note: CANNOT use "?" for weight_total (see the args above).
 	$sth->execute(shift);	# just the term
 }
 sub _set_weight ($$$) {
@@ -383,6 +526,67 @@ More verose debugging messages are sent in the presence of non-zero $ENV{"DEBUG"
 
 =head3 TO DO: Add real perldoc
 
+=head2 External Dictionary (Ispell) [Recommended]
+
+An external dictionary can be used as a means of "pre-populating" and tracking
+allowed terms based on the widely available Ispell dictionary.  This can be the system
+dictionary or a personal version, but in order to support whitelisting, it must be
+editable to the process running Koha.  
+
+To enable, enter the absolute path to the ispell dictionary in the system
+preference "TagsExternalDictionary".
+
+Using external Ispell is recommended for both ease of use and performance.  Note that any
+language version of Ispell can be installed.  It is also possible to modify the dictionary 
+at the command line to affect the desired content.
+
+=head2 Table Structure
+
+The tables used by tags are:
+	tags_all
+	tags_index
+	tags_approval
+	tags_blacklist
+
+Your first thought may be that this looks a little complicated.  It is, but only because
+it has to be.  I'll try to explain.
+
+tags_all - This table would be all we really need if we didn't care about moderation or
+performance or tags disappearing when borrowers are removed.  Too bad, we do.  Otherwise
+though, it contains all the relevant info about a given tag:
+	tag_id         - unique id number for it
+	borrowernumber - user that entered it
+	biblionumber   - book record it is attached to
+	term           - tag "term" itself
+	language       - perhaps used later to influence weighting
+	date_created   - date and time it was created
+
+tags_approval - Since we need to provide moderation, this table is used to track it.  If no
+external dictionary is used, this table is the sole reference for approval and rejection.
+With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
+This could be called an "approved terms" table.  See above regarding the External Dictionary.
+	term           - tag "term" itself 
+	approved       - Negative, 0 or positive if tag is rejected, pending or approved.
+	date_approved  - date of last action
+	approved_by    - staffer performing the last action
+	weight_total   - total occurance of term in any biblio by any users
+
+tags_index - This table is for performance, because by far the most common operation will 
+be fetching tags for a list of search results.  We will have a set of biblios, and we will
+want ONLY their approved tags and overall weighting.  While we could implement a query that
+would traverse tags_all filtered against tags_approval, the performance implications of
+trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
+	term           - approved term as it appears in tags_approval
+	biblionumber   - book record it is attached to
+	weight         - number of times tag applied by any user
+
+tags_blacklist - TODO
+
+So the best way to think about the different tabes is that they are each tailored to a certain
+use.  Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
+the tag population can continue to grow even if a user is removed, along with the corresponding
+rows in tags_all.  
+
 =head2 Tricks
 
 If you want to auto-populate some tags for debugging, do something like this:
-- 
1.5.5.GIT




More information about the Koha-patches mailing list