[Koha-patches] [PATCH 1/1] Bug 8218 : A script for maintenance to clean any &
Christophe Croullebois
christophe.croullebois at biblibre.com
Fri Sep 28 16:13:58 CEST 2012
Two subs written by Alex Arnaud in C4/Charset.pm, SanitizeEntity and entity_clean
I had created two scripts for Biblibre, one to be used in cron, it reindexes all transformed records.
Sophie Meynieux brought together the two scripts, she has created a batch mode, usefull but without the reindexing
So I have rewrited it and I have integrated "Fixing copyright headers and license and description"
written by Chris Cormack and "Followup: Perltidy and QA FIX" written by Jonathan Druart
Thx to all
---
C4/Charset.pm | 48 +++++++++
misc/maintenance/batchSanitizeEntity.pl | 165 +++++++++++++++++++++++++++++++
2 files changed, 213 insertions(+), 0 deletions(-)
create mode 100644 misc/maintenance/batchSanitizeEntity.pl
diff --git a/C4/Charset.pm b/C4/Charset.pm
index 91d43b4..f225ada 100644
--- a/C4/Charset.pm
+++ b/C4/Charset.pm
@@ -42,6 +42,7 @@ BEGIN {
StripNonXmlChars
nsb_clean
fix_unimarc_titles
+ SanitizeEntity
);
}
@@ -1232,6 +1233,53 @@ sub fix_unimarc_titles {
}
}
+=head2 SanitizeEntity
+
+=over 4
+
+SanitizeEntity($marcrecord);
+
+=back
+
+Sanitize Entity
+
+=cut
+
+sub SanitizeEntity {
+ my $record = shift;
+
+ foreach my $field ($record->fields()) {
+ if ($field->is_control_field()) {
+ $field->update(entity_clean($field->data()));
+ } else {
+ my @subfields = $field->subfields();
+ my @new_subfields;
+ foreach my $subfield (@subfields) {
+ push @new_subfields, $subfield->[0] => entity_clean($subfield->[1]);
+ }
+ if (scalar(@new_subfields) > 0) {
+ my $new_field;
+ eval {
+ $new_field = MARC::Field->new($field->tag(), $field->indicator(1), $field->indicator(2), @new_subfields);
+ };
+ if ($@) {
+ warn "error : $@";
+ } else {
+ $field->replace_with($new_field);
+ }
+
+ }
+ }
+ }
+ return $record;
+}
+
+sub entity_clean {
+ my $string=shift;
+ $string=~s/(&)(amp;)+/$1/g;
+ return $string;
+}
+
1;
=head1 AUTHOR
diff --git a/misc/maintenance/batchSanitizeEntity.pl b/misc/maintenance/batchSanitizeEntity.pl
new file mode 100644
index 0000000..7bd5e5f
--- /dev/null
+++ b/misc/maintenance/batchSanitizeEntity.pl
@@ -0,0 +1,165 @@
+#!/usr/bin/perl
+# small script that replaces '&etc.' by '&' in a record
+
+# Copyright 2012 Biblibre SARL
+#
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with Koha; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+use Modern::Perl;
+use C4::Charset;
+use C4::Context;
+use DBI;
+use C4::Biblio;
+use Getopt::Long;
+use Pod::Usage;
+
+my ( $biblios,$run,$want_help,$batch,$cron );
+my $result = GetOptions(
+ 'where=s' => \$biblios,
+ '--run' => \$run,
+ 'help|h' => \$want_help,
+ 'batch|b' => \$batch,
+ 'cron|c' => \$cron,
+);
+
+# We check if required entries are given :
+if ( not $result or $want_help or not $biblios or not $run ) {
+ print_usage();
+ exit 0;
+}
+
+# We initialise some tools :
+my $count;
+my $bibliocount;
+my @bibliofile;
+my @biblionum;
+my @biblios;
+my $record;
+my $cleanrecord;
+
+# We first detect if we have a file or biblos directly entered by command line
+#or if we want to use findAmp() sub
+if ( $biblios eq "search" ) {
+ @biblios = findAmp();
+}
+else {
+ if ( $biblios =~ m|/| ) {
+ open( FILE, "$biblios" ) || die("Can't open $biblios");
+ @bibliofile = <FILE>;
+ close(FILE);
+ }
+ else {
+ @biblionum = split ',', $biblios;
+ }
+
+ # We take the biblios
+ @biblios = @bibliofile ? @bibliofile : @biblionum;
+}
+
+# We remove spaces
+s/(^\s*|\s*$)//g for @biblios;
+# Remove empty lines
+ at biblios = grep {!/^$/} @biblios;
+
+# We valid the input
+foreach my $biblio (@biblios) {
+ if ( $biblio !~ /^\d+$/ ) {
+ print
+"=============== \"$biblio\" is not a biblionumber !!! ===============\n";
+ print "=============== please verify \"$biblio\" !!! ===============\n";
+ $count++;
+ }
+ exit(1) if $count;
+}
+
+$bibliocount = scalar @biblios;
+
+print
+"=============== $bibliocount Biblio(s) ready to be cleaned ===============\n";
+if ( !$batch or !$cron ) {
+ my $confirm = 0;
+ print "=============== You are ok ? Types yes/no :\n";
+ while ( $confirm == 0 ) {
+ my $prout = <STDIN>;
+ if ($prout eq "yes\n") {
+ $confirm = 1;
+ }
+ elsif ($prout eq "no\n") {
+ print "=============== Ok, bye ===============\n";
+ exit(0);
+ }
+ else {
+ print "======= Bad answer please types 'yes' or 'no' !!!!!!!!!!! ===============\n";
+ }
+ }
+}
+
+foreach my $biblio ( @biblios ) {
+ print "=============== N° $biblio selected ===============\n";
+ $record = GetMarcBiblio($biblio);
+ $cleanrecord = SanitizeEntity($record);
+ my $frameworkcode = GetFrameworkCode($biblio);
+ ModBiblio( $cleanrecord, $biblio, $frameworkcode );
+ print "=============== Biblio n° $biblio cleaning done ===============\n";
+ $count++;
+}
+
+print "==============================================================\n";
+print "=============== $count Biblios cleaned ===============\n";
+
+if ( $cron ) {
+ print "==============================================================\n";
+ print
+"========= Now we are reindexing -b -v -where \"biblionumber=xxx\" =========\n";
+ print "==============================================================\n";
+ $count = 0;
+ my $kohapath = C4::Context->config('intranetdir');
+ foreach my $biblio ( @biblios ) {
+ system("$kohapath/misc/migration_tools/rebuild_zebra.pl
+ -b -v -where \"biblionumber=$biblio\"");
+ print "========= Biblio n° $biblio re-indexing done =========\n";
+ $count ++;
+}
+ print "=============== $count Biblios re-indexed ===============\n";
+}
+
+sub print_usage {
+ print <<_USAGE_;
+$0: replaces '&' by '&' in a record, you can either give some biblionumbers or a file with biblionumbers or
+
+Parameters:
+ -where use this to give biblionumbers in a string with "" (separated by coma)
+ or an absolute path to a file containing biblionumbers (1 by row)
+ or the command 'search' that creates an array with biblionumbers with "&amp;..."
+ --run run the command
+ --batch or -b run in batch mode
+ --cron or -c run in cron mode, it reindexes the changed records (you can redirect the output to a file)
+ --help or -h show this message.
+_USAGE_
+}
+
+sub findAmp {
+ my @bibliosearch;
+ my $dbh = C4::Context->dbh;
+ my $strsth =
+ qq{SELECT biblionumber FROM biblioitems WHERE marcxml LIKE "%amp;amp;%"};
+ my $sth = $dbh->prepare($strsth);
+ $sth->execute();
+ while ( my $bib = $sth-> fetchrow_array() ) {
+ push @bibliosearch, $bib;
+ }
+ return @bibliosearch;
+}
--
1.7.0.4
More information about the Koha-patches
mailing list