[Koha-patches] [PATCH] (bug #4020) XSLT unimarc display
Henri-Damien LAURENT
henridamien.laurent at biblibre.com
Thu Dec 31 10:19:58 CET 2009
From: Paul Poulain <paul.poulain at biblibre.com>
When using XSLT Display, and UNIMARC,
since marcFlavour is not used in encoding data, when data is true utf8, as_xml
fails on some subfields.
Moreover, because transformMARCXMLForXSLT edits some values in the marc record
and the PERL UTF8 is not handled by MARC::File::USMARC, it endsup in double
encoding the data.
Sending a patch to fix both issues.
This patch adds
- two functions in C4/Charset.pm
NormalizeString (uses Unicode::Normalize)
SetUTF8Flag (This function in my opinion belongs to MARC::Record, or at least MARC::File::USMARC)
- edits C4::XSLT in order to cope with the correct marcflavour
- edits C4::Search searchResults to use setUTF8Flag
---
C4/Charset.pm | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
C4/Search.pm | 5 +++
C4/XSLT.pm | 5 ++-
3 files changed, 90 insertions(+), 2 deletions(-)
diff --git a/C4/Charset.pm b/C4/Charset.pm
index e1b6c96..e39637a 100644
--- a/C4/Charset.pm
+++ b/C4/Charset.pm
@@ -23,6 +23,7 @@ use warnings;
use MARC::Charset qw/marc8_to_utf8/;
use Text::Iconv;
use C4::Debug;
+use Unicode::Normalize;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
@@ -34,6 +35,7 @@ BEGIN {
@EXPORT = qw(
IsStringUTF8ish
MarcToUTF8Record
+ SetUTF8Flag
SetMarcUnicodeFlag
StripNonXmlChars
);
@@ -111,6 +113,86 @@ sub IsStringUTF8ish {
return utf8::decode($str);
}
+=head2 SetUTF8Flag
+
+=over 4
+
+my $marc_record = SetUTF8Flag($marc_record);
+
+=back
+
+This function sets the PERL UTF8 flag for data.
+It is required when using new_from_usmarc
+since MARC::File::USMARC does not handle PERL UTF8 setting.
+When editing unicode marc records fields and subfields, you
+would end up in double encoding without using this function.
+
+FIXME
+In my opinion, this function belongs to MARC::Record and not
+to this package.
+But since it handles charset, and MARC::Record, it finds its way in that package
+
+=cut
+
+sub SetUTF8Flag{
+ my ($record)=@_;
+ return unless ($record && $record->fields());
+ foreach my $field ($record->fields()){
+ if ($field->tag()>=10){
+ my @subfields;
+ foreach my $subfield ($field->subfields()){
+ push @subfields,($$subfield[0],NormalizeString($$subfield[1]));
+ }
+ my $newfield=MARC::Field->new(
+ $field->tag(),
+ $field->indicator(1),
+ $field->indicator(2),
+ @subfields
+ );
+ $field->replace_with($newfield);
+ }
+ }
+}
+
+=head2 NormalizeString
+
+=over 4
+
+ my $normalized_string=NormalizeString($string);
+
+=back
+ Given
+ a string
+ nfc : If you want to set NFC and not NFD
+ transform : If you expect all the signs to be removed
+ Sets the PERL UTF8 Flag on your initial data if need be
+ and applies cleaning if required
+
+ Returns a utf8 NFD normalized string
+
+ Sample code :
+ my $string=NormalizeString ("l'ornithoptère");
+ #results into ornithoptère in NFD form and sets UTF8 Flag
+=cut
+
+sub NormalizeString{
+ my ($string,$nfc,$transform)=@_;
+ utf8::decode($string) unless (utf8::is_utf8($string));
+ if ($nfc){
+ $string= NFD($string);
+ }
+ else {
+ $string=NFC($string);
+ }
+ if ($transform){
+ $string=~s/\<|\>|\^|\;|\.|\?|,|\-|\(|\)|\[|\]|\{|\}|\$|\%|\!|\*|\:|\\|\/|\&|\"|\'/ /g;
+ #removing one letter words "d'" "l'" was changed into "d " "l "
+ $string=~s/\b\S\b//g;
+ $string=~s/\s+$//g;
+ }
+ return $string;
+}
+
=head2 MarcToUTF8Record
=over 4
diff --git a/C4/Search.pm b/C4/Search.pm
index 5605ede..cf1309e 100644
--- a/C4/Search.pm
+++ b/C4/Search.pm
@@ -448,6 +448,7 @@ sub getRecords {
# not an index scan
else {
$record = $results[ $i - 1 ]->record($j)->raw();
+ warn $results[$i-1]->record($j)->render() ;
# warn "RECORD $j:".$record;
$results_hash->{'RECORDS'}[$j] = $record;
@@ -1648,6 +1649,10 @@ sub searchResults {
}
# XSLT processing of some stuff
+ my $debug=1;
+ use C4::Charset;
+ SetUTF8Flag($marcrecord);
+ $debug && warn $marcrecord->as_formatted;
if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
$oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
$oldbiblio->{biblionumber}, $marcrecord, 'Results' );
diff --git a/C4/XSLT.pm b/C4/XSLT.pm
index 8a67e04..1c7184a 100644
--- a/C4/XSLT.pm
+++ b/C4/XSLT.pm
@@ -124,8 +124,9 @@ sub XSLTParse4Display {
my $record = transformMARCXML4XSLT($biblionumber, $orig_record);
#return $record->as_formatted();
my $itemsxml = buildKohaItemsNamespace($biblionumber);
- my $xmlrecord = $record->as_xml();
+ my $xmlrecord = $record->as_xml(C4::Context->preference('marcflavour'));
my $sysxml = "<sysprefs>\n";
+ warn $xmlrecord;
foreach my $syspref ( qw/OPACURLOpenInNewWindow DisplayOPACiconsXSLT URLLinkText/ ) {
$sysxml .= "<syspref name=\"$syspref\">" .
C4::Context->preference( $syspref ) .
@@ -137,7 +138,7 @@ sub XSLTParse4Display {
my $parser = XML::LibXML->new();
# don't die when you find &, >, etc
- $parser->recover_silently(1);
+ $parser->recover_silently(0);
my $source = $parser->parse_string($xmlrecord);
unless ( $stylesheet ) {
my $xslt = XML::LibXSLT->new();
--
1.6.3.3
More information about the Koha-patches
mailing list