[Koha-patches] [PATCH] A script comparing MARC::Record object creation XML/YAML
Frédéric Demians
f.demians at tamil.fr
Sun Jun 14 07:17:43 CEST 2009
MARC::Record object creation are everywhere in Koha,
from Zebra and from MySQL biblioitem.marcxml field.
The XML parsing appears to be a very CPU consuming
process. My tests (NYTProf) on OPAC result page shows
that about one third of the time is used for that!
This script times MARC::Record creation from an
XML representation vs a YAML one.
---
misc/test-marc-deserial | 211 +++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 211 insertions(+), 0 deletions(-)
create mode 100755 misc/test-marc-deserial
diff --git a/misc/test-marc-deserial b/misc/test-marc-deserial
new file mode 100755
index 0000000..390a4da
--- /dev/null
+++ b/misc/test-marc-deserial
@@ -0,0 +1,211 @@
+#!/usr/bin/perl
+
+#
+# Compare MARC::Record object creation from an XML serialization vs
+# a YAML one.
+#
+
+
+use strict;
+use warnings;
+use MARC::Record;
+use MARC::File::XML;
+use YAML::XS;
+use Time::HiRes qw(gettimeofday);
+
+
+test_xml( 1000 );
+test_yaml( 1000 );
+
+
+sub test_xml {
+ my $max = shift;
+ my $xml = <<EOS;
+<?xml version="1.0" encoding="UTF-8"?>
+<record
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd"
+ xmlns="http://www.loc.gov/MARC21/slim">
+
+ <leader>00571nam0a2200133 4500</leader>
+ <controlfield tag="005">20080924184404.000</controlfield>
+ <datafield tag="090" ind1=" " ind2=" ">
+ <subfield code="a">1000</subfield>
+ </datafield>
+ <datafield tag="100" ind1=" " ind2=" ">
+ <subfield code="a">20081018 frey50 </subfield>
+ </datafield>
+ <datafield tag="200" ind1="1" ind2=" ">
+ <subfield code="a">Propriété barrière de la peau</subfield>
+ <subfield code="b">LIVR</subfield>
+ <subfield code="e">caractérisation de l'organisation des lipides par spectroscopie vibrationnelle</subfield>
+ <subfield code="f">Emmanuelle Corbé Guillard</subfield>
+ <subfield code="g">[sous la direction de] Arlette Baillet-Guffroy</subfield>
+ </datafield>
+ <datafield tag="210" ind1=" " ind2=" ">
+ <subfield code="a">[S.l.]</subfield>
+ <subfield code="c">[s.n.]</subfield>
+ <subfield code="d">2008</subfield>
+ </datafield>
+ <datafield tag="215" ind1=" " ind2=" ">
+ <subfield code="a">1 vol. (66 f.)</subfield>
+ <subfield code="c">ill.</subfield>
+ <subfield code="d">30 cm</subfield>
+ </datafield>
+ <datafield tag="700" ind1=" " ind2="1">
+ <subfield code="9">41714</subfield>
+ <subfield code="a">Corbé Guillard</subfield>
+ <subfield code="b">Emmanuelle</subfield>
+ <subfield code="f">1979-....</subfield>
+ <subfield code="4">070</subfield>
+ </datafield>
+ <datafield tag="995" ind1=" " ind2=" ">
+ <subfield code="o">0</subfield>
+ <subfield code="y">LIVR</subfield>
+ <subfield code="f">100999</subfield>
+ <subfield code="9">1000</subfield>
+ <subfield code="c">TAMIL</subfield>
+ <subfield code="k">TPHB 10352</subfield>
+ <subfield code="b">TAMIL</subfield>
+ </datafield>
+ <controlfield tag="001">1000</controlfield>
+</record>
+EOS
+ my $start = gettimeofday;
+ my $count = 0;
+ MARC::File::XML->default_record_format( 'UNIMARC' );
+ while ( $count < $max ) {
+ my $record = MARC::Record::new_from_xml( $xml, "utf8", 'UNIMARC' );
+ $count++
+ }
+ print "Deserialize $count records from XML: ", gettimeofday - $start, "\n";
+}
+
+
+sub test_yaml {
+ my $max = shift;
+ my $yaml = <<EOS;
+--- !!perl/hash:MARC::Record
+_fields:
+- !!perl/hash:MARC::Field
+ _data: '20080924184404.000'
+ _is_control_field: 1
+ _tag: 005
+ _warnings: []
+- !!perl/hash:MARC::Field
+ _ind1: ' '
+ _ind2: ' '
+ _is_control_field: ''
+ _subfields:
+ - a
+ - '1000'
+ _tag: 090
+ _warnings: []
+- !!perl/hash:MARC::Field
+ _ind1: ' '
+ _ind2: ' '
+ _is_control_field: ''
+ _subfields:
+ - a
+ - '20081018 frey50 '
+ _tag: 100
+ _warnings: []
+- !!perl/hash:MARC::Field
+ _ind1: '1'
+ _ind2: ' '
+ _is_control_field: ''
+ _subfields:
+ - a
+ - Propriété barrière de la peau
+ - b
+ - LIVR
+ - e
+ - caractérisation de l'organisation des lipides par spectroscopie vibrationnelle
+ - f
+ - Emmanuelle Corbé Guillard
+ - g
+ - '[sous la direction de] Arlette Baillet-Guffroy'
+ _tag: 200
+ _warnings: []
+- !!perl/hash:MARC::Field
+ _ind1: ' '
+ _ind2: ' '
+ _is_control_field: ''
+ _subfields:
+ - a
+ - '[S.l.]'
+ - c
+ - '[s.n.]'
+ - d
+ - '2008'
+ _tag: 210
+ _warnings: []
+- !!perl/hash:MARC::Field
+ _ind1: ' '
+ _ind2: ' '
+ _is_control_field: ''
+ _subfields:
+ - a
+ - 1 vol. (66 f.)
+ - c
+ - ill.
+ - d
+ - 30 cm
+ _tag: 215
+ _warnings: []
+- !!perl/hash:MARC::Field
+ _ind1: ' '
+ _ind2: '1'
+ _is_control_field: ''
+ _subfields:
+ - '9'
+ - '41714'
+ - a
+ - Corbé Guillard
+ - b
+ - Emmanuelle
+ - f
+ - 1979-....
+ - '4'
+ - '070'
+ _tag: 700
+ _warnings: []
+- !!perl/hash:MARC::Field
+ _ind1: ' '
+ _ind2: ' '
+ _is_control_field: ''
+ _subfields:
+ - o
+ - '0'
+ - y
+ - LIVR
+ - f
+ - '100999'
+ - '9'
+ - '1000'
+ - c
+ - TAMIL
+ - k
+ - TPHB 10352
+ - b
+ - TAMIL
+ _tag: 995
+ _warnings: []
+- !!perl/hash:MARC::Field
+ _data: '1000'
+ _is_control_field: 1
+ _tag: 001
+ _warnings: []
+_leader: 00571nam0a2200133 4500
+_warnings: []
+EOS
+ my $count = 0;
+ my $start = gettimeofday;
+ while ( $count < $max ) {
+ my $marc = Load( $yaml );
+ $count++;
+ }
+ print "Deserialize $count records from YAML: ", gettimeofday - $start, "\n";
+}
+
+
--
1.5.6.5
More information about the Koha-patches
mailing list