[Koha-patches] [PATCH] A script comparing MARC::Record object creation XML/YAML

Frédéric Demians f.demians at tamil.fr
Sun Jun 14 07:17:43 CEST 2009


MARC::Record object creation are everywhere in Koha,
from Zebra and from MySQL biblioitem.marcxml field.
The XML parsing appears to be a very CPU consuming
process. My tests (NYTProf) on OPAC result page shows
that about one third of the time is used for that!

This script times MARC::Record creation from an
XML representation vs a YAML one.
---
 misc/test-marc-deserial |  211 +++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 211 insertions(+), 0 deletions(-)
 create mode 100755 misc/test-marc-deserial

diff --git a/misc/test-marc-deserial b/misc/test-marc-deserial
new file mode 100755
index 0000000..390a4da
--- /dev/null
+++ b/misc/test-marc-deserial
@@ -0,0 +1,211 @@
+#!/usr/bin/perl 
+
+#
+# Compare MARC::Record object creation from an XML serialization vs
+# a YAML one.
+#
+
+
+use strict;
+use warnings;
+use MARC::Record;
+use MARC::File::XML;
+use YAML::XS;
+use Time::HiRes qw(gettimeofday);
+
+
+test_xml(  1000 );
+test_yaml( 1000 );
+
+
+sub test_xml {
+    my $max = shift;
+    my $xml = <<EOS;
+<?xml version="1.0" encoding="UTF-8"?>
+<record
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd"
+    xmlns="http://www.loc.gov/MARC21/slim">
+
+  <leader>00571nam0a2200133   4500</leader>
+  <controlfield tag="005">20080924184404.000</controlfield>
+  <datafield tag="090" ind1=" " ind2=" ">
+    <subfield code="a">1000</subfield>
+  </datafield>
+  <datafield tag="100" ind1=" " ind2=" ">
+    <subfield code="a">20081018              frey50       </subfield>
+  </datafield>
+  <datafield tag="200" ind1="1" ind2=" ">
+    <subfield code="a">Propriété barrière de la peau</subfield>
+    <subfield code="b">LIVR</subfield>
+    <subfield code="e">caractérisation de l'organisation des lipides par spectroscopie vibrationnelle</subfield>
+    <subfield code="f">Emmanuelle Corbé Guillard</subfield>
+    <subfield code="g">[sous la direction de] Arlette Baillet-Guffroy</subfield>
+  </datafield>
+  <datafield tag="210" ind1=" " ind2=" ">
+    <subfield code="a">[S.l.]</subfield>
+    <subfield code="c">[s.n.]</subfield>
+    <subfield code="d">2008</subfield>
+  </datafield>
+  <datafield tag="215" ind1=" " ind2=" ">
+    <subfield code="a">1 vol. (66 f.)</subfield>
+    <subfield code="c">ill.</subfield>
+    <subfield code="d">30 cm</subfield>
+  </datafield>
+  <datafield tag="700" ind1=" " ind2="1">
+    <subfield code="9">41714</subfield>
+    <subfield code="a">Corbé Guillard</subfield>
+    <subfield code="b">Emmanuelle</subfield>
+    <subfield code="f">1979-....</subfield>
+    <subfield code="4">070</subfield>
+  </datafield>
+  <datafield tag="995" ind1=" " ind2=" ">
+    <subfield code="o">0</subfield>
+    <subfield code="y">LIVR</subfield>
+    <subfield code="f">100999</subfield>
+    <subfield code="9">1000</subfield>
+    <subfield code="c">TAMIL</subfield>
+    <subfield code="k">TPHB 10352</subfield>
+    <subfield code="b">TAMIL</subfield>
+  </datafield>
+  <controlfield tag="001">1000</controlfield>
+</record>
+EOS
+    my $start = gettimeofday;
+    my $count = 0;
+    MARC::File::XML->default_record_format( 'UNIMARC' );
+    while ( $count < $max ) {
+        my $record = MARC::Record::new_from_xml( $xml, "utf8", 'UNIMARC' );
+        $count++
+    }
+    print "Deserialize $count records from XML: ", gettimeofday - $start, "\n";
+}
+
+
+sub test_yaml {
+    my $max = shift;
+    my $yaml = <<EOS;
+--- !!perl/hash:MARC::Record
+_fields:
+- !!perl/hash:MARC::Field
+  _data: '20080924184404.000'
+  _is_control_field: 1
+  _tag: 005
+  _warnings: []
+- !!perl/hash:MARC::Field
+  _ind1: ' '
+  _ind2: ' '
+  _is_control_field: ''
+  _subfields:
+  - a
+  - '1000'
+  _tag: 090
+  _warnings: []
+- !!perl/hash:MARC::Field
+  _ind1: ' '
+  _ind2: ' '
+  _is_control_field: ''
+  _subfields:
+  - a
+  - '20081018              frey50       '
+  _tag: 100
+  _warnings: []
+- !!perl/hash:MARC::Field
+  _ind1: '1'
+  _ind2: ' '
+  _is_control_field: ''
+  _subfields:
+  - a
+  - Propriété barrière de la peau
+  - b
+  - LIVR
+  - e
+  - caractérisation de l'organisation des lipides par spectroscopie vibrationnelle
+  - f
+  - Emmanuelle Corbé Guillard
+  - g
+  - '[sous la direction de] Arlette Baillet-Guffroy'
+  _tag: 200
+  _warnings: []
+- !!perl/hash:MARC::Field
+  _ind1: ' '
+  _ind2: ' '
+  _is_control_field: ''
+  _subfields:
+  - a
+  - '[S.l.]'
+  - c
+  - '[s.n.]'
+  - d
+  - '2008'
+  _tag: 210
+  _warnings: []
+- !!perl/hash:MARC::Field
+  _ind1: ' '
+  _ind2: ' '
+  _is_control_field: ''
+  _subfields:
+  - a
+  - 1 vol. (66 f.)
+  - c
+  - ill.
+  - d
+  - 30 cm
+  _tag: 215
+  _warnings: []
+- !!perl/hash:MARC::Field
+  _ind1: ' '
+  _ind2: '1'
+  _is_control_field: ''
+  _subfields:
+  - '9'
+  - '41714'
+  - a
+  - Corbé Guillard
+  - b
+  - Emmanuelle
+  - f
+  - 1979-....
+  - '4'
+  - '070'
+  _tag: 700
+  _warnings: []
+- !!perl/hash:MARC::Field
+  _ind1: ' '
+  _ind2: ' '
+  _is_control_field: ''
+  _subfields:
+  - o
+  - '0'
+  - y
+  - LIVR
+  - f
+  - '100999'
+  - '9'
+  - '1000'
+  - c
+  - TAMIL
+  - k
+  - TPHB 10352
+  - b
+  - TAMIL
+  _tag: 995
+  _warnings: []
+- !!perl/hash:MARC::Field
+  _data: '1000'
+  _is_control_field: 1
+  _tag: 001
+  _warnings: []
+_leader: 00571nam0a2200133   4500
+_warnings: []
+EOS
+    my $count = 0;
+    my $start = gettimeofday;
+    while ( $count < $max ) {
+        my $marc = Load( $yaml );
+        $count++;
+    }
+    print "Deserialize $count records from YAML: ", gettimeofday - $start, "\n";
+}
+        
+
-- 
1.5.6.5




More information about the Koha-patches mailing list