[Koha-cvs] CVS: koha/marc/benchmarks benchmarkresults,NONE,1.1 benchmarkschema,NONE,1.1 generaterandomdata,NONE,1.1 getdata-paul,NONE,1.1 getdata-paul-regex,NONE,1.1 getdata-steve,NONE,1.1 runbenchmark,NONE,1.1

Sat May 18 00:03:17 CEST 2002

Update of /cvsroot/koha/koha/marc/benchmarks
In directory usw-pr-cvs1:/tmp/cvs-serv27560

Added Files:
	benchmarkresults benchmarkschema generaterandomdata 
	getdata-paul getdata-paul-regex getdata-steve runbenchmark 
Log Message:
Benchmarking files for comparing performance of two different proposed database
schemas for MARC storage.  See IRC log at

    http://www.haz.cmsd.bc.ca/cgi-bin/kohalog.pl

for information on the two proposals.

--- NEW FILE ---
Benchmark results from Steve Tonnesen

getdata-steve:		15.73	13.26	14.47	12.73	11.90
getdata-paul:  		21.56	14.11	12.58	12.49	13.18
getdata-paul-regex:     14.32	11.53	13.51	10.15	10.99

--- NEW FILE ---
CREATE TABLE marc_0XX_tag_table (
		bibcode bigint(20) NOT NULL default '0',
		tagnumber char(3) NOT NULL default '',
		tagorder tinyint(4) NOT NULL default '0',
		tagvalue varchar(255) default NULL,
		valuebloblink bigint(20) default NULL,
		PRIMARY KEY  (bibcode,tagnumber,tagorder)
		) TYPE=MyISAM;

CREATE TABLE marc_2XX_subfield_table (
		subfieldid bigint(20) unsigned NOT NULL auto_increment,
		tagid bigint(20) NOT NULL default '0',
		tag char(3) NOT NULL default '',
		bibid bigint(20) NOT NULL default '0',
		subfieldorder tinyint(4) NOT NULL default '0',
		subfieldcode char(1) NOT NULL default '',
		subfieldvalue varchar(255) default NULL,
		valuebloblink bigint(20) default NULL,
		PRIMARY KEY (subfieldid),
		KEY (bibid,tagid,tag,subfieldcode)
		) TYPE=MyISAM;

--- NEW FILE ---
#!/usr/bin/perl
#
# This script generates 80,000 random records in the kohabenchmark database for
# the purposes of comparing two different marc storage schemas.  It requires
# the presence of a word list for populating the data.  Mine is in
# /usr/share/dict/words.  Change that if necessary.  You'll also need to change
# your userid and password for the dbi->connect line.

use DBI;

my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'youruserid', 'yourpassword');
@subfields = ( 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n');

open (W, "/usr/share/dict/words");
while (<W>) {
    chomp;
    push @words, $_;
}

my $tagcounter=0;
my $subfieldcounter=0;
srand($$|time);
for ($bibid=1; $bibid<80000; $bibid++) {
    my $numtags=int(rand(10)+5);
    my $localtagcounter=0;
    for ($i=1; $i<$numtags; $i++) {
	$localtagcounter++;
	$tagcounter++;
	my $tag=$i*40+100;
	my $numsubfields=int(rand(10)+1);
	my $subfieldsused;
	my $localsubfieldcounter=0;
	my $tagvalue='';
	for ($j=1; $j<=$numsubfields; $j++) {
	    my $code='';
	    until ($code) { 
		my $codepicker=int(rand($#subfields));
		if ($subfieldsused->{$subfields[$codepicker]}==0) {
		    $subfieldsused->{$subfields[$codepicker]}=1;
		    $code=$subfields[$codepicker];
		}
	    }
	    $subfieldcounter++;
	    $localsubfieldcounter++;
	    my $word=$words[int(rand($#words))];
	    $tagvalue.="\$$code $word\0";
	    my $sth=$dbh->prepare("insert into marc_2XX_subfield_table (subfieldid, tagid, tag, bibid, subfieldorder, subfieldcode, subfieldvalue) values (?,?,?,?,?,?,?)");
	    my $error=1;
	    while ($error) {
		$sth->execute($subfieldcounter, $tagcounter, $tag, $bibid, $localsubfieldcounter, $code, $word);
		$error=$dbh->err;
		if ($error) {
		    sleep 1;
		    print "ERROR: $error\n";
		}
		$sth->finish;
	    }
	}
	$tagvalue=~s/\0$//;
	my $error=1;
	my $sth=$dbh->prepare("insert into marc_0XX_tag_table (bibcode, tagnumber, tagorder, tagvalue) values (?, ?, ?, ?)");
	while ($error) {
	    $sth->execute($bibid, $tag, $localtagcounter, $tagvalue);
	    $error=$dbh->err;
	    if ($error) {
		sleep 1;
		print "ERROR: $error\n";
	    }
	    $sth->finish;
	}
    }
}

--- NEW FILE ---
#!/usr/bin/perl
#
#
# Benchmark script for Paul's marc db schema using split() to separate subfield
# code from subfield value

use DBI;

my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'root', 'testpass');

my $count=$ARGV[0];
my $print=$ARGV[1];
my $bibid=$ARGV[2];

for ($i=0; $i<$count; $i++) {
    ($bibid) || ($bibid=int(rand(79998))+1);

    ($print) && (print "BIBID: $bibid\n");
    my $sth=$dbh->prepare("select tagnumber,tagvalue from marc_0XX_tag_table where bibcode=$bibid order by tagorder");
    $sth->execute;
    while (my ($tagnumber, $tagvalue) = $sth->fetchrow) {
	($print) && (print "  Tag: $tagnumber\n");
	foreach (split(/\0/, $tagvalue)) {
	    my ($code, $value) = split(/\s/, $_, 2);
	    ($print) && (print "    $code $value\n");
	}
    }
    $bibid=0;
}

--- NEW FILE ---
#!/usr/bin/perl
#
#
# Benchmark script for Paul's marc db schema using regex to separate subfield
# code from subfield value

use DBI;

my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'root', 'testpass');

my $count=$ARGV[0];
my $print=$ARGV[1];
my $bibid=$ARGV[2];

for ($i=0; $i<$count; $i++) {
    ($bibid) || ($bibid=int(rand(79998))+1);

    ($print) && (print "BIBID: $bibid\n");
    my $sth=$dbh->prepare("select tagnumber,tagvalue from marc_0XX_tag_table where bibcode=$bibid order by tagorder");
    $sth->execute;
    while (my ($tagnumber, $tagvalue) = $sth->fetchrow) {
	($print) && (print "  Tag: $tagnumber\n");
	foreach (split(/\0/, $tagvalue)) {
	    m#$(.) (.*)#;
	    my ($code, $value) = ($1, $2);
	    ($print) && (print "    $code $value\n");
	}
    }
    $bibid=0;
}

--- NEW FILE ---
#!/usr/bin/perl
#
#
# Benchmark script for Steve's marc db schema

use DBI;

my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'root', 'testpass');

my $count=$ARGV[0];
my $print=$ARGV[1];
my $bibid=$ARGV[2];

for ($i=0; $i<$count; $i++) {
    ($bibid) || ($bibid=int(rand(79998))+1);

    ($print) && (print "BIBID: $bibid\n");
    my $sth=$dbh->prepare("select tagid,tag,subfieldcode,subfieldvalue from marc_2XX_subfield_table where bibid=$bibid order by tagid,subfieldorder");
    $sth->execute;
    my $lasttag='';
    while (my ($tagid,$tag,$subfieldcode,$subfieldvalue) = $sth->fetchrow) {
	if ($tag ne $lasttag) {
	    ($print) && (print "  Tag: $tag\n");
	    $lasttag=$tag;
	}
	($print) && (print "    $subfieldcode $subfieldvalue\n");
    }
    $bibid=0;
}

--- NEW FILE ---
#!/usr/bin/perl
#
#
#  This script will iterate through each benchmark 5 times, looking up 500
#  random records each time.  Results will be printed to STDOUT.

my @benchmarks=('getdata-steve', 'getdata-paul', 'getdata-paul-regex');

my $iterations=5;

foreach (@benchmarks) {
    print "$_:\t";
    for ($i=1; $i<=$iterations; $i++) {
	my $timer=`/usr/bin/time -f "%E" perl $_ 500 2>&1`;
	chomp $timer;
	print "$timer\t";
    }
    print "\n";
}