[Koha-cvs] CVS: koha/misc/translator TmplToken.pm,NONE,1.1 TmplTokenType.pm,NONE,1.1 TmplTokenizer.pm,1.3,1.4 VerboseWarnings.pm,1.1,1.2 text-extract2.pl,1.36,1.37

Tue Feb 17 03:45:30 CET 2004

Update of /cvsroot/koha/koha/misc/translator
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6928

Modified Files:
	TmplTokenizer.pm VerboseWarnings.pm text-extract2.pl 
Added Files:
	TmplToken.pm TmplTokenType.pm 
Log Message:
Further breaking up of the TmplTokenizer module.
A couple of minor fixes.

--- NEW FILE ---
package TmplToken;

use strict;
use TmplTokenType;
require Exporter;

use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

###############################################################################

=head1 NAME

TmplToken.pm - Object representing a scanner token for .tmpl files

=head1 DESCRIPTION

This is a class representing a token scanned from an HTML::Template .tmpl file.

=cut

###############################################################################

$VERSION = 0.01;

@ISA = qw(Exporter);
@EXPORT_OK = qw();

###############################################################################

sub new {
    my $this = shift;
    my $class = ref($this) || $this;
    my $self = {};
    bless $self, $class;
    ($self->{'_string'}, $self->{'_type'}, $self->{'_lc'}) = @_;
    return $self;
}

sub string {
    my $this = shift;
    return $this->{'_string'}
}

sub type {
    my $this = shift;
    return $this->{'_type'}
}

sub line_number {
    my $this = shift;
    return $this->{'_lc'}
}

sub attributes {
    my $this = shift;
    return $this->{'_attr'};
}

sub set_attributes {
    my $this = shift;
    $this->{'_attr'} = ref $_[0] eq 'HASH'? $_[0]: \@_;
    return $this;
}

###############################################################################

1;

--- NEW FILE ---
package TmplTokenType;

use strict;
require Exporter;

use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

###############################################################################

=head1 NAME

TmplTokenType.pm - Types of TmplToken objects

=head1 DESCRIPTION

This is a Java-style "safe enum" singleton class for types of TmplToken objects.

=cut

###############################################################################

$VERSION = 0.01;

@ISA = qw(Exporter);
@EXPORT_OK = qw(
    &TEXT
    &CDATA
    &TAG
    &DECL
    &PI
    &DIRECTIVE
    &COMMENT
    &UNKNOWN
);

###############################################################################

use vars qw( $_text $_cdata $_tag $_decl $_pi $_directive $_comment $_unknown );

BEGIN {
    my $new = sub {
	my $this = shift;
	my $class = ref($this) || $this;
	my $self = {};
	bless $self, $class;
	($self->{'id'}, $self->{'name'}, $self->{'desc'}) = @_;
	return $self;
    };
    $_text	= &$new(0, 'TEXT');
    $_cdata	= &$new(1, 'CDATA');
    $_tag	= &$new(2, 'TAG');
    $_decl	= &$new(3, 'DECL');
    $_pi	= &$new(4, 'PI');
    $_directive	= &$new(5, 'DIRECTIVE');
    $_comment	= &$new(6, 'COMMENT');
    $_unknown	= &$new(7, 'UNKNOWN');
}

sub to_string {
    my $this = shift;
    return $this->{'name'}
}

sub TEXT	() { $_text }
sub CDATA	() { $_cdata }
sub TAG		() { $_tag }
sub DECL	() { $_decl }
sub PI		() { $_pi }
sub DIRECTIVE	() { $_directive }
sub COMMENT	() { $_comment }
sub UNKNOWN	() { $_unknown }

###############################################################################

1;

Index: TmplTokenizer.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/TmplTokenizer.pm,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -r1.3 -r1.4
*** TmplTokenizer.pm	16 Feb 2004 23:50:56 -0000	1.3
--- TmplTokenizer.pm	17 Feb 2004 02:45:27 -0000	1.4
***************
*** 2,5 ****
--- 2,7 ----

  use strict;
+ use TmplTokenType;
+ use TmplToken;
  use VerboseWarnings qw( pedantic_p error_normal warn_normal warn_pedantic );
  require Exporter;
***************
*** 33,46 ****

  @ISA = qw(Exporter);
! @EXPORT_OK = qw(
!     &KIND_TEXT
!     &KIND_CDATA
!     &KIND_TAG
!     &KIND_DECL
!     &KIND_PI
!     &KIND_DIRECTIVE
!     &KIND_COMMENT
!     &KIND_UNKNOWN
! );

  use vars qw( $input );
--- 35,39 ----

  @ISA = qw(Exporter);
! @EXPORT_OK = qw();

  use vars qw( $input );
***************
*** 92,104 ****
  # End of the hideous stuff

- sub KIND_TEXT      () { 'TEXT' }
- sub KIND_CDATA     () { 'CDATA' }
- sub KIND_TAG       () { 'TAG' }
- sub KIND_DECL      () { 'DECL' }
- sub KIND_PI        () { 'PI' }
- sub KIND_DIRECTIVE () { 'HTML::Template' }
- sub KIND_COMMENT   () { 'COMMENT' }   # empty DECL with exactly one SGML comment
- sub KIND_UNKNOWN   () { 'ERROR' }
- 
  use vars qw( $readahead $lc_0 $lc $syntaxerror_p );
  use vars qw( $cdata_mode_p $cdata_close );
--- 85,88 ----
***************
*** 180,188 ****
  	;
      } elsif ($readahead =~ /^\s+/s) {	# whitespace
! 	($kind, $it, $readahead) = (KIND_TEXT, $&, $');
      # FIXME the following (the [<\s] part) is an unreliable HACK :-(
      } elsif ($readahead =~ /^(?:[^<]|<[<\s])+/s) {	# non-space normal text
! 	($kind, $it, $readahead) = (KIND_TEXT, $&, $');
! 	warn_normal "Warning: Unescaped < $it\n", $lc_0
  		if !$cdata_mode_p && $it =~ /</s;
      } else {				# tag/declaration/processing instruction
--- 164,172 ----
  	;
      } elsif ($readahead =~ /^\s+/s) {	# whitespace
! 	($kind, $it, $readahead) = (TmplTokenType::TEXT, $&, $');
      # FIXME the following (the [<\s] part) is an unreliable HACK :-(
      } elsif ($readahead =~ /^(?:[^<]|<[<\s])+/s) {	# non-space normal text
! 	($kind, $it, $readahead) = (TmplTokenType::TEXT, $&, $');
! 	warn_normal "Unescaped < in $it\n", $lc_0
  		if !$cdata_mode_p && $it =~ /</s;
      } else {				# tag/declaration/processing instruction
***************
*** 191,206 ****
  	    if ($cdata_mode_p) {
  		if ($readahead =~ /^$cdata_close/) {
! 		    ($kind, $it, $readahead) = (KIND_TAG, $&, $');
  		    $ok_p = 1;
  		} else {
! 		    ($kind, $it, $readahead) = (KIND_TEXT, $readahead, undef);
  		    $ok_p = 1;
  		}
  	    } elsif ($readahead =~ /^$re_tag_compat/os) {
! 		($kind, $it, $readahead) = (KIND_TAG, "$1>", $3);
  		$ok_p = 1;
  		warn_normal "SGML \"closed start tag\" notation: $1<\n", $lc_0 if $2 eq '';
  	    } elsif ($readahead =~ /^<!--(?:(?!-->).)*-->/s) {
! 		($kind, $it, $readahead) = (KIND_COMMENT, $&, $');
  		$ok_p = 1;
  		warn_normal "Syntax error in comment: $&\n", $lc_0;
--- 175,190 ----
  	    if ($cdata_mode_p) {
  		if ($readahead =~ /^$cdata_close/) {
! 		    ($kind, $it, $readahead) = (TmplTokenType::TAG, $&, $');
  		    $ok_p = 1;
  		} else {
! 		    ($kind, $it, $readahead) = (TmplTokenType::TEXT, $readahead, undef);
  		    $ok_p = 1;
  		}
  	    } elsif ($readahead =~ /^$re_tag_compat/os) {
! 		($kind, $it, $readahead) = (TmplTokenType::TAG, "$1>", $3);
  		$ok_p = 1;
  		warn_normal "SGML \"closed start tag\" notation: $1<\n", $lc_0 if $2 eq '';
  	    } elsif ($readahead =~ /^<!--(?:(?!-->).)*-->/s) {
! 		($kind, $it, $readahead) = (TmplTokenType::COMMENT, $&, $');
  		$ok_p = 1;
  		warn_normal "Syntax error in comment: $&\n", $lc_0;
***************
*** 214,237 ****
  	    $readahead .= $next;
  	}
! 	if ($kind ne KIND_TAG) {
  	    ;
  	} elsif ($it =~ /^<!/) {
! 	    $kind = KIND_DECL;
! 	    $kind = KIND_COMMENT if $it =~ /^<!--(?:(?!-->).)*-->/;
  	} elsif ($it =~ /^<\?/) {
! 	    $kind = KIND_PI;
  	}
  	if ($it =~ /^$re_directive/ios && !$cdata_mode_p) {
! 	    $kind = KIND_DIRECTIVE;
  	}
  	if (!$ok_p && $eof_p) {
! 	    ($kind, $it, $readahead) = (KIND_UNKNOWN, $readahead, undef);
  	    $syntaxerror_p = 1;
  	}
      }
      warn_normal "Unrecognizable token found: $it\n", $lc_0
! 	    if $kind eq KIND_UNKNOWN;
!     return defined $it? (wantarray? ($kind, $it):
! 				    [$kind, $it]): undef;
  }

--- 198,220 ----
  	    $readahead .= $next;
  	}
! 	if ($kind ne TmplTokenType::TAG) {
  	    ;
  	} elsif ($it =~ /^<!/) {
! 	    $kind = TmplTokenType::DECL;
! 	    $kind = TmplTokenType::COMMENT if $it =~ /^<!--(?:(?!-->).)*-->/;
  	} elsif ($it =~ /^<\?/) {
! 	    $kind = TmplTokenType::PI;
  	}
  	if ($it =~ /^$re_directive/ios && !$cdata_mode_p) {
! 	    $kind = TmplTokenType::DIRECTIVE;
  	}
  	if (!$ok_p && $eof_p) {
! 	    ($kind, $it, $readahead) = (TmplTokenType::UNKNOWN, $readahead, undef);
  	    $syntaxerror_p = 1;
  	}
      }
      warn_normal "Unrecognizable token found: $it\n", $lc_0
! 	    if $kind eq TmplTokenType::UNKNOWN;
!     return defined $it? TmplToken->new($it, $kind, $lc): undef;
  }

***************
*** 241,248 ****
      if (!$cdata_mode_p) {
  	$it = next_token_internal($h);
! 	if (defined $it && $it->[0] eq KIND_TAG) { # FIXME
  	    ($cdata_mode_p, $cdata_close) = (1, "</$1\\s*>")
! 		    if $it->[1] =~ /^<(script|style|textarea)\b/i; #FIXME
! 	    push @$it, extract_attributes($it->[1], $lc_0); #FIXME
  	}
      } else {
--- 224,231 ----
      if (!$cdata_mode_p) {
  	$it = next_token_internal($h);
! 	if (defined $it && $it->type eq TmplTokenType::TAG) {
  	    ($cdata_mode_p, $cdata_close) = (1, "</$1\\s*>")
! 		    if $it->string =~ /^<(script|style|textarea)\b/i;
! 	    $it->set_attributes( extract_attributes($it->string, $lc_0) );
  	}
      } else {
***************
*** 251,265 ****
  	    my $next = next_token_internal($h);
  	last if !defined $next;
! 	    if (defined $next && $next->[1] =~ /$cdata_close/i) { #FIXME
! 		($lc, $readahead) = ($lc_prev, $next->[1] . $readahead); #FIXME
  		$cdata_mode_p = 0;
  	    }
  	last unless $cdata_mode_p;
! 	    $it .= $next->[1]; #FIXME
  	}
! 	$it = [KIND_CDATA, $it]; #FIXME
  	$cdata_close = undef;
      }
!     return defined $it? (wantarray? @$it: $it): undef;
  }

--- 234,248 ----
  	    my $next = next_token_internal($h);
  	last if !defined $next;
! 	    if (defined $next && $next->string =~ /$cdata_close/i) {
! 		($lc, $readahead) = ($lc_prev, $next->string . $readahead);
  		$cdata_mode_p = 0;
  	    }
  	last unless $cdata_mode_p;
! 	    $it .= $next->string;
  	}
! 	$it = TmplToken->new( $it, TmplTokenType::CDATA, $lc );
  	$cdata_close = undef;
      }
!     return $it;
  }

***************
*** 291,293 ****
--- 274,288 ----
  rework in tmpl_process.pl

+ Gettext-style line number references would also be very helpful in
+ disambiguating the strings. Ultimately, we should generate and work
+ with gettext-style po files, so that translators are able to use
+ tools designed for gettext.
+ 
+ An example of a string untranslatable to Chinese is "Accounts for";
+ "Accounts for %s", however, would be translatable. Short words like
+ "in" would also be untranslatable, not only to Chinese, but also to
+ languages requiring declension of nouns.
+ 
  =cut
+ 
+ 1;

Index: VerboseWarnings.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/VerboseWarnings.pm,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** VerboseWarnings.pm	16 Feb 2004 23:42:57 -0000	1.1
--- VerboseWarnings.pm	17 Feb 2004 02:45:27 -0000	1.2
***************
*** 25,31 ****
  @ISA = qw(Exporter);
  @EXPORT_OK = qw(
-     &set_application_name
-     &set_input_file_name
-     &set_pedantic_mode
      &pedantic_p
      &warn_normal
--- 25,28 ----
***************
*** 46,50 ****
      my($s) = @_;
      $input = $s;
!     $input_abbr = $& if !defined $input && defined $s && $s =~ /[^\/]+$/;
  }

--- 43,47 ----
      my($s) = @_;
      $input = $s;
!     $input_abbr = $& if defined $s && $s =~ /[^\/]+$/;
  }

Index: text-extract2.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/text-extract2.pl,v
retrieving revision 1.36
retrieving revision 1.37
diff -C2 -r1.36 -r1.37
*** text-extract2.pl	16 Feb 2004 23:50:56 -0000	1.36
--- text-extract2.pl	17 Feb 2004 02:45:27 -0000	1.37
***************
*** 33,41 ****
      last unless defined $s;
  	printf "%s\n", ('-' x 79);
! 	my($kind, $t, $attr) = @$s; # FIXME
  	printf "%s:\n", $kind;
  	printf "%4dH%s\n", length($t),
  		join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $t));
! 	if ($kind eq TmplTokenizer::KIND_TAG && %$attr) {
  	    printf "Attributes:\n";
  	    for my $a (keys %$attr) {
--- 33,41 ----
      last unless defined $s;
  	printf "%s\n", ('-' x 79);
! 	my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
  	printf "%s:\n", $kind;
  	printf "%4dH%s\n", length($t),
  		join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $t));
! 	if ($kind eq TmplTokenType::TAG && %$attr) {
  	    printf "Attributes:\n";
  	    for my $a (keys %$attr) {
***************
*** 57,65 ****
  	my $s = TmplTokenizer::next_token $h;
      last unless defined $s;
! 	my($kind, $t, $attr) = @$s; # FIXME
! 	if ($kind eq TmplTokenizer::KIND_TEXT) {
  	    $t = TmplTokenizer::trim $t;
  	    $text{$t} = 1 if $t =~ /\S/s;
! 	} elsif ($kind eq TmplTokenizer::KIND_TAG && %$attr) {
  	    # value [tag=input], meta
  	    my $tag = lc($1) if $t =~ /^<(\S+)/s;
--- 57,65 ----
  	my $s = TmplTokenizer::next_token $h;
      last unless defined $s;
! 	my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
! 	if ($kind eq TmplTokenType::TEXT) {
  	    $t = TmplTokenizer::trim $t;
  	    $text{$t} = 1 if $t =~ /\S/s;
! 	} elsif ($kind eq TmplTokenType::TAG && %$attr) {
  	    # value [tag=input], meta
  	    my $tag = lc($1) if $t =~ /^<(\S+)/s;