[Koha-cvs] CVS: koha/misc/translator TmplToken.pm,NONE,1.1 TmplTokenType.pm,NONE,1.1 TmplTokenizer.pm,1.3,1.4 VerboseWarnings.pm,1.1,1.2 text-extract2.pl,1.36,1.37
Ambrose Li
acli at users.sourceforge.net
Tue Feb 17 03:45:30 CET 2004
Update of /cvsroot/koha/koha/misc/translator
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6928
Modified Files:
TmplTokenizer.pm VerboseWarnings.pm text-extract2.pl
Added Files:
TmplToken.pm TmplTokenType.pm
Log Message:
Further breaking up of the TmplTokenizer module.
A couple of minor fixes.
--- NEW FILE ---
package TmplToken;
use strict;
use TmplTokenType;
require Exporter;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
###############################################################################
=head1 NAME
TmplToken.pm - Object representing a scanner token for .tmpl files
=head1 DESCRIPTION
This is a class representing a token scanned from an HTML::Template .tmpl file.
=cut
###############################################################################
$VERSION = 0.01;
@ISA = qw(Exporter);
@EXPORT_OK = qw();
###############################################################################
sub new {
my $this = shift;
my $class = ref($this) || $this;
my $self = {};
bless $self, $class;
($self->{'_string'}, $self->{'_type'}, $self->{'_lc'}) = @_;
return $self;
}
sub string {
my $this = shift;
return $this->{'_string'}
}
sub type {
my $this = shift;
return $this->{'_type'}
}
sub line_number {
my $this = shift;
return $this->{'_lc'}
}
sub attributes {
my $this = shift;
return $this->{'_attr'};
}
sub set_attributes {
my $this = shift;
$this->{'_attr'} = ref $_[0] eq 'HASH'? $_[0]: \@_;
return $this;
}
###############################################################################
1;
--- NEW FILE ---
package TmplTokenType;
use strict;
require Exporter;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
###############################################################################
=head1 NAME
TmplTokenType.pm - Types of TmplToken objects
=head1 DESCRIPTION
This is a Java-style "safe enum" singleton class for types of TmplToken objects.
=cut
###############################################################################
$VERSION = 0.01;
@ISA = qw(Exporter);
@EXPORT_OK = qw(
&TEXT
&CDATA
&TAG
&DECL
&PI
&DIRECTIVE
&COMMENT
&UNKNOWN
);
###############################################################################
use vars qw( $_text $_cdata $_tag $_decl $_pi $_directive $_comment $_unknown );
BEGIN {
my $new = sub {
my $this = shift;
my $class = ref($this) || $this;
my $self = {};
bless $self, $class;
($self->{'id'}, $self->{'name'}, $self->{'desc'}) = @_;
return $self;
};
$_text = &$new(0, 'TEXT');
$_cdata = &$new(1, 'CDATA');
$_tag = &$new(2, 'TAG');
$_decl = &$new(3, 'DECL');
$_pi = &$new(4, 'PI');
$_directive = &$new(5, 'DIRECTIVE');
$_comment = &$new(6, 'COMMENT');
$_unknown = &$new(7, 'UNKNOWN');
}
sub to_string {
my $this = shift;
return $this->{'name'}
}
sub TEXT () { $_text }
sub CDATA () { $_cdata }
sub TAG () { $_tag }
sub DECL () { $_decl }
sub PI () { $_pi }
sub DIRECTIVE () { $_directive }
sub COMMENT () { $_comment }
sub UNKNOWN () { $_unknown }
###############################################################################
1;
Index: TmplTokenizer.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/TmplTokenizer.pm,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -r1.3 -r1.4
*** TmplTokenizer.pm 16 Feb 2004 23:50:56 -0000 1.3
--- TmplTokenizer.pm 17 Feb 2004 02:45:27 -0000 1.4
***************
*** 2,5 ****
--- 2,7 ----
use strict;
+ use TmplTokenType;
+ use TmplToken;
use VerboseWarnings qw( pedantic_p error_normal warn_normal warn_pedantic );
require Exporter;
***************
*** 33,46 ****
@ISA = qw(Exporter);
! @EXPORT_OK = qw(
! &KIND_TEXT
! &KIND_CDATA
! &KIND_TAG
! &KIND_DECL
! &KIND_PI
! &KIND_DIRECTIVE
! &KIND_COMMENT
! &KIND_UNKNOWN
! );
use vars qw( $input );
--- 35,39 ----
@ISA = qw(Exporter);
! @EXPORT_OK = qw();
use vars qw( $input );
***************
*** 92,104 ****
# End of the hideous stuff
- sub KIND_TEXT () { 'TEXT' }
- sub KIND_CDATA () { 'CDATA' }
- sub KIND_TAG () { 'TAG' }
- sub KIND_DECL () { 'DECL' }
- sub KIND_PI () { 'PI' }
- sub KIND_DIRECTIVE () { 'HTML::Template' }
- sub KIND_COMMENT () { 'COMMENT' } # empty DECL with exactly one SGML comment
- sub KIND_UNKNOWN () { 'ERROR' }
-
use vars qw( $readahead $lc_0 $lc $syntaxerror_p );
use vars qw( $cdata_mode_p $cdata_close );
--- 85,88 ----
***************
*** 180,188 ****
;
} elsif ($readahead =~ /^\s+/s) { # whitespace
! ($kind, $it, $readahead) = (KIND_TEXT, $&, $');
# FIXME the following (the [<\s] part) is an unreliable HACK :-(
} elsif ($readahead =~ /^(?:[^<]|<[<\s])+/s) { # non-space normal text
! ($kind, $it, $readahead) = (KIND_TEXT, $&, $');
! warn_normal "Warning: Unescaped < $it\n", $lc_0
if !$cdata_mode_p && $it =~ /</s;
} else { # tag/declaration/processing instruction
--- 164,172 ----
;
} elsif ($readahead =~ /^\s+/s) { # whitespace
! ($kind, $it, $readahead) = (TmplTokenType::TEXT, $&, $');
# FIXME the following (the [<\s] part) is an unreliable HACK :-(
} elsif ($readahead =~ /^(?:[^<]|<[<\s])+/s) { # non-space normal text
! ($kind, $it, $readahead) = (TmplTokenType::TEXT, $&, $');
! warn_normal "Unescaped < in $it\n", $lc_0
if !$cdata_mode_p && $it =~ /</s;
} else { # tag/declaration/processing instruction
***************
*** 191,206 ****
if ($cdata_mode_p) {
if ($readahead =~ /^$cdata_close/) {
! ($kind, $it, $readahead) = (KIND_TAG, $&, $');
$ok_p = 1;
} else {
! ($kind, $it, $readahead) = (KIND_TEXT, $readahead, undef);
$ok_p = 1;
}
} elsif ($readahead =~ /^$re_tag_compat/os) {
! ($kind, $it, $readahead) = (KIND_TAG, "$1>", $3);
$ok_p = 1;
warn_normal "SGML \"closed start tag\" notation: $1<\n", $lc_0 if $2 eq '';
} elsif ($readahead =~ /^<!--(?:(?!-->).)*-->/s) {
! ($kind, $it, $readahead) = (KIND_COMMENT, $&, $');
$ok_p = 1;
warn_normal "Syntax error in comment: $&\n", $lc_0;
--- 175,190 ----
if ($cdata_mode_p) {
if ($readahead =~ /^$cdata_close/) {
! ($kind, $it, $readahead) = (TmplTokenType::TAG, $&, $');
$ok_p = 1;
} else {
! ($kind, $it, $readahead) = (TmplTokenType::TEXT, $readahead, undef);
$ok_p = 1;
}
} elsif ($readahead =~ /^$re_tag_compat/os) {
! ($kind, $it, $readahead) = (TmplTokenType::TAG, "$1>", $3);
$ok_p = 1;
warn_normal "SGML \"closed start tag\" notation: $1<\n", $lc_0 if $2 eq '';
} elsif ($readahead =~ /^<!--(?:(?!-->).)*-->/s) {
! ($kind, $it, $readahead) = (TmplTokenType::COMMENT, $&, $');
$ok_p = 1;
warn_normal "Syntax error in comment: $&\n", $lc_0;
***************
*** 214,237 ****
$readahead .= $next;
}
! if ($kind ne KIND_TAG) {
;
} elsif ($it =~ /^<!/) {
! $kind = KIND_DECL;
! $kind = KIND_COMMENT if $it =~ /^<!--(?:(?!-->).)*-->/;
} elsif ($it =~ /^<\?/) {
! $kind = KIND_PI;
}
if ($it =~ /^$re_directive/ios && !$cdata_mode_p) {
! $kind = KIND_DIRECTIVE;
}
if (!$ok_p && $eof_p) {
! ($kind, $it, $readahead) = (KIND_UNKNOWN, $readahead, undef);
$syntaxerror_p = 1;
}
}
warn_normal "Unrecognizable token found: $it\n", $lc_0
! if $kind eq KIND_UNKNOWN;
! return defined $it? (wantarray? ($kind, $it):
! [$kind, $it]): undef;
}
--- 198,220 ----
$readahead .= $next;
}
! if ($kind ne TmplTokenType::TAG) {
;
} elsif ($it =~ /^<!/) {
! $kind = TmplTokenType::DECL;
! $kind = TmplTokenType::COMMENT if $it =~ /^<!--(?:(?!-->).)*-->/;
} elsif ($it =~ /^<\?/) {
! $kind = TmplTokenType::PI;
}
if ($it =~ /^$re_directive/ios && !$cdata_mode_p) {
! $kind = TmplTokenType::DIRECTIVE;
}
if (!$ok_p && $eof_p) {
! ($kind, $it, $readahead) = (TmplTokenType::UNKNOWN, $readahead, undef);
$syntaxerror_p = 1;
}
}
warn_normal "Unrecognizable token found: $it\n", $lc_0
! if $kind eq TmplTokenType::UNKNOWN;
! return defined $it? TmplToken->new($it, $kind, $lc): undef;
}
***************
*** 241,248 ****
if (!$cdata_mode_p) {
$it = next_token_internal($h);
! if (defined $it && $it->[0] eq KIND_TAG) { # FIXME
($cdata_mode_p, $cdata_close) = (1, "</$1\\s*>")
! if $it->[1] =~ /^<(script|style|textarea)\b/i; #FIXME
! push @$it, extract_attributes($it->[1], $lc_0); #FIXME
}
} else {
--- 224,231 ----
if (!$cdata_mode_p) {
$it = next_token_internal($h);
! if (defined $it && $it->type eq TmplTokenType::TAG) {
($cdata_mode_p, $cdata_close) = (1, "</$1\\s*>")
! if $it->string =~ /^<(script|style|textarea)\b/i;
! $it->set_attributes( extract_attributes($it->string, $lc_0) );
}
} else {
***************
*** 251,265 ****
my $next = next_token_internal($h);
last if !defined $next;
! if (defined $next && $next->[1] =~ /$cdata_close/i) { #FIXME
! ($lc, $readahead) = ($lc_prev, $next->[1] . $readahead); #FIXME
$cdata_mode_p = 0;
}
last unless $cdata_mode_p;
! $it .= $next->[1]; #FIXME
}
! $it = [KIND_CDATA, $it]; #FIXME
$cdata_close = undef;
}
! return defined $it? (wantarray? @$it: $it): undef;
}
--- 234,248 ----
my $next = next_token_internal($h);
last if !defined $next;
! if (defined $next && $next->string =~ /$cdata_close/i) {
! ($lc, $readahead) = ($lc_prev, $next->string . $readahead);
$cdata_mode_p = 0;
}
last unless $cdata_mode_p;
! $it .= $next->string;
}
! $it = TmplToken->new( $it, TmplTokenType::CDATA, $lc );
$cdata_close = undef;
}
! return $it;
}
***************
*** 291,293 ****
--- 274,288 ----
rework in tmpl_process.pl
+ Gettext-style line number references would also be very helpful in
+ disambiguating the strings. Ultimately, we should generate and work
+ with gettext-style po files, so that translators are able to use
+ tools designed for gettext.
+
+ An example of a string untranslatable to Chinese is "Accounts for";
+ "Accounts for %s", however, would be translatable. Short words like
+ "in" would also be untranslatable, not only to Chinese, but also to
+ languages requiring declension of nouns.
+
=cut
+
+ 1;
Index: VerboseWarnings.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/VerboseWarnings.pm,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** VerboseWarnings.pm 16 Feb 2004 23:42:57 -0000 1.1
--- VerboseWarnings.pm 17 Feb 2004 02:45:27 -0000 1.2
***************
*** 25,31 ****
@ISA = qw(Exporter);
@EXPORT_OK = qw(
- &set_application_name
- &set_input_file_name
- &set_pedantic_mode
&pedantic_p
&warn_normal
--- 25,28 ----
***************
*** 46,50 ****
my($s) = @_;
$input = $s;
! $input_abbr = $& if !defined $input && defined $s && $s =~ /[^\/]+$/;
}
--- 43,47 ----
my($s) = @_;
$input = $s;
! $input_abbr = $& if defined $s && $s =~ /[^\/]+$/;
}
Index: text-extract2.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/text-extract2.pl,v
retrieving revision 1.36
retrieving revision 1.37
diff -C2 -r1.36 -r1.37
*** text-extract2.pl 16 Feb 2004 23:50:56 -0000 1.36
--- text-extract2.pl 17 Feb 2004 02:45:27 -0000 1.37
***************
*** 33,41 ****
last unless defined $s;
printf "%s\n", ('-' x 79);
! my($kind, $t, $attr) = @$s; # FIXME
printf "%s:\n", $kind;
printf "%4dH%s\n", length($t),
join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $t));
! if ($kind eq TmplTokenizer::KIND_TAG && %$attr) {
printf "Attributes:\n";
for my $a (keys %$attr) {
--- 33,41 ----
last unless defined $s;
printf "%s\n", ('-' x 79);
! my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
printf "%s:\n", $kind;
printf "%4dH%s\n", length($t),
join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $t));
! if ($kind eq TmplTokenType::TAG && %$attr) {
printf "Attributes:\n";
for my $a (keys %$attr) {
***************
*** 57,65 ****
my $s = TmplTokenizer::next_token $h;
last unless defined $s;
! my($kind, $t, $attr) = @$s; # FIXME
! if ($kind eq TmplTokenizer::KIND_TEXT) {
$t = TmplTokenizer::trim $t;
$text{$t} = 1 if $t =~ /\S/s;
! } elsif ($kind eq TmplTokenizer::KIND_TAG && %$attr) {
# value [tag=input], meta
my $tag = lc($1) if $t =~ /^<(\S+)/s;
--- 57,65 ----
my $s = TmplTokenizer::next_token $h;
last unless defined $s;
! my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
! if ($kind eq TmplTokenType::TEXT) {
$t = TmplTokenizer::trim $t;
$text{$t} = 1 if $t =~ /\S/s;
! } elsif ($kind eq TmplTokenType::TAG && %$attr) {
# value [tag=input], meta
my $tag = lc($1) if $t =~ /^<(\S+)/s;
More information about the Koha-cvs
mailing list