[Koha-cvs] CVS: koha/misc/translator TmplToken.pm,1.5,1.6 TmplTokenizer.pm,1.33,1.34 text-extract2.pl,1.41,1.42 tmpl_process3.pl,1.19,1.20 xgettext.pl,1.11,1.12

Ambrose C. LI acli at users.sourceforge.net
Wed Mar 10 08:00:30 CET 2004


Update of /cvsroot/koha/koha/misc/translator
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18221

Modified Files:
	TmplToken.pm TmplTokenizer.pm text-extract2.pl 
	tmpl_process3.pl xgettext.pl 
Log Message:
Added hack to extract and translate strings inside JavaScript CDATA blocks,
using C-like _("some translatable string") notation. English templates will
need to be modified.


Index: TmplToken.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/TmplToken.pm,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -r1.5 -r1.6
*** TmplToken.pm	27 Feb 2004 13:26:07 -0000	1.5
--- TmplToken.pm	10 Mar 2004 07:00:27 -0000	1.6
***************
*** 110,113 ****
--- 110,156 ----
  }
  
+ sub has_js_data {
+     my $this = shift;
+     return defined $this->{'_js_data'} && ref($this->{'_js_data'}) eq 'ARRAY';
+ }
+ 
+ sub js_data {
+     my $this = shift;
+     return $this->{'_js_data'};
+ }
+ 
+ sub set_js_data {
+     my $this = shift;
+     $this->{'_js_data'} = $_[0];
+     return $this;
+ }
+ 
+ # predefined tests
+ 
+ sub tag_p {
+     my $this = shift;
+     return $this->type == TmplTokenType::TAG;
+ }
+ 
+ sub cdata_p {
+     my $this = shift;
+     return $this->type == TmplTokenType::CDATA;
+ }
+ 
+ sub text_p {
+     my $this = shift;
+     return $this->type == TmplTokenType::TEXT;
+ }
+ 
+ sub text_parametrized_p {
+     my $this = shift;
+     return $this->type == TmplTokenType::TEXT_PARAMETRIZED;
+ }
+ 
+ sub directive_p {
+     my $this = shift;
+     return $this->type == TmplTokenType::DIRECTIVE;
+ }
+ 
  ###############################################################################
  

Index: TmplTokenizer.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/TmplTokenizer.pm,v
retrieving revision 1.33
retrieving revision 1.34
diff -C2 -r1.33 -r1.34
*** TmplTokenizer.pm	8 Mar 2004 05:00:42 -0000	1.33
--- TmplTokenizer.pm	10 Mar 2004 07:00:27 -0000	1.34
***************
*** 94,97 ****
--- 94,98 ----
  sub CDATA_CLOSE		() {'cdata-close'}
  sub PCDATA_MODE_P	() {'pcdata-mode-p'}	# additional submode for CDATA
+ sub JS_MODE_P		() {'js-mode-p'}	# cdata-mode-p must also be true
  
  sub ALLOW_CFORMAT_P	() {'allow-cformat-p'}
***************
*** 170,173 ****
--- 171,179 ----
  }
  
+ sub js_mode_p {
+     my $this = shift;
+     return $this->{+JS_MODE_P};
+ }
+ 
  sub cdata_close {
      my $this = shift;
***************
*** 241,244 ****
--- 247,256 ----
  }
  
+ sub _set_js_mode {
+     my $this = shift;
+     $this->{+JS_MODE_P} = $_[0];
+     return $this;
+ }
+ 
  sub _set_cdata_close {
      my $this = shift;
***************
*** 255,258 ****
--- 267,364 ----
  ###############################################################################
  
+ use vars qw( $js_EscapeSequence );
+ BEGIN {
+     # Perl quoting is really screwed up, but this common subexp is way too long
+     $js_EscapeSequence = q{\\\\(?:['"\\\\bfnrt]|[^0-7xu]|[0-3]?[0-7]{1,2}|x[\da-fA-F]{2}|u[\da-fA-F]{4})};
+ }
+ sub parenleft  () { '(' }
+ sub parenright () { ')' }
+ 
+ sub split_js ($) {
+     my ($s0) = @_;
+     my @it = ();
+     while (length $s0) {
+ 	if ($s0 =~ /^\s+/s) {				# whitespace
+ 	    push @it, $&;
+ 	    $s0 = $';
+ 	} elsif ($s0 =~ /^\/\/[^\r\n]*(?:[\r\n]|$)/s) {	# C++-style comment
+ 	    push @it, $&;
+ 	    $s0 = $';
+ 	} elsif ($s0 =~ /^\/\*(?:(?!\*\/).)*\*\//s) {	# C-style comment
+ 	    push @it, $&;
+ 	    $s0 = $';
+ 	# Keyword or identifier, ECMA-262 p.13 (section 7.5)
+ 	} elsif ($s0 =~ /^[A-Z_\$][A-Z\d_\$]*/is) {	# IdentifierName
+ 	    push @it, $&;
+ 	    $s0 = $';
+ 	# Punctuator, ECMA-262 p.13 (section 7.6)
+ 	} elsif ($s0 =~ /^(?:[\(\){}\[\];]|>>>=|<<=|>>=|[-\+\*\/\&\|\^\%]=|>>>|<<|>>|--|\+\+|\|\||\&\&|==|<=|>=|!=|[=><,!~\?:\.\-\+\*\/\&\|\^\%])/s) {
+ 	    push @it, $&;
+ 	    $s0 = $';
+ 	# DecimalLiteral, ECMA-262 p.14 (section 7.7.3); note: bug in the spec
+ 	} elsif ($s0 =~ /^(?:0|[1-9]\d+(?:\.\d*(?:[eE][-\+]?\d+)?)?)/s) {
+ 	    push @it, $&;
+ 	    $s0 = $';
+ 	# HexIntegerLiteral, ECMA-262 p.15 (section 7.7.3)
+ 	} elsif ($s0 =~ /^0[xX][\da-fA-F]+/s) {
+ 	    push @it, $&;
+ 	    $s0 = $';
+ 	# OctalIntegerLiteral, ECMA-262 p.15 (section 7.7.3)
+ 	} elsif ($s0 =~ /^0[\da-fA-F]+/s) {
+ 	    push @it, $&;
+ 	    $s0 = $';
+ 	# StringLiteral, ECMA-262 p.17 (section 7.7.4)
+ 	# XXX SourceCharacter doesn't seem to be defined (?)
+ 	} elsif ($s0 =~ /^(?:"(?:(?!["\\\r\n]).|$js_EscapeSequence)*"|'(?:(?!['\\\r\n]).|$js_EscapeSequence)*')/os) {
+ 	    push @it, $&;
+ 	    $s0 = $';
+ 	} elsif ($s0 =~ /^./) {				# UNKNOWN TOKEN !!!
+ 	    push @it, $&;
+ 	    $s0 = $';
+ 	}
+     }
+     return @it;
+ }
+ 
+ sub STATE_UNDERSCORE     () { 1 }
+ sub STATE_PARENLEFT      () { 2 }
+ sub STATE_STRING_LITERAL () { 3 }
+ 
+ # XXX This is a crazy hack. I don't want to write an ECMAScript parser.
+ # XXX A scanner is one thing; a parser another thing.
+ sub identify_js_translatables (@) {
+     my @input = @_;
+     my @output = ();
+     # We mark a JavaScript translatable string as in C, i.e., _("literal")
+     # For simplicity, we ONLY look for "_" "(" StringLiteral ")"
+     for (my $i = 0, my $state = 0, my($j, $q, $s); $i <= $#input; $i += 1) {
+ 	my $reset_state_p = 0;
+ 	push @output, [0, $input[$i]];
+ 	if ($input[$i] !~ /\S/s) {
+ 	    ;
+ 	} elsif ($state == 0) {
+ 	    $state = STATE_UNDERSCORE if $input[$i] eq '_';
+ 	} elsif ($state == STATE_UNDERSCORE) {
+ 	    $state = $input[$i] eq parenleft ? STATE_PARENLEFT : 0;
+ 	} elsif ($state == STATE_PARENLEFT) {
+ 	    if ($input[$i] =~ /^(['"])(.*)\1$/s) {
+ 		($state, $j, $q, $s) = (STATE_STRING_LITERAL, $#output, $1, $2);
+ 	    } else {
+ 		$state = 0;
+ 	    }
+ 	} elsif ($state == STATE_STRING_LITERAL) {
+ 	    if ($input[$i] eq parenright) {
+ 		$output[$j] = [1, $output[$j]->[1], $q, $s];
+ 	    }
+ 	    $state = 0;
+ 	} else {
+ 	    die "identify_js_translatables internal error: Unknown state $state"
+ 	}
+     }
+     return \@output;
+ }
+ 
+ ###############################################################################
+ 
  sub _extract_attributes ($;$) {
      my $this = shift;
***************
*** 431,434 ****
--- 537,541 ----
  		$this->_set_cdata_close( "</$1\\s*>" );
  		$this->_set_pcdata_mode( 0 );
+ 		$this->_set_js_mode( lc($1) eq 'script' );
  #	    } elsif ($it->string =~ /^<(title)\b/is) {
  #		$this->_set_cdata_mode( 1 );
***************
*** 471,476 ****
  			($this->pcdata_mode_p?
  			    TmplTokenType::TEXT: TmplTokenType::CDATA),
! 			$this->line_number )
  		if defined $it;
  	$this->_set_pcdata_mode, 0;
  	$this->_set_cdata_close, undef unless !defined $it;
--- 578,595 ----
  			($this->pcdata_mode_p?
  			    TmplTokenType::TEXT: TmplTokenType::CDATA),
! 			$this->line_number, $this->filename )
  		if defined $it;
+ 	if ($this->js_mode_p) {
+ 	    my $s0 = $it->string;
+ 	    my @head = ();
+ 	    my @tail = ();
+ 	    if ($s0 =~ /^(\s*<!--\s*)(.*)(\s*--\s*>\s*)$/s) {
+ 		push @head, $1;
+ 		push @tail, $3;
+ 		$s0 = $2;
+ 	    }
+ 	    push @head, split_js $s0;
+ 	    $it->set_js_data( identify_js_translatables(@head, @tail) );
+ 	}
  	$this->_set_pcdata_mode, 0;
  	$this->_set_cdata_close, undef unless !defined $it;

Index: text-extract2.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/text-extract2.pl,v
retrieving revision 1.41
retrieving revision 1.42
diff -C2 -r1.41 -r1.42
*** text-extract2.pl	19 Feb 2004 21:24:30 -0000	1.41
--- text-extract2.pl	10 Mar 2004 07:00:28 -0000	1.42
***************
*** 61,64 ****
--- 61,70 ----
  	    }
  	}
+ 	if ($s->has_js_data) {
+ 	    printf "JavaScript translatable strings:\n";
+ 	    for my $t (@{$s->js_data}) {
+ 		printf "%dH%s\n", length $t->[3], underline $t->[3] if $t->[0]; # FIXME
+ 	    }
+ 	}
      }
  }
***************
*** 89,92 ****
--- 95,102 ----
  		}
  	    }
+ 	} elsif ($s->has_js_data) {
+ 	    for my $t (@{$s->js_data}) {
+ 		remember( $s, $t->[3] ) if $t->[0]; # FIXME
+ 	    }
  	}
      }

Index: tmpl_process3.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/tmpl_process3.pl,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -r1.19 -r1.20
*** tmpl_process3.pl	8 Mar 2004 04:59:38 -0000	1.19
--- tmpl_process3.pl	10 Mar 2004 07:00:28 -0000	1.20
***************
*** 98,101 ****
--- 98,111 ----
  	} elsif ($kind eq TmplTokenType::TAG && %$attr) {
  	    print $output text_replace_tag($t, $attr);
+ 	} elsif ($s->has_js_data) {
+ 	    for my $t (@{$s->js_data}) {
+ 		# FIXME for this whole block
+ 		if ($t->[0]) {
+ 		    printf $output "%s%s%s", $t->[2], find_translation $t->[3],
+ 			    $t->[2];
+ 		} else {
+ 		    print $output $t->[1];
+ 		}
+ 	    }
  	} elsif (defined $t) {
  	    print $output $t;

Index: xgettext.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/xgettext.pl,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -r1.11 -r1.12
*** xgettext.pl	27 Feb 2004 13:26:08 -0000	1.11
--- xgettext.pl	10 Mar 2004 07:00:28 -0000	1.12
***************
*** 108,111 ****
--- 108,115 ----
  		}
  	    }
+ 	} elsif ($s->has_js_data) {
+ 	    for my $t (@{$s->js_data}) {
+ 		remember( $s, $t->[3] ) if $t->[0]; # FIXME
+ 	    }
  	}
      }
***************
*** 199,202 ****
--- 203,209 ----
  		    . (defined $name? " name=$name->[1]": '');
  	    }
+ 	} elsif ($text{$t}->[0]->has_js_data) {
+ 	    printf OUTPUT "#. For the first occurrence,\n" if @{$text{$t}} > 1;
+ 	    printf OUTPUT "#. SCRIPT\n";
  	}
  	my $cformat_p;
***************
*** 377,381 ****
  =item -
  
- (Future goal)
  Translation to non-English-like languages with different word
  order:  gettext's c-format strings can theoretically be
--- 384,387 ----
***************
*** 418,421 ****
--- 424,441 ----
  "update" actions have already been implemented in tmpl_process3.pl.
  
+ =head2 Strings inside JavaScript
+ 
+ In the SCRIPT elements, the script will attempt to scan for
+ _("I<string literal>") patterns, and extract the I<string literal>
+ as a translatable string.
+ 
+ Note that the C-like _(...) notation is required.
+ 
+ The JavaScript must actually define a _ function
+ so that the code remains correct JavaScript.
+ A suitable definition of such a function can be
+ 
+ 	function _(s) { return s } // dummy function for gettext
+ 
  =head1 SEE ALSO
  





More information about the Koha-cvs mailing list