[Koha-cvs] CVS: koha/misc/translator TmplTokenizer.pm,1.22,1.23 tmpl_process3.pl,1.5,1.6 xgettext.pl,1.4,1.5

Mon Feb 23 02:21:06 CET 2004

Update of /cvsroot/koha/koha/misc/translator
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13082

Modified Files:
	TmplTokenizer.pm tmpl_process3.pl xgettext.pl 
Log Message:
Fold all consecutive whitespaces into single blanks. This avoids problems
when minor whitespace changes occur in the original templates; it also
makes the strings much easier to read (e.g., instead of "foo\n\n\t\t  bar",
xgettext.pl will now always generate "foo bar" and tmpl_process3.pl will
understand it to be the same as the original string).

Index: TmplTokenizer.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/TmplTokenizer.pm,v
retrieving revision 1.22
retrieving revision 1.23
diff -C2 -r1.22 -r1.23
*** TmplTokenizer.pm	22 Feb 2004 21:34:40 -0000	1.22
--- TmplTokenizer.pm	23 Feb 2004 01:21:03 -0000	1.23
***************
*** 449,455 ****
--- 449,471 ----
  }

+ sub string_canon ($) {
+     my($s) = @_;
+     if (1) { # FIXME
+ 	# Fold all whitespace into single blanks
+ 	$s =~ s/\s+/ /gs;
+     }
+     return $s;
+ }
+ 
+ sub _formalize_string_cformat ($) {
+     my($s) = @_;
+     return _quote_cformat string_canon $s;
+ }
+ 
  sub _formalize ($) {
      my($t) = @_;
      return $t->type == TmplTokenType::DIRECTIVE? '%s':
+ 	   $t->type == TmplTokenType::TEXT?
+ 		   _formalize_string_cformat($t->string):
  	   $t->type == TmplTokenType::TAG?
  		   ($t->string =~ /^<a\b/is? '<a>': _quote_cformat($t->string)):
***************
*** 599,602 ****
--- 615,622 ----
  	}
      }
+     if (defined $it && $it->type == TmplTokenType::TEXT) {
+ 	my $form = string_canon $it->string;
+ 	$it->set_form( $form );
+     }
      return $it;
  }

Index: tmpl_process3.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/tmpl_process3.pl,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -r1.5 -r1.6
*** tmpl_process3.pl	22 Feb 2004 21:34:40 -0000	1.5
--- tmpl_process3.pl	23 Feb 2004 01:21:03 -0000	1.6
***************
*** 33,38 ****
  sub find_translation ($) {
      my($s) = @_;
!     my $key = TmplTokenizer::quote_po($s) if $s =~ /\S/;
!     $key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out);
      return defined $href->{$key}
  		&& !$href->{$key}->fuzzy
--- 33,44 ----
  sub find_translation ($) {
      my($s) = @_;
!     my $key = $s;
!     if ($s =~ /\S/s) {
!     print STDERR "DEBUG: before: ($key)\n";
! 	$key = TmplTokenizer::string_canon($key);
! 	$key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out);
! 	$key = TmplTokenizer::quote_po($key);
!     print STDERR "DEBUG: after: ($key)\n";
!     }
      return defined $href->{$key}
  		&& !$href->{$key}->fuzzy
***************
*** 313,329 ****
  file as c-format strings with %s.

  =head1 BUGS

  The --help option has not been implemented yet.

- If an extracted string contain actual text (versus tags or
- TMPL_VAR directives), the strings are extracted verbatim,
- resulting in unwieldy things like multiple spaces, tabs,
- and/or newlines which are semantically indistinguishable
- from single blanks. If the template writer changes the
- spacing just a little bit, the new formatting would be
- considered new strings. This is arguably wrong, and in any
- case counter-productive.
- 
  xgettext.pl must be present in the current directory; the
  msgmerge(1) command must also be present in the search path.
--- 319,331 ----
  file as c-format strings with %s.

+ Whitespace in extracted strings are folded to single blanks, in
+ order to prevent new strings from appearing when minor changes in
+ the original templates occur, and to prevent overly difficult to
+ read strings in the PO file.
+ 
  =head1 BUGS

  The --help option has not been implemented yet.

  xgettext.pl must be present in the current directory; the
  msgmerge(1) command must also be present in the search path.

Index: xgettext.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/xgettext.pl,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -r1.4 -r1.5
*** xgettext.pl	22 Feb 2004 21:34:40 -0000	1.4
--- xgettext.pl	23 Feb 2004 01:21:03 -0000	1.5
***************
*** 56,61 ****
      # If we determine that the string is negligible, don't bother to remember
      unless (string_negligible_p( $string ) || token_negligible_p( $token )) {
! 	$text{$string} = [] unless defined $text{$string};
! 	push @{$text{$string}}, $token;
      }
  }
--- 56,62 ----
      # If we determine that the string is negligible, don't bother to remember
      unless (string_negligible_p( $string ) || token_negligible_p( $token )) {
! 	my $key = TmplTokenizer::string_canon( $string );
! 	$text{$key} = [] unless defined $text{$key};
! 	push @{$text{$key}}, $token;
      }
  }
***************
*** 154,157 ****
--- 155,159 ----
  	printf OUTPUT "#, c-format\n" if $cformat_p;
  	printf OUTPUT "msgid %s\n", TmplTokenizer::quote_po
+ 		TmplTokenizer::string_canon
  		TmplTokenizer::charset_convert $t, $charset_in, $charset_out;
  	printf OUTPUT "msgstr %s\n\n", (defined $translation{$t}?