[Koha-cvs] CVS: koha/misc/translator TmplTokenizer.pm,1.16,1.17 tmpl_process3.pl,1.2,1.3 xgettext.pl,1.1,1.2
Ambrose C. LI
acli at users.sourceforge.net
Sun Feb 22 06:18:55 CET 2004
Update of /cvsroot/koha/koha/misc/translator
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31961
Modified Files:
TmplTokenizer.pm tmpl_process3.pl xgettext.pl
Log Message:
Handle the iso8859-1 charset somewhat, so that when the po file is in
either iso8859-1 or utf8, msgmerge(1) won't crap out. The code is ugly;
the conversion table is hard-coded, and in some place not very appropriate.
However, this does fix the case where a few strings containing French
characters can't be translated. As a side effect, tmpl_process3 can now
also be used for French or other languages using iso8859-1.
Index: TmplTokenizer.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/TmplTokenizer.pm,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -r1.16 -r1.17
*** TmplTokenizer.pm 20 Feb 2004 07:52:32 -0000 1.16
--- TmplTokenizer.pm 22 Feb 2004 05:18:52 -0000 1.17
***************
*** 540,544 ****
}
! # Complication function that shouldn't be here
sub parametrize ($@) {
my($fmt, @params) = @_;
--- 540,544 ----
}
! # Some functions that shouldn't be here... should be moved out some time
sub parametrize ($@) {
my($fmt, @params) = @_;
***************
*** 573,576 ****
--- 573,584 ----
}
+ sub charset_canon ($) {
+ my($charset) = @_;
+ $charset = uc($charset);
+ $charset = "$1-$2" if $charset =~ /^(ISO|UTF)(\d.*)/i;
+ $charset = 'Big5' if $charset eq 'BIG5'; # "Big5" must be in mixed case
+ return $charset;
+ }
+
###############################################################################
Index: tmpl_process3.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/tmpl_process3.pl,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -r1.2 -r1.3
*** tmpl_process3.pl 19 Feb 2004 21:28:14 -0000 1.2
--- tmpl_process3.pl 22 Feb 2004 05:18:52 -0000 1.3
***************
*** 27,30 ****
--- 27,31 ----
use vars qw( $href );
use vars qw( $type ); # file extension (DOS form without the dot) to match
+ use vars qw( $charset_in $charset_out );
###############################################################################
***************
*** 99,103 ****
}
- # FIXME: Should we use the GNOME convention of using POTFILES.in instead?
sub listfiles ($$) {
my($dir, $type) = @_;
--- 100,103 ----
***************
*** 147,150 ****
--- 147,156 ----
VerboseWarnings::set_pedantic_mode $pedantic_p;
+ # keep the buggy Locale::PO quiet if it says stupid things
+ $SIG{__WARN__} = sub {
+ my($s) = @_;
+ print STDERR $s unless $s =~ /^Strange line in [^:]+: #~/s
+ };
+
my $action = shift or usage_error('You must specify an ACTION.');
usage_error('You must at least specify input and string list filenames.')
***************
*** 180,192 ****
}
if ($action eq 'create') {
# updates the list. As the list is empty, every entry will be added
die "$str_file: Output file already exists" if -f $str_file;
my($tmph, $tmpfile) = tmpnam();
for my $input (@in_files) {
print $tmph "$input\n";
}
close $tmph;
! system {'./xgettext.pl'} ('xgettext.pl', '-s', '-f', $tmpfile, '-o', $str_file);
unlink $tmpfile || warn_normal "$tmpfile: unlink failed: $!\n", undef;
--- 186,221 ----
}
+ # restores the string list from file
+ $href = Locale::PO->load_file_ashash($str_file);
+
+ # guess the charsets. HTML::Templates defaults to iso-8859-1
+ if (defined $href) {
+ $charset_out = TmplTokenizer::charset_canon $2
+ if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/;
+ for my $msgid (keys %$href) {
+ if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) {
+ my $candidate = TmplTokenizer::charset_canon $2;
+ die "Conflicting charsets in msgid: $charset_in vs $candidate\n"
+ if defined $charset_in && $charset_in ne $candidate;
+ $charset_in = $2;
+ }
+ }
+ }
+ if (!defined $charset_in) {
+ $charset_in = TmplTokenizer::charset_canon 'iso8859-1';
+ warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n";
+ }
+
if ($action eq 'create') {
# updates the list. As the list is empty, every entry will be added
die "$str_file: Output file already exists" if -f $str_file;
my($tmph, $tmpfile) = tmpnam();
+ # Generate the temporary file that acts as <MODULE>/POTFILES.in
for my $input (@in_files) {
print $tmph "$input\n";
}
close $tmph;
! # Generate the specified po file ($str_file)
! system ('xgettext.pl', '-s', '-f', $tmpfile, '-o', $str_file);
unlink $tmpfile || warn_normal "$tmpfile: unlink failed: $!\n", undef;
***************
*** 195,203 ****
my($tmph2, $tmpfile2) = tmpnam();
close $tmph2; # We just want a name
for my $input (@in_files) {
print $tmph1 "$input\n";
}
close $tmph1;
! system('./xgettext.pl', '-s', '-f', $tmpfile1, '-o', $tmpfile2);
system('msgmerge', '-U', '-s', $str_file, $tmpfile2);
unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
--- 224,239 ----
my($tmph2, $tmpfile2) = tmpnam();
close $tmph2; # We just want a name
+ # Generate the temporary file that acts as <MODULE>/POTFILES.in
for my $input (@in_files) {
print $tmph1 "$input\n";
}
close $tmph1;
! # Generate the temporary file that acts as <MODULE>/<LANG>.pot
! system('./xgettext.pl', '-s', '-f', $tmpfile1, '-o', $tmpfile2,
! (defined $charset_in? ('-I', $charset_in): ()),
! (defined $charset_out? ('-O', $charset_out): ()));
! # Merge the temporary "pot file" with the specified po file ($str_file)
! # FIXME: msgmerge(1) is a Unix dependency
! # FIXME: need to check the return value
system('msgmerge', '-U', '-s', $str_file, $tmpfile2);
unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
***************
*** 222,228 ****
close INPUT;
- # restores the string list from file
- $href = Locale::PO->load_file_ashash($str_file);
-
# creates the new tmpl file using the new translation
for my $input (@in_files) {
--- 258,261 ----
Index: xgettext.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/xgettext.pl,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** xgettext.pl 19 Feb 2004 21:24:30 -0000 1.1
--- xgettext.pl 22 Feb 2004 05:18:52 -0000 1.2
***************
*** 13,19 ****
use VerboseWarnings;
use vars qw( $files_from $directory $output $sort );
use vars qw( $pedantic_p );
! use vars qw( %text );
###############################################################################
--- 13,60 ----
use VerboseWarnings;
+ use vars qw( $convert_from );
use vars qw( $files_from $directory $output $sort );
use vars qw( $pedantic_p );
! use vars qw( %text %translation );
! use vars qw( $charset_in $charset_out );
!
! ###############################################################################
!
! use vars qw( @latin1_utf8 );
! @latin1_utf8 = (
! "\302\200", "\302\201", "\302\202", "\302\203", "\302\204", "\302\205",
! "\302\206", "\302\207", "\302\210", "\302\211", "\302\212", "\302\213",
! "\302\214", "\302\215", undef, undef, "\302\220", "\302\221",
! "\302\222", "\302\223", "\302\224", "\302\225", "\302\226", "\302\227",
! "\302\230", "\302\231", "\302\232", "\302\233", "\302\234", "\302\235",
! "\302\236", "\302\237", "\302\240", "\302\241", "\302\242", "\302\243",
! "\302\244", "\302\245", "\302\246", "\302\247", "\302\250", "\302\251",
! "\302\252", "\302\253", "\302\254", "\302\255", "\302\256", "\302\257",
! "\302\260", "\302\261", "\302\262", "\302\263", "\302\264", "\302\265",
! "\302\266", "\302\267", "\302\270", "\302\271", "\302\272", "\302\273",
! "\302\274", "\302\275", "\302\276", "\302\277", "\303\200", "\303\201",
! "\303\202", "\303\203", "\303\204", "\303\205", "\303\206", "\303\207",
! "\303\210", "\303\211", "\303\212", "\303\213", "\303\214", "\303\215",
! "\303\216", "\303\217", "\303\220", "\303\221", "\303\222", "\303\223",
! "\303\224", "\303\225", "\303\226", "\303\227", "\303\230", "\303\231",
! "\303\232", "\303\233", "\303\234", "\303\235", "\303\236", "\303\237",
! "\303\240", "\303\241", "\303\242", "\303\243", "\303\244", "\303\245",
! "\303\246", "\303\247", "\303\250", "\303\251", "\303\252", "\303\253",
! "\303\254", "\303\255", "\303\256", "\303\257", "\303\260", "\303\261",
! "\303\262", "\303\263", "\303\264", "\303\265", "\303\266", "\303\267",
! "\303\270", "\303\271", "\303\272", "\303\273", "\303\274", "\303\275",
! "\303\276", "\303\277" );
!
! sub charset_convert ($) {
! my($s) = @_;
! if ($s !~ /[\200-\377]/s) { # FIXME: don't worry about iso2022 for now
! ;
! } elsif ($charset_in eq 'ISO-8859-1' && $charset_out eq 'UTF-8') {
! $s =~ s/[\200-\377]/ $latin1_utf8[ord($&) - 128] /egs;
! } elsif ($charset_in ne $charset_out) {
! VerboseWarnings::warn_normal "conversion from $charset_in to $charset_out is not supported\n", undef;
! }
! return $s;
! }
###############################################################################
***************
*** 88,91 ****
--- 129,134 ----
sub generate_po_file () {
# We don't emit the Plural-Forms header; it's meaningless for us
+ my $pot_charset = (defined $charset_out? $charset_out: 'CHARSET');
+ $pot_charset = TmplTokenizer::charset_canon $pot_charset;
print OUTPUT <<EOF;
# SOME DESCRIPTIVE TITLE.
***************
*** 103,107 ****
"Language-Team: LANGUAGE <LL\@li.org>\\n"
"MIME-Version: 1.0\\n"
! "Content-Type: text/plain; charset=CHARSET\\n"
"Content-Transfer-Encoding: 8bit\\n"
--- 146,150 ----
"Language-Team: LANGUAGE <LL\@li.org>\\n"
"MIME-Version: 1.0\\n"
! "Content-Type: text/plain; charset=$pot_charset\\n"
"Content-Transfer-Encoding: 8bit\\n"
***************
*** 114,123 ****
my $pathname = $token->pathname;
$pathname =~ s/^$directory_re//os;
! printf OUTPUT "#: %s:%d\n", $pathname, $token->line_number;
$cformat_p = 1 if $token->type == TmplTokenType::TEXT_PARAMETRIZED;
}
printf OUTPUT "#, c-format\n" if $cformat_p;
! printf OUTPUT "msgid %s\n", TmplTokenizer::quote_po( $t );
! printf OUTPUT "msgstr \"\"\n\n";
}
}
--- 157,207 ----
my $pathname = $token->pathname;
$pathname =~ s/^$directory_re//os;
! printf OUTPUT "#: %s:%d\n", $pathname, $token->line_number
! if defined $pathname && defined $token->line_number;
$cformat_p = 1 if $token->type == TmplTokenType::TEXT_PARAMETRIZED;
}
printf OUTPUT "#, c-format\n" if $cformat_p;
! printf OUTPUT "msgid %s\n", TmplTokenizer::quote_po( charset_convert $t );
! printf OUTPUT "msgstr %s\n\n", (defined $translation{$t}?
! TmplTokenizer::quote_po( $translation{$t} ): "\"\"");
! }
! }
!
! ###############################################################################
!
! sub convert_translation_file () {
! open(INPUT, "<$convert_from") || die "$convert_from: $!\n";
! VerboseWarnings::set_input_file_name $convert_from;
! while (<INPUT>) {
! chomp;
! my($msgid, $msgstr) = split(/\t/);
! die "$convert_from: $.: Malformed tmpl_process input (no tab)\n"
! unless defined $msgstr;
!
! # Fixup some of the bad strings
! $msgid =~ s/^SELECTED>//;
!
! # Create dummy token
! my $token = TmplToken->new( $msgid, TmplTokenType::UNKNOWN, undef, undef );
! remember( $token, $msgid );
! $msgstr =~ s/^(?:LIMIT;|LIMITED;)//g; # unneeded for tmpl_process3
! $translation{$msgid} = $msgstr unless $msgstr eq '*****';
!
! if ($msgid =~ /\bcharset=(["']?)([^;\s"']+)\1/s) {
! my $candidate = TmplTokenizer::charset_canon $2;
! die "Conflicting charsets in msgid: $candidate vs $charset_in\n"
! if defined $charset_in && $charset_in ne $candidate;
! $charset_in = $candidate;
! }
! if ($msgstr =~ /\bcharset=(["']?)([^;\s"']+)\1/s) {
! my $candidate = TmplTokenizer::charset_canon $2;
! die "Conflicting charsets in msgid: $candidate vs $charset_out\n"
! if defined $charset_out && $charset_out ne $candidate;
! $charset_out = $candidate;
! }
! }
! if (!defined $charset_in) {
! $charset_in = $charset_out = TmplTokenizer::charset_canon 'iso8859-1';
! warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n";
}
}
***************
*** 165,171 ****
--- 249,259 ----
Getopt::Long::config qw( bundling no_auto_abbrev );
GetOptions(
+ 'charset=s' => sub { $charset_in = $charset_out = $_[1] }, # INTERNAL
+ 'convert-from=s' => \$convert_from,
'D|directory=s' => \$directory,
'f|files-from=s' => \$files_from,
+ 'I|input-charset=s' => \$charset_in, # INTERNAL
'pedantic-warnings|pedantic' => sub { $pedantic_p = 1 },
+ 'O|output-charset=s' => \$charset_out, # INTERNAL
'output|o=s' => \$output,
's|sort-output' => sub { $sort = 's' },
***************
*** 177,183 ****
VerboseWarnings::set_pedantic_mode $pedantic_p;
! usage_error('Missing mandatory option -f') unless defined $files_from;
$directory = '.' unless defined $directory;
if (defined $output && $output ne '-') {
open(OUTPUT, ">$output") || die "$output: $!\n";
--- 265,275 ----
VerboseWarnings::set_pedantic_mode $pedantic_p;
! usage_error('Missing mandatory option -f')
! unless defined $files_from || defined $convert_from;
$directory = '.' unless defined $directory;
+ usage_error('You cannot specify both --convert-from and --files-from')
+ if defined $convert_from && defined $files_from;
+
if (defined $output && $output ne '-') {
open(OUTPUT, ">$output") || die "$output: $!\n";
***************
*** 186,198 ****
}
! open(INPUT, "<$files_from") || die "$files_from: $!\n";
! while (<INPUT>) {
! chomp;
! my $h = TmplTokenizer->new( "$directory/$_" );
! $h->set_allow_cformat( 1 );
! VerboseWarnings::set_input_file_name "$directory/$_";
! text_extract( $h );
}
- close INPUT;
generate_po_file;
--- 278,294 ----
}
! if (defined $files_from) {
! open(INPUT, "<$files_from") || die "$files_from: $!\n";
! while (<INPUT>) {
! chomp;
! my $h = TmplTokenizer->new( "$directory/$_" );
! $h->set_allow_cformat( 1 );
! VerboseWarnings::set_input_file_name "$directory/$_";
! text_extract( $h );
! }
! close INPUT;
! } else {
! convert_translation_file;
}
generate_po_file;
More information about the Koha-cvs
mailing list