blob: b7b24e8774f3e1df3bca6da7dbc27fd51c494710 [file] [log] [blame]
#!/usr/bin/perl -w
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
use strict;
use warnings;
use FindBin;
use Getopt::Std;
# Lingua::Translate has a bunch of requirements (see bottom of its perldoc)
use Lingua::Translate;
# %rules and %scores from tmp/rules.pl
our ($opt_c, $opt_e, $opt_h, $opt_n, $opt_r, %rules, %scores);
sub usage {
die "generate-translation language output_file
-h print this help
-e STR use STR as destination character set (using Lingua::Translate)
-r STR use STR as destination character set (using recode)
-n N translate first N rules (used for testing)
-c DIR use DIR as rules directory
language should be a two letter language code from this list:
zh: Chinese-simplified
zt: Chinese-traditional
nl: Dutch
fr: French
de: German
el: Greek
it: Italian
ja: Japanese
ko: Korean
pt: Portuguese
ru: Russian
es: Spanish
progress is displayed on standard error
";
}
getopts("hc:e:n:r:");
usage() if ($opt_h || @ARGV < 2);
# options
my $dest = shift @ARGV;
my $output = shift @ARGV;
my $cffile = $opt_c || "$FindBin::Bin/../rules";
my $enc = $opt_e || "utf8";
my $recode = $opt_r || "UTF-8";
# rule => configuration hashes
my %english;
my %old;
my %translation;
# translation cache
my %lang_cache;
# do the work
read_rules($cffile);
generate_translation();
print_translation();
sub read_rules {
my ($cffile) = @_;
system("$FindBin::Bin/../build/parse-rules-for-masses -d \"$cffile\"")
and die "unable to parse rules\n";
require "$FindBin::Bin/tmp/rules.pl"
or die "unable to read tmp/rules.pl\n";
}
sub generate_translation {
my $fish = Lingua::Translate->new(src => "en",
dest => $dest,
dest_enc => $enc)
or die "no translation server available for en -> $dest\n";
# see if we had an old translation
if (-f "$FindBin::Bin/../rules/30_text_$dest.cf") {
open(OLD, "$FindBin::Bin/../rules/30_text_$dest.cf");
while(<OLD>) {
if (/^lang\s+$dest\s+describe\s+(\S+)\s+(.*?)\s*$/) {
$old{$1} = "lang $dest describe $1 $2\n";
}
}
close(OLD);
}
# try to generate new translation
my $count = 0;
for my $name (sort keys %rules) {
next if ($name eq '_scoreset');
my $lang_name = $name;
my $lang_describe = '';
if ($rules{$name}->{lang}) {
next;
}
if (defined $rules{$name}->{describe}) {
# translate name if it appears in the description
my $describe = $rules{$name}->{describe};
if ($describe =~ /$name/) {
eval {
$lang_name = $fish->translate($name); # dies or croaks on error
};
if ($@) {
$lang_name = '[A-Z]+[A-Z0-9_]+[A-Z0-9]';
}
}
# English version
$english{$name} = "describe $name\t$describe\n";
# translate description
eval {
if (defined $lang_cache{$describe}) {
$lang_describe = $lang_cache{$describe};
}
else {
# dies or croaks on error
$lang_describe = $fish->translate($describe);
$lang_describe =~ s/\s+/ /sg;
$lang_describe =~ s/ $//g;
$lang_cache{$describe} = $lang_describe;
}
};
# didn't work
if ($@) {
print STDERR "x";
}
else {
$lang_describe =~ s/$lang_name/$name/;
$translation{$name} = "lang $dest describe $name\t$lang_describe\n";
print STDERR ".";
}
$count++;
last if ($opt_n && $count == $opt_n);
}
}
print STDERR "\n" if $count > 0;
}
sub print_translation {
my $charset = $enc;
if ($opt_r) {
$charset = $recode;
}
$charset =~ s/utf-?8/utf-8/i;
open(OUTPUT, "> $output");
print OUTPUT <<'EOF';
#
# Please don't modify this file as your changes will be overwritten with
# the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
# See 'perldoc Mail::SpamAssassin::Conf' for details.
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
#
###########################################################################
# character set used in the following texts
EOF
print OUTPUT <<EOF;
lang $dest report_charset $charset
# make me pretty
EOF
open(MISC, "$FindBin::Bin/../rules/10_misc.cf");
while (<MISC>) {
if (/^(?:clear_report_template|report|clear_unsafe_report_template|unsafe_report)\b/) {
print OUTPUT "lang $dest $_";
}
}
print OUTPUT "\n\n";
for (sort keys %english) {
print OUTPUT "# $english{$_}";
print OUTPUT "# $translation{$_}" if $translation{$_};
print OUTPUT "# $old{$_}" if $old{$_};
print OUTPUT "\n";
}
system("/usr/bin/recode $enc..$recode $output") if $opt_r;
}