blob: f25f5ef74cbf2df126dbf3f11fdd4d810c4a81a4 [file] [log] [blame]
#!/usr/bin/perl -w
#
# build/mkrules -- compile the SpamAssassin rules into installable form
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
# This is an implementation of
# http://wiki.apache.org/spamassassin/RulesProjPromotion
sub usage {
die "build/mkrules [--src srcdir] [--exit_on_no_src] [--out outputdir]\n";
}
my $RULE_DEFINE_KEYWORDS_RE = qr{
header|rawbody|body|full|uri
|meta|mimeheader|urirhssub|uridnsbl
}x;
my $RULE_KEYWORDS_RE = qr{
${RULE_DEFINE_KEYWORDS_RE}|
describe|tflags|reuse|score
}x;
my $fail_message = "";
use strict;
use File::Find;
use File::Copy;
use File::Basename;
use Getopt::Long;
# use SpamAssassin classes directly, so we can lint rules
# as we go
use lib 'lib';
use Mail::SpamAssassin;
our ( @opt_srcs, $opt_out, $opt_sandboxout, $opt_manifest,
$opt_manifestskip, $opt_listpromotable, $opt_active,
$opt_activeout, $default_file_header,
$opt_rulemetadata, $opt_exit_on_no_src);
GetOptions("src=s" => \@opt_srcs,
"out=s",
"sandboxout=s",
"activeout=s",
"active=s",
"manifest=s",
"manifestskip=s",
"rulemetadata=s",
"exit_on_no_src",
);
if (!@opt_srcs) {
foreach ( 'rulescode', 'rulesrc' ) {
if (-d $_) {
# print "using default src $_\n";
push(@opt_srcs, $_);
}
}
}
if (!$opt_manifest && -f "MANIFEST") {
$opt_manifest = "MANIFEST";
}
if (!$opt_manifestskip && -f "MANIFEST.SKIP") {
$opt_manifestskip = "MANIFEST.SKIP";
}
if (!$opt_active && -f "rules/active.list") {
$opt_active = "rules/active.list";
}
if ($opt_exit_on_no_src) {
my $foundone = 0;
foreach my $src (@opt_srcs) {
if (-d $src) { $foundone++; last; }
}
if (!$foundone) {
print "no source directory found: exiting\n";
exit 0;
}
}
# else
die "no src" unless (@opt_srcs >= 1);
my $promolist;
die "no out" unless ($opt_out);
die "unreadable out" unless (-d $opt_out);
die "unreadable active" unless (-f $opt_active);
$opt_sandboxout ||= "70_sandbox.cf";
$opt_activeout ||= "72_active.cf";
# source files that need compilation, and their targets
my $needs_compile = { };
my $found_output = { };
my $current_src;
my $newest_src_mtime = 0;
my $newest_out_mtime = 0;
$default_file_header = join('', <DATA>);
compile_utf8ify_function();
foreach my $src (@opt_srcs) {
if (!-d $src) {
warn "WARNING: unreadable src '$src'\n";
next;
}
$current_src = $src;
File::Find::find ({
wanted => \&src_wanted,
no_chdir => 1
}, $src);
}
# get mtimes of output files; we can be sure that all
# output is under the "opt_out" dir, so recurse there
File::Find::find ({
wanted => \&out_wanted,
no_chdir => 1
}, $opt_out);
# we must rebuild if a compiled .pm is missing, too
my $found_all_pm_files = 1;
foreach my $f (keys %{$needs_compile}) {
next unless ($f =~ /\.pm$/i);
if (!exists $found_output->{basename $f}) {
$found_all_pm_files = 0;
}
}
# check mtime on the active.list file, too
{
my @st = stat $opt_active;
if ($st[9] && $st[9] > $newest_src_mtime) {
$newest_src_mtime = $st[9];
}
}
# check mtimes, and also require that the two required output files
# really do exist
if ($newest_src_mtime && $newest_out_mtime
&& $newest_src_mtime < $newest_out_mtime
&& -f $opt_out.'/'.$opt_sandboxout
&& -f $opt_out.'/'.$opt_activeout
&& $found_all_pm_files)
{
print "mkrules: no rules updated\n";
exit 0;
}
my $rules = { };
my $file_manifest = { };
my $file_manifest_skip = [ ];
if ($opt_manifest) {
read_manifest($opt_manifest);
}
if ($opt_manifestskip) {
read_manifest_skip($opt_manifestskip);
}
my $active_rules = { };
read_active($opt_active);
# context for the rules compiler
my $seen_rules = { };
my $renamed_rules = { };
my $output_files = { };
my $output_file_text = { };
my $files_to_lint = { };
my $entries_for_rule_name = { };
# $COMMENTS is a "catch-all" "name", for lines that appear after the last line
# that refers to a rule by name. Those lines are not published by themselves;
# they'll be published to all pubfiles found in the file.
#
# It's assumed they are comments, because they generally are, but could be all
# sorts of unparseable lines.
my $COMMENTS = '!comments!';
# another "fake name" for lines that should always be published. They'll
# be published to the non-sandbox file.
my $ALWAYS_PUBLISH = '!always_publish!';
read_all_rules($needs_compile);
read_rules_from_output_dir();
compile_output_files();
lint_output_files();
write_output_files();
# mkrules.t relies on the script exiting cleanly Bug #7302 and Bug #7692
exit if ($ENV{'TEST_ACTIVE'}) ;
die "$fail_message" if ( $fail_message =~ m/./) ;
exit;
# ---------------------------------------------------------------------------
sub lint_output_files {
foreach my $file (keys %{$files_to_lint}) {
my $text = join("\n", "file start $file", $output_file_text->{$file}, "file end $file");
if (lint_rule_text($text) != 0) {
warn "\nERROR: LINT FAILED, suppressing output: $file\n\n";
$fail_message = $fail_message . "ERROR: LINT FAILED, suppressing output: $file\n";
# don't suppress entirely, otherwise 'make distcheck'/'disttest'
# will fail since the MANIFEST-listed output files will be
# empty.
# delete $output_file_text->{$file};
$output_file_text->{$file} = '';
}
}
}
sub lint_rule_text {
my ($text) = @_;
# ensure we turn off slow/optional stuff for linting, but keep the essentials
my $pretext = q{
loadplugin Mail::SpamAssassin::Plugin::Check
loadplugin Mail::SpamAssassin::Plugin::URIDNSBL
util_rb_tld com # skip "need to run sa-update" warn
use_bayes 0
};
my $mailsa = Mail::SpamAssassin->new({
rules_filename => "./rules",
# debug => 1,
local_tests_only => 1,
dont_copy_prefs => 1,
config_text => $pretext.$text
});
my $errors = 0;
$mailsa->{lint_callback} = sub {
my %opts = @_;
return if ($opts{msg} =~ /
(?:score\sset\sfor\snon-existent|description\sexists)
/x);
warn "lint: $opts{msg}";
if ($opts{iserror}) {
$errors++;
}
};
$mailsa->lint_rules();
$mailsa->finish();
return $errors; # 0 means good
}
sub src_wanted {
my $path = $File::Find::name;
# record stat times of directories, too, to catch file additions/removals
# in the source tree
my @st = stat $path;
if ($st[9] && $st[9] > $newest_src_mtime) {
$newest_src_mtime = $st[9];
}
# only files from now on, though
return if (!-f $path);
# limit what will be copied from sandboxes
return if ($path =~ /\bsandbox\b/ && !/(?:\d.*\.cf|\.pm)$/i);
# don't use generated scores; they can be out of sync with what is currently
# in the sandboxes or the most current active.list file at any given time
return if ($path =~ /\bscores\b/);
# a bit of sanity please - no svn metadata ;)
return if ($path =~ /\.svn/);
my $dir = $path;
$dir =~ s/^${current_src}[\/\\\:]//s;
$dir =~ s/([^\/\\\:]+)$//;
my $filename = $1;
my $f = "$current_src/$dir$filename";
my $t;
$t = "$opt_out/$filename";
$needs_compile->{$f} = {
f => $f,
t => $t,
dir => $dir,
filename => $filename
};
}
sub out_wanted {
my $path = $File::Find::name;
return unless (-f $path);
return if ($path =~ /\.svn/);
return unless ($path =~ /\.(?:cf|pm)$/i);
my @st = stat $path;
if ($st[9] && $st[9] > $newest_out_mtime) {
$newest_out_mtime = $st[9];
}
my $dir = $path;
$dir =~ s/^${current_src}[\/\\\:]//s;
$dir =~ s/([^\/\\\:]+)$//;
my $filename = $1;
if ($path =~ /\.pm$/i) {
$found_output->{$filename} = 1;
}
}
# compile all the source files found by the src_wanted() sub, in sorted
# order so that the order of precedence makes sense.
sub read_all_rules {
my ($sources) = @_;
# deal with the perl modules first, so that later linting w/ loadplugin will
# work appropriately.
foreach my $f (sort {
my ($ae) = $a =~ /\.(cf|pm)$/;
my ($be) = $b =~ /\.(cf|pm)$/;
return $be cmp $ae || $a cmp $b;
} keys %$sources)
{
my $entry = $needs_compile->{$f};
my $t = $entry->{t};
# TODO: dependency checking optimization?
## my $needs_rebuild = 0;
## if (!-f $t || -M $t > -M $f) {
## # the source file is newer, or dest is not there
## $needs_rebuild = 1;
## }
my $needs_rebuild = 1;
if ($entry->{filename} =~ /\.pm$/) {
plugin_file_compile($entry);
}
elsif ($entry->{dir} =~ /sandbox/) {
rule_file_compile($f, $t, $entry->{filename},
{ issandbox => 1 });
}
elsif ($entry->{dir} =~ /scores/) {
rule_file_compile($f, $t, $entry->{filename},
{ issandbox => 1, isscores => 1 });
}
elsif ($entry->{dir} =~ /extra/) {
# 'extra' rulesets; not built by default (TODO)
next;
}
else {
# rules in "core" and "lang" are always copied
if ($needs_rebuild) {
rule_file_compile($f, $t, $entry->{filename}, { });
}
}
}
}
###########################################################################
# Rules are compiled from source dir to output dir.
#
# Rules in "rules/active.list" are promoted to "72_active.cf"; rules not
# listed there are relegated to "70_sandbox.cf". There is code to allow
# other filenames to be selected from the rulesrc .cf file, but I'm not
# sure if it works anymore ;)
#
# Rules will be autorenamed, if there's a collision between a new rule name and
# one that's already been output by the compiler in another source file. The
# autorenaming is very simple -- portions of the current source path are
# appended to the rule name, sanitised.
sub rule_file_compile {
my ($f, $t, $filename, $flags) = @_;
my $issandbox = $flags->{issandbox};
my $isscores = $flags->{isscores};
open (IN, "<$f") or die "cannot read $f";
# a fast parser for the config file format; don't need the
# full deal here, and it must be fast, since it's run on every
# "make" invocation
my $rule_order = [ ];
my $lastrule = $COMMENTS;
if (!defined $rules->{$ALWAYS_PUBLISH}) {
$rules->{$ALWAYS_PUBLISH} = rule_entry_create();
}
# zero or more "ifplugin" or "if" scopes
my @current_conditionals = ();
my $current_comments = '';
while (<IN>) {
my $orig = $_;
s/#.*$//g; s/^\s+//; s/\s+$//;
# drop comments/blank lines from output
next if (/^$/);
# save "lang" declarations
my $lang = '';
if (s/^lang\s+(\S+)\s+//) {
$lang = $1;
}
if (/^(${RULE_KEYWORDS_RE})\s+(\S+)\s+(.*)$/)
{
# rule definitions
my $type = $1;
my $name = $2;
my $val = $3;
my $origname = $name;
if ($issandbox) {
$name = sandbox_rule_name_avoid_collisions($name, $f);
}
my $origname_w_T_prefix = $name;
# non-sandbox rules always use the same name
if (scalar @current_conditionals) {
# ensure the current conditionals are used in the block name;
# this ensures that we scope alternative (#ifdef-style) dupe
# rule definitions in their own ifplugin scopes
$name .= " ".join("", @current_conditionals);
$name =~ s/\s+/ /gs; $name =~ s/ $//;
}
# track this as a rule-entry block for that rule name
# (and it's T_ prefixed variant, if relevant)
push @{$entries_for_rule_name->{$origname}}, $name;
push @{$entries_for_rule_name->{$origname_w_T_prefix}}, $name;
# comment "score" lines for sandbox rules (bug 5558)
# use generated scores, though, if the rule is active
if ($type eq 'score' && $issandbox &&
!($isscores && $active_rules->{$name}))
{
$orig =~ s/^/#/g;
}
if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
$rules->{$name}->{issandbox} = $issandbox;
$rules->{$name}->{isscores} = $isscores;
$rules->{$name}->{origname} = $origname;
$rules->{$name}->{origname_w_T_prefix} = $origname_w_T_prefix;
$rules->{$name}->{cond} = [@current_conditionals];
$rules->{$name}->{text} .= $current_comments . $orig;
$rules->{$name}->{plugin_dependencies} = {};
# note if the conditional is a plugin reference, as we need to
# ensure that "loadplugin" lines stay in the same place
foreach my $c (@current_conditionals) {
if ($c =~ /^ifplugin\s+(\S+)/) {
$rules->{$name}->{plugin_dependencies}->{$1} = 1;
} elsif ($c =~ /^if.*plugin/) {
while ($c =~ /plugin\s*\(\s*(\S+)\s*\)/g) {
$rules->{$name}->{plugin_dependencies}->{$1} = 1;
}
}
}
# note if we found the rule defn or not. if we did not,
# that means the rule was a code-tied rule, which should always
# have its descriptions/scores/etc. published in "active".
if ($type =~ /^${RULE_DEFINE_KEYWORDS_RE}$/x) {
$rules->{$name}->{found_definition} = 1;
$rules->{$name}->{srcfile} = $f;
$rules->{$name}->{code} = $orig;
}
elsif ($type eq 'tflags') {
# userconf rules are always published in "active"
if ($val =~ /\buserconf\b/) {
$rules->{$name}->{forceactive} = 1;
}
# record for rulemetadata code
$val =~ s/\s+/ /gs;
if ($rules->{$name}->{tflags}) {
$rules->{$name}->{tflags} .= ' '.$val;
} else {
$rules->{$name}->{tflags} = $val;
}
}
$current_comments = '';
$lastrule = $name;
push (@$rule_order, $name);
}
elsif (/^
(pubfile|publish)
\s+(\S+)\s*(.*?)$
/x)
{
# preprocessor directives
my $command = $1;
my $name = $2;
my $val = $3;
my $origname = $name;
# note: if we call sandbox_rule_name_avoid_collisions(), it'll
# rename to 'T_RULENAME' -- which is exactly what we're trying
# to avoid in 'publish RULENAME' lines! so don't call it here.
# if ($issandbox) {
# $name = sandbox_rule_name_avoid_collisions($name, $f);
# }
if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
$rules->{$name}->{origname} = $origname;
$rules->{$name}->{origname_w_T_prefix} = $origname;
if ($command eq 'publish') {
# the 'publish' command defaults to "1", unless it explicitly
# is set to "0". iow: publish RULE_NAME [(0 | 1)] [default: 1]
if (!defined $val || $val eq '') { $val = '1'; }
}
elsif ($command eq 'pubfile') {
if (!filename_in_manifest($opt_out.'/'.$val)) {
warn "$val: WARNING: not listed in manifest file, using default\n";
next; # don't set 'pubfile' below
}
}
$rules->{$name}->{$command} = $val;
# if we see "publish NAMEOFRULE", that means the rule is
# considered active
if ($rules->{$name}->{publish}) {
$rules->{$name}->{forceactive} = 1;
}
}
elsif (/^
(if|ifplugin)
\s+(.*?)$
/x)
{
push @current_conditionals, $orig;
}
elsif (/^else\b/x)
{
if (!scalar @current_conditionals) {
warn "WARNING: 'else' without 'if'/'ifplugin' conditional\n";
} else {
my $cond = invert_conditional(pop @current_conditionals);
push @current_conditionals, $cond;
}
}
elsif (/^endif\b/x)
{
if (!scalar @current_conditionals) {
warn "WARNING: 'endif' without 'if'/'ifplugin' conditional\n";
} else {
pop @current_conditionals;
}
}
elsif (/^require_version\s*(\S+)\b/) {
# silently ignored. TODO? (meh)
}
elsif (/^loadplugin\s*(\S+)\b/) {
my $name = 'loadplugin_'.$1;
unless ($rules->{$name}) {
$rules->{$name} = rule_entry_create();
$rules->{$name}->{origname} = $name;
$rules->{$name}->{origname_w_T_prefix} = $name;
$rules->{$name}->{issandbox} = $issandbox;
$rules->{$name}->{iscommand} = 1;
}
if (/^loadplugin\s*\S+\s+(\S+)/) {
my $fname = $1;
my $fpath = dirname($f)."/".$fname;
if (!-f $fpath) {
warn "$f: WARNING: plugin code file '$fpath' not found, line ignored: $orig";
next;
}
if ($fpath =~ /sandbox/i) {
# Since this is a sandbox plugin, force its output to the sandbox area.
$rules->{$name}->{sandbox_plugin} = 1;
}
# If a 'loadplugin' line is found, and the plugin .pm is not listed in
# the MANIFEST file, this will mean that the .pm will not be copied
# during "make dist". This causes failures during "make disttest",
# since the file does not exist.
#
# However, we do want to preserve these lines in the 'rules' dir, for
# use during development -- without requiring that the .pm's be put
# into MANIFEST -- ie. before the plugin is considered release-ready,
# ie. sandbox plugins.
#
# fix: make it a "tryplugin" line instead; these are ignored if the
# target file is nonexistent.
if (!filename_in_manifest($opt_out.'/'.$fname)) {
warn "$f: WARNING: '$opt_out/$fname' not listed in manifest file, making 'tryplugin': $orig";
$orig =~ s/^\s*loadplugin\b/tryplugin/;
}
}
$rules->{$name}->{text} .= $orig;
unshift (@$rule_order, $name);
}
else {
# an unhandled configuration line; "redirector_pattern",
# "report", something like that. This should be sent to
# the active.cf output (or sandbox if it appeared in a sandbox
# input file).
# use the line itself as a key
my $name = $_;
/^\s*(\S+)/ and $name = $1;
$name =~ s/\s+/ /gs;
my $forceactive = 1;
# always send 'test' lines to the sandbox files
if (/^test\s*/) {
$forceactive = 0;
$name = $_; # ensure we don't drag rules with us though!
$name =~ s/\s+/ /gs;
}
if (scalar @current_conditionals) {
$name = join("", @current_conditionals);
$name =~ s/\s+/ /gs; $name =~ s/ $//;
}
if ($issandbox) {
$name .= "_sandbox";
}
unless ($rules->{$name}) {
$rules->{$name} = rule_entry_create();
$rules->{$name}->{origname} = $name;
$rules->{$name}->{origname_w_T_prefix} = $name;
}
$rules->{$name}->{cond} = [@current_conditionals];
$rules->{$name}->{issandbox} = $issandbox;
$rules->{$name}->{forceactive} = $forceactive;
# $rules->{$name}->{forceactive} = 1;
$rules->{$name}->{iscommand} = 1;
# TODO: bug 6241: 'replace_rules' should be handled ok, but isn't
# warn "unknown line in rules file '$f', saving to default: $orig";
$rules->{$name}->{text} .= $orig;
unshift (@$rule_order, $name);
}
}
close IN;
if ($current_comments) {
$rules->{$COMMENTS}->{text} .= $current_comments;
}
# now append all the found text to the output file buffers
copy_to_output_buffers($rule_order, $issandbox, $f, $filename);
# ok; file complete. now mark all those rules as "seen"; future
# refs to those rule names will trigger an autorename.
foreach my $name (@$rule_order) {
$seen_rules->{$name} = 1;
}
}
# this is only run if we're generating rulemetadata!
sub read_rules_from_output_dir {
return unless ($opt_rulemetadata);
foreach my $file (<$opt_out/*.cf>) {
next unless ($file =~ /\d\d_\S+\.cf$/);
next if (pubfile_is_activeout($file));
next if (pubfile_is_sandboxout($file));
read_output_file($file);
}
}
sub read_output_file {
my ($file) = @_;
open (IN, "<$file") or warn "cannot read $file";
while (<IN>) {
my $orig = $_;
s/#.*$//g; s/^\s+//; s/\s+$//;
# drop comments/blank lines from output
next if (/^$/);
# save "lang" declarations
my $lang = '';
if (s/^lang\s+(\S+)\s+//) {
$lang = $1;
}
if (/^(${RULE_KEYWORDS_RE})\s+(\S+)\s+(.*)$/) {
# rule definitions
my $type = $1;
my $name = $2;
my $val = $3;
# note: we only want to do this if --rulemetadata is in use!
if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
if ($type eq 'tflags') {
$val =~ s/\s+/ /gs;
if ($rules->{$name}->{tflags}) {
$rules->{$name}->{tflags} .= ' '.$val;
} else {
$rules->{$name}->{tflags} = $val;
}
}
if ($type =~ /^${RULE_DEFINE_KEYWORDS_RE}$/x) {
$rules->{$name}->{srcfile} = $file;
$rules->{$name}->{code} = $orig;
}
}
}
close IN;
}
sub copy_to_output_buffers {
my ($rule_order, $issandbox, $f, $filename) = @_;
# always output these two files, even if they're empty!
foreach my $pubfile ($opt_out.'/'.$opt_sandboxout,
$opt_out.'/'.$opt_activeout)
{
$output_files->{$pubfile} = {
header => $default_file_header
};
}
my %already_done = ();
my $copied_active = 0;
my $copied_other = 0;
foreach my $name (@$rule_order)
{
# only do each rule once, please ;)
next if exists $already_done{$name};
$already_done{$name} = undef;
my $text = $rules->{$name}->{text};
if (!$text) {
next; # nothing to write!
}
my $srcfile = $rules->{$name}->{srcfile};
my $pubfile = pubfile_for_rule($rules, $rules->{$name}->{origname_w_T_prefix});
my $is_active = 0;
if (pubfile_is_activeout($pubfile)) {
$is_active++;
}
my $cond = $rules->{$name}->{cond};
if ($cond) {
foreach my $pluginclass (keys %{$rules->{$name}->{plugin_dependencies}}) {
my $ifplugin_text_name = "loadplugin_".($pluginclass || "");
if ($rules->{$ifplugin_text_name}) {
# if the plugin is a sandbox plugin, ensure it's not
# sent to the active file
if ($rules->{$ifplugin_text_name}->{sandbox_plugin}) {
$pubfile = $opt_out.'/'.$opt_sandboxout;
$is_active = 0;
}
# either way, ensure the "loadplugin" line, if there is one,
# goes to the same file
$rules->{$ifplugin_text_name}->{output_file} = $pubfile;
}
}
# ensure we produce enough "endif"s to match however many
# nested conditions there are
my $endifs = "endif\n" x (scalar @{$cond});
$rules->{$name}->{output_text} = "\n"
.join("", @{$cond})
.$text
.$endifs;
} else {
$rules->{$name}->{output_text} = $text;
}
# note the target file
$rules->{$name}->{output_file} = $pubfile;
$output_files->{$pubfile} = {
header => $default_file_header
};
if ($is_active) {
$copied_active++;
} else {
$copied_other++;
}
}
print "$f: $copied_active active rules, ".
"$copied_other other\n";
}
sub pubfile_for_rule {
my ($rules, $name) = @_;
my $pubfile;
if ($rules->{$name}->{publish}) {
# "publish NAMEOFRULE" => send it to active
$pubfile = $opt_out.'/'.$opt_activeout;
}
# default: "70_sandbox.cf" or "72_active.cf"
if (!$pubfile) {
if ($active_rules->{$name} # is active
|| $rules->{$name}->{forceactive} # or is forced to be
|| (!$rules->{$name}->{found_definition} && !$rules->{$name}->{iscommand}
&& !$rules->{$name}->{isscores}))
# or is a rule-related setting in reference to an unknown rule
# but isn't a generated score
{
$pubfile = $opt_out.'/'.$opt_activeout;
}
elsif ($rules->{$name}->{issandbox}) {
$pubfile = $opt_out.'/'.$opt_sandboxout;
}
else {
warn "oops? inactive rule, non-sandbox, shouldn't be possible anymore";
$pubfile = $opt_out.'/'.$opt_sandboxout;
}
}
return $pubfile;
}
sub plugin_file_compile {
my ($entry) = @_;
return if $opt_listpromotable;
# just copy the raw perl module over to the new area
# we can't really rename to avoid conflicts since the loadplugin lines
# are going to be all screwed up in that case.
# jm: we always want to update the output file in case the input
# has been changed!
if (0 && -e $entry->{t}) {
warn "The perl module ".$entry->{t}." already exists, can't copy from ".$entry->{f}."\n";
}
else {
copy($entry->{f}, $entry->{t}) || warn "Couldn't copy ".$entry->{f}.": $!";
}
}
###########################################################################
sub compile_output_files {
my $always = $rules->{$ALWAYS_PUBLISH}->{output_text};
# create all known output files
foreach my $file (keys %$output_files) {
$output_file_text->{$file} = $output_files->{$file}->{header};
if ($always && pubfile_is_activeout($file)) {
$output_file_text->{$file} .= $always;
}
}
# this is a horrible kluge.
# at this point in the game, we've lost the ordered list of rules, so the
# loadplugin lines have no guarantee that they'll be loaded before the rules
# that require them. so we kluge the sort to always have loadplugin lines
# appear at the very top of the array so we know they'll be listed before
# anything else.
my @rulenames = sort {
if ($a =~ /^loadplugin_/) {
return -1;
}
elsif ($b =~ /^loadplugin_/) {
return 1;
}
return $a cmp $b;
} keys %$rules;
my %seen = ();
# go through the rules looking for meta subrules we
# may have forgotten; this happens if a non-subrule is
# listed in active.list, the subrules will not be! fix them
# to appear in the same output file as the master rule.
foreach my $rule (@rulenames) {
fix_up_rule_dependencies($rule);
}
# now repeat, just for rules in the active set; their dependencies should
# always be likewise promoted into the active set, overriding the prev step.
foreach my $rule (@rulenames) {
my $pubfile = $rules->{$rule}->{output_file};
next unless ($pubfile && pubfile_is_activeout($pubfile));
fix_up_rule_dependencies($rule);
}
my $rulemd = '';
# output the known rules that are not meta subrules.
foreach my $rule (@rulenames) {
$rulemd .= get_rulemetadata_string($rule); # all metadata strings
next if ($rule =~ /^__/);
my $pubfile = $rules->{$rule}->{output_file};
my $text = $rules->{$rule}->{output_text};
next unless defined ($text);
# DOS - bug 6297 - HACK HACK HACK HACK
# this will probably screw up meta rules that do something like '&& !$rule'
# avoid publishing 'tflags nopublish' rules
if (pubfile_is_activeout($pubfile) && exists $rules->{$rule}->{tflags} &&
$rules->{$rule}->{tflags} =~ /\bnopublish\b/)
{
print "omitting rule $rule due to tflags nopublish (tflags $rules->{$rule}->{tflags})\n";
next;
}
# DOS - END HACK
$output_file_text->{$pubfile} .= "##{ $rule\n".
$text.
"##} ".$rule."\n\n";
}
# now output all subrules (in a slightly more compact form)
foreach my $rule (@rulenames) {
next unless ($rule =~ /^__/);
my $pubfile = $rules->{$rule}->{output_file};
my $text = $rules->{$rule}->{output_text};
next unless defined ($text);
# DOS - bug 6297 - HACK HACK HACK HACK
# this will probably screw up meta rules that do something like '&& !$rule'
# avoid publishing 'tflags nopublish' rules
if (pubfile_is_activeout($pubfile) && exists $rules->{$rule}->{tflags} &&
$rules->{$rule}->{tflags} =~ /\bnopublish\b/)
{
print "omitting rule $rule due to tflags nopublish (tflags $rules->{$rule}->{tflags})\n";
next;
}
# DOS - END HACK
$output_file_text->{$pubfile} .= $text;
}
# finally, finish off all output files
foreach my $file (keys %$output_files) {
# and get them lint-checked!
$files_to_lint->{$file} = 1;
}
if ($opt_rulemetadata) {
open (RULEMD, ">".$opt_rulemetadata)
or die "cannot write rulemd to $opt_rulemetadata";
print RULEMD "<?xml version='1.0' encoding='UTF-8'?>\n",
"<rulemds>", $rulemd, "</rulemds>\n";
close RULEMD or die "cannot close rulemd to $opt_rulemetadata";
}
}
# conditionally build a method to UTF-8-encode a string. this is only required
# for the rulemetadata XML output, so don't make it mandatory!
sub compile_utf8ify_function {
if (!eval '
sub utf8ify { use Encode; return Encode::encode("UTF-8", $_[0]); } 1;
')
{
eval '
sub utf8ify { die "unimplemented -- Encode module required!" } 1;
'
}
}
sub get_rulemetadata_string {
my ($rule) = @_;
return '' unless ($opt_rulemetadata);
my $mod = 0;
my $srcfile = '';
my $code = '';
my $name = $rule;
# if we found a rule definition with a T_ prefix, use that data
if (!$rules->{$name}->{srcfile} && $rules->{"T_".$name}->{srcfile}) {
$name = "T_".$name;
}
if ($rules->{$name}->{srcfile}) {
$srcfile = $rules->{$name}->{srcfile};
if ($srcfile) {
my @s = stat $srcfile;
if (@s) { $mod = $s[9]; }
}
}
if ($rules->{$name}->{code}) {
$code = $rules->{$name}->{code};
$code =~ s/\]\]>/\](defanged by mkrules)\]>/gs; # ensure it's CDATA-safe
$code = utf8ify($code);
}
my $tf = $rules->{$name}->{tflags} || '';
return "<rulemetadata>".
"<name>$rule</name>".
"<src>$srcfile</src>".
"<srcmtime>$mod</srcmtime>".
# don't include <code> blocks; they bloat up the XML badly (to 800KB)
# and make it very slow to parse later
# "<code><![CDATA[$code]]></code>".
"<tf>$tf</tf>".
"</rulemetadata>\n";
}
sub fix_up_rule_dependencies {
my $rule = shift;
my $pubfile = $rules->{$rule}->{output_file};
my $text = $rules->{$rule}->{output_text};
return unless $text;
while ($text =~ /^\s*meta\s+(.*)$/mg) {
my $line = $1;
while ($line =~ /\b([_A-Za-z0-9]+)\b/g) {
# force that subrule (if it exists) to output in the
# same pubfile
my $rule2 = $1;
# deal with rules that changed name from "FOO" to "T_FOO"
sed_renamed_rule_names(\$rule2);
if (!$entries_for_rule_name->{$rule2}) {
# we may not always have a rule entry, if the rule was from a non-sandbox
# source
# warn "cannot find entries_for_rule_name '$rule2'";
}
foreach my $entryname2 (@{$entries_for_rule_name->{$rule2}}) {
next unless ($rules->{$entryname2} && $rules->{$entryname2}->{output_file});
# don't do this if the subrule would be moved *out* of the
# active file!
my $pubfile2 = $rules->{$entryname2}->{output_file};
next if (pubfile_is_activeout($pubfile2));
$rules->{$entryname2}->{output_file} = $pubfile;
}
}
}
}
sub pubfile_is_activeout {
return 1 if ($_[0] && $_[0] =~ /\b\Q$opt_activeout\E$/);
return 0;
}
sub pubfile_is_sandboxout {
return 1 if ($_[0] && $_[0] =~ /\b\Q$opt_sandboxout\E$/);
return 0;
}
sub write_output_files {
foreach my $pubfile (sort keys %$output_files) {
if (-f $pubfile) {
unlink $pubfile or die "cannot remove output file '$pubfile'";
}
if (!filename_in_manifest($pubfile)) {
warn "$pubfile: WARNING: not listed in manifest file\n";
}
my $text = $output_file_text->{$pubfile};
if ($text) {
open (OUT, ">".$pubfile) or die "cannot write to output file '$pubfile'";
sed_renamed_rule_names(\$text);
print OUT $text;
close OUT or die "cannot close output file '$pubfile'";
# print "$pubfile: written\n"; # too noisy
}
else {
print "$pubfile: no rules promoted\n";
# create an empty file anyway to satisfy MANIFEST
open (OUT, ">".$pubfile) or die "cannot write to output file '$pubfile'";
close OUT or die "cannot close output file '$pubfile'";
}
}
}
###########################################################################
sub rule_entry_create {
return {
text => '',
publish => 0
};
}
###########################################################################
sub sandbox_rule_name_avoid_collisions {
my ($rule, $path) = @_;
my $new;
my $newreason;
my $dowarn = 0;
return $rule if $opt_listpromotable;
return $rule if $active_rules->{$rule};
return $rule if $rules->{$rule}->{forceactive};
if ($rule !~ /^(?:T_|__)/) {
$new = "T_".$rule;
$newreason = "missing T_ prefix";
}
elsif (!exists $seen_rules->{$rule}) {
return $rule;
}
else {
$new = $path;
$new =~ s/[^A-Za-z0-9]+/_/gs;
$new =~ s/_+/_/gs;
$new =~ s/^_//;
$new =~ s/_$//;
$new = $rule.'_'.$new;
$newreason = "collision with existing rule";
$dowarn = 1;
}
if (!$renamed_rules->{$new}) {
$renamed_rules->{$new} = $rule;
if ($dowarn) {
warn "WARNING: $rule: renamed as $new due to $newreason\n";
}
}
return $new;
}
sub sed_renamed_rule_names {
my ($textref) = @_;
foreach my $new (keys %{$renamed_rules}) {
my $rule = $renamed_rules->{$new};
$$textref =~ s/\b${rule}\b/${new}/gs;
}
}
###########################################################################
sub invert_conditional {
my $cond = shift;
if ($cond =~ /^ \s* ifplugin \s+(.*?)$ /x) {
return "if !plugin($1)\n";
} elsif ($cond =~ /^ \s* if \s+(.*?)$ /x) {
return "if !($1)\n";
} else {
warn "WARNING: cannot parse '$cond' for 'else'\n";
return 'if 0';
}
}
###########################################################################
sub read_manifest {
my ($fname) = @_;
parse_line_delimited_config_file($fname, sub {
/^\s*(.*?)\s*$/ and $file_manifest->{$1} = 1;
});
}
sub read_manifest_skip {
my ($fname) = @_;
parse_line_delimited_config_file($fname, sub {
/^\s*(.*?)\s*$/ and push (@{$file_manifest_skip}, qr/$1/);
});
}
sub read_active {
my ($fname) = @_;
parse_line_delimited_config_file($fname, sub {
/^(\S+)/ and $active_rules->{$1} = 1;
});
}
sub filename_in_manifest {
my ($fname) = @_;
return 1 if ($file_manifest->{$fname});
foreach my $skipre (@{$file_manifest_skip}) {
return 1 if ($fname =~ $skipre);
}
return 0;
}
sub parse_line_delimited_config_file {
my ($fname, $callback) = @_;
if (!open (IN, "<$fname")) {
warn "cannot read $fname\n";
} else {
while (<IN>) {
next if /^#/;
$callback->();
}
close IN;
}
}
__DATA__
# SpamAssassin rules file
#
# Please don't modify this file as your changes will be overwritten with
# the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
# See 'perldoc Mail::SpamAssassin::Conf' for details.
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
#
###########################################################################
require_version @@VERSION@@