| #!/usr/bin/perl -w |
| # |
| # build/mkrules -- compile the SpamAssassin rules into installable form |
| # |
| # <@LICENSE> |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to you under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at: |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # </@LICENSE> |
| |
| # This is an implementation of |
| # http://wiki.apache.org/spamassassin/RulesProjPromotion |
| |
| sub usage { |
| die "build/mkrules [--src srcdir] [--exit_on_no_src] [--out outputdir]\n"; |
| } |
| |
| my $RULE_DEFINE_KEYWORDS_RE = qr{ |
| header|rawbody|body|full|uri |
| |meta|mimeheader|urirhssub|uridnsbl |
| }x; |
| |
| my $RULE_KEYWORDS_RE = qr{ |
| ${RULE_DEFINE_KEYWORDS_RE}| |
| describe|tflags|reuse|score |
| }x; |
| |
| my $fail_message = ""; |
| |
| use strict; |
| use File::Find; |
| use File::Copy; |
| use File::Basename; |
| use Getopt::Long; |
| |
| # use SpamAssassin classes directly, so we can lint rules |
| # as we go |
| use lib 'lib'; |
| use Mail::SpamAssassin; |
| |
| our ( @opt_srcs, $opt_out, $opt_sandboxout, $opt_manifest, |
| $opt_manifestskip, $opt_listpromotable, $opt_active, |
| $opt_activeout, $default_file_header, |
| $opt_rulemetadata, $opt_exit_on_no_src); |
| |
| GetOptions("src=s" => \@opt_srcs, |
| "out=s", |
| "sandboxout=s", |
| "activeout=s", |
| "active=s", |
| "manifest=s", |
| "manifestskip=s", |
| "rulemetadata=s", |
| "exit_on_no_src", |
| ); |
| |
| if (!@opt_srcs) { |
| foreach ( 'rulescode', 'rulesrc' ) { |
| if (-d $_) { |
| # print "using default src $_\n"; |
| push(@opt_srcs, $_); |
| } |
| } |
| } |
| |
| if (!$opt_manifest && -f "MANIFEST") { |
| $opt_manifest = "MANIFEST"; |
| } |
| |
| if (!$opt_manifestskip && -f "MANIFEST.SKIP") { |
| $opt_manifestskip = "MANIFEST.SKIP"; |
| } |
| |
| if (!$opt_active && -f "rules/active.list") { |
| $opt_active = "rules/active.list"; |
| } |
| |
| if ($opt_exit_on_no_src) { |
| my $foundone = 0; |
| foreach my $src (@opt_srcs) { |
| if (-d $src) { $foundone++; last; } |
| } |
| |
| if (!$foundone) { |
| print "no source directory found: exiting\n"; |
| exit 0; |
| } |
| } |
| # else |
| die "no src" unless (@opt_srcs >= 1); |
| |
| my $promolist; |
| |
| die "no out" unless ($opt_out); |
| die "unreadable out" unless (-d $opt_out); |
| die "unreadable active" unless (-f $opt_active); |
| |
| $opt_sandboxout ||= "70_sandbox.cf"; |
| $opt_activeout ||= "72_active.cf"; |
| |
| # source files that need compilation, and their targets |
| my $needs_compile = { }; |
| my $found_output = { }; |
| my $current_src; |
| my $newest_src_mtime = 0; |
| my $newest_out_mtime = 0; |
| |
| $default_file_header = join('', <DATA>); |
| compile_utf8ify_function(); |
| |
| foreach my $src (@opt_srcs) { |
| if (!-d $src) { |
| warn "WARNING: unreadable src '$src'\n"; |
| next; |
| } |
| $current_src = $src; |
| File::Find::find ({ |
| wanted => \&src_wanted, |
| no_chdir => 1 |
| }, $src); |
| } |
| |
| # get mtimes of output files; we can be sure that all |
| # output is under the "opt_out" dir, so recurse there |
| File::Find::find ({ |
| wanted => \&out_wanted, |
| no_chdir => 1 |
| }, $opt_out); |
| |
| # we must rebuild if a compiled .pm is missing, too |
| my $found_all_pm_files = 1; |
| foreach my $f (keys %{$needs_compile}) { |
| next unless ($f =~ /\.pm$/i); |
| if (!exists $found_output->{basename $f}) { |
| $found_all_pm_files = 0; |
| } |
| } |
| |
| # check mtime on the active.list file, too |
| { |
| my @st = stat $opt_active; |
| if ($st[9] && $st[9] > $newest_src_mtime) { |
| $newest_src_mtime = $st[9]; |
| } |
| } |
| |
| # check mtimes, and also require that the two required output files |
| # really do exist |
| if ($newest_src_mtime && $newest_out_mtime |
| && $newest_src_mtime < $newest_out_mtime |
| && -f $opt_out.'/'.$opt_sandboxout |
| && -f $opt_out.'/'.$opt_activeout |
| && $found_all_pm_files) |
| { |
| print "mkrules: no rules updated\n"; |
| exit 0; |
| } |
| |
| my $rules = { }; |
| |
| my $file_manifest = { }; |
| my $file_manifest_skip = [ ]; |
| if ($opt_manifest) { |
| read_manifest($opt_manifest); |
| } |
| if ($opt_manifestskip) { |
| read_manifest_skip($opt_manifestskip); |
| } |
| |
| my $active_rules = { }; |
| read_active($opt_active); |
| |
| # context for the rules compiler |
| my $seen_rules = { }; |
| my $renamed_rules = { }; |
| my $output_files = { }; |
| my $output_file_text = { }; |
| my $files_to_lint = { }; |
| my $entries_for_rule_name = { }; |
| |
| # $COMMENTS is a "catch-all" "name", for lines that appear after the last line |
| # that refers to a rule by name. Those lines are not published by themselves; |
| # they'll be published to all pubfiles found in the file. |
| # |
| # It's assumed they are comments, because they generally are, but could be all |
| # sorts of unparseable lines. |
| my $COMMENTS = '!comments!'; |
| |
| # another "fake name" for lines that should always be published. They'll |
| # be published to the non-sandbox file. |
| my $ALWAYS_PUBLISH = '!always_publish!'; |
| |
| read_all_rules($needs_compile); |
| read_rules_from_output_dir(); |
| compile_output_files(); |
| lint_output_files(); |
| write_output_files(); |
| |
| # mkrules.t relies on the script exiting cleanly Bug #7302 and Bug #7692 |
| exit if ($ENV{'TEST_ACTIVE'}) ; |
| |
| die "$fail_message" if ( $fail_message =~ m/./) ; |
| exit; |
| |
| # --------------------------------------------------------------------------- |
| |
| sub lint_output_files { |
| foreach my $file (keys %{$files_to_lint}) { |
| my $text = join("\n", "file start $file", $output_file_text->{$file}, "file end $file"); |
| if (lint_rule_text($text) != 0) { |
| warn "\nERROR: LINT FAILED, suppressing output: $file\n\n"; |
| $fail_message = $fail_message . "ERROR: LINT FAILED, suppressing output: $file\n"; |
| |
| # don't suppress entirely, otherwise 'make distcheck'/'disttest' |
| # will fail since the MANIFEST-listed output files will be |
| # empty. |
| |
| # delete $output_file_text->{$file}; |
| $output_file_text->{$file} = ''; |
| } |
| } |
| } |
| |
| sub lint_rule_text { |
| my ($text) = @_; |
| |
| # ensure we turn off slow/optional stuff for linting, but keep the essentials |
| my $pretext = q{ |
| loadplugin Mail::SpamAssassin::Plugin::Check |
| loadplugin Mail::SpamAssassin::Plugin::URIDNSBL |
| util_rb_tld com # skip "need to run sa-update" warn |
| use_bayes 0 |
| }; |
| |
| my $mailsa = Mail::SpamAssassin->new({ |
| rules_filename => "./rules", |
| # debug => 1, |
| local_tests_only => 1, |
| dont_copy_prefs => 1, |
| config_text => $pretext.$text |
| }); |
| |
| my $errors = 0; |
| $mailsa->{lint_callback} = sub { |
| my %opts = @_; |
| |
| return if ($opts{msg} =~ / |
| (?:score\sset\sfor\snon-existent|description\sexists) |
| /x); |
| |
| warn "lint: $opts{msg}"; |
| if ($opts{iserror}) { |
| $errors++; |
| } |
| }; |
| |
| $mailsa->lint_rules(); |
| $mailsa->finish(); |
| return $errors; # 0 means good |
| } |
| |
| sub src_wanted { |
| my $path = $File::Find::name; |
| |
| # record stat times of directories, too, to catch file additions/removals |
| # in the source tree |
| my @st = stat $path; |
| if ($st[9] && $st[9] > $newest_src_mtime) { |
| $newest_src_mtime = $st[9]; |
| } |
| |
| # only files from now on, though |
| return if (!-f $path); |
| |
| # limit what will be copied from sandboxes |
| return if ($path =~ /\bsandbox\b/ && !/(?:\d.*\.cf|\.pm)$/i); |
| |
| # don't use generated scores; they can be out of sync with what is currently |
| # in the sandboxes or the most current active.list file at any given time |
| return if ($path =~ /\bscores\b/); |
| |
| # a bit of sanity please - no svn metadata ;) |
| return if ($path =~ /\.svn/); |
| |
| my $dir = $path; |
| $dir =~ s/^${current_src}[\/\\\:]//s; |
| $dir =~ s/([^\/\\\:]+)$//; |
| my $filename = $1; |
| |
| |
| my $f = "$current_src/$dir$filename"; |
| my $t; |
| $t = "$opt_out/$filename"; |
| |
| $needs_compile->{$f} = { |
| f => $f, |
| t => $t, |
| dir => $dir, |
| filename => $filename |
| }; |
| } |
| |
| sub out_wanted { |
| my $path = $File::Find::name; |
| return unless (-f $path); |
| return if ($path =~ /\.svn/); |
| return unless ($path =~ /\.(?:cf|pm)$/i); |
| |
| my @st = stat $path; |
| if ($st[9] && $st[9] > $newest_out_mtime) { |
| $newest_out_mtime = $st[9]; |
| } |
| |
| my $dir = $path; |
| $dir =~ s/^${current_src}[\/\\\:]//s; |
| $dir =~ s/([^\/\\\:]+)$//; |
| my $filename = $1; |
| |
| if ($path =~ /\.pm$/i) { |
| $found_output->{$filename} = 1; |
| } |
| } |
| |
| # compile all the source files found by the src_wanted() sub, in sorted |
| # order so that the order of precedence makes sense. |
| sub read_all_rules { |
| my ($sources) = @_; |
| |
| # deal with the perl modules first, so that later linting w/ loadplugin will |
| # work appropriately. |
| foreach my $f (sort { |
| my ($ae) = $a =~ /\.(cf|pm)$/; |
| my ($be) = $b =~ /\.(cf|pm)$/; |
| return $be cmp $ae || $a cmp $b; |
| } keys %$sources) |
| { |
| my $entry = $needs_compile->{$f}; |
| my $t = $entry->{t}; |
| |
| # TODO: dependency checking optimization? |
| ## my $needs_rebuild = 0; |
| ## if (!-f $t || -M $t > -M $f) { |
| ## # the source file is newer, or dest is not there |
| ## $needs_rebuild = 1; |
| ## } |
| |
| my $needs_rebuild = 1; |
| |
| if ($entry->{filename} =~ /\.pm$/) { |
| plugin_file_compile($entry); |
| } |
| elsif ($entry->{dir} =~ /sandbox/) { |
| rule_file_compile($f, $t, $entry->{filename}, |
| { issandbox => 1 }); |
| } |
| elsif ($entry->{dir} =~ /scores/) { |
| rule_file_compile($f, $t, $entry->{filename}, |
| { issandbox => 1, isscores => 1 }); |
| } |
| elsif ($entry->{dir} =~ /extra/) { |
| # 'extra' rulesets; not built by default (TODO) |
| next; |
| } |
| else { |
| # rules in "core" and "lang" are always copied |
| if ($needs_rebuild) { |
| rule_file_compile($f, $t, $entry->{filename}, { }); |
| } |
| } |
| } |
| } |
| |
| ########################################################################### |
| |
| # Rules are compiled from source dir to output dir. |
| # |
| # Rules in "rules/active.list" are promoted to "72_active.cf"; rules not |
| # listed there are relegated to "70_sandbox.cf". There is code to allow |
| # other filenames to be selected from the rulesrc .cf file, but I'm not |
| # sure if it works anymore ;) |
| # |
| # Rules will be autorenamed, if there's a collision between a new rule name and |
| # one that's already been output by the compiler in another source file. The |
| # autorenaming is very simple -- portions of the current source path are |
| # appended to the rule name, sanitised. |
| |
| sub rule_file_compile { |
| my ($f, $t, $filename, $flags) = @_; |
| my $issandbox = $flags->{issandbox}; |
| my $isscores = $flags->{isscores}; |
| |
| open (IN, "<$f") or die "cannot read $f"; |
| |
| # a fast parser for the config file format; don't need the |
| # full deal here, and it must be fast, since it's run on every |
| # "make" invocation |
| |
| my $rule_order = [ ]; |
| |
| my $lastrule = $COMMENTS; |
| |
| if (!defined $rules->{$ALWAYS_PUBLISH}) { |
| $rules->{$ALWAYS_PUBLISH} = rule_entry_create(); |
| } |
| |
| # zero or more "ifplugin" or "if" scopes |
| my @current_conditionals = (); |
| my $current_comments = ''; |
| |
| while (<IN>) { |
| my $orig = $_; |
| |
| s/#.*$//g; s/^\s+//; s/\s+$//; |
| |
| # drop comments/blank lines from output |
| next if (/^$/); |
| |
| # save "lang" declarations |
| my $lang = ''; |
| if (s/^lang\s+(\S+)\s+//) { |
| $lang = $1; |
| } |
| |
| if (/^(${RULE_KEYWORDS_RE})\s+(\S+)\s+(.*)$/) |
| { |
| # rule definitions |
| my $type = $1; |
| my $name = $2; |
| my $val = $3; |
| |
| my $origname = $name; |
| if ($issandbox) { |
| $name = sandbox_rule_name_avoid_collisions($name, $f); |
| } |
| my $origname_w_T_prefix = $name; |
| # non-sandbox rules always use the same name |
| |
| if (scalar @current_conditionals) { |
| # ensure the current conditionals are used in the block name; |
| # this ensures that we scope alternative (#ifdef-style) dupe |
| # rule definitions in their own ifplugin scopes |
| $name .= " ".join("", @current_conditionals); |
| $name =~ s/\s+/ /gs; $name =~ s/ $//; |
| } |
| |
| # track this as a rule-entry block for that rule name |
| # (and it's T_ prefixed variant, if relevant) |
| push @{$entries_for_rule_name->{$origname}}, $name; |
| push @{$entries_for_rule_name->{$origname_w_T_prefix}}, $name; |
| |
| # comment "score" lines for sandbox rules (bug 5558) |
| # use generated scores, though, if the rule is active |
| if ($type eq 'score' && $issandbox && |
| !($isscores && $active_rules->{$name})) |
| { |
| $orig =~ s/^/#/g; |
| } |
| |
| if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); } |
| |
| $rules->{$name}->{issandbox} = $issandbox; |
| $rules->{$name}->{isscores} = $isscores; |
| $rules->{$name}->{origname} = $origname; |
| $rules->{$name}->{origname_w_T_prefix} = $origname_w_T_prefix; |
| $rules->{$name}->{cond} = [@current_conditionals]; |
| $rules->{$name}->{text} .= $current_comments . $orig; |
| $rules->{$name}->{plugin_dependencies} = {}; |
| |
| # note if the conditional is a plugin reference, as we need to |
| # ensure that "loadplugin" lines stay in the same place |
| foreach my $c (@current_conditionals) { |
| if ($c =~ /^ifplugin\s+(\S+)/) { |
| $rules->{$name}->{plugin_dependencies}->{$1} = 1; |
| } elsif ($c =~ /^if.*plugin/) { |
| while ($c =~ /plugin\s*\(\s*(\S+)\s*\)/g) { |
| $rules->{$name}->{plugin_dependencies}->{$1} = 1; |
| } |
| } |
| } |
| |
| # note if we found the rule defn or not. if we did not, |
| # that means the rule was a code-tied rule, which should always |
| # have its descriptions/scores/etc. published in "active". |
| if ($type =~ /^${RULE_DEFINE_KEYWORDS_RE}$/x) { |
| $rules->{$name}->{found_definition} = 1; |
| $rules->{$name}->{srcfile} = $f; |
| $rules->{$name}->{code} = $orig; |
| } |
| elsif ($type eq 'tflags') { |
| # userconf rules are always published in "active" |
| if ($val =~ /\buserconf\b/) { |
| $rules->{$name}->{forceactive} = 1; |
| } |
| |
| # record for rulemetadata code |
| $val =~ s/\s+/ /gs; |
| if ($rules->{$name}->{tflags}) { |
| $rules->{$name}->{tflags} .= ' '.$val; |
| } else { |
| $rules->{$name}->{tflags} = $val; |
| } |
| } |
| |
| $current_comments = ''; |
| |
| $lastrule = $name; |
| push (@$rule_order, $name); |
| } |
| elsif (/^ |
| (pubfile|publish) |
| \s+(\S+)\s*(.*?)$ |
| /x) |
| { |
| # preprocessor directives |
| my $command = $1; |
| my $name = $2; |
| my $val = $3; |
| |
| my $origname = $name; |
| |
| # note: if we call sandbox_rule_name_avoid_collisions(), it'll |
| # rename to 'T_RULENAME' -- which is exactly what we're trying |
| # to avoid in 'publish RULENAME' lines! so don't call it here. |
| # if ($issandbox) { |
| # $name = sandbox_rule_name_avoid_collisions($name, $f); |
| # } |
| |
| if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); } |
| $rules->{$name}->{origname} = $origname; |
| $rules->{$name}->{origname_w_T_prefix} = $origname; |
| |
| if ($command eq 'publish') { |
| # the 'publish' command defaults to "1", unless it explicitly |
| # is set to "0". iow: publish RULE_NAME [(0 | 1)] [default: 1] |
| if (!defined $val || $val eq '') { $val = '1'; } |
| } |
| elsif ($command eq 'pubfile') { |
| if (!filename_in_manifest($opt_out.'/'.$val)) { |
| warn "$val: WARNING: not listed in manifest file, using default\n"; |
| next; # don't set 'pubfile' below |
| } |
| } |
| |
| $rules->{$name}->{$command} = $val; |
| |
| # if we see "publish NAMEOFRULE", that means the rule is |
| # considered active |
| if ($rules->{$name}->{publish}) { |
| $rules->{$name}->{forceactive} = 1; |
| } |
| } |
| elsif (/^ |
| (if|ifplugin) |
| \s+(.*?)$ |
| /x) |
| { |
| push @current_conditionals, $orig; |
| } |
| elsif (/^else\b/x) |
| { |
| if (!scalar @current_conditionals) { |
| warn "WARNING: 'else' without 'if'/'ifplugin' conditional\n"; |
| } else { |
| my $cond = invert_conditional(pop @current_conditionals); |
| push @current_conditionals, $cond; |
| } |
| } |
| elsif (/^endif\b/x) |
| { |
| if (!scalar @current_conditionals) { |
| warn "WARNING: 'endif' without 'if'/'ifplugin' conditional\n"; |
| } else { |
| pop @current_conditionals; |
| } |
| } |
| elsif (/^require_version\s*(\S+)\b/) { |
| # silently ignored. TODO? (meh) |
| } |
| elsif (/^loadplugin\s*(\S+)\b/) { |
| my $name = 'loadplugin_'.$1; |
| |
| unless ($rules->{$name}) { |
| $rules->{$name} = rule_entry_create(); |
| $rules->{$name}->{origname} = $name; |
| $rules->{$name}->{origname_w_T_prefix} = $name; |
| $rules->{$name}->{issandbox} = $issandbox; |
| $rules->{$name}->{iscommand} = 1; |
| } |
| |
| if (/^loadplugin\s*\S+\s+(\S+)/) { |
| my $fname = $1; |
| my $fpath = dirname($f)."/".$fname; |
| |
| if (!-f $fpath) { |
| warn "$f: WARNING: plugin code file '$fpath' not found, line ignored: $orig"; |
| next; |
| } |
| |
| if ($fpath =~ /sandbox/i) { |
| # Since this is a sandbox plugin, force its output to the sandbox area. |
| $rules->{$name}->{sandbox_plugin} = 1; |
| } |
| |
| # If a 'loadplugin' line is found, and the plugin .pm is not listed in |
| # the MANIFEST file, this will mean that the .pm will not be copied |
| # during "make dist". This causes failures during "make disttest", |
| # since the file does not exist. |
| # |
| # However, we do want to preserve these lines in the 'rules' dir, for |
| # use during development -- without requiring that the .pm's be put |
| # into MANIFEST -- ie. before the plugin is considered release-ready, |
| # ie. sandbox plugins. |
| # |
| # fix: make it a "tryplugin" line instead; these are ignored if the |
| # target file is nonexistent. |
| |
| if (!filename_in_manifest($opt_out.'/'.$fname)) { |
| warn "$f: WARNING: '$opt_out/$fname' not listed in manifest file, making 'tryplugin': $orig"; |
| $orig =~ s/^\s*loadplugin\b/tryplugin/; |
| } |
| } |
| |
| $rules->{$name}->{text} .= $orig; |
| unshift (@$rule_order, $name); |
| } |
| else { |
| # an unhandled configuration line; "redirector_pattern", |
| # "report", something like that. This should be sent to |
| # the active.cf output (or sandbox if it appeared in a sandbox |
| # input file). |
| |
| # use the line itself as a key |
| my $name = $_; |
| /^\s*(\S+)/ and $name = $1; |
| $name =~ s/\s+/ /gs; |
| |
| my $forceactive = 1; |
| |
| # always send 'test' lines to the sandbox files |
| if (/^test\s*/) { |
| $forceactive = 0; |
| |
| $name = $_; # ensure we don't drag rules with us though! |
| $name =~ s/\s+/ /gs; |
| } |
| |
| if (scalar @current_conditionals) { |
| $name = join("", @current_conditionals); |
| $name =~ s/\s+/ /gs; $name =~ s/ $//; |
| } |
| |
| if ($issandbox) { |
| $name .= "_sandbox"; |
| } |
| |
| unless ($rules->{$name}) { |
| $rules->{$name} = rule_entry_create(); |
| $rules->{$name}->{origname} = $name; |
| $rules->{$name}->{origname_w_T_prefix} = $name; |
| } |
| $rules->{$name}->{cond} = [@current_conditionals]; |
| $rules->{$name}->{issandbox} = $issandbox; |
| $rules->{$name}->{forceactive} = $forceactive; |
| # $rules->{$name}->{forceactive} = 1; |
| $rules->{$name}->{iscommand} = 1; |
| |
| # TODO: bug 6241: 'replace_rules' should be handled ok, but isn't |
| |
| # warn "unknown line in rules file '$f', saving to default: $orig"; |
| |
| $rules->{$name}->{text} .= $orig; |
| unshift (@$rule_order, $name); |
| } |
| } |
| close IN; |
| |
| if ($current_comments) { |
| $rules->{$COMMENTS}->{text} .= $current_comments; |
| } |
| |
| # now append all the found text to the output file buffers |
| copy_to_output_buffers($rule_order, $issandbox, $f, $filename); |
| |
| # ok; file complete. now mark all those rules as "seen"; future |
| # refs to those rule names will trigger an autorename. |
| foreach my $name (@$rule_order) { |
| $seen_rules->{$name} = 1; |
| } |
| } |
| |
| # this is only run if we're generating rulemetadata! |
| sub read_rules_from_output_dir { |
| return unless ($opt_rulemetadata); |
| |
| foreach my $file (<$opt_out/*.cf>) { |
| next unless ($file =~ /\d\d_\S+\.cf$/); |
| next if (pubfile_is_activeout($file)); |
| next if (pubfile_is_sandboxout($file)); |
| read_output_file($file); |
| } |
| } |
| |
| sub read_output_file { |
| my ($file) = @_; |
| open (IN, "<$file") or warn "cannot read $file"; |
| while (<IN>) { |
| my $orig = $_; |
| |
| s/#.*$//g; s/^\s+//; s/\s+$//; |
| |
| # drop comments/blank lines from output |
| next if (/^$/); |
| |
| # save "lang" declarations |
| my $lang = ''; |
| if (s/^lang\s+(\S+)\s+//) { |
| $lang = $1; |
| } |
| |
| if (/^(${RULE_KEYWORDS_RE})\s+(\S+)\s+(.*)$/) { |
| # rule definitions |
| my $type = $1; |
| my $name = $2; |
| my $val = $3; |
| |
| # note: we only want to do this if --rulemetadata is in use! |
| if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); } |
| |
| if ($type eq 'tflags') { |
| $val =~ s/\s+/ /gs; |
| if ($rules->{$name}->{tflags}) { |
| $rules->{$name}->{tflags} .= ' '.$val; |
| } else { |
| $rules->{$name}->{tflags} = $val; |
| } |
| } |
| |
| if ($type =~ /^${RULE_DEFINE_KEYWORDS_RE}$/x) { |
| $rules->{$name}->{srcfile} = $file; |
| $rules->{$name}->{code} = $orig; |
| } |
| } |
| } |
| close IN; |
| } |
| |
| sub copy_to_output_buffers { |
| my ($rule_order, $issandbox, $f, $filename) = @_; |
| |
| # always output these two files, even if they're empty! |
| foreach my $pubfile ($opt_out.'/'.$opt_sandboxout, |
| $opt_out.'/'.$opt_activeout) |
| { |
| $output_files->{$pubfile} = { |
| header => $default_file_header |
| }; |
| } |
| |
| my %already_done = (); |
| my $copied_active = 0; |
| my $copied_other = 0; |
| foreach my $name (@$rule_order) |
| { |
| # only do each rule once, please ;) |
| next if exists $already_done{$name}; |
| $already_done{$name} = undef; |
| |
| my $text = $rules->{$name}->{text}; |
| if (!$text) { |
| next; # nothing to write! |
| } |
| |
| my $srcfile = $rules->{$name}->{srcfile}; |
| my $pubfile = pubfile_for_rule($rules, $rules->{$name}->{origname_w_T_prefix}); |
| my $is_active = 0; |
| if (pubfile_is_activeout($pubfile)) { |
| $is_active++; |
| } |
| |
| my $cond = $rules->{$name}->{cond}; |
| if ($cond) { |
| foreach my $pluginclass (keys %{$rules->{$name}->{plugin_dependencies}}) { |
| my $ifplugin_text_name = "loadplugin_".($pluginclass || ""); |
| |
| if ($rules->{$ifplugin_text_name}) { |
| # if the plugin is a sandbox plugin, ensure it's not |
| # sent to the active file |
| if ($rules->{$ifplugin_text_name}->{sandbox_plugin}) { |
| $pubfile = $opt_out.'/'.$opt_sandboxout; |
| $is_active = 0; |
| } |
| |
| # either way, ensure the "loadplugin" line, if there is one, |
| # goes to the same file |
| $rules->{$ifplugin_text_name}->{output_file} = $pubfile; |
| } |
| } |
| |
| # ensure we produce enough "endif"s to match however many |
| # nested conditions there are |
| my $endifs = "endif\n" x (scalar @{$cond}); |
| |
| $rules->{$name}->{output_text} = "\n" |
| .join("", @{$cond}) |
| .$text |
| .$endifs; |
| |
| } else { |
| $rules->{$name}->{output_text} = $text; |
| } |
| |
| # note the target file |
| $rules->{$name}->{output_file} = $pubfile; |
| |
| $output_files->{$pubfile} = { |
| header => $default_file_header |
| }; |
| |
| if ($is_active) { |
| $copied_active++; |
| } else { |
| $copied_other++; |
| } |
| } |
| |
| print "$f: $copied_active active rules, ". |
| "$copied_other other\n"; |
| } |
| |
| sub pubfile_for_rule { |
| my ($rules, $name) = @_; |
| |
| my $pubfile; |
| if ($rules->{$name}->{publish}) { |
| # "publish NAMEOFRULE" => send it to active |
| $pubfile = $opt_out.'/'.$opt_activeout; |
| } |
| |
| # default: "70_sandbox.cf" or "72_active.cf" |
| if (!$pubfile) { |
| if ($active_rules->{$name} # is active |
| || $rules->{$name}->{forceactive} # or is forced to be |
| || (!$rules->{$name}->{found_definition} && !$rules->{$name}->{iscommand} |
| && !$rules->{$name}->{isscores})) |
| # or is a rule-related setting in reference to an unknown rule |
| # but isn't a generated score |
| { |
| $pubfile = $opt_out.'/'.$opt_activeout; |
| } |
| elsif ($rules->{$name}->{issandbox}) { |
| $pubfile = $opt_out.'/'.$opt_sandboxout; |
| } |
| else { |
| warn "oops? inactive rule, non-sandbox, shouldn't be possible anymore"; |
| $pubfile = $opt_out.'/'.$opt_sandboxout; |
| } |
| } |
| return $pubfile; |
| } |
| |
| sub plugin_file_compile { |
| my ($entry) = @_; |
| |
| return if $opt_listpromotable; |
| # just copy the raw perl module over to the new area |
| # we can't really rename to avoid conflicts since the loadplugin lines |
| # are going to be all screwed up in that case. |
| # jm: we always want to update the output file in case the input |
| # has been changed! |
| if (0 && -e $entry->{t}) { |
| warn "The perl module ".$entry->{t}." already exists, can't copy from ".$entry->{f}."\n"; |
| } |
| else { |
| copy($entry->{f}, $entry->{t}) || warn "Couldn't copy ".$entry->{f}.": $!"; |
| } |
| } |
| |
| ########################################################################### |
| |
| sub compile_output_files { |
| my $always = $rules->{$ALWAYS_PUBLISH}->{output_text}; |
| |
| # create all known output files |
| foreach my $file (keys %$output_files) { |
| $output_file_text->{$file} = $output_files->{$file}->{header}; |
| |
| if ($always && pubfile_is_activeout($file)) { |
| $output_file_text->{$file} .= $always; |
| } |
| } |
| |
| # this is a horrible kluge. |
| # at this point in the game, we've lost the ordered list of rules, so the |
| # loadplugin lines have no guarantee that they'll be loaded before the rules |
| # that require them. so we kluge the sort to always have loadplugin lines |
| # appear at the very top of the array so we know they'll be listed before |
| # anything else. |
| my @rulenames = sort { |
| if ($a =~ /^loadplugin_/) { |
| return -1; |
| } |
| elsif ($b =~ /^loadplugin_/) { |
| return 1; |
| } |
| return $a cmp $b; |
| } keys %$rules; |
| my %seen = (); |
| |
| # go through the rules looking for meta subrules we |
| # may have forgotten; this happens if a non-subrule is |
| # listed in active.list, the subrules will not be! fix them |
| # to appear in the same output file as the master rule. |
| foreach my $rule (@rulenames) { |
| fix_up_rule_dependencies($rule); |
| } |
| |
| # now repeat, just for rules in the active set; their dependencies should |
| # always be likewise promoted into the active set, overriding the prev step. |
| foreach my $rule (@rulenames) { |
| my $pubfile = $rules->{$rule}->{output_file}; |
| next unless ($pubfile && pubfile_is_activeout($pubfile)); |
| fix_up_rule_dependencies($rule); |
| } |
| |
| my $rulemd = ''; |
| |
| # output the known rules that are not meta subrules. |
| foreach my $rule (@rulenames) { |
| $rulemd .= get_rulemetadata_string($rule); # all metadata strings |
| |
| next if ($rule =~ /^__/); |
| my $pubfile = $rules->{$rule}->{output_file}; |
| my $text = $rules->{$rule}->{output_text}; |
| next unless defined ($text); |
| |
| # DOS - bug 6297 - HACK HACK HACK HACK |
| # this will probably screw up meta rules that do something like '&& !$rule' |
| |
| # avoid publishing 'tflags nopublish' rules |
| if (pubfile_is_activeout($pubfile) && exists $rules->{$rule}->{tflags} && |
| $rules->{$rule}->{tflags} =~ /\bnopublish\b/) |
| { |
| print "omitting rule $rule due to tflags nopublish (tflags $rules->{$rule}->{tflags})\n"; |
| next; |
| } |
| |
| # DOS - END HACK |
| |
| $output_file_text->{$pubfile} .= "##{ $rule\n". |
| $text. |
| "##} ".$rule."\n\n"; |
| } |
| |
| # now output all subrules (in a slightly more compact form) |
| foreach my $rule (@rulenames) { |
| next unless ($rule =~ /^__/); |
| my $pubfile = $rules->{$rule}->{output_file}; |
| my $text = $rules->{$rule}->{output_text}; |
| next unless defined ($text); |
| |
| # DOS - bug 6297 - HACK HACK HACK HACK |
| # this will probably screw up meta rules that do something like '&& !$rule' |
| |
| # avoid publishing 'tflags nopublish' rules |
| if (pubfile_is_activeout($pubfile) && exists $rules->{$rule}->{tflags} && |
| $rules->{$rule}->{tflags} =~ /\bnopublish\b/) |
| { |
| print "omitting rule $rule due to tflags nopublish (tflags $rules->{$rule}->{tflags})\n"; |
| next; |
| } |
| |
| # DOS - END HACK |
| |
| $output_file_text->{$pubfile} .= $text; |
| } |
| |
| # finally, finish off all output files |
| foreach my $file (keys %$output_files) { |
| # and get them lint-checked! |
| $files_to_lint->{$file} = 1; |
| } |
| |
| if ($opt_rulemetadata) { |
| open (RULEMD, ">".$opt_rulemetadata) |
| or die "cannot write rulemd to $opt_rulemetadata"; |
| print RULEMD "<?xml version='1.0' encoding='UTF-8'?>\n", |
| "<rulemds>", $rulemd, "</rulemds>\n"; |
| close RULEMD or die "cannot close rulemd to $opt_rulemetadata"; |
| } |
| } |
| |
| # conditionally build a method to UTF-8-encode a string. this is only required |
| # for the rulemetadata XML output, so don't make it mandatory! |
| sub compile_utf8ify_function { |
| if (!eval ' |
| sub utf8ify { use Encode; return Encode::encode("UTF-8", $_[0]); } 1; |
| ') |
| { |
| eval ' |
| sub utf8ify { die "unimplemented -- Encode module required!" } 1; |
| ' |
| } |
| } |
| |
| sub get_rulemetadata_string { |
| my ($rule) = @_; |
| |
| return '' unless ($opt_rulemetadata); |
| |
| my $mod = 0; |
| my $srcfile = ''; |
| my $code = ''; |
| my $name = $rule; |
| |
| # if we found a rule definition with a T_ prefix, use that data |
| if (!$rules->{$name}->{srcfile} && $rules->{"T_".$name}->{srcfile}) { |
| $name = "T_".$name; |
| } |
| |
| if ($rules->{$name}->{srcfile}) { |
| $srcfile = $rules->{$name}->{srcfile}; |
| if ($srcfile) { |
| my @s = stat $srcfile; |
| if (@s) { $mod = $s[9]; } |
| } |
| } |
| |
| if ($rules->{$name}->{code}) { |
| $code = $rules->{$name}->{code}; |
| $code =~ s/\]\]>/\](defanged by mkrules)\]>/gs; # ensure it's CDATA-safe |
| $code = utf8ify($code); |
| } |
| |
| my $tf = $rules->{$name}->{tflags} || ''; |
| |
| return "<rulemetadata>". |
| "<name>$rule</name>". |
| "<src>$srcfile</src>". |
| "<srcmtime>$mod</srcmtime>". |
| # don't include <code> blocks; they bloat up the XML badly (to 800KB) |
| # and make it very slow to parse later |
| # "<code><![CDATA[$code]]></code>". |
| "<tf>$tf</tf>". |
| "</rulemetadata>\n"; |
| } |
| |
| sub fix_up_rule_dependencies { |
| my $rule = shift; |
| |
| my $pubfile = $rules->{$rule}->{output_file}; |
| my $text = $rules->{$rule}->{output_text}; |
| return unless $text; |
| |
| while ($text =~ /^\s*meta\s+(.*)$/mg) { |
| my $line = $1; |
| while ($line =~ /\b([_A-Za-z0-9]+)\b/g) { |
| # force that subrule (if it exists) to output in the |
| # same pubfile |
| my $rule2 = $1; |
| |
| # deal with rules that changed name from "FOO" to "T_FOO" |
| sed_renamed_rule_names(\$rule2); |
| |
| if (!$entries_for_rule_name->{$rule2}) { |
| # we may not always have a rule entry, if the rule was from a non-sandbox |
| # source |
| # warn "cannot find entries_for_rule_name '$rule2'"; |
| } |
| |
| foreach my $entryname2 (@{$entries_for_rule_name->{$rule2}}) { |
| next unless ($rules->{$entryname2} && $rules->{$entryname2}->{output_file}); |
| |
| # don't do this if the subrule would be moved *out* of the |
| # active file! |
| my $pubfile2 = $rules->{$entryname2}->{output_file}; |
| next if (pubfile_is_activeout($pubfile2)); |
| |
| $rules->{$entryname2}->{output_file} = $pubfile; |
| } |
| } |
| } |
| } |
| |
| sub pubfile_is_activeout { |
| return 1 if ($_[0] && $_[0] =~ /\b\Q$opt_activeout\E$/); |
| return 0; |
| } |
| |
| sub pubfile_is_sandboxout { |
| return 1 if ($_[0] && $_[0] =~ /\b\Q$opt_sandboxout\E$/); |
| return 0; |
| } |
| |
| sub write_output_files { |
| foreach my $pubfile (sort keys %$output_files) { |
| if (-f $pubfile) { |
| unlink $pubfile or die "cannot remove output file '$pubfile'"; |
| } |
| |
| if (!filename_in_manifest($pubfile)) { |
| warn "$pubfile: WARNING: not listed in manifest file\n"; |
| } |
| |
| my $text = $output_file_text->{$pubfile}; |
| if ($text) { |
| open (OUT, ">".$pubfile) or die "cannot write to output file '$pubfile'"; |
| sed_renamed_rule_names(\$text); |
| print OUT $text; |
| close OUT or die "cannot close output file '$pubfile'"; |
| # print "$pubfile: written\n"; # too noisy |
| } |
| else { |
| print "$pubfile: no rules promoted\n"; |
| |
| # create an empty file anyway to satisfy MANIFEST |
| open (OUT, ">".$pubfile) or die "cannot write to output file '$pubfile'"; |
| close OUT or die "cannot close output file '$pubfile'"; |
| } |
| } |
| } |
| |
| ########################################################################### |
| |
| sub rule_entry_create { |
| return { |
| text => '', |
| publish => 0 |
| }; |
| } |
| |
| ########################################################################### |
| |
| sub sandbox_rule_name_avoid_collisions { |
| my ($rule, $path) = @_; |
| my $new; |
| my $newreason; |
| my $dowarn = 0; |
| |
| return $rule if $opt_listpromotable; |
| return $rule if $active_rules->{$rule}; |
| return $rule if $rules->{$rule}->{forceactive}; |
| |
| if ($rule !~ /^(?:T_|__)/) { |
| $new = "T_".$rule; |
| $newreason = "missing T_ prefix"; |
| } |
| elsif (!exists $seen_rules->{$rule}) { |
| return $rule; |
| } |
| else { |
| $new = $path; |
| $new =~ s/[^A-Za-z0-9]+/_/gs; |
| $new =~ s/_+/_/gs; |
| $new =~ s/^_//; |
| $new =~ s/_$//; |
| $new = $rule.'_'.$new; |
| $newreason = "collision with existing rule"; |
| $dowarn = 1; |
| } |
| |
| if (!$renamed_rules->{$new}) { |
| $renamed_rules->{$new} = $rule; |
| if ($dowarn) { |
| warn "WARNING: $rule: renamed as $new due to $newreason\n"; |
| } |
| } |
| |
| return $new; |
| } |
| |
| sub sed_renamed_rule_names { |
| my ($textref) = @_; |
| foreach my $new (keys %{$renamed_rules}) { |
| my $rule = $renamed_rules->{$new}; |
| $$textref =~ s/\b${rule}\b/${new}/gs; |
| } |
| } |
| |
| ########################################################################### |
| |
| sub invert_conditional { |
| my $cond = shift; |
| if ($cond =~ /^ \s* ifplugin \s+(.*?)$ /x) { |
| return "if !plugin($1)\n"; |
| } elsif ($cond =~ /^ \s* if \s+(.*?)$ /x) { |
| return "if !($1)\n"; |
| } else { |
| warn "WARNING: cannot parse '$cond' for 'else'\n"; |
| return 'if 0'; |
| } |
| } |
| |
| ########################################################################### |
| |
| sub read_manifest { |
| my ($fname) = @_; |
| parse_line_delimited_config_file($fname, sub { |
| /^\s*(.*?)\s*$/ and $file_manifest->{$1} = 1; |
| }); |
| } |
| |
| sub read_manifest_skip { |
| my ($fname) = @_; |
| parse_line_delimited_config_file($fname, sub { |
| /^\s*(.*?)\s*$/ and push (@{$file_manifest_skip}, qr/$1/); |
| }); |
| } |
| |
| sub read_active { |
| my ($fname) = @_; |
| parse_line_delimited_config_file($fname, sub { |
| /^(\S+)/ and $active_rules->{$1} = 1; |
| }); |
| } |
| |
| sub filename_in_manifest { |
| my ($fname) = @_; |
| return 1 if ($file_manifest->{$fname}); |
| foreach my $skipre (@{$file_manifest_skip}) { |
| return 1 if ($fname =~ $skipre); |
| } |
| return 0; |
| } |
| |
| sub parse_line_delimited_config_file { |
| my ($fname, $callback) = @_; |
| if (!open (IN, "<$fname")) { |
| warn "cannot read $fname\n"; |
| } else { |
| while (<IN>) { |
| next if /^#/; |
| $callback->(); |
| } |
| close IN; |
| } |
| } |
| |
| |
| __DATA__ |
| # SpamAssassin rules file |
| # |
| # Please don't modify this file as your changes will be overwritten with |
| # the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead. |
| # See 'perldoc Mail::SpamAssassin::Conf' for details. |
| # |
| # <@LICENSE> |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to you under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at: |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # </@LICENSE> |
| # |
| ########################################################################### |
| |
| require_version @@VERSION@@ |
| |