blob: 4c017c4f803c8e0dad2f943d254c78aac416b62e [file] [log] [blame]
#!/usr/bin/perl -w -T
BEGIN {
if (-e 't/test_dir') { # if we are running "t/rule_names.t", kluge around ...
chdir 't';
}
if (-e 'test_dir') { # running from test directory, not ..
unshift(@INC, '../blib/lib');
}
}
my $prefix = '.';
if (-e 'test_dir') { # running from test directory, not ..
$prefix = '..';
}
use lib '.'; use lib 't';
use SATest; sa_t_init("rule_names");
use strict;
use Mail::SpamAssassin;
BEGIN {
eval { require Digest::SHA; Digest::SHA->import(qw(sha1)); 1 }
or do { require Digest::SHA1; Digest::SHA1->import(qw(sha1)) }
}
our $RUN_THIS_TEST;
use Test::More;
plan skip_all => "This test requires 'run_rule_name_tests' set to 'y'." unless conf_bool('run_rule_name_tests');
use vars qw(%patterns %anti_patterns);
# initialize SpamAssassin
my $sa = create_saobj({'dont_copy_prefs' => 1});
# allow_user_rules, otherwise $sa->{conf}->{test_types} will be
# deleted by SA::Conf::Parser::finish_parsing()
$sa->{conf}->{allow_user_rules} = 1;
$sa->init(0); # parse rules
# get rule names
my @tests;
while (my ($test, $type) = each %{ $sa->{conf}->{test_types} }) {
push @tests, $test;
}
# run tests
my $mail = 'log/rule_names.eml';
write_mail();
%patterns = ();
my $i = 1;
for my $test (@tests) {
# look for test with spaces on either side, should match report
# lines in spam report, only exempt rules that are really unavoidable
# and are clearly not hitting due to rules being named poorly
next if $test =~ /^UPPERCASE_\d/;
next if $test eq "UNIQUE_WORDS";
# exempt the auto-generated nightly mass-check rules
next if $test =~ /^T_MC_/;
$anti_patterns{"$test,"} = "P_" . $i++;
}
{ # couldn't call Test::plan in a BEGIN phase, the %patterns and %anti_patterns
# must be assembled first in order to get the planned test count
plan tests => scalar(keys %anti_patterns) + scalar(keys %patterns);
diag "Note: rule_name failures may be only cosmetic but must be fixed before release";
};
# ---------------------------------------------------------------------------
tstprefs ("
# set super low threshold, so always marked as spam
required_score -10000.0
# add two fake lexically high tests so every other hit will always be
# followed by a comma in the X-Spam-Status header
body ZZZZZZZZ /./
body zzzzzzzz /./
");
sarun ("-L < $mail", \&patterns_run_cb);
ok_all_patterns();
# function to write test email with varied (not random) ordering tests in body
sub write_mail {
if (open(MAIL, ">$mail")) {
print MAIL <<'EOF';
Received: from internal.example.com [127.0.0.1] by localhost
for recipient@example.com; Fri, 07 Oct 2002 09:02:00 +0000
Received: from external.example.org [150.51.53.1] by internal.example.com
for recipient@example.com; Fri, 07 Oct 2002 09:01:00 +0000
Message-ID: <clean.1010101@example.com>
Date: Mon, 07 Oct 2002 09:00:00 +0000
From: Sender <sender@example.com>
MIME-Version: 1.0
To: Recipient <recipient@example.com>
Subject: this trivial message should have no hits
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit
EOF
# we are looking for random failures, but we do a deterministic
# test to prevent too much frustration with "make test".
# start off sorted
@tests = sort @tests;
print MAIL join("\n", @tests) . "\n\n";
# 25 iterations gets most hits most of the time, but 10 is large enough
for (1..10) {
print MAIL join("\n", sha1_shuffle($_, @tests)) . "\n\n";
}
close(MAIL);
}
else {
die "can't open output file: $!";
}
}
# Fisher-Yates shuffle
sub fy_shuffle {
for (my $i = $#_; $i > 0; $i--) {
@_[$_,$i] = @_[$i,$_] for int rand($i+1);
}
return @_;
}
# SHA1 shuffle
sub sha1_shuffle {
my $i = shift;
return map { $_->[0] }
sort { $a->[1] cmp $b->[1] }
map { [$_, sha1($_ . $i)] }
@_;
}