blob: 0c033c61fea954f4ea91c918f4b12a5963c19a47 [file] [log] [blame]
#!/usr/bin/perl -w
my %falsepos;
my %falseneg;
my $nfp=0;
my $nfn=0;
my %scores;
my %rulehit;
open(SPAM, "<spam.log");
open(NONSPAM, "<nonspam.log");
open(SCORES, "<newscores");
while(<SCORES>)
{
next unless /^score\s+([^\s]*)\s+([-0-9.]*)/;
$scores{$1} = $2;
$falsepos{$1} = 0;
$falseneg{$1} = 0;
$rulehit{$1} = 1;
}
close(SCORES);
while(<SPAM>)
{
next if /^#/;
/.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)(\s+?:(?:bayes|time)=\S+)\s*?$/;
my $test_str = $1;
my @rules;
foreach my $r (split(/,/, $test_str)) {
my $hits = 1;
# Support compacted RULE(hitcount) format
if ($r =~ s/\((\d+)\)$//) {
$hits = $1;
}
push @rules, $r for (1 .. $hits);
}
my $score = 0.0;
foreach $rule (@rules)
{
next unless (defined ($scores{$rule}));
$score += $scores{$rule};
$rulehit{$rule}++;
}
if($score < 5)
{
foreach $rule (@rules)
{
next unless (defined ($scores{$rule}));
$falseneg{$rule}++;
}
$nfn++;
}
}
close(SPAM);
while(<NONSPAM>)
{
next if /^#/;
/.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
next unless defined($1);
my $test_str = $1;
my @rules;
foreach my $r (split(/,/, $test_str)) {
my $hits = 1;
# Support compacted RULE(hitcount) format
if ($r =~ s/\((\d+)\)$//) {
$hits = $1;
}
push @rules, $r for (1 .. $hits);
}
my $score = 0.0;
foreach $rule (@rules)
{
next unless (defined ($scores{$rule}));
$score += $scores{$rule};
$rulehit{$rule}++;
}
if($score >= 5)
{
foreach $rule (@rules)
{
next unless (defined ($scores{$rule}));
$falsepos{$rule}++;
}
$nfp++;
}
}
@fpk = sort { $falsepos{$b}/($rulehit{$b}||0.0001) <=> $falsepos{$a}/($rulehit{$a}||0.00001) } keys %falsepos;
print "COMMON FALSE POSITIVES: ($nfp total)\n-----------------------\n\n";
foreach $key (@fpk)
{
print sprintf("%0.3f %5d % 0.4f %s\n",$falsepos{$key}/($rulehit{$key}-1),$falsepos{$key},$scores{$key},$key) if $falsepos{$key}>0;
}
@fnk = sort { $falseneg{$b}/($rulehit{$b}||0.0001) <=> $falseneg{$a}/($rulehit{$a}||0.00001) } keys %falseneg;
print "\n\n\nCOMMON FALSE NEGATIVES: ($nfn total)\n-----------------------\n\n";
foreach $key (@fnk)
{
print sprintf("%0.3f %5d % 0.4f %s\n",$falseneg{$key}/($rulehit{$key}-1),$falseneg{$key},$scores{$key},$key) if $falseneg{$key}>0;
}