blob: 929fa7ee539496ca9e82f23c97e2a2f36e41a935 [file] [log] [blame]
#!/usr/bin/perl -w
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
my $argcffile = shift @ARGV;
my $scoreset = shift @ARGV;
$scoreset = 0 if ( !defined $scoreset );
my %freq_spam = ();
my %freq_nonspam = ();
my $num_spam;
my $num_nonspam;
my $num_total;
my %mutable_tests = ();
my %ranking = ();
my %soratio = ();
my %is_nice = ();
if (!defined $argcffile) { $argcffile = "../rules"; }
my $tmpf = "./tmp/rules$$.pl";
system "../build/parse-rules-for-masses ".
"-d \"$argcffile\" -s $scoreset -o $tmpf" and die;
require $tmpf;
unlink $tmpf;
while (<>) {
/^\s*([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+\S+\s+(.+)\s*$/ or next;
my $overall = $1+0;
my $spam = $2+0;
my $nonspam = $3+0;
my $soratio = $4+0;
my $ranking = $5+0;
my $test = $6;
if ($test eq '(all messages)') {
$num_spam = $spam;
$num_nonspam = $nonspam;
$num_total = $spam+$nonspam;
next;
}
next if ($test eq '(all messages as %)');
if (!defined ($rules{$test})) {
warn "$test: rule no longer exists; ignoring\n";
next;
}
$freq{$test} = $overall;
$freq_spam{$test} = $spam;
$freq_nonspam{$test} = $nonspam;
my $tflags = $rules{$test}->{tflags}; $tflags ||= '';
$mutable_tests{$test} = 1;
# "userconf" rules, or "net" rules in set 0/2, or "learn" rules
# in set 1/3, are nonmutable.
if ($tflags =~ /\buserconf\b/) {
print "$test: immutable due to 'userconf'\n";
$mutable_tests{$test} = 0;
}
elsif ( ($scoreset & 1) == 0 && $tflags =~ /\bnet\b/ ) {
print "$test: immutable due to 'net'\n";
$mutable_tests{$test} = 0;
}
elsif ( ($scoreset & 2) == 0 && $tflags =~ /\blearn\b/ ) {
print "$test: immutable due to 'learn'\n";
$mutable_tests{$test} = 0;
}
elsif (!$rules{$test}->{mutable}) {
# rules read from the non-mutable section
print "$test: immutable according to parse-rules\n";
$mutable_tests{$test} = 0;
}
elsif ($rules{$test}->{score} == 0) {
# this causes trouble, since rewrite-with-new-scores has a tendency
# to "simplify" scores down to 0. comment, since real zero-scored rules
# that were scored zero when the mass-check ran, will also have no hits
# and the 'less than 0.01%' case below takes care of that.
# print "$test: immutable since score is 0\n";
# $mutable_tests{$test} = 0;
}
if ($tflags =~ m/\bnice\b/i) {
$is_nice{$test} = 1;
} else {
$is_nice{$test} = 0;
}
# less than 0.01% of messages were hit: force these rules to 0.0
if ($overall < 0.01) {
print "$test: zeroing rule and marking immutable, due to low hitrate\n";
$mutable_tests{$test} = 0;
$soratio{$test} = 0.5;
$ranking{$test} = 0.0;
$rules{$test}->{score} = 0; # tvd - disable these rules automagically
} else {
$soratio{$test} = $soratio;
$ranking{$test} = $ranking;
}
}
if ( ! mkdir "tmp", 0755 ) {
warn "Couldn't create tmp directory!: $!\n";
}
open (OUT, ">tmp/ranges.data");
foreach my $test (sort { $ranking{$b} <=> $ranking{$a} } keys %freq) {
if (!defined ($rules{$test})) {
warn "$test: rule not found! forcing score to 0";
print OUT ("0 0 0 $test\n");
next;
}
my $overall = $freq{$test};
my $spam = $freq_spam{$test};
my $nonspam = $freq_nonspam{$test};
my $soratio = $soratio{$test};
my $ranking = $ranking{$test};
my $mutable = $mutable_tests{$test};
# non-mutable, or score of 0 -- lock down to current score.
if (!$mutable) {
printf OUT ("%3.3f %3.3f 0 $test\n",
$rules{$test}->{score},
$rules{$test}->{score});
next;
}
my ($lo, $hi);
if ($is_nice{$test}) {
$hi = 0;
$lo = $ranking{$test} * -4.5;
}
else {
$lo = 0;
$hi = $ranking{$test} * 4.5;
}
printf OUT ("%3.1f %3.1f $mutable $test\n", $lo, $hi);
}
close OUT;
exit;