| #!/usr/bin/perl -w |
| # |
| # <@LICENSE> |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to you under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at: |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # </@LICENSE> |
| |
| my $argcffile = shift @ARGV; |
| my $scoreset = shift @ARGV; |
| $scoreset = 0 if ( !defined $scoreset ); |
| |
| my %freq_spam = (); |
| my %freq_nonspam = (); |
| |
| my $num_spam; |
| my $num_nonspam; |
| my $num_total; |
| |
| my %mutable_tests = (); |
| my %ranking = (); |
| my %soratio = (); |
| my %is_nice = (); |
| |
| if (!defined $argcffile) { $argcffile = "../rules"; } |
| |
| my $tmpf = "tmp/rules$$.pl"; |
| system "../build/parse-rules-for-masses ". |
| "-d \"$argcffile\" -s $scoreset -o $tmpf" and die; |
| require $tmpf; |
| unlink $tmpf; |
| |
| while (<>) { |
| /^\s*([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+\S+\s+(.+)\s*$/ or next; |
| |
| my $overall = $1+0; |
| my $spam = $2+0; |
| my $nonspam = $3+0; |
| my $soratio = $4+0; |
| my $ranking = $5+0; |
| my $test = $6; |
| |
| if ($test eq '(all messages)') { |
| $num_spam = $spam; |
| $num_nonspam = $nonspam; |
| $num_total = $spam+$nonspam; |
| next; |
| } |
| next if ($test eq '(all messages as %)'); |
| |
| if (!defined ($rules{$test})) { |
| warn "$test: rule no longer exists; ignoring\n"; |
| next; |
| } |
| |
| $freq{$test} = $overall; |
| $freq_spam{$test} = $spam; |
| $freq_nonspam{$test} = $nonspam; |
| |
| my $tflags = $rules{$test}->{tflags}; $tflags ||= ''; |
| |
| $mutable_tests{$test} = 1; |
| |
| # "userconf" rules, or "net" rules in set 0/2, or "learn" rules |
| # in set 1/3, are nonmutable. |
| if ($tflags =~ /\buserconf\b/) { |
| print "$test: immutable due to 'userconf'\n"; |
| $mutable_tests{$test} = 0; |
| } |
| elsif ( ($scoreset & 1) == 0 && $tflags =~ /\bnet\b/ ) { |
| print "$test: immutable due to 'net'\n"; |
| $mutable_tests{$test} = 0; |
| } |
| elsif ( ($scoreset & 2) == 0 && $tflags =~ /\blearn\b/ ) { |
| print "$test: immutable due to 'learn'\n"; |
| $mutable_tests{$test} = 0; |
| } |
| elsif (!$rules{$test}->{mutable}) { |
| # rules read from the non-mutable section |
| print "$test: immutable according to parse-rules\n"; |
| $mutable_tests{$test} = 0; |
| } |
| elsif ($rules{$test}->{score} == 0) { |
| # this causes trouble, since rewrite-with-new-scores has a tendency |
| # to "simplify" scores down to 0. comment, since real zero-scored rules |
| # that were scored zero when the mass-check ran, will also have no hits |
| # and the 'less than 0.01%' case below takes care of that. |
| # print "$test: immutable since score is 0\n"; |
| # $mutable_tests{$test} = 0; |
| } |
| |
| if ($tflags =~ m/\bnice\b/i) { |
| $is_nice{$test} = 1; |
| } else { |
| $is_nice{$test} = 0; |
| } |
| |
| # less than 0.01% of messages were hit: force these rules to 0.0 |
| if ($overall < 0.01) { |
| print "$test: zeroing rule and marking immutable, due to low hitrate\n"; |
| $mutable_tests{$test} = 0; |
| $soratio{$test} = 0.5; |
| $ranking{$test} = 0.0; |
| $rules{$test}->{score} = 0; # tvd - disable these rules automagically |
| |
| } else { |
| $soratio{$test} = $soratio; |
| $ranking{$test} = $ranking; |
| } |
| } |
| |
| if ( ! mkdir "tmp", 0755 ) { |
| warn "Couldn't create tmp directory!: $!\n"; |
| } |
| |
| open (OUT, ">tmp/ranges.data"); |
| foreach my $test (sort { $ranking{$b} <=> $ranking{$a} } keys %freq) { |
| if (!defined ($rules{$test})) { |
| warn "$test: rule not found! forcing score to 0"; |
| print OUT ("0 0 0 $test\n"); |
| next; |
| } |
| |
| my $overall = $freq{$test}; |
| my $spam = $freq_spam{$test}; |
| my $nonspam = $freq_nonspam{$test}; |
| my $soratio = $soratio{$test}; |
| my $ranking = $ranking{$test}; |
| my $mutable = $mutable_tests{$test}; |
| |
| # non-mutable, or score of 0 -- lock down to current score. |
| if (!$mutable) { |
| printf OUT ("%3.3f %3.3f 0 $test\n", |
| $rules{$test}->{score}, |
| $rules{$test}->{score}); |
| next; |
| } |
| |
| my ($lo, $hi); |
| if ($is_nice{$test}) { |
| $hi = 0; |
| $lo = $ranking{$test} * -4.5; |
| } |
| else { |
| $lo = 0; |
| $hi = $ranking{$test} * 4.5; |
| } |
| |
| printf OUT ("%3.1f %3.1f $mutable $test\n", $lo, $hi); |
| } |
| close OUT; |
| exit; |
| |
| |