blob: 1b3a311e35430c4475b0bb7bd87223d49779467e [file] [log] [blame]
#!/usr/bin/perl -w
$USE_CHI_COMBINING = 0;
# use 1 bucket's set for speed, just to get going:
#$SPAMLOG = "/raid/bayestest/results/bucket1/spam.log";
#$HAMLOG = "/raid/bayestest/results/bucket1/nonspam.log";
# or use the lot, slow but more accurate:
$SPAMLOG = "/raid/bayestest/results/spam_all.log";
$HAMLOG = "/raid/bayestest/results/nonspam_all.log";
my $searchspaces = {
ROBINSON_S_CONSTANT => [ 0.0, 0.75 ],
ROBINSON_X => [ 0.0, 0.75 ],
};
my $count;
$|=1;
srand(23);
for ($count = 0; $count < 1000; $count++) {
my $rands = rand_constants();
write_constants($rands);
run_test($count);
parse_log($rands, $count);
}
exit;
sub rand_constants {
my $rands = { };
foreach (keys %{$searchspaces}) {
my $lo = $searchspaces->{$_}->[0];
my $hi = $searchspaces->{$_}->[1];
$rands->{$_} = rand($hi-$lo) + $lo;
}
return $rands;
}
sub write_constants {
my $rands = shift;
open (OUT, ">constants.pl");
print OUT "
use constant ROBINSON_S_CONSTANT => $rands->{ROBINSON_S_CONSTANT};
use constant ROBINSON_X => $rands->{ROBINSON_X};
use constant ROBINSON_MIN_PROB_STRENGTH => 0.27;
use constant PROB_BOUND_LOWER => 0.001;
use constant PROB_BOUND_UPPER => 0.999;
use constant N_SIGNIFICANT_TOKENS => 150;
use constant USE_CHI_COMBINING => $USE_CHI_COMBINING;
1;
";
close OUT;
}
sub run_test {
my $count = shift;
mkdir ("logs");
system ("./bayes-analyse-from-raw-counts $SPAMLOG $HAMLOG ".
"> logs/log.$count 2>&1");
}
sub parse_log {
my $rands = shift;
my $count = shift;
my $r = { };
open (IN, "< logs/log.$count");
while (<IN>) {
/optimization for hamcutoff=(\S+), spamcutoff=(\S+): cost=\$\s*(\S+)/ or next;
$r->{hamcutoff} = $1;
$r->{spamcutoff} = $2;
$r->{cost} = $3;
$_ = <IN>;
$_ = <IN>; s/\s+/ /gs; /FP: (\S+) \S+ FN: (\S+) /;
$r->{fp} = $1;
$r->{fn} = $2;
$_ = <IN>; s/\s+/ /gs; /Unsure: (\S+) \S+ .ham: (\S+) \S+ spam: (\S+) /;
$r->{unsure} = $1;
$r->{unham} = $2;
$r->{unspam} = $3;
last;
}
close IN;
my $line = "$count";
foreach my $key (sort keys %{$rands}) {
$line .= " $key $rands->{$key}";
}
foreach my $key (sort keys %{$r}) {
$line .= " $key $r->{$key}";
}
print $line, "\n";
sleep 1;
}