| #!/usr/bin/perl -w |
| |
| $USE_CHI_COMBINING = 0; |
| |
| # use 1 bucket's set for speed, just to get going: |
| #$SPAMLOG = "/raid/bayestest/results/bucket1/spam.log"; |
| #$HAMLOG = "/raid/bayestest/results/bucket1/nonspam.log"; |
| # or use the lot, slow but more accurate: |
| $SPAMLOG = "/raid/bayestest/results/spam_all.log"; |
| $HAMLOG = "/raid/bayestest/results/nonspam_all.log"; |
| |
| my $searchspaces = { |
| ROBINSON_S_CONSTANT => [ 0.0, 0.75 ], |
| ROBINSON_X => [ 0.0, 0.75 ], |
| }; |
| |
| my $count; |
| $|=1; |
| srand(23); |
| for ($count = 0; $count < 1000; $count++) { |
| my $rands = rand_constants(); |
| write_constants($rands); |
| run_test($count); |
| parse_log($rands, $count); |
| } |
| exit; |
| |
| sub rand_constants { |
| my $rands = { }; |
| foreach (keys %{$searchspaces}) { |
| my $lo = $searchspaces->{$_}->[0]; |
| my $hi = $searchspaces->{$_}->[1]; |
| |
| $rands->{$_} = rand($hi-$lo) + $lo; |
| } |
| return $rands; |
| } |
| |
| sub write_constants { |
| my $rands = shift; |
| open (OUT, ">constants.pl"); |
| print OUT " |
| use constant ROBINSON_S_CONSTANT => $rands->{ROBINSON_S_CONSTANT}; |
| use constant ROBINSON_X => $rands->{ROBINSON_X}; |
| use constant ROBINSON_MIN_PROB_STRENGTH => 0.27; |
| use constant PROB_BOUND_LOWER => 0.001; |
| use constant PROB_BOUND_UPPER => 0.999; |
| use constant N_SIGNIFICANT_TOKENS => 150; |
| use constant USE_CHI_COMBINING => $USE_CHI_COMBINING; |
| 1; |
| "; |
| close OUT; |
| } |
| |
| |
| sub run_test { |
| my $count = shift; |
| |
| mkdir ("logs"); |
| system ("./bayes-analyse-from-raw-counts $SPAMLOG $HAMLOG ". |
| "> logs/log.$count 2>&1"); |
| } |
| |
| sub parse_log { |
| my $rands = shift; |
| my $count = shift; |
| my $r = { }; |
| |
| open (IN, "< logs/log.$count"); |
| while (<IN>) { |
| /optimization for hamcutoff=(\S+), spamcutoff=(\S+): cost=\$\s*(\S+)/ or next; |
| $r->{hamcutoff} = $1; |
| $r->{spamcutoff} = $2; |
| $r->{cost} = $3; |
| |
| $_ = <IN>; |
| $_ = <IN>; s/\s+/ /gs; /FP: (\S+) \S+ FN: (\S+) /; |
| $r->{fp} = $1; |
| $r->{fn} = $2; |
| |
| $_ = <IN>; s/\s+/ /gs; /Unsure: (\S+) \S+ .ham: (\S+) \S+ spam: (\S+) /; |
| $r->{unsure} = $1; |
| $r->{unham} = $2; |
| $r->{unspam} = $3; |
| |
| last; |
| } |
| close IN; |
| |
| my $line = "$count"; |
| foreach my $key (sort keys %{$rands}) { |
| $line .= " $key $rands->{$key}"; |
| } |
| foreach my $key (sort keys %{$r}) { |
| $line .= " $key $r->{$key}"; |
| } |
| print $line, "\n"; |
| sleep 1; |
| } |