| #!/bin/sh |
| |
| # set SCORESET |
| # note: this script does not need to rely on the runGA setup |
| # must use a / in the arg to a 'source' command to avoid searching the PATH |
| [ -r config ] && . ./config |
| |
| if [ "x$1" != "x" ]; then |
| SCORESET=$1 |
| fi |
| |
| gen_fp_fn_report () { |
| ./fp-fn-statistics \ |
| --spam=spam-test.log \ |
| --ham=ham-test.log \ |
| --threshold $1 --scoreset=$SCORESET | \ |
| sed -e 's/^Reading.*//' -e '/^$/d' |
| } |
| |
| echo "STATISTICS REPORT FOR SPAMASSASSIN RULESET" |
| echo |
| echo "Classification success on test corpora, at default threshold:" |
| echo |
| gen_fp_fn_report 5 |
| |
| echo |
| echo "Results on test corpora at various alternative thresholds:" |
| echo |
| |
| # list a wide range of thresholds, so that we can make graphs later ;) |
| for thresh in -4 -3 -2 -1 0 1 2 3 4 4.5 5.5 6 6.5 7 8 9 10 12 15 17 20 ; do |
| gen_fp_fn_report $thresh |
| echo |
| done |
| |
| echo |
| echo "Test hit frequencies, for spam and ham corpora:" |
| echo "(note: S/O indicates ratio of spam hits to overall hits for" |
| echo "each test, where 0.0 = hits only non-spam and 1.0 = hits only spam," |
| echo "and the 'score' field should be ignored.)" |
| echo |
| # don't just use "freqs", it's often out of date w.r.t. scores |
| # remove T_ test rules from the logs |
| perl hit-frequencies -x -p -s $SCORESET spam.log ham.log \ |
| | egrep -v ' T_' |
| |
| version=`( cd .. ; ./build/get_version )` |
| echo |
| echo "Rule file versions for which these results apply:" |
| echo "(SpamAssassin Version string: $version)" |
| echo |
| |
| ( cd ../rules ; ls -l *.cf ) |
| |