|  | #!/bin/sh | 
|  |  | 
|  | # set SCORESET | 
|  | # note: this script does not need to rely on the runGA setup | 
|  | # must use a / in the arg to a 'source' command to avoid searching the PATH | 
|  | [ -r config ] && . ./config | 
|  |  | 
|  | if [ "x$1" != "x" ]; then | 
|  | SCORESET=$1 | 
|  | fi | 
|  |  | 
|  | gen_fp_fn_report () { | 
|  | ./fp-fn-statistics \ | 
|  | --spam=spam-test.log \ | 
|  | --ham=ham-test.log \ | 
|  | --threshold $1 --scoreset=$SCORESET | \ | 
|  | sed -e 's/^Reading.*//' -e '/^$/d' | 
|  | } | 
|  |  | 
|  | echo "STATISTICS REPORT FOR SPAMASSASSIN RULESET" | 
|  | echo | 
|  | echo "Classification success on test corpora, at default threshold:" | 
|  | echo | 
|  | gen_fp_fn_report 5 | 
|  |  | 
|  | echo | 
|  | echo "Results on test corpora at various alternative thresholds:" | 
|  | echo | 
|  |  | 
|  | # list a wide range of thresholds, so that we can make graphs later ;) | 
|  | for thresh in -4 -3 -2 -1 0 1 2 3 4 4.5 5.5 6 6.5 7 8 9 10 12 15 17 20 ; do | 
|  | gen_fp_fn_report $thresh | 
|  | echo | 
|  | done | 
|  |  | 
|  | echo | 
|  | echo "Test hit frequencies, for spam and ham corpora:" | 
|  | echo "(note: S/O indicates ratio of spam hits to overall hits for" | 
|  | echo "each test, where 0.0 = hits only non-spam and 1.0 = hits only spam," | 
|  | echo "and the 'score' field should be ignored.)" | 
|  | echo | 
|  | # don't just use "freqs", it's often out of date w.r.t. scores | 
|  | # remove T_ test rules from the logs | 
|  | perl hit-frequencies -x -p -s $SCORESET spam.log ham.log \ | 
|  | | egrep -v '  T_' | 
|  |  | 
|  | version=`( cd .. ; ./build/get_version )` | 
|  | echo | 
|  | echo "Rule file versions for which these results apply:" | 
|  | echo "(SpamAssassin Version string: $version)" | 
|  | echo | 
|  |  | 
|  | ( cd ../rules ; ls -l *.cf ) | 
|  |  |