blob: e34ef4c6614eda5c9f7513db55bc5e3f78a94208 [file] [log] [blame]
#!/bin/sh
# set SCORESET
# note: this script does not need to rely on the runGA setup
# must use a / in the arg to a 'source' command to avoid searching the PATH
[ -r config ] && . ./config
if [ "x$1" != "x" ]; then
SCORESET=$1
fi
gen_fp_fn_report () {
./fp-fn-statistics \
--spam=spam-test.log \
--ham=ham-test.log \
--threshold $1 --scoreset=$SCORESET | \
sed -e 's/^Reading.*//' -e '/^$/d'
}
echo "STATISTICS REPORT FOR SPAMASSASSIN RULESET"
echo
echo "Classification success on test corpora, at default threshold:"
echo
gen_fp_fn_report 5
echo
echo "Results on test corpora at various alternative thresholds:"
echo
# list a wide range of thresholds, so that we can make graphs later ;)
for thresh in -4 -3 -2 -1 0 1 2 3 4 4.5 5.5 6 6.5 7 8 9 10 12 15 17 20 ; do
gen_fp_fn_report $thresh
echo
done
echo
echo "Test hit frequencies, for spam and ham corpora:"
echo "(note: S/O indicates ratio of spam hits to overall hits for"
echo "each test, where 0.0 = hits only non-spam and 1.0 = hits only spam,"
echo "and the 'score' field should be ignored.)"
echo
# don't just use "freqs", it's often out of date w.r.t. scores
# remove T_ test rules from the logs
perl hit-frequencies -x -p -s $SCORESET spam.log ham.log \
| egrep -v ' T_'
version=`( cd .. ; ./build/get_version )`
echo
echo "Rule file versions for which these results apply:"
echo "(SpamAssassin Version string: $version)"
echo
( cd ../rules ; ls -l *.cf )