| #!/usr/bin/perl |
| # |
| # <@LICENSE> |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to you under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at: |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # </@LICENSE> |
| |
| use Getopt::Long; |
| our ($opt_lambda, $opt_fn, $opt_fp, $opt_spam, $opt_ham); |
| GetOptions("lambda=f", "fn=f", "fp=f", "spam=f", "ham=f"); |
| |
| # lambda value for TCR equation, representing the cost of of an FP vs. the |
| # cost of a FN. Some example values are: 1 = tagged only, 9 = mailed back |
| # to sender asking for token, 999 = blocking or deleting a message. |
| # |
| # We roughly aim for a value representing "moved to infrequently-read folder". |
| |
| my $lambda = 50; if ($opt_lambda) { $lambda = $opt_lambda; } |
| |
| if (!$opt_spam && !$opt_ham) { |
| die "usage: fn-fp-to-tcr [-lambda l] -fn n -fp n -spam n -ham n\n\n"; |
| } |
| |
| # convert to the TCR metrics used in the published lit |
| my $n_yy = $opt_spam; |
| my $n_yn = $opt_fn; |
| my $n_ny = $opt_fp; |
| my $n_nn = $opt_ham; |
| my $n_ham = $opt_ham + $opt_fp; |
| my $n_spam = $opt_spam + $opt_fn; |
| |
| my $werr = ($lambda * $n_ny + $n_yn) |
| / ($lambda * $n_ham + $n_spam); |
| |
| my $werr_base = $n_spam |
| / ($lambda * $n_ham + $n_spam); |
| |
| $werr ||= 0.000001; # avoid / by 0 |
| my $tcr = $werr_base / $werr; |
| |
| printf "# TCR(l=%d): %3.6f\n\n", |
| $lambda, $tcr; |
| |
| printf "# Correctly non-spam: %6d %6.3f%%\n", |
| $n_nn, ($n_nn / $n_ham) * 100.0; |
| printf "# Correctly spam: %6d %6.3f%%\n", |
| $n_yy, ($n_yy / $n_spam) * 100.0; |
| printf "# False positives: %6d %6.3f%%\n", |
| $n_ny, ($n_ny / $n_ham) * 100.0; |
| printf "# False negatives: %6d %6.3f%%\n\n", |
| $n_yn, ($n_yn / $n_spam) * 100.0; |
| |
| my $sr = ($n_yy / $n_spam) * 100.0; |
| my $sp = ($n_yy / ($n_yy + $n_ny)) * 100.0; |
| |
| printf "# SpamRecall: %6.3f%%\n", |
| $sr; |
| printf "# SpamPrecision: %6.3f%%\n\n", |
| $sp; |
| |
| my $ppv = $n_spam / ($n_spam + $n_ny); |
| my $npv = $n_ham / ($n_ham + $n_yn); |
| my $sensitivity = $n_spam / ($n_spam + $n_yn); |
| my $specificity = $n_ham / ($n_ham + $n_ny); |
| my $efficiency = ($n_spam + $n_ham) / ($n_spam + $n_ham + $n_ny + $n_yn); |
| |
| printf "# PPV: %6.3f%%\n", $ppv*100; |
| printf "# NPV: %6.3f%%\n", $npv*100; |
| printf "# Sensitivity: %6.3f%%\n", $sensitivity*100; |
| printf "# Specificity: %6.3f%%\n", $specificity*100; |
| printf "# Efficiency: %6.3f%%\n\n", $efficiency*100; |
| |
| printf "# Batting Average: %.5f/%.5f\n", |
| (1 - ($n_yn / $n_spam)), |
| ($n_ny / $n_ham); |
| |