Update of /cvsroot/bogofilter/bogofilter/tuning
In directory sc8-pr-cvs1:/tmp/cvs-serv7986
Modified Files:
README.bogotune bogotune
Log Message:
Update to bogotune-0.3
Index: README.bogotune
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/tuning/README.bogotune,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -d -r1.7 -r1.8
--- README.bogotune 26 Jun 2003 12:14:24 -0000 1.7
+++ README.bogotune 26 Jun 2003 19:31:34 -0000 1.8
@@ -1,4 +1,4 @@
-README for bogotune version 0.2.7
+README for bogotune version 0.3
(How to tune bogofilter with minimum effort)
This document describes a script called bogotune that will completely
@@ -30,7 +30,7 @@
but results will be much more reliable (though the run will
take longer) if you can use several thousand (or even one or
two myriad) of each.
- 3. You must be using bogofilter version 0.13.6.3 or later, with
+ 3. You must be using bogofilter version 0.13.7.1 or later, with
the Robinson-Fisher algorithm. Programs bogofilter, bogoutil
and bogolexer must all be in your execution path. If it's
version 0.13.6.3 you're using, you need to apply the patch
@@ -83,7 +83,7 @@
with maildir format as well, but this has not been tested; feedback
would be much appreciated.)
-If bogotune aborts, there may be leftover files named Rxxx in the
+If bogotune aborts, there may be leftover files named btxxx in the
directory from which bogotune ran; the xxx stands for some number of
more or less random digits (actually the pid of the bogotune process).
@@ -91,20 +91,23 @@
output that ends like this:
Recommendations:
+
+---cut---
db_cachesize=10
robx=0.503238
min_dev=0.040
robs=0.0178
-spam_cutoff=0.81 for 0.01% false positives; expect 6.79% false neg.
-spam_cutoff=0.69 for 0.05% false positives; expect 3.07% false neg.
-spam_cutoff=0.611 for 0.1% false positives; expect 2.81% false neg.
-spam_cutoff=0.563 for 0.2% false positives; expect 1.84% false neg.
-non_spam=0.312
+spam_cutoff=0.81 # for 0.01% false positives; expect 6.79% false neg.
+#spam_cutoff=0.69 # for 0.05% false positives; expect 3.07% false neg.
+#spam_cutoff=0.611 # for 0.1% false positives; expect 2.81% false neg.
+#spam_cutoff=0.563 # for 0.2% false positives; expect 1.84% false neg.
+ham_cutoff=0.312
+---cut---
+
Tuning complete.
These can be pasted into your bogofilter.cf file (but choose only one
-spam_cutoff value, and only copy up to but not including the first
-space on the line). Normally, up to four possible spam cutoffs are
+spam_cutoff value). Normally, up to four possible spam cutoffs are
suggested, as shown above, to give you an opportunity to judge of the
tradeoff between false positives and false negatives. If there are too
few messages in your test files, some of the suggestions may be
Index: bogotune
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/tuning/bogotune,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -d -r1.7 -r1.8
--- bogotune 26 Jun 2003 12:14:24 -0000 1.7
+++ bogotune 26 Jun 2003 19:31:34 -0000 1.8
@@ -1,20 +1,21 @@
#! /usr/bin/perl
# bogotune - a bogofilter tuning tool
-# version 0.2.7, started 20030625
+# version 0.3, started 20030626
# Copyright (c) 2003 Gregory Louis; distributed wiithout warranty of
# any kind under the GNU General Public License (GPL).
+use strict;
### 0. Initialize and process arguments
use POSIX;
# choose apclass (0) or bogofilter (1) for scanning; apclass is twice
# as fast, if you have it, but it's not yet freely available.
-$usebogofilter = ($ENV{"IRCNICK"} ne "bigglebai");
+my $usebogofilter = ($ENV{"IRCNICK"} ne "bigglebai");
# $bogop may be an absolute path to bogofilter and bogoutil, eg
# $bogop = "/newversion/"; # for testing
-$bogop = "";
-$bogof = $bogop . "bogofilter"; $bogou = $bogop . "bogoutil";
+my $bogop = "";
+my $bogof = $bogop . "bogofilter"; my $bogou = $bogop . "bogoutil";
# besides being human-readable, this help text is formatted for automatic
# man-page generation by the txt2man utility (http://mvertes.free.fr);
@@ -59,8 +60,9 @@
exit(0);
}
-$state = $verbose = 0; $cf = "";
-foreach $arg (@ARGV) {
+my $state = my $verbose = 0; my $cf = "";
+my ($bogodir, @spfiles, @nsfiles);
+foreach my $arg (@ARGV) {
if($arg eq "-s") { $state = 2; }
elsif($arg eq "-n") { $state = 3; }
elsif($arg eq "-C") { $cf = "-C"; }
@@ -73,7 +75,7 @@
}
print "Using apclass.\n" unless $usebogofilter or ! $verbose;
-$workfn = "bt" . $$;
+my $workfn = "bt" . $$;
### 1. Validate bogofilter database
# Check that $bogodir contains a valid training database,
@@ -92,29 +94,30 @@
$bogodir = $ENV{"HOME"} . "/.bogofilter";
}
}
-$dbname = $bogodir . "/wordlist.db";
-$twolist = 0;
+my $dbname = $bogodir . "/wordlist.db";
+my $twolist = 0;
if(! -r $dbname) {
$dbname = $bogodir . "/goodlist.db";
$twolist = 1;
}
print("Verifying training db ", $dbname, "...\n");
+my $counts;
open(COUNTS,"$bogou -w" . $bogodir . " .MSG_COUNT |") or
yuk(255, "Couldn't run bogoutil");
while(<COUNTS>) { $counts = $_; }
chop $counts; close COUNTS;
-$err = 0;
+my $err = 0; my ($scount, $ncount);
if(!$counts) { $err = 1; }
else {
- ($junk, $scount, $ncount) = split /\s+/, $counts;
+ ($scount, $ncount) = (split /\s+/, $counts)[1,2];
if($scount < 2000 || $ncount < 2000) { $err = 2; }
else {
- $scalefactor = $scount / $ncount;
+ my $scalefactor = $scount / $ncount;
if($scalefactor < 0.2 || $scalefactor > 5) { $err = 3; }
}
}
if($err > 0) {
- @errstr = ("No wordlist found",
+ my @errstr = ("No wordlist found",
"At least 2000 spam and 2000 nonspam required in training db",
"Wordlist is out of balance");
yuk($err, $errstr[$err-1]);
@@ -131,20 +134,18 @@
# testing, in MH, mbox or msg-count format.
print("Verifying test files...\n");
-$msgformat = "mbox";
+my $msgformat = "mbox";
if(-d $spfiles[0]) {
$msgformat = "MH"; $scount = 0;
- foreach $dir (@spfiles) {
- $n = `ls $dir/[0-9]* 2>/dev/null | wc -l`;
- $scount += $n;
+ foreach my $dir (@spfiles) {
+ $scount += `ls $dir/[0-9]* 2>/dev/null | wc -l`;
}
$ncount = 0;
- foreach $dir (@nsfiles) {
- $n = `ls $dir/[0-9]* 2>/dev/null | wc -l`;
- $ncount += $n;
+ foreach my $dir (@nsfiles) {
+ $ncount += `ls $dir/[0-9]* 2>/dev/null | wc -l`;
}
} else {
- $cmd = join(" ", "cat", @spfiles, "| grep -c '^From ' |");
+ my $cmd = join(" ", "cat", @spfiles, "| grep -c '^From ' |");
open(COUNTS, $cmd) or yuk(5, "$cmd pipe failed");
$scount = <COUNTS>; chop $scount; close COUNTS;
$cmd = join(" ", "cat", @nsfiles, "| grep -c '^From ' |");
@@ -166,36 +167,36 @@
print("Verification completed successfully.\n");
if($msgformat ne "msgcount") {
print("Creating message-count files...\n");
- $spwork = $workfn . ".sp"; $nswork = $workfn . ".ns";
+ my $spwork = $workfn . ".sp"; my $nswork = $workfn . ".ns";
if($msgformat eq "mbox") {
- $cmd = join(" ", "cat", @spfiles, "| formail -s bogol", $bogodir,
- $cf, ">", $spwork);
+ my $cmd = join(" ", "cat", @spfiles, "| formail -es bogol",
+ $bogodir, $cf, ">", $spwork);
system($cmd) == 0 or yuk(7, "Problem processing spam files");
- $cmd = join(" ", "cat", @nsfiles, "| formail -s bogol", $bogodir,
+ $cmd = join(" ", "cat", @nsfiles, "| formail -es bogol", $bogodir,
$cf, ">", $nswork);
system($cmd) == 0 or yuk(7, "Problem processing nonspam files");
} else {
unlink($spwork);
- foreach $dir (@spfiles) {
+ foreach my $dir (@spfiles) {
opendir(DH, $dir) or yuk(7, "Problem processing spam files");
- @msgs = readdir(DH); closedir(DH);
- foreach $msg(@msgs) {
+ my @msgs = readdir(DH); closedir(DH);
+ foreach my $msg(@msgs) {
if($msg =~ /^[0-9]/) {
- $cmd = join(" ", "cat $dir/$msg | bogol", $bogodir, $cf,
- ">>$spwork");
+ my $cmd = join(" ", "cat $dir/$msg | bogol", $bogodir,
+ $cf, ">>$spwork");
system($cmd) == 0 or
yuk(7, "Problem writing spam msg-count file");
}
}
}
unlink($nswork);
- foreach $dir (@nsfiles) {
+ foreach my $dir (@nsfiles) {
opendir(DH, $dir) or yuk(7, "Problem processing nonspam files");
- @msgs = readdir(DH); closedir(DH);
- foreach $msg(@msgs) {
+ my @msgs = readdir(DH); closedir(DH);
+ foreach my $msg (@msgs) {
if($msg =~ /^[0-9]/) {
- $cmd = join(" ", "cat $dir/$msg | bogol", $bogodir, $cf,
- ">>$nswork");
+ my $cmd = join(" ", "cat $dir/$msg | bogol", $bogodir,
+ $cf, ">>$nswork");
system($cmd) == 0 or
yuk(7, "Problem writing nonspam msg-count file");
}
@@ -209,18 +210,18 @@
### 3. Calculate cache size
# Calculate the cache size that should be used.
-$cmd = "ls -l " . $dbname . " |";
+my $cmd = "ls -l " . $dbname . " |";
open(COUNTS, $cmd) or yuk(5, "$cmd pipe failed");
$counts = <COUNTS>; chop $counts; close COUNTS;
-($junk1, $junk2, $junk3, $junk4, $dbsize, $junk6) = split(/\s+/, $counts);
+my $dbsize = (split(/\s+/, $counts))[4];
if($twolist) {
- $cmd = "ls -l " . $bogodir . "/spamlist.db |";
+ my $cmd = "ls -l " . $bogodir . "/spamlist.db |";
open(COUNTS, $cmd) or yuk(5, "$cmd pipe failed");
$counts = <COUNTS>; chop $counts; close COUNTS;
- ($junk1, $junk2, $junk3, $junk4, $dbs2, $junk6) = split(/\s+/, $counts);
+ my $dbs2 = (split(/\s+/, $counts))[4];
if($dbs2 > $dbsize) { $dbsize = $dbs2; }
}
-$cachesize = POSIX::ceil($dbsize / (1024*1024*3));
+my $cachesize = POSIX::ceil($dbsize / (1024*1024*3));
print("Recommended cache size is ", $cachesize, " Mbytes.\n");
### 4. Calculate fp target
@@ -239,9 +240,9 @@
$err = system($cmd) >> 8;
if($err > 2) { print("Warning re processing nonspam messages: ", $err, "\n"); }
open(SCORES, $workfn) or yuk(8, "Couldn't open nonspam scores");
-$target = 0; $thresh = 0.95;
+my $target = 0; my $thresh = 0.95; my @nsscores;
while(<SCORES>) {
- chop; ($V1, $V2) = split(/\s+/);
+ chop; my ($V1, $V2) = split(/\s+/);
if($V2 == "") { $V2 = $V1; } elsif($V1 == 1) { $V2 = 1 - $V2; }
push @nsscores, $V2;
if($V2 > $thresh) { ++$target; }
@@ -252,7 +253,7 @@
if ($thresh < 0.5) {
print("Very few high-scoring nonspams in this data set.\n",
"Use these settings (only min_dev may have changed):\n");
- open(SCORES, "$bogof $cf -d $bogodir -Q |"); $n = 0;
+ open(SCORES, "$bogof $cf -d $bogodir -Q |"); my $n = 0;
while(<SCORES>) {
++$n;
if($n == 6) { print "min_dev = 0.020000 (2.00e-02)\n"; }
@@ -263,7 +264,7 @@
yuk(0, "Tuning aborted.")
}
$target = 0;
- foreach $n (@nsscores) {
+ foreach my $n (@nsscores) {
if($n > $thresh) { ++$target; }
}
}
@@ -277,9 +278,9 @@
print("Calculating initial x value...\n");
open(SCORES, "$bogou -r " . $bogodir . " |") or
yuk(9, "Couldn't run bogoutil");
-$robx = <SCORES>; chop $robx; close SCORES;
+my $robx = <SCORES>; chop $robx; close SCORES;
$robx = 0.6 if $robx > 0.6; $robx = 0.4 if $robx < 0.4;
-$x0 = $robx - 0.1; $x1 = $robx + 0.1;
+my $x0 = $robx - 0.1; my $x1 = $robx + 0.1;
printf("Initial x value is %0.6f\n", $robx);
### 6. Coarsely scan s, md and x
@@ -291,7 +292,7 @@
sub progress {
my ($cur, $top) = @_;
- my $i, $ndots; $ndots = POSIX::ceil(70 * $cur / $top);
+ my $i; my $ndots = POSIX::ceil(70 * $cur / $top);
if($ndots < 1) { $ndots = 1; }
print("\r["); for $i (0 .. $ndots-1) { print("."); }
for($i = $ndots; $i < 70; ++$i) { print(" "); }
@@ -300,26 +301,31 @@
sub col5 { $$a[5] <=> $$b[5]; }
+my(@mdval,@rxval,@parms);
+
sub gfn {
my ($si, $mi, $xi) = @_; my $pi;
$pi = $si * ($#mdval+1) * ($#rxval+1) + $mi * ($#rxval+1) + $xi;
return($parms[$pi][5]);
}
-sub log10 { my $n = shift; return log($n) / log(10); }
+sub my_log10 { my $n = shift; return log($n) / log(10); }
-@... = (10 ** 0, 10 ** -0.5, 10 ** -1, 10 ** -1.5, 10 ** -2);
+my @rsval = (10 ** 0, 10 ** -0.5, 10 ** -1, 10 ** -1.5, 10 ** -2);
@mdval = (0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45);
-$n = ($x1 - $x0);
+my $n = ($x1 - $x0);
@rxval = ($x0 + $n/2, $x0 + $n/4, $x0 + $n*0.75, $x0, $x1);
-foreach $scantype ("coarse", "fine") {
+my ($robs, $md);
+foreach my $scantype ("coarse", "fine") {
+ my ($rsi, $mdi, $rxi, $cutoff, $fn);
print("Performing ", $scantype, " scan:\n");
- $ord = 0; $np = scalar @rsval * scalar @mdval * scalar @rxval;
+ my $ord = 0; my $np = scalar @rsval * scalar @mdval * scalar @rxval;
@parms = ();
foreach $rsi (0 .. $#rsval) {
foreach $mdi (0 .. $#mdval) {
foreach $rxi (0 .. $#rxval) {
- $mpar = $mdval[$mdi] . "," . $rsval[$rsi] . "," . $rxval[$rxi];
+ my $mpar = $mdval[$mdi] . "," . $rsval[$rsi] . "," .
+ $rxval[$rxi];
### first get cutoff
if($usebogofilter) {
$cmd = join(" ", "cat", @nsfiles, "|", $bogof, $cf, "-d",
@@ -335,15 +341,15 @@
}
open(SCORES, $workfn) or
yuk(8, "Couldn't open nonspam scores");
- @scores = ();
+ my @scores = ();
while(<SCORES>) {
- chop; ($V1, $V2) = split(/\s+/);
+ chop; my ($V1, $V2) = split(/\s+/);
if($V2 eq "") { push @scores, 1-$V1; }
elsif($V1 == 1) { push @scores, $V2; }
}
close SCORES;
@nsscores = sort { $a <=> $b } @scores;
- $fp = $target;
+ my $fp = $target;
if(scalar @nsscores >= $fp) {
while($nsscores[$fp-1] < 0.000001) { ++$fp; }
$cutoff = 1 - $nsscores[$fp-1];
@@ -352,30 +358,30 @@
}
### now get fn count
if($usebogofilter) {
- $cmd = join(" ", "cat", @spfiles, "|", $bogof, $cf, "-d",
- $bogodir, "-Mttv -m", $mpar, "-o", $cutoff, ">",
- $workfn);
+ my $cmd = join(" ", "cat", @spfiles, "|", $bogof, $cf,
+ "-d", $bogodir, "-Mttv -m", $mpar, "-o", $cutoff,
+ ">", $workfn);
$err = system($cmd) >> 8;
if($err > 2) {
print("Warning re processing spam messages: ",
- $errr, "\n");
+ $err, "\n");
}
open(SCORES, $workfn) or
yuk(8, "Couldn't open spam scores");
$fn = 0;
while(<SCORES>) {
- chop; ($V1, $V2) = split(/\s+/);
+ chop; my ($V1, $V2) = split(/\s+/);
if($V2 eq "") { ++$fn if $V1 < $cutoff; }
else { ++$fn if $V1 != 1; }
}
close SCORES;
} else {
- $cmd = join(" ", "cat", @spfiles, "| apclass -fq -m",
+ my $cmd = join(" ", "cat", @spfiles, "| apclass -fq -m",
$mpar, "-o", $cutoff, "| grep -c -v '^1' |");
open(COUNTS, $cmd) or yuk(5, "$cmd pipe failed");
$fn = <COUNTS>; chop $fn; close COUNTS;
}
- @parm = ($rsi, $mdi, $rxi, $cutoff, $fp, $fn);
+ my @parm = ($rsi, $mdi, $rxi, $cutoff, $fp, $fn);
push @parms, [ @parm ];
if($verbose) {
printf("%0.4f %0.3f %0.3f %0.6f %0.0f %0.0f\n",
@@ -390,29 +396,29 @@
}
if(! $verbose) { print "\r"; }
# Scan complete, now find minima
- @unsorted = ();
- foreach $row (@parms) {
+ my @unsorted = ();
+ foreach my $row (@parms) {
push @unsorted, [ @$row ] if $$row[4] == $target;
}
if(scalar @unsorted == 0) {
@unsorted = @parms;
print("Warning: fp target was not met, using original results\n");
}
- @sorted = sort col5 @unsorted;
+ my @sorted = sort col5 @unsorted;
if($verbose) {
print("Top ten parameter sets from this scan:\n",
" rs md rx co %fp %fn\n");
- foreach $i (0 .. 9) {
- ($rsi, $mdi, $rxi, $cutoff, $fp, $fn) = @{$sorted[$i]};
+ foreach my $i (0 .. 9) {
+ ($rsi, $mdi, $rxi, $cutoff, my $fp, $fn) = @{$sorted[$i]};
printf("%6.4f %5.3f %5.3f %5.3f %5.3f %6.3f\n",
$rsval[$rsi], $mdval[$mdi], $rxval[$rxi], $cutoff,
100 * $fp / $ncount, 100 * $fn / $scount);
}
}
- $med = $sorted[int(scalar @sorted / 3)][5];
- if($verbose) { print("33% fn count was ", $med, "\n"); }
- $n = $o = 0;
- foreach $i (0 .. $#sorted) {
+ my $med = $sorted[int(scalar @sorted / 4)][5];
+ if($verbose) { print("25% fn count was ", $med, "\n"); }
+ my $n = my $o = 0;
+ foreach my $i (0 .. $#sorted) {
$rsi = $sorted[$i][0]; $mdi = $sorted[$i][1]; $rxi = $sorted[$i][2];
if( ($rsi == 0 || gfn($rsi-1, $mdi, $rxi) < $med)
&& ($rsi == $#rsval || gfn($rsi+1, $mdi, $rxi) < $med)
@@ -448,12 +454,12 @@
if($scantype == "coarse") {
@rsval = @mdval = ();
- $s0 = log10($robs) - 0.5; $s0 = -2 if $s0 < -2;
- $s1 = log10($robs) + 0.5; $s1 = 0 if $s1 > 0;
- for($i=$s1; $i>=$s0; $i-=0.25) { push @rsval, 10 ** $i; }
+ my $s0 = my_log10($robs) - 0.5; $s0 = -2 if $s0 < -2;
+ my $s1 = my_log10($robs) + 0.5; $s1 = 0 if $s1 > 0;
+ for(my $i=$s1; $i>=$s0; $i-=0.25) { push @rsval, 10 ** $i; }
$s0 = $md - 0.075; $s0 = 0.02 if $s0 < 0.02;
$s1 = $md + 0.075; $s1 = 0.465 if $s1 > 0.465;
- for($i=$s0; $i<=$s1; $i+=0.015) { push @mdval, $i; }
+ for(my $i=$s0; $i<=$s1; $i+=0.015) { push @mdval, $i; }
@rxval = ($robx, $robx-0.02, $robx+0.02, $robx-0.04, $robx+0.04);
}
}
@@ -465,7 +471,7 @@
# that give 0.05%, 0.1% and 0.2% fp.
print("Performing final scoring:\nNonspam...\n");
-$mpar = $md . "," . $robs . "," . $robx;
+my $mpar = $md . "," . $robs . "," . $robx;
if($usebogofilter) {
$cmd = join(" ", "cat", @nsfiles, "|", $bogof, $cf, "-d",
$bogodir, "-Mttv -m", $mpar, ">", $workfn);
@@ -478,9 +484,9 @@
print("Warning re processing nonspam messages: ", $err, "\n");
}
open(SCORES, $workfn) or yuk(8, "Couldn't open nonspam scores");
-@... = ();
+my @scores = ();
while(<SCORES>) {
- chop; ($V1, $V2) = split(/\s+/);
+ chop; my ($V1, $V2) = split(/\s+/);
if($V2 == "") { $V2 = $V1; }
elsif($V1 == 1) { $V2 = 1 - $V2; }
push @scores, $V2;
@@ -502,43 +508,46 @@
open(SCORES, $workfn) or yuk(8, "Couldn't open spam scores");
@scores = ();
while(<SCORES>) {
- chop; ($V1, $V2) = split(/\s+/);
+ chop; my ($V1, $V2) = split(/\s+/);
if($V2 == "") { $V2 = $V1; }
elsif($V1 == 1) { $V2 = 1 - $V2; }
push @scores, $V2;
}
close SCORES;
-@... = sort { $a <=> $b } @scores;
-print("Recommendations:\n\n",
+my @spscores = sort { $a <=> $b } @scores;
+print("Recommendations:\n\n---cut---\n",
"db_cachesize=", $cachesize, "\n");
printf("robx=%0.6f\nmin_dev=%0.3f\nrobs=%0.4f\n", $robx, $md, $robs);
-$printed = 0;
-foreach $minn (10000, 2000, 1000, 500) {
+my $printed = 0;
+foreach my $minn (10000, 2000, 1000, 500) {
if($ncount > $minn) {
- $cutoff = sprintf("%0.3f", $nsscores[POSIX::ceil($ncount / $minn)-1]);
+ my $cutoff = sprintf("%0.3f", $nsscores[POSIX::ceil($ncount /
+ $minn)-1]);
if($cutoff < 0.999) {
- $n = $#spscores;
- for $i (0 .. $#spscores) {
+ my $n = $#spscores;
+ for my $i (0 .. $#spscores) {
if($spscores[$i] > $cutoff) { $n=$i; last; }
}
- $fneg = sprintf("%0.2f", 100 * $n / $scount);
- print("spam_cutoff=", $cutoff, " for ", 100 / $minn,
+ my $fneg = sprintf("%0.2f", 100 * $n / $scount);
+ print("#") if ($printed == 1);
+ print("spam_cutoff=", $cutoff, "\t# for ", 100 / $minn,
"% false positives; expect ", $fneg, "% false neg.\n");
$printed=1;
}
}
}
if($printed == 0) {
- for $i (0 .. $#nsscores)
+ for my $i (0 .. $#nsscores)
{ if($nsscores[$i] <= 0.999) { $target=$i; last; }}
- $cutoff = sprintf("%0.3f", $nsscores[$target]); $n = $#spscores;
- for $i (0 .. $#spscores) { if($spscores[$i] > $cutoff) { $n=$i; last; }}
- $fneg = sprintf("%0.2f", 100 * $n / $scount);
- $fp = sprintf("%0.2f", 100 * $target / $ncount);
- print("spam_cutoff=", $cutoff, " for ", $fp, "% false positives; expect ",
+ my $cutoff = sprintf("%0.3f", $nsscores[$target]); my $n = $#spscores;
+ for my $i (0 .. $#spscores)
+ { if($spscores[$i] > $cutoff) { $n=$i; last; }}
+ my $fneg = sprintf("%0.2f", 100 * $n / $scount);
+ my $fp = sprintf("%0.2f", 100 * $target / $ncount);
+ print("spam_cutoff=", $cutoff, "\t# for ", $fp, "% false positives; expect ",
$fneg, "% false neg.\n");
}
-$cutoff = sprintf("%0.3f", $spscores[POSIX::ceil($scount * 0.002)-1]);
+my $cutoff = sprintf("%0.3f", $spscores[POSIX::ceil($scount * 0.002)-1]);
$cutoff = 0.1 if $cutoff < 0.1; $cutoff = 0.4 if $cutoff > 0.4;
-print("ham_cutoff=", $cutoff, "\n");
+print("ham_cutoff=", $cutoff, "\n---cut---\n");
yuk(0, "\nTuning completed.");
|