[cvs] bogofilter/src bogoconfig.c,1.132,1.133 bogofilter.c,1.35,1.36 globals.c,1.48,1.49 globals.h,1
Fast Bayesian spam filter along lines suggested by Paul Graham
Brought to you by:
m-a
From: <re...@us...> - 2004-01-14 20:47:52
|
Update of /cvsroot/bogofilter/bogofilter/src In directory sc8-pr-cvs1:/tmp/cvs-serv3140/src Modified Files: bogoconfig.c bogofilter.c globals.c globals.h Log Message: Add thresh_update option. Index: bogoconfig.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/bogoconfig.c,v retrieving revision 1.132 retrieving revision 1.133 diff -u -d -r1.132 -r1.133 --- bogoconfig.c 9 Jan 2004 12:43:30 -0000 1.132 +++ bogoconfig.c 14 Jan 2004 20:47:47 -0000 1.133 @@ -122,6 +122,7 @@ { "min_dev", CP_DOUBLE, { (void *) &min_dev } }, { "spam_cutoff", CP_DOUBLE, { (void *) &spam_cutoff } }, { "thresh_stats", CP_DOUBLE, { (void *) &thresh_stats } }, + { "thresh_update", CP_DOUBLE, { (void *) &thresh_update } }, #ifdef ENABLE_DEPRECATED_CODE { "thresh_index", CP_INTEGER, { (void *) NULL } }, /* Graham */ #endif @@ -303,6 +304,7 @@ "\t -p - passthrough.\n" "\t -e - in -p mode, exit with code 0 when the mail is not spam.\n" "\t -u - classify message as spam or non-spam and register accordingly.\n" + "\t -u v1 - set threshold for auto-update.\n" #ifdef ENABLE_DEPRECATED_CODE "\t -2 - set binary classification mode (yes/no).\n" "\t -3 - set ternary classification mode (yes/no/unsure).\n" @@ -427,9 +429,9 @@ #endif #ifndef ENABLE_DEPRECATED_CODE -#define OPTIONS ":bBc:Cd:DefFghHI:k:lL:m:MnNo:O:pqQRrsStTuUvVx:X:y:" +#define OPTIONS ":bBc:Cd:DefFghHI:k:lL:m:MnNo:O:pqQRrsStTu::UvVx:X:y:" #else -#define OPTIONS ":23bBc:Cd:DefFghHI:k:lL:m:MnNo:O:pP:qQRrsStTuUvWVx:X:y:" G R F +#define OPTIONS ":23bBc:Cd:DefFghHI:k:lL:m:MnNo:O:pP:qQRrsStTu::UvWVx:X:y:" G R F #endif /** These functions process command line arguments. @@ -570,8 +572,18 @@ break; case 'u': + { + char *tmp = optarg; run_type |= RUN_UPDATE; + if (optarg == NULL) + thresh_update = 0.0; + else { + bool ok = xatof(&thresh_update, optarg); + if (!ok) + fprintf(stderr, "Cannot parse -%c option argument '%s'.\n", option, optarg); + } break; + } case 'U': unsure_stats = true; @@ -658,7 +670,6 @@ break; } - case 'H': #ifdef ENABLE_DEPRECATED_CODE header_degen = true; @@ -750,7 +761,7 @@ { fprintf(stdout, "%s version %s\n", progname, version); fprintf(stdout, "\n"); - fprintf(stdout, "%-11s = %s\n", "algorithm", method->name); + fprintf(stdout, "%-11s = %s\n", "algorithm", method->name); fprintf(stdout, "%-11s = %0.6f (%8.2e)\n", "robx", robx, robx); fprintf(stdout, "%-11s = %0.6f (%8.2e)\n", "robs", robs, robs); fprintf(stdout, "%-11s = %0.6f (%8.2e)\n", "min_dev", min_dev, min_dev); @@ -766,9 +777,9 @@ #endif fprintf(stdout, "%-17s = %s\n", "replace_nonascii_characters", YN(replace_nonascii_characters)); fprintf(stdout, "\n"); - fprintf(stdout, "%-17s = '%s'\n", "spam_header_name", spam_header_name); - fprintf(stdout, "%-17s = '%s'\n", "header_format", header_format); - fprintf(stdout, "%-17s = '%s'\n", "terse_format", terse_format); + fprintf(stdout, "%-17s = '%s'\n", "spam_header_name", spam_header_name); + fprintf(stdout, "%-17s = '%s'\n", "header_format", header_format); + fprintf(stdout, "%-17s = '%s'\n", "terse_format", terse_format); fprintf(stdout, "%-17s = '%s'\n", "log_header_format", log_header_format); fprintf(stdout, "%-17s = '%s'\n", "log_update_format", log_update_format); display_tag_array("spamicity_tags ", spamicity_tags); Index: bogofilter.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/bogofilter.c,v retrieving revision 1.35 retrieving revision 1.36 diff -u -d -r1.35 -r1.36 --- bogofilter.c 28 Dec 2003 18:25:34 -0000 1.35 +++ bogofilter.c 14 Jan 2004 20:47:47 -0000 1.36 @@ -113,9 +113,9 @@ status = (*method->status)(); if (run_type & RUN_UPDATE) /* Note: don't register if RC_UNSURE */ { - if (status == RC_SPAM) + if (status == RC_SPAM && spamicity <= 1.0 - thresh_update) register_words(REG_SPAM, w, msgcount); - if (status == RC_HAM) + if (status == RC_HAM && spamicity >= thresh_update) register_words(REG_GOOD, w, msgcount); } Index: globals.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/globals.c,v retrieving revision 1.48 retrieving revision 1.49 diff -u -d -r1.48 -r1.49 --- globals.c 1 Jan 2004 13:34:50 -0000 1.48 +++ globals.c 14 Jan 2004 20:47:47 -0000 1.49 @@ -43,6 +43,7 @@ double min_dev; double spam_cutoff; double thresh_stats; +double thresh_update; const char *update_dir; /*@observer@*/ Index: globals.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/globals.h,v retrieving revision 1.49 retrieving revision 1.50 diff -u -d -r1.49 -r1.50 --- globals.h 6 Jan 2004 12:45:46 -0000 1.49 +++ globals.h 14 Jan 2004 20:47:47 -0000 1.50 @@ -46,6 +46,7 @@ extern double ham_cutoff; extern double spam_cutoff; extern double thresh_stats; +extern double thresh_update; extern int abort_on_error; extern bool stats_in_header; |