[cvs] bogofilter bogofilter.h,1.32,1.33 bogofilter.c,1.89,1.90 graham.c,1.13,1.14 robinson.c,1.10,1.
Fast Bayesian spam filter along lines suggested by Paul Graham
Brought to you by:
m-a
From: <re...@us...> - 2002-11-30 22:38:41
|
Update of /cvsroot/bogofilter/bogofilter In directory sc8-pr-cvs1:/tmp/cvs-serv31646 Modified Files: bogofilter.h bogofilter.c graham.c robinson.c fisher.c Log Message: Add RC_UNSURE to allow Robinson-Fisher to return 3 result states. Rename RC_NONSPAM to RC_HAM. Index: bogofilter.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.h,v retrieving revision 1.32 retrieving revision 1.33 diff -u -d -r1.32 -r1.33 --- bogofilter.h 21 Nov 2002 15:42:53 -0000 1.32 +++ bogofilter.h 30 Nov 2002 22:38:38 -0000 1.33 @@ -8,7 +8,7 @@ #define UNKNOWN_WORD 0.4f /* odds that unknown word is spammish */ #define DEVIATION(n) fabs((n) - EVEN_ODDS) /* deviation from average */ -typedef enum rc_e {RC_SPAM=0, RC_NONSPAM=1} rc_t; +typedef enum rc_e {RC_SPAM=0, RC_HAM=1, RC_UNSURE=2} rc_t; extern void initialize_constants(void); extern rc_t bogofilter(/*@out@*/ double *xss); Index: bogofilter.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.c,v retrieving revision 1.89 retrieving revision 1.90 diff -u -d -r1.89 -r1.90 --- bogofilter.c 25 Nov 2002 20:54:25 -0000 1.89 +++ bogofilter.c 30 Nov 2002 22:38:38 -0000 1.90 @@ -83,7 +83,7 @@ db_lock_release_list(word_lists); - status = (spamicity > spam_cutoff) ? RC_SPAM : RC_NONSPAM; + status = (spamicity > spam_cutoff) ? RC_SPAM : RC_HAM; if (xss != NULL) *xss = spamicity; Index: graham.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/graham.c,v retrieving revision 1.13 retrieving revision 1.14 diff -u -d -r1.13 -r1.14 --- graham.c 30 Nov 2002 22:19:36 -0000 1.13 +++ graham.c 30 Nov 2002 22:38:38 -0000 1.14 @@ -339,7 +339,7 @@ rc_t gra_status(void) { - rc_t status = ( stats.spamicity >= spam_cutoff ) ? RC_SPAM : RC_NONSPAM; + rc_t status = ( stats.spamicity >= spam_cutoff ) ? RC_SPAM : RC_HAM; return status; } Index: robinson.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/robinson.c,v retrieving revision 1.10 retrieving revision 1.11 diff -u -d -r1.10 -r1.11 --- robinson.c 30 Nov 2002 22:19:37 -0000 1.10 +++ robinson.c 30 Nov 2002 22:38:38 -0000 1.11 @@ -289,7 +289,7 @@ rc_t rob_status(void) { - rc_t status = ( stats.spamicity >= spam_cutoff ) ? RC_SPAM : RC_NONSPAM; + rc_t status = ( stats.spamicity >= spam_cutoff ) ? RC_SPAM : RC_HAM; return status; } Index: fisher.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/fisher.c,v retrieving revision 1.7 retrieving revision 1.8 diff -u -d -r1.7 -r1.8 --- fisher.c 30 Nov 2002 22:19:37 -0000 1.7 +++ fisher.c 30 Nov 2002 22:38:38 -0000 1.8 @@ -20,6 +20,7 @@ #define RF_DEBUG #undef RF_DEBUG +#define FISHER_HAM_CUTOFF 0.05f #define FISHER_SPAM_CUTOFF 0.952f #define FISHER_MIN_DEV 0.1f @@ -34,6 +35,8 @@ /* Static Variables */ +double ham_cutoff = 0.0f; + rf_method_t rf_fisher_method = { /* used by config.c */ { "fisher", /* const char *name; */ @@ -89,6 +92,8 @@ void fis_initialize_constants(void) { + if ( ham_cutoff < EPS ) + ham_cutoff = FISHER_HAM_CUTOFF; rob_initialize_with_parameters(FISHER_MIN_DEV, FISHER_SPAM_CUTOFF); } @@ -99,7 +104,13 @@ rc_t fis_status(void) { - return ( stats.spamicity >= spam_cutoff ) ? RC_SPAM : RC_NONSPAM ; + if ( stats.spamicity >= spam_cutoff ) + return RC_SPAM; + + if (ham_cutoff > EPS && stats.spamicity > ham_cutoff) + return RC_UNSURE; + + return RC_HAM; } /* Done */ |