[cvs] bogofilter/src lexer.h,1.38,1.39 lexer_v3.l,1.147,1.148 token.c,1.83,1.84
Fast Bayesian spam filter along lines suggested by Paul Graham
Brought to you by:
m-a
From: <re...@us...> - 2004-06-29 15:33:36
|
Update of /cvsroot/bogofilter/bogofilter/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12884 Modified Files: lexer.h lexer_v3.l token.c Log Message: Replace state machine for recognizing message address with lexer pattern. Index: lexer.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/lexer.h,v retrieving revision 1.38 retrieving revision 1.39 diff -u -d -r1.38 -r1.39 --- lexer.h 26 Jun 2004 04:03:50 -0000 1.38 +++ lexer.h 29 Jun 2004 15:33:25 -0000 1.39 @@ -27,6 +27,7 @@ EOH, /* end-of-header (empty line) */ BOUNDARY, /* MIME multipart boundary line */ IPADDR, /* IP address */ + MSGADDR, /* Message's IP address */ VERP, /* Variable Envelope Return Path */ MSG_COUNT_LINE, BOGO_LEX_LINE Index: lexer_v3.l =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/lexer_v3.l,v retrieving revision 1.147 retrieving revision 1.148 diff -u -d -r1.147 -r1.148 --- lexer_v3.l 14 Jun 2004 23:23:11 -0000 1.147 +++ lexer_v3.l 29 Jun 2004 15:33:26 -0000 1.148 @@ -288,6 +288,9 @@ "<"\!DOCTYPE\ HTML\ PUBLIC\ .*">" { BEGIN HTML; } {IPADDR} { return IPADDR;} +"\["({IPADDR})"\]" { return MSGADDR;} + + {TOKEN} { return TOKEN;} <HTML>{TOKEN_12}?{HTML_ENCODING} { html_char(); } /* process escaped chars, eg 'e' is 'a' */ Index: token.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/token.c,v retrieving revision 1.83 retrieving revision 1.84 diff -u -d -r1.83 -r1.84 --- token.c 27 Jun 2004 15:33:29 -0000 1.83 +++ token.c 29 Jun 2004 15:33:26 -0000 1.84 @@ -26,8 +26,6 @@ #include "token.h" #include "xmemrchr.h" -typedef enum { R_INIT, R_SAVE, R_DONE } R_STATE; - /* Local Variables */ word_t *yylval = NULL; @@ -35,7 +33,6 @@ static token_t save_class = NONE; static word_t *ipsave = NULL; -static R_STATE r_state; static word_t *w_to = NULL; /* To: */ static word_t *w_from = NULL; /* From: */ @@ -52,6 +49,8 @@ static word_t *token_prefix = NULL; static word_t *nonblank_line = NULL; +#define WFREE(n) word_free(n); n = NULL/* Global Variables */ + /* Function Prototypes */ /* Function Definitions */ @@ -173,15 +172,21 @@ } break; - case IPADDR: - if ((token_prefix == w_recv) && - (r_state == R_INIT || r_state == R_SAVE) && - (strcmp(yylval->text, "127.0.0.1") != 0)) { + case MSGADDR: + /* trim brackets */ + yylval->leng -= 2; + memcpy(yylval->text, yylval->text+1, yylval->leng); + Z(yylval->text[yylval->leng]); /* for easier debugging - removable */ + /* if top level, no address, not localhost, .... */ + if (token_prefix == w_recv && + msg_state == msg_state->parent && + ipaddr == NULL && + strcmp(yylval->text, "127.0.0.1") != 0) { /* Not guaranteed to be the originating address of the message. */ - r_state = R_SAVE; word_free(ipaddr); ipaddr = word_dup(yylval); } + case IPADDR: if (block_on_subnets) { const byte *prefix = (wordlist_version >= IP_PREFIX) ? (const byte *)"ip:" : (const byte *)"url:"; @@ -272,8 +277,7 @@ void token_init(void) { yyinit(); - - r_state = R_INIT; + WFREE(ipaddr); if (!msg_count_file) mime_reset(); @@ -329,11 +333,8 @@ case 'r': if (tolower(text[2]) == 't') token_prefix = w_rtrn; /* Return-Path: */ - else { + else token_prefix = w_recv; /* Received: */ - if (r_state == R_SAVE) - r_state = R_DONE; - } break; case 's': token_prefix = w_subj; /* Subject: */ @@ -347,8 +348,6 @@ return; } -#define WFREE(n) word_free(n); n = NULL - /* Cleanup storage allocation */ void token_cleanup() { |