[cvs] bogofilter/src collect.c,1.43,1.44 token.c,1.98,1.99
Fast Bayesian spam filter along lines suggested by Paul Graham
Brought to you by:
m-a
From: David R. <re...@us...> - 2005-03-13 01:54:45
|
Update of /cvsroot/bogofilter/bogofilter/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19373 Modified Files: collect.c token.c Log Message: Rewrite token return code so that malloc/free aren't needed. Index: token.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/token.c,v retrieving revision 1.98 retrieving revision 1.99 diff -u -d -r1.98 -r1.99 --- token.c 12 Mar 2005 23:15:49 -0000 1.98 +++ token.c 13 Mar 2005 01:54:32 -0000 1.99 @@ -14,6 +14,7 @@ #include "common.h" +#include <assert.h> #include <ctype.h> #include <stdlib.h> @@ -26,6 +27,8 @@ #include "token.h" #include "xmemrchr.h" +#define MAX_PREFIX_LEN 5 + /* Local Variables */ word_t *msg_addr = NULL; /* First IP Address in Received: statement */ @@ -34,7 +37,9 @@ static token_t save_class = NONE; static word_t *ipsave = NULL; -static word_t *yylval = NULL; + +byte yylval_text[MAXTOKENLEN + MAX_PREFIX_LEN + D]; +static word_t yylval = { 0, yylval_text }; static word_t *w_to = NULL; /* To: */ static word_t *w_from = NULL; /* From: */ @@ -49,6 +54,8 @@ bool block_on_subnets = false; static word_t *token_prefix = NULL; +static uint32_t token_prefix_len; + static word_t *nonblank_line = NULL; #define WFREE(n) word_free(n); n = NULL/* Global Variables */ @@ -73,21 +80,24 @@ { *t = (byte) '\0'; ipsave->leng = (uint) (t - ipsave->text); - yylval = ipsave; - return save_class; + yylval.leng = ipsave->leng; + memcpy(yylval.text, ipsave->text, ipsave->leng + D ); + Z(yylval.text[yylval.leng]); /* for easier debugging - removable */ + cls = save_class; + done = true; } } - if (yylval == NULL) - yylval = word_new(NULL, 0); - while (!done) { + uint leng; + byte *text; cls = (*lexer->yylex)(); - yylval->leng = (uint) *lexer->yyleng; - yylval->text = (unsigned char *)(*lexer->yytext); + + leng = (uint) *lexer->yyleng; + text = (byte *) *lexer->yytext; if (DEBUG_TEXT(2)) { - word_puts(yylval, 0, dbgout); + word_puts(&yylval, 0, dbgout); fputc('\n', dbgout); } @@ -97,12 +107,19 @@ switch (cls) { case EOH: /* end of header - bogus if not empty */ + if (leng > MAXTOKENLEN) + continue; + if (msg_state->mime_type == MIME_MESSAGE) mime_add_child(msg_state); - if (yylval->leng == 2) + if (leng == 2) continue; - else /* "spc:invalid_end_of_header" */ - yylval = word_dup(nonblank_line); + else { /* "spc:invalid_end_of_header" */ + yylval.leng = nonblank_line->leng; + memcpy(yylval.text, nonblank_line->text, nonblank_line->leng + D ); + Z(yylval.text[yylval.leng]); /* for easier debugging - removable */ + done = true; + } break; case BOUNDARY: /* don't return boundary tokens to the user */ @@ -110,7 +127,7 @@ case VERP: /* Variable Envelope Return Path */ { - byte *st = (byte *)yylval->text; + byte *st = (byte *)text; byte *in; byte *fst = NULL; byte *lst = NULL; @@ -128,14 +145,20 @@ *ot++ = '#'; for (in = lst; *in != '\0'; in += 1, ot += 1) *ot = *in; - yylval->leng = (uint) (ot - st); - Z(yylval->text[yylval->leng]); /* for easier debugging - removable */ + leng = (uint) (ot - st); } if (token_prefix != NULL) { - word_t *o = yylval; - yylval = word_concat(token_prefix, yylval); - word_free(o); + yylval.leng = leng + token_prefix_len; + memcpy(yylval.text, token_prefix->text, token_prefix_len + D); + memcpy(yylval.text+token_prefix_len, text, leng + D); + Z(yylval.text[yylval.leng]); /* for easier debugging - removable */ + } + else { + yylval.leng = leng; + memcpy(yylval.text, text, leng + D); + Z(yylval.text[yylval.leng]); /* for easier debugging - removable */ } + } break; @@ -144,21 +167,32 @@ if (!header_line_markup) continue; else { - const char *delim = strchr((const char *)yylval->text, ':'); - yylval->leng = (uint) (delim - (const char *)yylval->text); - Z(yylval->text[yylval->leng]); /* for easier debugging - removable */ + const char *delim = strchr((const char *)text, ':'); + leng = (uint) (delim - (const char *)text); + if (leng > MAXTOKENLEN) + continue; + yylval.leng = leng; + memcpy(yylval.text, text, yylval.leng + D); + Z(yylval.text[yylval.leng]); /* for easier debugging - removable */ } } /*@fallthrough@*/ case TOKEN: /* ignore anything when not reading text MIME types */ + if (leng > MAXTOKENLEN) + continue; if (token_prefix != NULL) { - word_t *o = yylval; - yylval = word_concat(token_prefix, yylval); - word_free(o); + yylval.leng = leng + token_prefix_len; + memcpy(yylval.text, token_prefix->text, token_prefix_len + D); + memcpy(yylval.text+token_prefix_len, text, leng + D); + Z(yylval.text[yylval.leng]); /* for easier debugging - removable */ } else { + yylval.leng = leng; + memcpy(yylval.text, text, leng + D); + Z(yylval.text[yylval.leng]); /* for easier debugging - removable */ + switch (msg_state->mime_type) { case MIME_TEXT: case MIME_TEXT_HTML: @@ -179,15 +213,15 @@ /** \bug: the parser MUST be aligned with lexer_v3.l! */ { size_t skip = 0; - while (!isspace(yylval->text[skip])) + while (!isspace(yylval.text[skip])) skip += 1; - while (isspace(yylval->text[skip])) + while (isspace(yylval.text[skip])) skip += 1; - yylval->leng -= skip; - memmove(yylval->text, yylval->text+skip, yylval->leng); - Z(yylval->text[yylval->leng]); /* for easier debugging - removable */ + yylval.leng -= skip; + memmove(yylval.text, yylval.text+skip, yylval.leng); + Z(yylval.text[yylval.leng]); /* for easier debugging - removable */ word_free(msg_id); - msg_id = word_dup(yylval); + msg_id = word_dup(&yylval); } continue; @@ -196,34 +230,37 @@ /** \bug: the parser MUST be aligned with lexer_v3.l! */ if (queue_id == NULL) { size_t skip = 0; - while (isspace(yylval->text[skip])) + while (isspace(text[skip])) skip += 1; - if (memcmp(yylval->text+skip, "id", 2) == 0) + if (memcmp(text+skip, "id", 2) == 0) skip += 2; - while (isspace(yylval->text[skip])) + while (isspace(text[skip])) skip += 1; - yylval->leng -= skip; - memmove(yylval->text, yylval->text+skip, yylval->leng); - Z(yylval->text[yylval->leng]); /* for easier debugging - removable */ + leng -= skip; + yylval.leng = leng; + memmove(yylval.text, text+skip, yylval.leng); + Z(yylval.text[yylval.leng]); /* for easier debugging - removable */ word_free(queue_id); - queue_id = word_dup(yylval); + queue_id = word_dup(&yylval); } continue; case MESSAGE_ADDR: { /* trim brackets */ - yylval->leng -= 2; - memmove(yylval->text, yylval->text+1, yylval->leng); - Z(yylval->text[yylval->leng]); /* for easier debugging - removable */ + leng -= 2; + text += 1; + yylval.leng = leng; + memmove(yylval.text, text, yylval.leng); + Z(yylval.text[yylval.leng]); /* for easier debugging - removable */ /* if top level, no address, not localhost, .... */ if (token_prefix == w_recv && msg_state == msg_state->parent && msg_addr == NULL && - strcmp((char *)yylval->text, "127.0.0.1") != 0) { + strcmp((char *)yylval.text, "127.0.0.1") != 0) { /* Not guaranteed to be the originating address of the message. */ word_free(msg_addr); - msg_addr = word_dup(yylval); + msg_addr = word_dup(&yylval); } } @@ -246,24 +283,31 @@ * mask their origin. Nuke the high bits to unmask the * address. */ - if (sscanf((const char *)yylval->text, "%d.%d.%d.%d", &q1, &q2, &q3, &q4) == 4) + if (sscanf((const char *)yylval.text, "%d.%d.%d.%d", &q1, &q2, &q3, &q4) == 4) /* safe because result string guaranteed to be shorter */ - sprintf((char *)yylval->text, "%d.%d.%d.%d", + sprintf((char *)yylval.text, "%d.%d.%d.%d", q1 & 0xff, q2 & 0xff, q3 & 0xff, q4 & 0xff); - yylval->leng = strlen((const char *)yylval->text); - ipsave = word_new(NULL, plen + yylval->leng); + yylval.leng = strlen((const char *)yylval.text); + ipsave = word_new(NULL, plen + yylval.leng); memcpy(ipsave->text, prefix, plen); - memcpy(ipsave->text+plen, yylval->text, yylval->leng+1); - word_free(yylval); - yylval = ipsave; + memcpy(ipsave->text+plen, yylval.text, yylval.leng+1); + yylval.leng = ipsave->leng; + memcpy(yylval.text, ipsave->text, ipsave->leng + D); + save_class = IPADDR; - *token = yylval; + *token = &yylval; return (cls); } if (token_prefix != NULL) { - word_t *o = yylval; - yylval = word_concat(token_prefix, yylval); - word_free(o); + yylval.leng = leng + token_prefix_len; + memcpy(yylval.text, token_prefix->text, token_prefix_len + D); + memcpy(yylval.text+token_prefix_len, text, leng + D); + Z(yylval.text[yylval.leng]); /* for easier debugging - removable */ + } + else { + yylval.leng = leng; + memcpy(yylval.text, text, leng + D); + Z(yylval.text[yylval.leng]); /* for easier debugging - removable */ } break; @@ -283,42 +327,45 @@ continue; case BOGO_LEX_LINE: + yylval.leng = leng; + memcpy(yylval.text, text, leng + D); + Z(yylval.text[yylval.leng]); /* for easier debugging - removable */ done = true; break; } if (DEBUG_TEXT(1)) { - word_puts(yylval, 0, dbgout); + word_puts(&yylval, 0, dbgout); fputc('\n', dbgout); } /* eat all long words */ - if (yylval->leng <= MAXTOKENLEN) + if (yylval.leng <= MAXTOKENLEN) done = true; } if (!msg_count_file) { /* Remove trailing blanks */ /* From "From ", for example */ - while (yylval->leng > 1 && yylval->text[yylval->leng-1] == ' ') { - yylval->leng -= 1; - yylval->text[yylval->leng] = (byte) '\0'; + while (yylval.leng > 1 && yylval.text[yylval.leng-1] == ' ') { + yylval.leng -= 1; + yylval.text[yylval.leng] = (byte) '\0'; } /* Remove trailing colon */ - if (yylval->leng > 1 && yylval->text[yylval->leng-1] == ':') { - yylval->leng -= 1; - yylval->text[yylval->leng] = (byte) '\0'; + if (yylval.leng > 1 && yylval.text[yylval.leng-1] == ':') { + yylval.leng -= 1; + yylval.text[yylval.leng] = (byte) '\0'; } if (replace_nonascii_characters) { /* replace nonascii characters by '?'s */ - for (cp = yylval->text; cp < yylval->text+yylval->leng; cp += 1) + for (cp = yylval.text; cp < yylval.text+yylval.leng; cp += 1) *cp = casefold_table[*cp]; } } - *token = yylval; + *token = &yylval; return(cls); } @@ -399,6 +446,10 @@ text); exit(EX_ERROR); } + + token_prefix_len = token_prefix->leng; + assert(token_prefix_len <= MAX_PREFIX_LEN); + if (DEBUG_LEXER(2)) { fprintf(dbgout,"--- set_tag(%s) -> prefix=", text); if (token_prefix) @@ -411,7 +462,6 @@ /* Cleanup storage allocation */ void token_cleanup() { - WFREE(yylval); WFREE(nonblank_line); WFREE(w_to); WFREE(w_from); Index: collect.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/collect.c,v retrieving revision 1.43 retrieving revision 1.44 diff -u -d -r1.43 -r1.44 --- collect.c 12 Mar 2005 23:15:49 -0000 1.43 +++ collect.c 13 Mar 2005 01:54:32 -0000 1.44 @@ -4,6 +4,7 @@ #include "common.h" +#include <assert.h> #include <stdlib.h> #include "charset.h" @@ -53,10 +54,11 @@ if (cls == BOGO_LEX_LINE) { - char *s = (char *)(token->text+1); /* skip leading quote mark */ - char *f = memchr(s, '"', token->leng - 1); - token->text = (unsigned char *) s; - token->leng = f - s; + char *beg = (char *)token->text+1; /* skip leading quote mark */ + char *end = strchr(beg, '"'); + assert(end); + token->leng = end - beg; + memmove(token->text, token->text + 1, token->leng + D); Z(token->text[token->leng]); /* replace terminal quote by NUL */ } |