Screenshot instructions:
Windows
Mac
Red Hat Linux
Ubuntu
Click URL instructions:
Right-click on ad, choose "Copy Link", then paste here →
(This may not be possible with some types of ads)
You can subscribe to this list here.
2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(92) |
Oct
(277) |
Nov
(500) |
Dec
(346) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2003 |
Jan
(437) |
Feb
(277) |
Mar
(215) |
Apr
(220) |
May
(225) |
Jun
(119) |
Jul
(184) |
Aug
(306) |
Sep
(340) |
Oct
(231) |
Nov
(176) |
Dec
(232) |
2004 |
Jan
(202) |
Feb
(189) |
Mar
(111) |
Apr
(43) |
May
(99) |
Jun
(164) |
Jul
(71) |
Aug
(82) |
Sep
(61) |
Oct
(135) |
Nov
(208) |
Dec
(174) |
2005 |
Jan
(241) |
Feb
(99) |
Mar
(236) |
Apr
(112) |
May
(175) |
Jun
(178) |
Jul
(18) |
Aug
(31) |
Sep
(37) |
Oct
(60) |
Nov
(41) |
Dec
(69) |
2006 |
Jan
(34) |
Feb
(14) |
Mar
(16) |
Apr
(6) |
May
(20) |
Jun
(15) |
Jul
(64) |
Aug
(25) |
Sep
|
Oct
(2) |
Nov
(25) |
Dec
(13) |
2007 |
Jan
(36) |
Feb
(15) |
Mar
(3) |
Apr
(2) |
May
|
Jun
|
Jul
(25) |
Aug
(2) |
Sep
|
Oct
|
Nov
(12) |
Dec
(4) |
2008 |
Jan
(2) |
Feb
(12) |
Mar
(4) |
Apr
(14) |
May
(8) |
Jun
(4) |
Jul
(4) |
Aug
(5) |
Sep
|
Oct
(11) |
Nov
(1) |
Dec
|
2009 |
Jan
(15) |
Feb
(50) |
Mar
|
Apr
(2) |
May
(16) |
Jun
|
Jul
(13) |
Aug
(16) |
Sep
(1) |
Oct
(1) |
Nov
|
Dec
|
2010 |
Jan
(1) |
Feb
(7) |
Mar
(20) |
Apr
(5) |
May
|
Jun
|
Jul
(31) |
Aug
|
Sep
(1) |
Oct
(2) |
Nov
(1) |
Dec
(4) |
2011 |
Jan
(1) |
Feb
(3) |
Mar
(1) |
Apr
(1) |
May
(8) |
Jun
(2) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2012 |
Jan
(6) |
Feb
|
Mar
|
Apr
(9) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(2) |
Nov
(1) |
Dec
(12) |
2013 |
Jan
(2) |
Feb
|
Mar
|
Apr
|
May
|
Jun
(5) |
Jul
(4) |
Aug
|
Sep
|
Oct
|
Nov
(14) |
Dec
(1) |
2014 |
Jan
|
Feb
(3) |
Mar
|
Apr
|
May
|
Jun
|
Jul
(3) |
Aug
|
Sep
|
Oct
|
Nov
(2) |
Dec
|
2015 |
Jan
|
Feb
(10) |
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(11) |
Nov
|
Dec
|
2016 |
Jan
(4) |
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(2) |
Aug
|
Sep
(2) |
Oct
(3) |
Nov
|
Dec
|
2017 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(4) |
Oct
|
Nov
|
Dec
|
2018 |
Jan
|
Feb
|
Mar
|
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
S | M | T | W | T | F | S |
---|---|---|---|---|---|---|
1
|
2
|
3
|
4
|
5
|
6
|
7
|
8
|
9
|
10
|
11
|
12
|
13
|
14
|
15
(10) |
16
(6) |
17
(17) |
18
(11) |
19
(2) |
20
(1) |
21
(1) |
22
(2) |
23
(6) |
24
(12) |
25
(8) |
26
(9) |
27
(3) |
28
|
29
(4) |
30
|
|
|
|
|
|
From: <m-a@us...> - 2002-09-29 21:43:53
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv1779 Modified Files: bogofilter.xml Log Message: Add manpage patch from Clint Adams, removing -l documentation. Index: bogofilter.xml =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.xml,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** bogofilter.xml 26 Sep 2002 23:04:41 -0000 1.8 --- bogofilter.xml 29 Sep 2002 21:43:47 -0000 1.9 *************** *** 23,27 **** <arg choice='opt'>-d</arg> <arg choice='opt'>-v</arg> - <arg choice='opt'>-l</arg> <arg choice='opt'>-V</arg> </cmdsynopsis> --- 23,26 ---- |
From: <gyepi@us...> - 2002-09-29 03:40:57
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv5105 Modified Files: bogofilter.c bogofilter.h main.c Log Message: Modified: bogofilter.c bogofilter.h main.c 1. replace Judy with hash table (wordhash) 2. ensure that databases are always locked in the same order. Apologies for simultaneously submitting loosely related changes. Index: bogofilter.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.c,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** bogofilter.c 27 Sep 2002 01:18:38 -0000 1.20 --- bogofilter.c 29 Sep 2002 03:40:54 -0000 1.21 *************** *** 2,5 **** --- 2,13 ---- /* * $Log$ + * Revision 1.21 2002/09/29 03:40:54 gyepi + * + * Modified: bogofilter.c bogofilter.h main.c + * 1. replace Judy with hash table (wordhash) + * 2. ensure that databases are always locked in the same order. + * + * Apologies for simultaneously submitting loosely related changes. + * * Revision 1.20 2002/09/27 01:18:38 gyepi * removed unused #defines and logprint function *************** *** 126,130 **** #include <stdlib.h> #include <db.h> ! #include <Judy.h> #include "bogofilter.h" #include "datastore.h" --- 134,138 ---- #include <stdlib.h> #include <db.h> ! #include <wordhash.h> #include "bogofilter.h" #include "datastore.h" *************** *** 148,240 **** ! void register_words(int fdin, wordlist_t *list, wordlist_t *other) // tokenize text on stdin and register it to a specified list // and possibly out of another list { ! int tok, wordcount, msgcount; ! void **PPValue; // associated with Index. ! void *PArray = (Pvoid_t) NULL; // JudySL array. ! JError_t JError; // Judy error structure ! void **loc; ! char tokenbuffer[BUFSIZ]; ! //FIXME -- The database locking time can be minized by using a hash table. ! db_lock_writer(list->dbh); ! if (other) ! db_lock_writer(other->dbh); ! // Grab tokens from the lexical analyzer into our own private Judy array ! yyin = fdopen(fdin, "r"); ! msgcount = wordcount = 0; ! list->msgcount = db_getcount(list->dbh); ! if (other) other->msgcount = db_getcount(other->dbh); ! for (;;) { ! tok = get_token(); ! ! if (tok != FROM && tok != 0) ! { ! // Ordinary word, stash in private per-message array. ! if ((PPValue = JudySLIns(&PArray, yytext, &JError)) == PPJERR) ! return; ! (*((PWord_t) PPValue))++; ! wordcount++; ! } ! else ! { ! // End of message. Update message counts. ! if (tok == FROM || (tok == 0 && msgcount == 0)) ! { ! list->msgcount++; ! msgcount++; ! if (other && other->msgcount > 0) ! other->msgcount--; ! } ! // We copy the incoming words into their own per-message array ! // in order to be able to cap frequencies. ! tokenbuffer[0]='\0'; ! for (loc = JudySLFirst(PArray, tokenbuffer, 0); ! loc != (void *) NULL; ! loc = JudySLNext(PArray, tokenbuffer, 0)) ! { ! int freq = (*((PWord_t) loc)); ! if (freq > MAX_REPEATS) ! freq = MAX_REPEATS; ! db_increment(list->dbh, tokenbuffer, freq); ! if (other) ! db_increment(other->dbh, tokenbuffer, -freq); ! } ! JudySLFreeArray(&PArray, &JError); ! PArray = (Pvoid_t)NULL; ! if (verbose) ! printf("# %d words\n", wordcount); ! // Want to process EOF, *then* drop out ! if (tok == 0) ! break; ! } ! } ! db_setcount(list->dbh, list->msgcount); ! db_flush(list->dbh); if (verbose) ! fprintf(stderr, "bogofilter: %lu messages on the %s list\n", list->msgcount, list->name); ! ! if (other){ ! db_setcount(other->dbh, other->msgcount); ! if (verbose) ! fprintf(stderr, "bogofilter: %lu messages on the %s list\n", other->msgcount, other->name); ! db_flush(other->dbh); ! db_lock_release(other->dbh); ! } ! ! db_lock_release(list->dbh); } --- 156,288 ---- ! void *collect_words(int fd, int *msg_count, int *word_count) ! // tokenize input text and save words in wordhash_t hash table ! // returns: the wordhash_t hash table. Sets msg_count and word_count to the appropriate values ! { ! int tok = 0; ! int w_count = 0; ! int m_count = 0; ! ! wordprop_t *w; ! hashnode_t *n; ! wordhash_t *h = wordhash_init(); ! ! for (;;){ ! tok = get_token(); ! ! if (tok != FROM && tok != 0){ ! w = wordhash_insert(h, yytext, sizeof(wordprop_t)); ! w->msg_freq++; ! w_count++; ! } ! else { ! // End of message. Update message counts. ! if (tok == FROM || (tok == 0 && m_count == 0)) ! m_count++; ! ! // Incremenent word frequencies, capping each message's contribution at MAX_REPEATS ! // in order to be able to cap frequencies. ! for(n = wordhash_first(h); n != NULL; n = wordhash_next(h)){ ! w = n->buf; ! if (w->msg_freq > MAX_REPEATS) ! w->msg_freq = MAX_REPEATS; ! ! w->freq += w->msg_freq; ! w->msg_freq = 0; ! } ! ! // Want to process EOF, *then* drop out ! if (tok == 0) ! break; ! } ! } ! ! if (word_count) ! *word_count = w_count; ! ! if (msg_count) ! *msg_count = m_count; ! ! return(h); ! } ! ! ! void register_words(int fdin, reg_t register_type) // tokenize text on stdin and register it to a specified list // and possibly out of another list { ! int wordcount, msgcount; ! hashnode_t *node; ! wordprop_t *wordprop; ! wordhash_t *h; ! wordlist_t *lists[2]; ! wordlist_t *incr_list = NULL; ! wordlist_t *decr_list = NULL; ! int i; ! int nlists = 0; ! h = collect_words(fdin, &msgcount, &wordcount); ! if (verbose) ! fprintf(stderr, "# %d words\n", wordcount); ! ! /* If the operation requires both databases, they must be locked in order */ ! switch(register_type) { ! case REG_GOOD: ! incr_list = lists[nlists++] = &good_list; ! break; ! case REG_SPAM: ! incr_list = lists[nlists++] = &spam_list; ! break; ! case REG_GOOD_TO_SPAM: ! decr_list = lists[nlists++] = &good_list; ! incr_list = lists[nlists++] = &spam_list; ! break; ! case REG_SPAM_TO_GOOD: ! incr_list = lists[nlists++] = &good_list; ! decr_list = lists[nlists++] = &spam_list; ! break; ! ! default: ! fprintf(stderr, "Error: Invalid register_type\n"); ! exit(2); ! } ! ! //Note: minimize database locking time. ! for (i = 0; i < nlists; i++){ ! db_lock_writer(lists[i]->dbh); ! lists[i]->msgcount = db_getcount(lists[i]->dbh); ! } ! ! incr_list->msgcount += msgcount; ! if (decr_list){ ! if (decr_list->msgcount > msgcount) ! decr_list->msgcount -= msgcount; ! else ! decr_list->msgcount = 0; ! } ! for (node = wordhash_first(h); node != NULL; node = wordhash_next(h)){ ! wordprop = node->buf; ! db_increment(incr_list->dbh, node->key, wordprop->freq); ! if (decr_list) db_increment(decr_list->dbh, node->key, -wordprop->freq); ! } ! for (i = 0; i < nlists; i++){ ! db_setcount(lists[i]->dbh, lists[i]->msgcount); ! db_flush(lists[i]->dbh); if (verbose) ! fprintf(stderr, "bogofilter: %lu messages on the %s list\n", lists[i]->msgcount, lists[i]->name); ! db_lock_release(lists[i]->dbh); ! } ! wordhash_free(h); } *************** *** 260,283 **** } - void *collect_words(int fd) - // tokenize input text and save words in a Judy array. - // returns: the Judy array - { - int tok; - - void **PPValue; // associated with Index. - void *PArray = (Pvoid_t) NULL; // JudySL array. - JError_t JError; // Judy error structure - - yyin = fdopen(fd, "r"); - while ((tok = get_token()) != 0) - { - // Ordinary word, stash in private per-message array. - if ((PPValue = JudySLIns(&PArray, yytext, &JError)) == PPJERR) - break; - (*((PWord_t) PPValue))++; - } - return PArray; - } double compute_probability( char *token ) --- 308,311 ---- *************** *** 329,339 **** } ! bogostat_t *select_indicators(void *PArray) // selects the best spam/nonspam indicators and // populates the stats structure. { ! void **loc; ! char tokenbuffer[BUFSIZ]; ! discrim_t *pp; static bogostat_t stats; --- 357,365 ---- } ! bogostat_t *select_indicators(wordhash_t *wordhash) // selects the best spam/nonspam indicators and // populates the stats structure. { ! hashnode_t *node; discrim_t *pp; static bogostat_t stats; *************** *** 345,354 **** } ! for (loc = JudySLFirst(PArray, tokenbuffer, 0); ! loc != (void *) NULL; ! loc = JudySLNext(PArray, tokenbuffer, 0)) ! { ! char *token = tokenbuffer; ! double prob = compute_probability( token ); double dev = DEVIATION(prob); discrim_t *hit = NULL; --- 371,377 ---- } ! for(node = wordhash_first(wordhash); node != NULL; node = wordhash_next(wordhash)) ! { ! double prob = compute_probability( node->key ); double dev = DEVIATION(prob); discrim_t *hit = NULL; *************** *** 369,373 **** { hit->prob = prob; ! strncpy(hit->key, token, MAXWORDLEN); } } --- 392,396 ---- { hit->prob = prob; ! strncpy(hit->key, node->key, MAXWORDLEN); } } *************** *** 414,422 **** rc_t status; double spamicity; ! void *PArray = (Pvoid_t) NULL; // JudySL array. bogostat_t *stats; ! // tokenize input text and save words in a Judy array. ! PArray = collect_words(fd); db_lock_reader(good_list.dbh); --- 437,444 ---- rc_t status; double spamicity; ! wordhash_t *wordhash; bogostat_t *stats; ! wordhash = collect_words(fd, NULL, NULL); db_lock_reader(good_list.dbh); *************** *** 427,437 **** // select the best spam/nonspam indicators. ! stats = select_indicators(PArray); ! ! // computes the spamicity of the spam/nonspam indicators. ! spamicity = compute_spamicity(stats); - db_lock_release(spam_list.dbh); db_lock_release(good_list.dbh); status = (spamicity > SPAM_CUTOFF) ? RC_SPAM : RC_NONSPAM; --- 449,459 ---- // select the best spam/nonspam indicators. ! stats = select_indicators(wordhash); db_lock_release(good_list.dbh); + db_lock_release(spam_list.dbh); + + // computes the spamicity of the spam/nonspam indicators. + spamicity = compute_spamicity(stats); status = (spamicity > SPAM_CUTOFF) ? RC_SPAM : RC_NONSPAM; *************** *** 439,442 **** --- 461,466 ---- if (xss != NULL) *xss = spamicity; + + wordhash_free(wordhash); return status; Index: bogofilter.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.h,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** bogofilter.h 27 Sep 2002 01:17:38 -0000 1.9 --- bogofilter.h 29 Sep 2002 03:40:54 -0000 1.10 *************** *** 2,5 **** --- 2,13 ---- /* * $Log$ + * Revision 1.10 2002/09/29 03:40:54 gyepi + * + * Modified: bogofilter.c bogofilter.h main.c + * 1. replace Judy with hash table (wordhash) + * 2. ensure that databases are always locked in the same order. + * + * Apologies for simultaneously submitting loosely related changes. + * * Revision 1.9 2002/09/27 01:17:38 gyepi * removed unused bogodump declaration *************** *** 59,62 **** --- 67,71 ---- typedef enum rc_e {RC_SPAM=0, RC_NONSPAM=1} rc_t; + typedef enum reg_e { REG_NONE = 0, REG_SPAM, REG_GOOD, REG_SPAM_TO_GOOD, REG_GOOD_TO_SPAM } reg_t; typedef struct *************** *** 69,73 **** wordlist_t; ! extern void register_words(int fd, wordlist_t *list, wordlist_t *other); extern rc_t bogofilter(int fd, double *xss); --- 78,82 ---- wordlist_t; ! extern void register_words(int fd, reg_t register_type); extern rc_t bogofilter(int fd, double *xss); *************** *** 75,77 **** extern int verbose; ! // end --- 84,91 ---- extern int verbose; ! //Represents the secondary data for a word key ! typedef struct { ! int freq; //total word count ! int msg_freq; //word count for current message ! } wordprop_t; ! Index: main.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/main.c,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** main.c 26 Sep 2002 23:04:40 -0000 1.14 --- main.c 29 Sep 2002 03:40:54 -0000 1.15 *************** *** 2,5 **** --- 2,13 ---- /* * $Log$ + * Revision 1.15 2002/09/29 03:40:54 gyepi + * + * Modified: bogofilter.c bogofilter.h main.c + * 1. replace Judy with hash table (wordhash) + * 2. ensure that databases are always locked in the same order. + * + * Apologies for simultaneously submitting loosely related changes. + * * Revision 1.14 2002/09/26 23:04:40 relson * documentation: *************** *** 106,111 **** { int ch; ! int register_spam = 0, register_good = 0; ! int spam_to_good = 0, good_to_spam = 0; char goodfile[PATH_MAX], spamfile[PATH_MAX], directory[PATH_MAX]; char *tmp; --- 114,118 ---- { int ch; ! reg_t register_type = REG_NONE; char goodfile[PATH_MAX], spamfile[PATH_MAX], directory[PATH_MAX]; char *tmp; *************** *** 128,144 **** case 's': ! register_spam = 1; break; case 'n': ! register_good = 1; break; case 'S': ! good_to_spam = 1; break; case 'N': ! spam_to_good = 1; break; --- 135,151 ---- case 's': ! register_type = REG_SPAM; break; case 'n': ! register_type = REG_GOOD; break; case 'S': ! register_type = REG_GOOD_TO_SPAM; break; case 'N': ! register_type = REG_SPAM_TO_GOOD; break; *************** *** 199,203 **** spam_list.file = spamfile; - if ( (good_list.dbh = db_open(good_list.file, good_list.name)) == NULL){ fprintf(stderr, "bogofilter: Cannot initialize database %s.\n", good_list.name); --- 206,209 ---- *************** *** 211,232 **** } ! ! if (register_spam) ! { ! register_words(STDIN_FILENO, &spam_list, NULL); ! } ! else if (register_good) ! { ! register_words(STDIN_FILENO, &good_list, NULL); ! } ! else if (spam_to_good) ! { ! register_words(STDIN_FILENO, &good_list, &spam_list); ! } ! else if (good_to_spam) ! { ! register_words(STDIN_FILENO, &spam_list, &good_list); ! } ! else { double spamicity; --- 217,221 ---- } ! if (register_type == REG_NONE) { double spamicity; *************** *** 267,270 **** --- 256,263 ---- exitcode = status; + } + else + { + register_words(STDIN_FILENO, register_type); } |
From: <gyepi@us...> - 2002-09-29 03:37:58
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv4955 Modified Files: Makefile.am README configure.in Added Files: wordhash.c wordhash.h Log Message: replace Judy with hash table (wordhash) --- NEW FILE: wordhash.c --- /* $Id: wordhash.c,v 1.1 2002/09/29 03:37:56 gyepi Exp $ $Log: wordhash.c,v $ Revision 1.1 2002/09/29 03:37:56 gyepi replace Judy with hash table (wordhash) */ /* NAME: wordhash.c -- Implements a hash data structure. AUTHOR: Gyepi Sam <gyepi@...> THEORY: See 'Programming Pearls' by Jon Bentley for a good treatment of word hashing. The algorithm, magic number selections, and hash function are based on his implementation. This module has been tuned to perform fast inserts and searches, using the following techniques: 1. Multiple, sequential memory allocation operations are combined into a single call. The allocated memory is then divided as necessary. 2. Insert operation allocates and returns a pointer to a memory buffer, in which the caller can store associated data, eliminating the need for further retrieval or storage operations. 3. Maintains a linked list of hash nodes in insert order for fast traversal of hash table. */ #ifdef MAIN #include <stdio.h> #endif #include <stdlib.h> #include <string.h> #include "xmalloc.h" #include "wordhash.h" #define NHASH 29989 #define MULT 31 #define CHUNK_SIZE 30000 wordhash_t *wordhash_init(void) { int i; wordhash_t *h = xmalloc(sizeof(wordhash_t)); h->bin = xmalloc(NHASH * sizeof(hashnode_t **)); for (i = 0; i < NHASH; i++) h->bin[i] = NULL; h->halloc_buf = NULL; h->iter_ptr = NULL; h->iter_head = NULL; h->iter_tail = NULL; return h; } void wordhash_free(wordhash_t * h) { halloc_t *p, *q; if (h == NULL) return; for (p = h->halloc_buf; p != NULL; p = q) { q = p->next; xfree(p); } xfree(h->bin); xfree(h); } static void *hmalloc(wordhash_t * h, size_t n) { halloc_t *x = h->halloc_buf; char *t; if (x == NULL || x->avail < n) { /* Eliminate extra call to xmalloc by allocating enough data for both the node and its buffer */ t = xmalloc(sizeof(halloc_t) + CHUNK_SIZE + n); x = (halloc_t *) t; x->next = h->halloc_buf; h->halloc_buf = x; x->buf = t + sizeof(halloc_t); x->avail = CHUNK_SIZE + n; x->used = 0; } x->avail -= n; t = x->buf + x->used; x->used += n; return (void *) t; } static unsigned int hash(char *p) { unsigned int h = 0; for (; *p; p++) h = MULT * h + *p; return h % NHASH; } void *wordhash_insert(wordhash_t * h, char *s, size_t n) { hashnode_t *p; size_t m; int index = hash(s); char *t; for (p = h->bin[index]; p != NULL; p = p->next) if (strcmp(s, p->key) == 0) { return p->buf; } m = strlen(s) + 1; t = hmalloc(h, sizeof(hashnode_t) + n + m);; p = (hashnode_t *) t; p->buf = t + sizeof(hashnode_t); p->key = t + sizeof(hashnode_t) + n; memcpy(p->key, s, m); p->next = h->bin[index]; h->bin[index] = p; if (h->iter_head == NULL) { h->iter_head = p; } else { h->iter_tail->iter_next = p; } p->iter_next = NULL; h->iter_tail = p; return p->buf; } hashnode_t *wordhash_first(wordhash_t * h) { return (h->iter_ptr = h->iter_head); } hashnode_t *wordhash_next(wordhash_t * h) { if (h->iter_ptr != NULL) h->iter_ptr = h->iter_ptr->iter_next; return h->iter_ptr; } #ifdef MAIN typedef struct { int count; } word_t; void dump_hash(wordhash_t * h) { hashnode_t *p; for (p = wordhash_first(h); p != NULL; p = wordhash_next(h)) { printf("%s %d\n", p->key, ((word_t *) p->buf)->count); } } int main() { wordhash_t *h = wordhash_init(); char buf[100]; word_t *w; while (scanf("%s", buf) != EOF) { w = wordhash_insert(h, buf, sizeof(word_t)); w->count++; } dump_hash(h); wordhash_free(h); return 0; } #endif --- NEW FILE: wordhash.h --- #ifndef WORDHASH_H_GUARD #define WORDHASH_H_GUARD /* Hash entry. */ typedef struct hashnode_t { char *key; /* word key */ void *buf; /* Associated buffer. To be used by caller. */ struct hashnode_t *next; /* Next item in linked list of items with same hash */ struct hashnode_t *iter_next; /* Next item added to hash. For fast traversal */ } hashnode_t; /* Managed heap for memory allocation */ typedef struct halloc_t { char *buf; int avail; int used; struct halloc_t *next; } halloc_t; /* hash table, with bookkeeping */ typedef struct { hashnode_t **bin; /* hash table */ halloc_t *halloc_buf; /*list of node buffers */ hashnode_t *iter_ptr; /* For traversal */ hashnode_t *iter_head; hashnode_t *iter_tail; } wordhash_t; /* initialize a wordhash */ wordhash_t *wordhash_init(void); /* deallocate resources */ void wordhash_free(wordhash_t *); /* Given hash table h, key s, and int n, search for key s. * If found, return pointer to associated buffer, else, insert key and return pointer to allocated buffer of size n */ void *wordhash_insert(wordhash_t *, char *, size_t); /* Starts an iteration over the hash entries */ hashnode_t *wordhash_first(wordhash_t *); /* returns next entry or NULL if at end */ hashnode_t *wordhash_next(wordhash_t *); #endif Index: Makefile.am =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/Makefile.am,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** Makefile.am 26 Sep 2002 23:13:12 -0000 1.13 --- Makefile.am 29 Sep 2002 03:37:56 -0000 1.14 *************** *** 1,4 **** --- 1,7 ---- # $Id$ # $Log$ + # Revision 1.14 2002/09/29 03:37:56 gyepi + # replace Judy with hash table (wordhash) + # # Revision 1.13 2002/09/26 23:13:12 relson # Changed name of lexer_l.l to lexer.l for consistency with other filenames. *************** *** 75,79 **** # what to build that from bogofilter_SOURCES = bogofilter.c bogofilter.h main.c lexer.l lexer.h \ ! datastore.h datastore_db.h datastore_db.c xmalloc.h xmalloc.c lexertest_SOURCES = lexer.l --- 78,83 ---- # what to build that from bogofilter_SOURCES = bogofilter.c bogofilter.h main.c lexer.l lexer.h \ ! datastore.h datastore_db.h datastore_db.c xmalloc.h xmalloc.c \ ! wordhash.h wordhash.c lexertest_SOURCES = lexer.l Index: README =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/README,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -C2 -d -r1.1.1.1 -r1.2 *** README 14 Sep 2002 22:15:20 -0000 1.1.1.1 --- README 29 Sep 2002 03:37:56 -0000 1.2 *************** *** 4,11 **** This package implements a fast Bayesian spam filter along the lines suggested ! by Paul Graham in his article "A Plan For Spam". It requires the Judy ! libraries, available from SourceForge at: ! ! http://sourceforge.net/projects/judy/ This version substantially improves on Paul's proposal by doing smarter --- 4,9 ---- This package implements a fast Bayesian spam filter along the lines suggested ! by Paul Graham in his article "A Plan For Spam". The Judy library requirement has been ! dropped in favor of a built-in hash table. This version substantially improves on Paul's proposal by doing smarter Index: configure.in =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/configure.in,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** configure.in 17 Sep 2002 11:35:08 -0000 1.6 --- configure.in 29 Sep 2002 03:37:56 -0000 1.7 *************** *** 1,4 **** --- 1,7 ---- # $Id$ # $Log$ + # Revision 1.7 2002/09/29 03:37:56 gyepi + # replace Judy with hash table (wordhash) + # # Revision 1.6 2002/09/17 11:35:08 m-a # Remove check for [v]fork(), bogofilter does not fork() at the moment. *************** *** 36,80 **** AH_TEMPLATE([HAVE_DB_H], [Have suitable db.h header]) - - AC_ARG_WITH(judy, - [ AC_HELP_STRING([--with-judy=PATH], - [Specify path to Judy install directory. (default=/opt/Judy/usr)]) - ], - [ - if test "x$withval" = "xno" ; then - withval = "/opt/Judy/usr" - fi - if test "x$withval" != "xno" ; then - if test -d "$withval/lib"; then - if test -n "${need_dash_r}"; then - LDFLAGS="-L${withval}/lib -R${withval}/lib ${LDFLAGS}" - else - LDFLAGS="-L${withval}/lib ${LDFLAGS}" - fi - else - if test -n "${need_dash_r}"; then - LDFLAGS="-L${withval} -R${withval} ${LDFLAGS}" - else - LDFLAGS="-L${withval} ${LDFLAGS}" - fi - fi - if test -d "$withval/include"; then - CPPFLAGS="-I${withval}/include ${CPPFLAGS}" - else - CPPFLAGS="-I${withval} ${CPPFLAGS}" - fi - fi - LIBS="$LIBS -lJudy" - ], - [ - AC_CHECK_LIB([Judy], [JudySLIns], LIBS="$LIBS -lJudy", - AC_MSG_ERROR([Can not locate the Judy library in ${withval}. Use --with-judy=PATH to specify a valid Judy install location]) - ) - ] - ) - - AC_MSG_CHECKING(Judy) - AC_TRY_LINK_FUNC(JudySLIns,, AC_MSG_ERROR([Judy package is not working properly. Specify a different one using --with-judy=PATH])) - AC_MSG_RESULT(Judy) AC_ARG_WITH(db, --- 39,42 ---- |
From: <gyepi@us...> - 2002-09-29 03:32:17
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv3882 Modified Files: UPGRADE Log Message: Added wordlist regeneration option, changed hamlist.db to goodlist.db Index: UPGRADE =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/UPGRADE,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** UPGRADE 24 Sep 2002 04:36:54 -0000 1.1 --- UPGRADE 29 Sep 2002 03:32:14 -0000 1.2 *************** *** 1,6 **** What: ! There are now multiple file formats for various versions of bogofilter: ! This document explains how to upgrade any earlier type to current versions. Assumptions: --- 1,16 ---- What: ! This document explains how to upgrade bogofilter's wordlist files ! from any earlier version to the current version. ! ! There are two possible upgrades paths: ! ! 1. Delete current wordlist files and regenerate them from your email corpus. ! This is the recommended upgrade for the upcoming versions (0.75-beta, and 0.75) ! since the definition of a 'word' has now changed. Regeneration will pick up more ! words and also create the wordlist files with the correct format. ! ! 2. If you choose not to regenerate your wordlists, you MUST upgrade the formats ! of the existing wordlist files. Assumptions: *************** *** 13,17 **** 1. Stop all instances of bogofilter. The upgrade tools do not lock files or guard against multiple processes accessing the same files. If you have cron jobs or daemons ! that fetch and process mail and could fire off bogofilter. Stop them. 2. Backup your data. Let's assume that you said: --- 23,27 ---- 1. Stop all instances of bogofilter. The upgrade tools do not lock files or guard against multiple processes accessing the same files. If you have cron jobs or daemons ! that fetch and process mail and could fire off bogofilter, stop them. 2. Backup your data. Let's assume that you said: *************** *** 23,32 **** 3. If your bogofilter version is less than 0.7, say ! $ /usr/bin/bogoupgrade.pl -b /usr/bin/bogoutil -i ~/.bogofilter.safe/goodlist -o ~/.bogofilter/hamlist.db $ /usr/bin/bogoupgrade.pl -b /usr/bin/bogoutil -i ~/.bogofilter.safe/badlist -o ~/.bogofilter/spamlist.db If your bogofilter version is 0.7 or greater, say ! $ /usr/bin/bogoupgrade.pl -b /usr/bin/bogoutil -i ~/.bogofilter.safe/hamlist.count -o ~/.bogofilter/hamlist.db $ /usr/bin/bogoupgrade.pl -b /usr/bin/bogoutil -i ~/.bogofilter.safe/spamlist.count -o ~/.bogofilter/spamlist.db --- 33,42 ---- 3. If your bogofilter version is less than 0.7, say ! $ /usr/bin/bogoupgrade.pl -b /usr/bin/bogoutil -i ~/.bogofilter.safe/goodlist -o ~/.bogofilter/goodlist.db $ /usr/bin/bogoupgrade.pl -b /usr/bin/bogoutil -i ~/.bogofilter.safe/badlist -o ~/.bogofilter/spamlist.db If your bogofilter version is 0.7 or greater, say ! $ /usr/bin/bogoupgrade.pl -b /usr/bin/bogoutil -i ~/.bogofilter.safe/hamlist.count -o ~/.bogofilter/goodlist.db $ /usr/bin/bogoupgrade.pl -b /usr/bin/bogoutil -i ~/.bogofilter.safe/spamlist.count -o ~/.bogofilter/spamlist.db |
From: <relson@us...> - 2002-09-27 12:22:27
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv10932 Modified Files: .cvsignore Log Message: Add bogoutil to the list of files to be ignored. Index: .cvsignore =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/.cvsignore,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** .cvsignore 26 Sep 2002 23:34:42 -0000 1.6 --- .cvsignore 27 Sep 2002 12:22:24 -0000 1.7 *************** *** 6,9 **** --- 6,10 ---- bogofilter-*.tar.gz bogofilter.spec + bogoutil config.h config.in |
From: <gyepi@us...> - 2002-09-27 01:18:41
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv28532 Modified Files: bogofilter.c Log Message: removed unused #defines and logprint function Index: bogofilter.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.c,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** bogofilter.c 26 Sep 2002 23:04:40 -0000 1.19 --- bogofilter.c 27 Sep 2002 01:18:38 -0000 1.20 *************** *** 2,5 **** --- 2,8 ---- /* * $Log$ + * Revision 1.20 2002/09/27 01:18:38 gyepi + * removed unused #defines and logprint function + * * Revision 1.19 2002/09/26 23:04:40 relson * documentation: *************** *** 127,132 **** #include "datastore.h" - // implementation details - #define HEADER "# bogofilter email-count (format version B): %lu\n" // constants for the Graham formula --- 130,133 ---- *************** *** 146,150 **** wordlist_t spam_list = {"spam", NULL, 0, NULL}; - #define PLURAL(count) ((count == 1) ? "" : "s") void register_words(int fdin, wordlist_t *list, wordlist_t *other) --- 147,150 ---- *************** *** 238,259 **** db_lock_release(list->dbh); } - - #ifdef __UNUSED__ - void logprintf(const char *fmt, ... ) - // log data from server - { - char buf[BUFSIZ]; - va_list ap; - int fd; - - va_start(ap, fmt); - vsnprintf(buf, sizeof(buf), fmt, ap); - va_end(ap); - - fd=open("/tmp/bogolog", O_RDWR|O_CREAT|O_APPEND,0700); - write(fd,buf,strlen(buf)); - close(fd); - } - #endif // __UNUSED__ typedef struct --- 238,241 ---- |
From: <gyepi@us...> - 2002-09-27 01:17:41
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv28298 Modified Files: bogofilter.h Log Message: removed unused bogodump declaration Index: bogofilter.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.h,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** bogofilter.h 26 Sep 2002 23:04:41 -0000 1.8 --- bogofilter.h 27 Sep 2002 01:17:38 -0000 1.9 *************** *** 2,5 **** --- 2,8 ---- /* * $Log$ + * Revision 1.9 2002/09/27 01:17:38 gyepi + * removed unused bogodump declaration + * * Revision 1.8 2002/09/26 23:04:41 relson * documentation: *************** *** 68,72 **** extern void register_words(int fd, wordlist_t *list, wordlist_t *other); extern rc_t bogofilter(int fd, double *xss); - extern int bogodump(char *file); extern wordlist_t good_list, spam_list; --- 71,74 ---- |
From: <relson@us...> - 2002-09-26 23:40:23
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv6052 Modified Files: lexer.l Log Message: Added directive so that loginfo is included in file. Index: lexer.l =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/lexer.l,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** lexer.l 26 Sep 2002 23:28:03 -0000 1.1 --- lexer.l 26 Sep 2002 23:40:20 -0000 1.2 *************** *** 1,3 **** --- 1,9 ---- /* $Id$ */ + /* + * $Log$ + * Revision 1.2 2002/09/26 23:40:20 relson + * Added directive so that loginfo is included in file. + * + */ /**** |
From: <relson@us...> - 2002-09-26 23:34:45
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv5033 Modified Files: .cvsignore Log Message: Use correct spelling of file lexer.c Index: .cvsignore =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/.cvsignore,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** .cvsignore 26 Sep 2002 23:32:55 -0000 1.5 --- .cvsignore 26 Sep 2002 23:34:42 -0000 1.6 *************** *** 13,17 **** cscope.out lexertest ! lexerl.c Makefile Makefile.in --- 13,17 ---- cscope.out lexertest ! lexer.c Makefile Makefile.in |
From: <relson@us...> - 2002-09-26 23:32:58
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv4580 Modified Files: .cvsignore Log Message: Since file lexer_l.l was renamed to lexer.c, changed lexer_l.c to lexer.c Index: .cvsignore =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/.cvsignore,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** .cvsignore 26 Sep 2002 23:28:03 -0000 1.4 --- .cvsignore 26 Sep 2002 23:32:55 -0000 1.5 *************** *** 1,8 **** - profile* - *.out - *.msg - *.orig - *.save - snap.*.tgz .deps aclocal.m4 --- 1,2 ---- *************** *** 19,23 **** cscope.out lexertest ! lexer_l.c Makefile Makefile.in --- 13,17 ---- cscope.out lexertest ! lexerl.c Makefile Makefile.in |
From: <relson@us...> - 2002-09-26 23:28:06
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv3726 Modified Files: .cvsignore Added Files: lexer.l Log Message: Renamed from lexer_l.l for consistency with other filenames. --- NEW FILE: lexer.l --- /* $Id: lexer.l,v 1.1 2002/09/26 23:28:03 relson Exp $ */ /**** ** ** Revision 1.9 2002/09/25 22:29:21 relson ** Correct handling of multi-line "X-Spam-Whatever" headers by correctly ordering ** the if() statements and by checking for an empty line. ** ** Add parameter prototypes so that fewer casts are needed. ** ** Revision 1.8 2002/09/25 00:02:44 relson ** Ignore older X-Spam-[...Whatever...] lines. ** ** Revision 1.7 2002/09/23 11:31:53 m-a ** Unnest comments, and move $ line down by one to prevent CVS from adding nested comments again. ** ** Revision 1.6 2002/09/22 21:24:36 relson ** Modify the lexer to allow the full range of alphabetic characters. ** Thanks to Clint Adams for the new token matching expression. ** ** Revision 1.5 2002/09/18 22:30:22 relson ** Created lexer.h with the definitions needed by lexer_l.l from bogofilter.h. ** This removes the compile-time dependency between the two files. ** ** Revision 1.4 2002/09/18 20:56:43 m-a ** Let automake deal with the lexer. ** ** Revision 1.3 2002/09/16 18:58:14 m-a ** Fix 'last line occasionally emitted twice' bug, cleaning up our yyinput(). ** ** Revision 1.2 2002/09/15 15:52:24 relson ** ** ** Makefile.in: ** - fix .l.c rule so that lexer_l.c is correctly generated from lexer_l.l ** - added lexer_l.c to target mostlyclean-compile ** - removed lexer_l.c from DIST_COMMON. As it can can be generated, it no longer needs to be distributed. ** - added target lexertest (from original bogofilter release) ** ** lexer_l.l: ** - defined global variable passthrough so that linking lexertest succeeds. ** ** Revision 1.1.1.1 2002/09/14 22:15:20 adrian_otto ** 0.7.3 Base Source ** */ %{ /* * NAME * lexer.l -- bogofilter's lexical analyzer * * ORIG: [A-Za-z$][A-Za-z0-9$'.-]+[A-Za-z0-9$] {return(TOKEN);} * ADAMS: [^[:blank:]\n[:digit:][:punct:]][^][:blank:]<>;=():&%$#@!+|/\\{}^\"?\*,\n[]+[^[:blank:][:punct:]\n] {return(TOKEN);} */ #include <stdlib.h> #include <ctype.h> #include <string.h> #include "lexer.h" // Our lexical analysis is different from Paul Graham's rules: // // We throw away headers that are readily identifiable as dates. // We throw away text lines that look like BASE64 encoding. // We throw away all digit strings that don't look like IP address parts. // We thow away lines beginning with <tab>id<space> -- mailer UDs. // We throw away various bits of MIME cruft (things that look like // declarations and boundaries for multipart declarations). // We throw away *all* tokens of length 1 or 2. // // These are optimizations to keep the token lists from bloating. // The big win is regognizing machine-generated unique IDs that // we'll never see again and shouldn't // // We don't treat dot between two alphanumerics as a separator, // because we want to keep domain names and IP addresses together as // recognizable units. // // Having done the above, there isn't much need to recognize URLs. // If a URL is a spam indicator, very likely any other URL from the // same site is as well, so the hostname part should be an adequate // statistical trigger. // // The list of HTML keywords and attributes to be ignored is from the 4.0 DTD. extern int passthrough; struct textblock textblocks, *textend = &textblocks; #define YY_INPUT(buf,result,max_size) result = yyinput(buf, max_size) int yyinput(char *buf, int max_size) // input getter for the scanner { char *returned; returned = fgets(buf, max_size, yyin); while (returned != NULL && memcmp(buf,"X-Spam-",6) == 0) { do { returned = fgets(buf, max_size, yyin); if (returned != NULL && *buf == '\n') break; } while (returned != NULL && isspace(*buf)); } if (returned == NULL) { if (ferror(yyin)) { fprintf(stderr, "input in flex scanner failed\n"); exit(2); } else { return 0; } } if (passthrough) { // Also, save the text on a linked list of lines. // Note that we store fixed-length blocks here, not lines. // One very long physical line could break up into more // than one of these. textend->block = strdup(buf); textend->next = (struct textblock *)malloc(sizeof(struct textblock)); textend->next->block = (char *)NULL; textend = textend->next; } return(strlen(returned)); } %} %option full align nounput noyywrap noreject 8bit BASE64 [A-Za-z0-9/+] IPADDR [0-9]+\.[0-9]+\.[0-9]+\.[0-9]+ MIME_BOUNDARY ^--[^[:blank:]\n]*$ %% abbr ; above ; acronym ; accesskey ; align ; all ; alt ; alink ; applet ; archive ; axis ; basefont ; baseline ; below ; bgcolor ; big ; body ; border ; bottom ; box ; button ; cellpadding ; cellspacing ; center ; char ; charoff ; charset ; circle ; cite ; colspan ; coords ; class ; classid ; clear ; codebase ; codetype ; color ; cols ; compact ; content ; datetime ; declare ; defer ; data ; default ; dfn ; dir ; disabled ; face ; font ; frameborder ; groups ; head ; headers ; height ; href ; hreflang ; hsides ; http-equiv ; hspace ; iframe ; input ; img ; ismap ; justify ; kbd ; label ; lang ; language ; left ; lhs ; link ; longdesc ; map ; marginheight ; marginwidth ; media ; meta ; middle ; multiple ; name ; nohref ; none ; noresize ; noshade ; nowrap ; object ; onblur ; onchange ; onclick ; ondblclick ; onfocus ; onmousedown ; onmouseup ; onmouseover ; onmousemove ; onmouseout ; onkeypress ; onkeydown ; onkeyup ; onload ; onselect ; onunload ; param ; poly ; profile ; prompt ; readonly ; rect ; rel ; rev ; rhs ; right ; rows ; rowspan ; rules ; samp ; scheme ; scope ; script ; scrolling ; select ; selected ; shape ; size ; small ; span ; src ; standby ; strike ; strong ; style ; sub ; summary ; sup ; tabindex ; table ; target ; textarea ; title ; top ; type ; usemap ; valign ; value ; valuetype ; var ; vlink ; vsides ; void ; vspace ; width ; \<\!-- ; --> ; ^From\ {return(FROM);} ^Date:.*|Delivery-Date:.* ; ^Message-ID:.* ; ^{BASE64}+$ ; ^\tid\ .* ; SMTP\ id\ .* ; boundary=.* ; name=\" ; filename=\" ; {MIME_BOUNDARY} ; {IPADDR} {return(TOKEN);} [^[:blank:]\n[:digit:][:punct:]][^][:blank:]<>;=():&%$#@!+|/\\{}^\"?\*,\n[]+[^[:blank:][:punct:]\n] {return(TOKEN);} . ; \n ; %% int get_token(void) { int class; char *cp; while ((class = yylex()) > 0) if (yyleng <= MAXWORDLEN) break; for (cp = yytext; *cp; cp++) *cp = tolower(*cp); return(class); } #ifdef MAIN int passthrough; int main(void) { int t; while ((t = get_token()) > 0) { (void) printf("get_token: %d '%s'\n", t, yytext); } return 0; } #endif /* MAIN */ // The following sets edit modes for GNU EMACS // Local Variables: // mode:fundamental // End: Index: .cvsignore =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/.cvsignore,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** .cvsignore 26 Sep 2002 23:13:12 -0000 1.3 --- .cvsignore 26 Sep 2002 23:28:03 -0000 1.4 *************** *** 1,2 **** --- 1,8 ---- + profile* + *.out + *.msg + *.orig + *.save + snap.*.tgz .deps aclocal.m4 *************** *** 13,17 **** cscope.out lexertest ! lexer.c Makefile Makefile.in --- 19,23 ---- cscope.out lexertest ! lexer_l.c Makefile Makefile.in |
From: <relson@us...> - 2002-09-26 23:16:03
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv417 Removed Files: lexer_l.l Log Message: File has been renamed to lexer.l for consistency with other filenames. --- lexer_l.l DELETED --- |
From: <relson@us...> - 2002-09-26 23:13:14
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv31814a Modified Files: .cvsignore Makefile.am Log Message: Changed name of lexer_l.l to lexer.l for consistency with other filenames. Index: .cvsignore =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/.cvsignore,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** .cvsignore 19 Sep 2002 23:10:44 -0000 1.2 --- .cvsignore 26 Sep 2002 23:13:12 -0000 1.3 *************** *** 13,17 **** cscope.out lexertest ! lexer_l.c Makefile Makefile.in --- 13,17 ---- cscope.out lexertest ! lexer.c Makefile Makefile.in Index: Makefile.am =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/Makefile.am,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** Makefile.am 25 Sep 2002 00:51:07 -0000 1.12 --- Makefile.am 26 Sep 2002 23:13:12 -0000 1.13 *************** *** 1,4 **** --- 1,7 ---- # $Id$ # $Log$ + # Revision 1.13 2002/09/26 23:13:12 relson + # Changed name of lexer_l.l to lexer.l for consistency with other filenames. + # # Revision 1.12 2002/09/25 00:51:07 adrian_otto # Removed referenced to lock.c and lock.h, because they have been obviated. *************** *** 62,65 **** --- 65,70 ---- AUTOMAKE_OPTIONS = foreign 1.6 + CFLAGS=-g -Wall + # what to build bin_PROGRAMS = bogofilter bogoutil *************** *** 69,76 **** # what to build that from ! bogofilter_SOURCES = bogofilter.c bogofilter.h main.c lexer_l.l lexer.h \ datastore.h datastore_db.h datastore_db.c xmalloc.h xmalloc.c ! lexertest_SOURCES = lexer_l.l lexertest_CFLAGS = -DMAIN --- 74,81 ---- # what to build that from ! bogofilter_SOURCES = bogofilter.c bogofilter.h main.c lexer.l lexer.h \ datastore.h datastore_db.h datastore_db.c xmalloc.h xmalloc.c ! lexertest_SOURCES = lexer.l lexertest_CFLAGS = -DMAIN |
From: <relson@us...> - 2002-09-26 23:04:44
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv29055 Modified Files: main.c bogofilter.c bogofilter.h bogofilter.xml Log Message: documentation: changed to refer to "good" and "spam" tokens and lists. removed '-l' option as this function is now in bogoutil. filenames: changed database from "hamlist.db" to "goodlist.db". variables: renamed "ham_list" and "hamness" to "good_list" and "goodness". Index: main.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/main.c,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** main.c 25 Sep 2002 18:17:09 -0000 1.13 --- main.c 26 Sep 2002 23:04:40 -0000 1.14 *************** *** 2,5 **** --- 2,16 ---- /* * $Log$ + * Revision 1.14 2002/09/26 23:04:40 relson + * documentation: + * changed to refer to "good" and "spam" tokens and lists. + * removed '-l' option as this function is now in bogoutil. + * + * filenames: + * changed database from "hamlist.db" to "goodlist.db". + * + * variables: + * renamed "ham_list" and "hamness" to "good_list" and "goodness". + * * Revision 1.13 2002/09/25 18:17:09 relson * Added '-h' option to print help message and exit. *************** *** 87,91 **** #define BOGODIR "/.bogofilter/" ! #define HAMFILE "hamlist.db" #define SPAMFILE "spamlist.db" --- 98,102 ---- #define BOGODIR "/.bogofilter/" ! #define GOODFILE "goodlist.db" #define SPAMFILE "spamlist.db" *************** *** 95,101 **** { int ch; ! int register_spam = 0, register_ham = 0; ! int spam_to_ham = 0, ham_to_spam = 0; ! char hamfile[PATH_MAX], spamfile[PATH_MAX], directory[PATH_MAX]; char *tmp; struct stat sb; --- 106,112 ---- { int ch; ! int register_spam = 0, register_good = 0; ! int spam_to_good = 0, good_to_spam = 0; ! char goodfile[PATH_MAX], spamfile[PATH_MAX], directory[PATH_MAX]; char *tmp; struct stat sb; *************** *** 121,133 **** case 'n': ! register_ham = 1; break; case 'S': ! ham_to_spam = 1; break; case 'N': ! spam_to_ham = 1; break; --- 132,144 ---- case 'n': ! register_good = 1; break; case 'S': ! good_to_spam = 1; break; case 'N': ! spam_to_good = 1; break; *************** *** 180,186 **** } ! strcpy(hamfile, directory); ! strcat(hamfile, HAMFILE); ! ham_list.file = hamfile; strcpy(spamfile, directory); --- 191,197 ---- } ! strcpy(goodfile, directory); ! strcat(goodfile, GOODFILE); ! good_list.file = goodfile; strcpy(spamfile, directory); *************** *** 189,194 **** ! if ( (ham_list.dbh = db_open(ham_list.file, ham_list.name)) == NULL){ ! fprintf(stderr, "bogofilter: Cannot initialize database %s.\n", ham_list.name); exit(2); } --- 200,205 ---- ! if ( (good_list.dbh = db_open(good_list.file, good_list.name)) == NULL){ ! fprintf(stderr, "bogofilter: Cannot initialize database %s.\n", good_list.name); exit(2); } *************** *** 196,200 **** if ( (spam_list.dbh = db_open(spam_list.file, spam_list.name)) == NULL){ fprintf(stderr, "bogofilter: Cannot initialize database %s.\n", spam_list.name); ! db_close(ham_list.dbh); exit(2); } --- 207,211 ---- if ( (spam_list.dbh = db_open(spam_list.file, spam_list.name)) == NULL){ fprintf(stderr, "bogofilter: Cannot initialize database %s.\n", spam_list.name); ! db_close(good_list.dbh); exit(2); } *************** *** 205,219 **** register_words(STDIN_FILENO, &spam_list, NULL); } ! else if (register_ham) { ! register_words(STDIN_FILENO, &ham_list, NULL); } ! else if (spam_to_ham) { ! register_words(STDIN_FILENO, &ham_list, &spam_list); } ! else if (ham_to_spam) { ! register_words(STDIN_FILENO, &spam_list, &ham_list); } else --- 216,230 ---- register_words(STDIN_FILENO, &spam_list, NULL); } ! else if (register_good) { ! register_words(STDIN_FILENO, &good_list, NULL); } ! else if (spam_to_good) { ! register_words(STDIN_FILENO, &good_list, &spam_list); } ! else if (good_to_spam) { ! register_words(STDIN_FILENO, &spam_list, &good_list); } else *************** *** 259,263 **** db_close(spam_list.dbh); ! db_close(ham_list.dbh); exit(exitcode); --- 270,274 ---- db_close(spam_list.dbh); ! db_close(good_list.dbh); exit(exitcode); Index: bogofilter.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.c,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** bogofilter.c 25 Sep 2002 00:51:07 -0000 1.18 --- bogofilter.c 26 Sep 2002 23:04:40 -0000 1.19 *************** *** 2,5 **** --- 2,16 ---- /* * $Log$ + * Revision 1.19 2002/09/26 23:04:40 relson + * documentation: + * changed to refer to "good" and "spam" tokens and lists. + * removed '-l' option as this function is now in bogoutil. + * + * filenames: + * changed database from "hamlist.db" to "goodlist.db". + * + * variables: + * renamed "ham_list" and "hamness" to "good_list" and "goodness". + * * Revision 1.18 2002/09/25 00:51:07 adrian_otto * Removed referenced to lock.c and lock.h, because they have been obviated. *************** *** 120,124 **** // constants for the Graham formula ! #define HAM_BIAS 2 // give ham words more weight #define KEEPERS 15 // how many extrema to keep #define MINIMUM_FREQ 5 // minimum freq --- 131,135 ---- // constants for the Graham formula ! #define GOOD_BIAS 2 // give good words more weight #define KEEPERS 15 // how many extrema to keep #define MINIMUM_FREQ 5 // minimum freq *************** *** 132,136 **** #define min(x, y) (((x) < (y)) ? (x) : (y)) ! wordlist_t ham_list = {"ham", NULL, 0, NULL}; wordlist_t spam_list = {"spam", NULL, 0, NULL}; --- 143,147 ---- #define min(x, y) (((x) < (y)) ? (x) : (y)) ! wordlist_t good_list = {"good", NULL, 0, NULL}; wordlist_t spam_list = {"spam", NULL, 0, NULL}; *************** *** 290,315 **** double compute_probability( char *token ) { ! double prob, hamness, spamness; ! hamness = db_getvalue(ham_list.dbh, token); spamness = db_getvalue(spam_list.dbh, token); #ifdef NON_EQUIPROBABLE // There is an argument that we should by by number of *words* here. ! double msg_prob = (spam_list.msgcount / ham_list.msgcount); #endif // NON_EQUIPROBABLE // Paul Graham's original formula: // ! // (let ((g (* 2 (or (gethash word ham) 0))) // (b (or (gethash word spam) 0))) ! // (unless (< (+ g b) 5) // (max .01 (min .99 // (double (/ // (min 1 (/ b nspam)) ! // (+ (min 1 (/ g nham)) (min 1 (/ b nspam))))))))) // This assumes that spam and non-spam are equiprobable. ! hamness *= HAM_BIAS; ! if (hamness + spamness < MINIMUM_FREQ) #ifdef NON_EQUIPROBABLE // In the absence of evidence, the probability that a new word --- 301,326 ---- double compute_probability( char *token ) { ! double prob, goodness, spamness; ! goodness = db_getvalue(good_list.dbh, token); spamness = db_getvalue(spam_list.dbh, token); #ifdef NON_EQUIPROBABLE // There is an argument that we should by by number of *words* here. ! double msg_prob = (spam_list.msgcount / good_list.msgcount); #endif // NON_EQUIPROBABLE // Paul Graham's original formula: // ! // (let ((g (* 2 (or (gethash word good) 0))) // (b (or (gethash word spam) 0))) ! // (unless (< (+ g b) 5) // (max .01 (min .99 // (double (/ // (min 1 (/ b nspam)) ! // (+ (min 1 (/ g ngood)) (min 1 (/ b nspam))))))))) // This assumes that spam and non-spam are equiprobable. ! goodness *= GOOD_BIAS; ! if (goodness + spamness < MINIMUM_FREQ) #ifdef NON_EQUIPROBABLE // In the absence of evidence, the probability that a new word *************** *** 323,327 **** { register double pb = min(1, (spamness / spam_list.msgcount)); ! register double pg = min(1, (hamness / ham_list.msgcount)); #ifdef NON_EQUIPROBABLE --- 334,338 ---- { register double pb = min(1, (spamness / spam_list.msgcount)); ! register double pg = min(1, (goodness / good_list.msgcount)); #ifdef NON_EQUIPROBABLE *************** *** 427,434 **** PArray = collect_words(fd); ! db_lock_reader(ham_list.dbh); db_lock_reader(spam_list.dbh); ! ham_list.msgcount = db_getcount(ham_list.dbh); spam_list.msgcount = db_getcount(spam_list.dbh); --- 438,445 ---- PArray = collect_words(fd); ! db_lock_reader(good_list.dbh); db_lock_reader(spam_list.dbh); ! good_list.msgcount = db_getcount(good_list.dbh); spam_list.msgcount = db_getcount(spam_list.dbh); *************** *** 440,444 **** db_lock_release(spam_list.dbh); ! db_lock_release(ham_list.dbh); status = (spamicity > SPAM_CUTOFF) ? RC_SPAM : RC_NONSPAM; --- 451,455 ---- db_lock_release(spam_list.dbh); ! db_lock_release(good_list.dbh); status = (spamicity > SPAM_CUTOFF) ? RC_SPAM : RC_NONSPAM; Index: bogofilter.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.h,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** bogofilter.h 26 Sep 2002 17:22:01 -0000 1.7 --- bogofilter.h 26 Sep 2002 23:04:41 -0000 1.8 *************** *** 2,5 **** --- 2,16 ---- /* * $Log$ + * Revision 1.8 2002/09/26 23:04:41 relson + * documentation: + * changed to refer to "good" and "spam" tokens and lists. + * removed '-l' option as this function is now in bogoutil. + * + * filenames: + * changed database from "hamlist.db" to "goodlist.db". + * + * variables: + * renamed "ham_list" and "hamness" to "good_list" and "goodness". + * * Revision 1.7 2002/09/26 17:22:01 relson * Remove unused function prototypes. *************** *** 59,63 **** extern int bogodump(char *file); ! extern wordlist_t ham_list, spam_list; extern int verbose; --- 70,74 ---- extern int bogodump(char *file); ! extern wordlist_t good_list, spam_list; extern int verbose; Index: bogofilter.xml =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.xml,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** bogofilter.xml 25 Sep 2002 18:18:20 -0000 1.7 --- bogofilter.xml 26 Sep 2002 23:04:41 -0000 1.8 *************** *** 33,37 **** In its normal mode of operation, it takes an email message or other text on standard input, does a statistical check against lists of ! "ham" and "spam" words, and returns a status code indicating whether or not the message is spam. Bogofilter is designed with fast algorithms, uses the Berkeley DB for fast startup and lookups, --- 33,37 ---- In its normal mode of operation, it takes an email message or other text on standard input, does a statistical check against lists of ! "good" and "bad" words, and returns a status code indicating whether or not the message is spam. Bogofilter is designed with fast algorithms, uses the Berkeley DB for fast startup and lookups, *************** *** 42,46 **** <refsect1 id='theory'><title>THEORY OF OPERATION</title> <para><application>Bogofilter</application> treats its input as a bag ! of tokens. Each token is checked against "ham" and "spam" wordlists, which maintain counts of the numbers of times it has occurred in non-spam and spam mails. These numbers are used to compute the --- 42,46 ---- <refsect1 id='theory'><title>THEORY OF OPERATION</title> <para><application>Bogofilter</application> treats its input as a bag ! of tokens. Each token is checked against "good" and "bad" wordlists, which maintain counts of the numbers of times it has occurred in non-spam and spam mails. These numbers are used to compute the *************** *** 102,109 **** <filename>$HOME/.bogofilter</filename>).</para> - <para>The <option>-l</option> lists wordlists. Used with - <option>-n</option>, it lists the ham list; used with - <option>-s</option>, it lists the spam list.</para> - <para>The <option>-p</option> (passthrough) option writes a copy of the input mail to the output with an X-Spam-Status header (in the --- 102,105 ---- *************** *** 180,185 **** <variablelist> <varlistentry> ! <term><filename>~/.bogofilter/hamlist</filename></term> ! <listitem><para>List of ham tokens.</para></listitem> </varlistentry> <varlistentry> --- 176,181 ---- <variablelist> <varlistentry> ! <term><filename>~/.bogofilter/goodlist</filename></term> ! <listitem><para>List of good tokens.</para></listitem> </varlistentry> <varlistentry> |
From: <relson@us...> - 2002-09-26 17:22:03
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv28318 Modified Files: bogofilter.h Log Message: Remove unused function prototypes. Index: bogofilter.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.h,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** bogofilter.h 24 Sep 2002 04:34:19 -0000 1.6 --- bogofilter.h 26 Sep 2002 17:22:01 -0000 1.7 *************** *** 2,5 **** --- 2,8 ---- /* * $Log$ + * Revision 1.7 2002/09/26 17:22:01 relson + * Remove unused function prototypes. + * * Revision 1.6 2002/09/24 04:34:19 gyepi * *************** *** 52,60 **** wordlist_t; - extern int read_list(wordlist_t *list); - extern void write_list(wordlist_t *ham_list); extern void register_words(int fd, wordlist_t *list, wordlist_t *other); - extern int get_token(void); - extern void lexer_stream_mode(void); extern rc_t bogofilter(int fd, double *xss); extern int bogodump(char *file); --- 55,59 ---- |
From: <relson@us...> - 2002-09-26 17:21:44
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv28179 Modified Files: lexer.h Log Message: Move prototype for get_token() into lexer.h, where it belongs. Index: lexer.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/lexer.h,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** lexer.h 18 Sep 2002 22:30:22 -0000 1.1 --- lexer.h 26 Sep 2002 17:21:41 -0000 1.2 *************** *** 6,9 **** --- 6,10 ---- #define TOKEN 1 // Ordinary token #define FROM 2 // Mail message delimiter + extern FILE *yyin; extern char *yytext; *************** *** 16,18 **** --- 17,21 ---- extern struct textblock textblocks, *textend; + + extern int get_token(void); |
From: <relson@us...> - 2002-09-25 22:29:24
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv3727 Modified Files: lexer_l.l Log Message: Correct handling of multi-line "X-Spam-Whatever" headers by correctly ordering the if() statements and by checking for an empty line. Add parameter prototypes so that fewer casts are needed. Index: lexer_l.l =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/lexer_l.l,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** lexer_l.l 25 Sep 2002 00:02:44 -0000 1.8 --- lexer_l.l 25 Sep 2002 22:29:21 -0000 1.9 *************** *** 2,5 **** --- 2,11 ---- /* * $Log$ + * Revision 1.9 2002/09/25 22:29:21 relson + * Correct handling of multi-line "X-Spam-Whatever" headers by correctly ordering + * the if() statements and by checking for an empty line. + * + * Add parameter prototypes so that fewer casts are needed. + * * Revision 1.8 2002/09/25 00:02:44 relson * Ignore older X-Spam-[...Whatever...] lines. *************** *** 42,45 **** --- 48,53 ---- * lexer_l.l -- bogofilter's lexical analyzer * + * ORIG: [A-Za-z$][A-Za-z0-9$'.-]+[A-Za-z0-9$] {return(TOKEN);} + * ADAMS: [^[:blank:]\n[:digit:][:punct:]][^][:blank:]<>;=():&%$#@!+|/\\{}^\"?\*,\n[]+[^[:blank:][:punct:]\n] {return(TOKEN);} */ #include <stdlib.h> *************** *** 79,88 **** #define YY_INPUT(buf,result,max_size) result = yyinput(buf, max_size) ! int yyinput(buf, max_size) // input getter for the scanner { char *returned; ! returned = fgets((char *)buf, max_size, yyin); if (returned == NULL) { if (ferror(yyin)) { --- 87,106 ---- #define YY_INPUT(buf,result,max_size) result = yyinput(buf, max_size) ! int yyinput(char *buf, int max_size) // input getter for the scanner { char *returned; ! returned = fgets(buf, max_size, yyin); ! ! while (returned != NULL && memcmp(buf,"X-Spam-",6) == 0) ! { ! do { ! returned = fgets(buf, max_size, yyin); ! if (returned != NULL && *buf == '\n') ! break; ! } while (returned != NULL && isspace(*buf)); ! } ! if (returned == NULL) { if (ferror(yyin)) { *************** *** 100,104 **** // One very long physical line could break up into more // than one of these. ! textend->block = strdup((char *)buf); textend->next = (struct textblock *)malloc(sizeof(struct textblock)); textend->next->block = (char *)NULL; --- 118,122 ---- // One very long physical line could break up into more // than one of these. ! textend->block = strdup(buf); textend->next = (struct textblock *)malloc(sizeof(struct textblock)); textend->next->block = (char *)NULL; *************** *** 277,281 **** ^Date:.*|Delivery-Date:.* ; ^Message-ID:.* ; - ^X-Spam-[A-Za-z]+:.* ; ^{BASE64}+$ ; ^\tid\ .* ; --- 295,298 ---- *************** *** 289,292 **** --- 306,310 ---- {IPADDR} {return(TOKEN);} [^[:blank:]\n[:digit:][:punct:]][^][:blank:]<>;=():&%$#@!+|/\\{}^\"?\*,\n[]+[^[:blank:][:punct:]\n] {return(TOKEN);} + . ; \n ; |
From: <relson@us...> - 2002-09-25 18:18:23
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv1863 Modified Files: bogofilter.xml Log Message: Documented '-h' option, which prints help message and exits. Index: bogofilter.xml =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.xml,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** bogofilter.xml 25 Sep 2002 18:15:20 -0000 1.6 --- bogofilter.xml 25 Sep 2002 18:18:20 -0000 1.7 *************** *** 15,18 **** --- 15,19 ---- <cmdsynopsis> <command>bogofilter</command> + <arg choice='opt'>-h</arg> <arg choice='opt'>-s</arg> <arg choice='opt'>-n</arg> *************** *** 76,79 **** --- 77,82 ---- returns 1 if the message is non-spam, 0 if it is spam. The non-spam wordfile is created if absent.</para> + + <para>The <option>-h</option> prints the help message and exits.</para> <para>The <option>-s</option> tells <application>bogofilter</application> |
From: <relson@us...> - 2002-09-25 18:17:11
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv1450 Modified Files: main.c Log Message: Added '-h' option to print help message and exit. Index: main.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/main.c,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** main.c 25 Sep 2002 18:15:19 -0000 1.12 --- main.c 25 Sep 2002 18:17:09 -0000 1.13 *************** *** 2,5 **** --- 2,8 ---- /* * $Log$ + * Revision 1.13 2002/09/25 18:17:09 relson + * Added '-h' option to print help message and exit. + * * Revision 1.12 2002/09/25 18:15:19 relson * Renamed '-h' and '-H' options to '-n' and '-N' in main.c. *************** *** 104,108 **** strcat(directory, BOGODIR); ! while ((ch = getopt(argc, argv, "d:snSNvVp")) != EOF) switch(ch) { --- 107,111 ---- strcat(directory, BOGODIR); ! while ((ch = getopt(argc, argv, "d:hsnSNvVp")) != EOF) switch(ch) { *************** *** 132,135 **** --- 135,158 ---- verbose++; break; + + case 'h': + printf( "\n" ); + printf( "Usage: bogofilter [options] < message\n" ); + printf( "\t-h\t- print this help message.\n" ); + printf( "\t-d path\t- specify directory for wordlists.\n" ); + printf( "\t-p\t- passthrough.\n" ); + printf( "\t-s\t- register message as spam.\n" ); + printf( "\t-n\t- register message as non-spam.\n" ); + printf( "\t-S\t- move message's words from non-spam list to spam list.\n" ); + printf( "\t-N\t- move message's words from spam list to spam non-list.\n" ); + printf( "\t-v\t- set debug verbosity level.\n" ); + printf( "\t-V\t- print version info.\n" ); + printf( "\n" ); + printf( "bogofilter is a tool for classifying email as spam or non-spam.\n" ); + printf( "\n" ); + printf( "For updates and additional information, see\n" ); + printf( "URL: http://bogofilter.sourceforge.net\n"; ); + printf( "\n" ); + exit(0); case 'V': |
From: <relson@us...> - 2002-09-25 18:15:23
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv847 Modified Files: main.c bogofilter.xml Log Message: Renamed '-h' and '-H' options to '-n' and '-N' in main.c. Modified documentation to show the new names. Index: main.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/main.c,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** main.c 24 Sep 2002 04:34:19 -0000 1.11 --- main.c 25 Sep 2002 18:15:19 -0000 1.12 *************** *** 2,5 **** --- 2,9 ---- /* * $Log$ + * Revision 1.12 2002/09/25 18:15:19 relson + * Renamed '-h' and '-H' options to '-n' and '-N' in main.c. + * Modified documentation to show the new names. + * * Revision 1.11 2002/09/24 04:34:19 gyepi * *************** *** 100,110 **** strcat(directory, BOGODIR); ! while ((ch = getopt(argc, argv, "d:shSHvVp")) != EOF) switch(ch) { case 'd': strcpy(directory, optarg); ! if (directory[strlen(directory)-1] != '/') ! strcat(directory, "/" ); break; --- 104,114 ---- strcat(directory, BOGODIR); ! while ((ch = getopt(argc, argv, "d:snSNvVp")) != EOF) switch(ch) { case 'd': strcpy(directory, optarg); ! if (directory[strlen(directory)-1] != '/') ! strcat(directory, "/" ); break; *************** *** 113,117 **** break; ! case 'h': register_ham = 1; break; --- 117,121 ---- break; ! case 'n': register_ham = 1; break; *************** *** 121,125 **** break; ! case 'H': spam_to_ham = 1; break; --- 125,129 ---- break; ! case 'N': spam_to_ham = 1; break; Index: bogofilter.xml =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.xml,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** bogofilter.xml 21 Sep 2002 09:22:21 -0000 1.5 --- bogofilter.xml 25 Sep 2002 18:15:20 -0000 1.6 *************** *** 16,22 **** <command>bogofilter</command> <arg choice='opt'>-s</arg> ! <arg choice='opt'>-h</arg> <arg choice='opt'>-S</arg> ! <arg choice='opt'>-H</arg> <arg choice='opt'>-p</arg> <arg choice='opt'>-d</arg> --- 16,22 ---- <command>bogofilter</command> <arg choice='opt'>-s</arg> ! <arg choice='opt'>-n</arg> <arg choice='opt'>-S</arg> ! <arg choice='opt'>-N</arg> <arg choice='opt'>-p</arg> <arg choice='opt'>-d</arg> *************** *** 81,85 **** wordfile is created if absent.</para> ! <para>The <option>-h</option> tells <application>bogofilter</application> to register the text presented on standard input as non-spam.</para> --- 81,85 ---- wordfile is created if absent.</para> ! <para>The <option>-n</option> tells <application>bogofilter</application> to register the text presented on standard input as non-spam.</para> *************** *** 90,94 **** same message as non-spam.</para> ! <para>The <option>-H</option> tells <application>bogofilter</application> to register the text presented on standard input as non-spam and to undo a prior registration of the --- 90,94 ---- same message as non-spam.</para> ! <para>The <option>-N</option> tells <application>bogofilter</application> to register the text presented on standard input as non-spam and to undo a prior registration of the *************** *** 100,104 **** <para>The <option>-l</option> lists wordlists. Used with ! <option>-h</option>, it lists the ham list; used with <option>-s</option>, it lists the spam list.</para> --- 100,104 ---- <para>The <option>-l</option> lists wordlists. Used with ! <option>-n</option>, it lists the ham list; used with <option>-s</option>, it lists the spam list.</para> *************** *** 136,140 **** words in messages rated as non-spam to the non-spam wordlist. With this in place, it will normally only be necessary for the user to ! intervene (with <option>-H</option> or <option>-S</option>) when <application>bogofilter</application> miscategorizes something.</para> <programlisting class="procmail"> --- 136,140 ---- words in messages rated as non-spam to the non-spam wordlist. With this in place, it will normally only be necessary for the user to ! intervene (with <option>-N</option> or <option>-S</option>) when <application>bogofilter</application> miscategorizes something.</para> <programlisting class="procmail"> *************** *** 150,154 **** :0EHBc ! | bogofilter -h </programlisting> <para> There have been numerous requests for a --- 150,154 ---- :0EHBc ! | bogofilter -n </programlisting> <para> There have been numerous requests for a *************** *** 161,165 **** <application>bogofilter</application>.</para> <programlisting> ! macro index d "<enter-command>unset wait_key\n<pipe-entry>bogofilter -h\n<enter-command>set wait_key\n<delete-message>" "delete message as non-spam" macro index \ed "<enter-command>unset wait_key\n<pipe-entry>bogofilter -s\n<enter-command>set wait_key\n<delete-message>" "delete message as spam" </programlisting> --- 161,165 ---- <application>bogofilter</application>.</para> <programlisting> ! macro index d "<enter-command>unset wait_key\n<pipe-entry>bogofilter -n\n<enter-command>set wait_key\n<delete-message>" "delete message as non-spam" macro index \ed "<enter-command>unset wait_key\n<pipe-entry>bogofilter -s\n<enter-command>set wait_key\n<delete-message>" "delete message as spam" </programlisting> |
From: <gyepi@us...> - 2002-09-25 02:03:30
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv19861 Modified Files: bogoupgrade.pl Log Message: better error message for missing files. consistent use of warn instead of print STDERR Index: bogoupgrade.pl =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogoupgrade.pl,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** bogoupgrade.pl 24 Sep 2002 04:36:54 -0000 1.1 --- bogoupgrade.pl 25 Sep 2002 02:03:28 -0000 1.2 *************** *** 9,13 **** Gyepi Sam <gyepi@...> - =cut --- 9,12 ---- *************** *** 41,44 **** --- 40,47 ---- } + + die "Missing input filename\n" unless $in; + die "Missing output filename\n" unless $out; + my $msg_count_token = '.MSG_COUNT'; *************** *** 95,99 **** } else { ! print STDERR "Cannot recognize signature [$sig].\n"; exit(2); } --- 98,102 ---- } else { ! warn "Cannot recognize signature [$sig].\n"; exit(2); } *************** *** 102,106 **** sub usage { ! print STDERR "usage: $0 [ -i <input text file> -o <output db file> [ -b <path to bogoutil>] ] [ -h ]\n"; } --- 105,109 ---- sub usage { ! warn "usage: $0 [ -i <input text file> -o <output db file> [ -b <path to bogoutil>] ] [ -h ]\n"; } |
From: <adrian_otto@us...> - 2002-09-25 00:52:04
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv25105 Removed Files: lock.c lock.h Log Message: These files have been obviated by new locking code in datastore_db.c --- lock.c DELETED --- --- lock.h DELETED --- |
From: <adrian_otto@us...> - 2002-09-25 00:51:10
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv24674 Modified Files: Makefile.am bogofilter.c Log Message: Removed referenced to lock.c and lock.h, because they have been obviated. Index: Makefile.am =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/Makefile.am,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** Makefile.am 24 Sep 2002 12:46:47 -0000 1.11 --- Makefile.am 25 Sep 2002 00:51:07 -0000 1.12 *************** *** 1,4 **** --- 1,7 ---- # $Id$ # $Log$ + # Revision 1.12 2002/09/25 00:51:07 adrian_otto + # Removed referenced to lock.c and lock.h, because they have been obviated. + # # Revision 1.11 2002/09/24 12:46:47 gyepi # added bogoupgrade.pl *************** *** 66,70 **** # what to build that from ! bogofilter_SOURCES = bogofilter.c bogofilter.h lock.c lock.h main.c lexer_l.l lexer.h \ datastore.h datastore_db.h datastore_db.c xmalloc.h xmalloc.c --- 69,73 ---- # what to build that from ! bogofilter_SOURCES = bogofilter.c bogofilter.h main.c lexer_l.l lexer.h \ datastore.h datastore_db.h datastore_db.c xmalloc.h xmalloc.c Index: bogofilter.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.c,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** bogofilter.c 24 Sep 2002 19:47:49 -0000 1.17 --- bogofilter.c 25 Sep 2002 00:51:07 -0000 1.18 *************** *** 2,5 **** --- 2,8 ---- /* * $Log$ + * Revision 1.18 2002/09/25 00:51:07 adrian_otto + * Removed referenced to lock.c and lock.h, because they have been obviated. + * * Revision 1.17 2002/09/24 19:47:49 m-a * Add missing #include "datastore.h". Drop unused strlwr. *************** *** 111,115 **** #include <Judy.h> #include "bogofilter.h" - #include "lock.h" #include "datastore.h" --- 114,117 ---- |
From: <relson@us...> - 2002-09-25 00:02:48
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv3741 Modified Files: lexer_l.l Log Message: Ignore older X-Spam-[...Whatever...] lines. Index: lexer_l.l =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/lexer_l.l,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** lexer_l.l 23 Sep 2002 11:31:53 -0000 1.7 --- lexer_l.l 25 Sep 2002 00:02:44 -0000 1.8 *************** *** 2,5 **** --- 2,8 ---- /* * $Log$ + * Revision 1.8 2002/09/25 00:02:44 relson + * Ignore older X-Spam-[...Whatever...] lines. + * * Revision 1.7 2002/09/23 11:31:53 m-a * Unnest comments, and move $ line down by one to prevent CVS from adding nested comments again. *************** *** 273,277 **** ^From\ {return(FROM);} ^Date:.*|Delivery-Date:.* ; ! ^Message-ID:.* ; ^{BASE64}+$ ; ^\tid\ .* ; --- 276,281 ---- ^From\ {return(FROM);} ^Date:.*|Delivery-Date:.* ; ! ^Message-ID:.* ; ! ^X-Spam-[A-Za-z]+:.* ; ^{BASE64}+$ ; ^\tid\ .* ; |
From: <m-a@us...> - 2002-09-24 19:49:05
|
Update of /cvsroot/bogofilter/bogofilter In directory usw-pr-cvs1:/tmp/cvs-serv9981 Modified Files: lock.c Log Message: Work around compiler warning. Index: lock.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/lock.c,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** lock.c 23 Sep 2002 11:38:11 -0000 1.4 --- lock.c 24 Sep 2002 19:49:01 -0000 1.5 *************** *** 2,5 **** --- 2,8 ---- /* * $Log$ + * Revision 1.5 2002/09/24 19:49:01 m-a + * Work around compiler warning. + * * Revision 1.4 2002/09/23 11:38:11 m-a * Fix missing right paranthesis. *************** *** 209,213 **** /****************************************************************************/ void alarm_signal_handler(int signum) { ! #ifdef HAVE_SYSLOG_H syslog(LOG_WARNING, "Waited %i seconds for a file lock, continuing without a lock", LOCK_TIMEOUT); --- 212,216 ---- /****************************************************************************/ void alarm_signal_handler(int signum) { ! (void)signum; /* work around compiler warning */ #ifdef HAVE_SYSLOG_H syslog(LOG_WARNING, "Waited %i seconds for a file lock, continuing without a lock", LOCK_TIMEOUT); |