Screenshot instructions:
Windows
Mac
Red Hat Linux
Ubuntu
Click URL instructions:
Right-click on ad, choose "Copy Link", then paste here →
(This may not be possible with some types of ads)
You can subscribe to this list here.
2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(92) |
Oct
(277) |
Nov
(500) |
Dec
(346) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2003 |
Jan
(437) |
Feb
(277) |
Mar
(215) |
Apr
(220) |
May
(225) |
Jun
(119) |
Jul
(184) |
Aug
(306) |
Sep
(340) |
Oct
(231) |
Nov
(176) |
Dec
(232) |
2004 |
Jan
(202) |
Feb
(189) |
Mar
(111) |
Apr
(43) |
May
(99) |
Jun
(164) |
Jul
(71) |
Aug
(82) |
Sep
(61) |
Oct
(135) |
Nov
(208) |
Dec
(174) |
2005 |
Jan
(241) |
Feb
(99) |
Mar
(236) |
Apr
(112) |
May
(175) |
Jun
(178) |
Jul
(18) |
Aug
(31) |
Sep
(37) |
Oct
(60) |
Nov
(41) |
Dec
(69) |
2006 |
Jan
(34) |
Feb
(14) |
Mar
(16) |
Apr
(6) |
May
(20) |
Jun
(15) |
Jul
(64) |
Aug
(25) |
Sep
|
Oct
(2) |
Nov
(25) |
Dec
(13) |
2007 |
Jan
(36) |
Feb
(15) |
Mar
(3) |
Apr
(2) |
May
|
Jun
|
Jul
(25) |
Aug
(2) |
Sep
|
Oct
|
Nov
(12) |
Dec
(4) |
2008 |
Jan
(2) |
Feb
(12) |
Mar
(4) |
Apr
(14) |
May
(8) |
Jun
(4) |
Jul
(4) |
Aug
(5) |
Sep
|
Oct
(11) |
Nov
(1) |
Dec
|
2009 |
Jan
(15) |
Feb
(50) |
Mar
|
Apr
(2) |
May
(16) |
Jun
|
Jul
(13) |
Aug
(16) |
Sep
(1) |
Oct
(1) |
Nov
|
Dec
|
2010 |
Jan
(1) |
Feb
(7) |
Mar
(20) |
Apr
(5) |
May
|
Jun
|
Jul
(31) |
Aug
|
Sep
(1) |
Oct
(2) |
Nov
(1) |
Dec
(4) |
2011 |
Jan
(1) |
Feb
(3) |
Mar
(1) |
Apr
(1) |
May
(8) |
Jun
(2) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2012 |
Jan
(6) |
Feb
|
Mar
|
Apr
(9) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(2) |
Nov
(1) |
Dec
(12) |
2013 |
Jan
(2) |
Feb
|
Mar
|
Apr
|
May
|
Jun
(5) |
Jul
(4) |
Aug
|
Sep
|
Oct
|
Nov
(14) |
Dec
(1) |
2014 |
Jan
|
Feb
(3) |
Mar
|
Apr
|
May
|
Jun
|
Jul
(3) |
Aug
|
Sep
|
Oct
|
Nov
(2) |
Dec
|
2015 |
Jan
|
Feb
(10) |
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(11) |
Nov
|
Dec
|
2016 |
Jan
(4) |
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(2) |
Aug
|
Sep
(2) |
Oct
(3) |
Nov
|
Dec
|
2017 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(4) |
Oct
|
Nov
|
Dec
|
2018 |
Jan
|
Feb
|
Mar
|
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
S | M | T | W | T | F | S |
---|---|---|---|---|---|---|
1
(11) |
2
|
3
|
4
|
5
|
6
|
7
(3) |
8
(2) |
9
(1) |
10
|
11
(1) |
12
(3) |
13
|
14
(8) |
15
|
16
(3) |
17
|
18
(2) |
19
(4) |
20
(1) |
21
(9) |
22
(2) |
23
|
24
|
25
|
26
|
27
|
28
|
From: <relson@us...> - 2009-02-14 23:22:42
|
Revision: 6798 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6798&view=rev Author: relson Date: 2009-02-14 21:58:29 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Replace conditionals with switch. Modified Paths: -------------- trunk/bogofilter/src/wordhash.c Modified: trunk/bogofilter/src/wordhash.c =================================================================== --- trunk/bogofilter/src/wordhash.c 2009-02-14 21:13:00 UTC (rev 6797) +++ trunk/bogofilter/src/wordhash.c 2009-02-14 21:58:29 UTC (rev 6798) @@ -76,23 +76,26 @@ */ wordhash_t * -wordhash_init (wh_t t, uint c) +wordhash_init (wh_t type, uint count) { wordhash_t *wh = xcalloc (1, sizeof (wordhash_t)); - wh->type = t; + wh->type = type; wh->count = 0; - wh->size = (t == WH_NORMAL) ? 0 : ((c == 0) ? WH_INIT : c); + wh->size = (type == WH_NORMAL) ? 0 : ((count == 0) ? WH_INIT : count); - if (t == WH_NORMAL) + switch (type) + { + case WH_NORMAL: wh->bin = xcalloc (NHASH, sizeof (hashnode_t **)); - - if (t == WH_CNTS) + break; + case WH_CNTS: // used for bogotune with msg_count files wh->cnts = (wordcnts_t *) xcalloc(wh->size, sizeof(wordcnts_t)); - - if (t == WH_PROPS) { + break; + case WH_PROPS: wh->freeable = true; wh->props = (hashnode_t *) xcalloc(wh->size, sizeof(hashnode_t)); + break; } return wh; @@ -101,8 +104,8 @@ wordhash_t * wordhash_new (void) { - wh_t t = (!fBogotune || !msg_count_file) ? WH_NORMAL : WH_CNTS; - wordhash_t *wh = wordhash_init(t, 0); + wh_t type = (!fBogotune || !msg_count_file) ? WH_NORMAL : WH_CNTS; + wordhash_t *wh = wordhash_init(type, 0); return wh; } @@ -164,16 +167,24 @@ wordhash_free_strings(wh); xfree (wh->order); - if (wh->type == WH_CNTS) + + switch (wh->type) + { + case WH_NORMAL: + break; + case WH_CNTS: xfree (wh->cnts); - if (wh->type == WH_PROPS) { + break; + case WH_PROPS: if (wh->freeable) { uint i; for (i=0; i<wh->size; i++) xfree(wh->props[i].data); } xfree (wh->props); + break; } + xfree (wh->bin); xfree (wh); } @@ -397,14 +408,14 @@ case WH_NORMAL: val = wh->iter_ptr = wh->iter_head; break; + case WH_CNTS: + wh->index = 0; + val = &wh->cnts[wh->index]; + break; case WH_PROPS: wh->index = 0; val = &wh->props[wh->index]; break; - case WH_CNTS: - wh->index = 0; - val = &wh->cnts[wh->index]; - break; } return val; @@ -420,14 +431,14 @@ if (wh->iter_ptr != NULL) val = wh->iter_ptr = wh->iter_ptr->iter_next; break; + case WH_CNTS: + if (++wh->index < wh->count) + val = &wh->cnts[wh->index]; + break; case WH_PROPS: if (++wh->index < wh->count) val = &wh->props[wh->index]; break; - case WH_CNTS: - if (++wh->index < wh->count) - val = &wh->cnts[wh->index]; - break; } return val; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <relson@us...> - 2009-02-14 23:22:22
|
Revision: 6799 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6799&view=rev Author: relson Date: 2009-02-14 22:10:54 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Remove some cruft. Add some comments. Modified Paths: -------------- trunk/bogofilter/src/score.c trunk/bogofilter/src/wordhash.c trunk/bogofilter/src/wordhash.h Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2009-02-14 21:58:29 UTC (rev 6798) +++ trunk/bogofilter/src/score.c 2009-02-14 22:10:54 UTC (rev 6799) @@ -46,7 +46,6 @@ typedef struct probnode_t { hashnode_t * node; double prob; - double dev; } probnode_t; /* struct for saving stats for printing. */ @@ -60,13 +59,6 @@ double q_pr; /* Robinson Q */ } score_t; -/* struct for printing doubles as hex. */ -typedef union -{ - double d; - long long q; -} t_DOUBLE_QUAD; - /* Function Prototypes */ static double get_spamicity(size_t robn, FLOAT P, FLOAT Q); @@ -235,16 +227,18 @@ if (DEBUG_ALGORITHM(2)) fprintf(dbgout, "min_dev: %f, robs: %f, robx: %f\n", min_dev, robs, robx); + /* compute scores for the wordhash's tokens */ compute_scores(wh); + /* recalculate min_dev if necessary to satisfy token_count settings */ score.min_dev = !need_scoring_boundary(wh) ? min_dev : find_scoring_boundary(wh); + /* compute message spamicity from the wordhash's scores */ compute_spamicity(wh, &P, &Q, &robn, need_stats); /* Robinson's P, Q and S ** S = (P - Q) / (P + Q) [combined indicator] */ - spamicity = get_spamicity(robn, P, Q); if (need_stats && robn != 0) Modified: trunk/bogofilter/src/wordhash.c =================================================================== --- trunk/bogofilter/src/wordhash.c 2009-02-14 21:58:29 UTC (rev 6798) +++ trunk/bogofilter/src/wordhash.c 2009-02-14 22:10:54 UTC (rev 6799) @@ -166,8 +166,6 @@ wordhash_free_alloc_nodes(wh); wordhash_free_strings(wh); - xfree (wh->order); - switch (wh->type) { case WH_NORMAL: Modified: trunk/bogofilter/src/wordhash.h =================================================================== --- trunk/bogofilter/src/wordhash.h 2009-02-14 21:58:29 UTC (rev 6798) +++ trunk/bogofilter/src/wordhash.h 2009-02-14 22:10:54 UTC (rev 6799) @@ -41,20 +41,19 @@ /*@null@*/ /*@dependent@*/ uint size; /* size of array */ hashnode_pt *bin; - /*@null@*/ /*@owned@*/ wh_alloc_node *nodes; /*list of node buffers */ + /*@null@*/ /*@owned@*/ wh_alloc_node *nodes; /* list of node buffers */ /*@null@*/ wh_alloc_str *strings; /* list of string buffers */ /*@null@*/ /*@dependent@*/ hashnode_t *iter_ptr; /*@null@*/ /*@dependent@*/ hashnode_t *iter_head; /*@null@*/ /*@dependent@*/ hashnode_t *iter_tail; - /*@null@*/ /*@dependent@*/ hashnode_t **order; /* array of nodes */ - /*@null@*/ /*@dependent@*/ hashnode_t *props; /* array of nodes */ - /*@null@*/ /*@dependent@*/ wordcnts_t *cnts; /* array of counts */ + /*@null@*/ /*@dependent@*/ hashnode_t *props; /* array of nodes */ + /*@null@*/ /*@dependent@*/ wordcnts_t *cnts; /* array of counts */ } wordhash_t; /*@only@*/ wordhash_t *wordhash_new(void); -/*@only@*/ wordhash_t *wordhash_init(wh_t t, uint c); +/*@only@*/ wordhash_t *wordhash_init(wh_t type, uint count); void wordhash_free(/*@only@*/ wordhash_t *); size_t wordhash_count(wordhash_t * h); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <relson@us...> - 2009-02-14 21:13:03
|
Revision: 6797 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6797&view=rev Author: relson Date: 2009-02-14 21:13:00 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Rename hashnode_t's 'buf' field. Modified Paths: -------------- trunk/bogofilter/src/register.c trunk/bogofilter/src/score.c trunk/bogofilter/src/wordhash.c trunk/bogofilter/src/wordhash.h trunk/bogofilter/src/wordhash.main.c Modified: trunk/bogofilter/src/register.c =================================================================== --- trunk/bogofilter/src/register.c 2009-02-14 21:12:52 UTC (rev 6796) +++ trunk/bogofilter/src/register.c 2009-02-14 21:13:00 UTC (rev 6797) @@ -87,7 +87,7 @@ for (node = wordhash_first(h); node != NULL; node = wordhash_next(h)) { - wordprop = node->buf; + wordprop = node->data; switch (ds_read(list->dsh, node->key, &val)) { case DS_ABORT_RETRY: rand_sleep(4*1000,1000*1000); Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2009-02-14 21:12:52 UTC (rev 6796) +++ trunk/bogofilter/src/score.c 2009-02-14 21:13:00 UTC (rev 6797) @@ -206,7 +206,7 @@ for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) { word_t *token = node->key; - wordprop_t *props = (wordprop_t *) node->buf; + wordprop_t *props = (wordprop_t *) node->data; wordcnts_t *cnts = &props->cnts; ret = lookup(token, cnts); if (ret == DS_ABORT_RETRY) @@ -269,7 +269,7 @@ wordprop_t *props; if (!fBogotune) { - props = (wordprop_t *) node->buf; + props = (wordprop_t *) node->data; cnts = &props->cnts; props->prob = calc_prob(cnts->good, cnts->bad, cnts->msgs_good, cnts->msgs_bad); @@ -309,7 +309,7 @@ if (!fBogotune) { token = node->key; - props = (wordprop_t *) node->buf; + props = (wordprop_t *) node->data; cnts = &props->cnts; prob = props->prob; useflag = props->used; @@ -374,7 +374,7 @@ for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) { if (!fBogotune) { - wordprop_t *props = (wordprop_t *) node->buf; + wordprop_t *props = (wordprop_t *) node->data; if (props->used) count += 1; } else { @@ -422,7 +422,7 @@ double dev; if (!fBogotune) { - props = (wordprop_t *) node->buf; + props = (wordprop_t *) node->data; cnts = &props->cnts; } else { cnts = (wordcnts_t *) node; @@ -457,8 +457,8 @@ if (!fBogotune) { const hashnode_t *hn1 = (const hashnode_t const *)pv1; const hashnode_t *hn2 = (const hashnode_t const *)pv2; - d1 = fabs(((wordprop_t *) hn1->buf)->prob - EVEN_ODDS); - d2 = fabs(((wordprop_t *) hn2->buf)->prob - EVEN_ODDS); + d1 = fabs(((wordprop_t *) hn1->data)->prob - EVEN_ODDS); + d2 = fabs(((wordprop_t *) hn2->data)->prob - EVEN_ODDS); } else { const wordcnts_t *cnts; double prob; Modified: trunk/bogofilter/src/wordhash.c =================================================================== --- trunk/bogofilter/src/wordhash.c 2009-02-14 21:12:52 UTC (rev 6796) +++ trunk/bogofilter/src/wordhash.c 2009-02-14 21:13:00 UTC (rev 6797) @@ -170,7 +170,7 @@ if (wh->freeable) { uint i; for (i=0; i<wh->size; i++) - xfree(wh->props[i].buf); + xfree(wh->props[i].data); } xfree (wh->props); } @@ -242,7 +242,7 @@ static void display_node(hashnode_t *n, const char *str) { - wordprop_t *p = (wordprop_t *)n->buf; + wordprop_t *p = (wordprop_t *)n->data; if (verbose > 2) printf( "%20.20s %5u %5u%s", n->key->u.text, p->cnts.bad, p->cnts.good, str); } @@ -264,7 +264,7 @@ } for (s = wordhash_first(src); s != NULL; s = wordhash_next(src)) { - wordprop_t *p = (wordprop_t *)s->buf; + wordprop_t *p = (wordprop_t *)s->data; word_t *key = s->key; wordprop_t *d; if (key == NULL) @@ -287,7 +287,7 @@ hashnode_t *hn; for (hn = wordhash_first(wh); hn != NULL; hn = wordhash_next(wh)) { - (*hook)(hn->key, hn->buf, userdata); + (*hook)(hn->key, hn->data, userdata); } return; @@ -313,7 +313,7 @@ for (hn = wh->bin[idx]; hn != NULL; hn = hn->next) { word_t *key = hn->key; if (key->leng == t->leng && memcmp (t->u.text, key->u.text, t->leng) == 0) { - wordprop_t *p = (wordprop_t *)hn->buf; + wordprop_t *p = (wordprop_t *)hn->data; return p; } } @@ -331,11 +331,11 @@ return buf; hn = nmalloc (wh); - hn->buf = smalloc (wh, n); + hn->data = smalloc (wh, n); if (initializer) - initializer(hn->buf); + initializer(hn->data); else - memset(hn->buf, '\0', n); + memset(hn->data, '\0', n); hn->key = word_dup(t); @@ -355,7 +355,7 @@ wh->count += 1; wh->size += 1; - return hn->buf; + return hn->data; } static void * @@ -445,7 +445,7 @@ static wordcnts_t *wordhash_get_counts(wordhash_t *wh, hashnode_t *n) { if (wh->cnts == NULL) { - wordprop_t *p = (wordprop_t *)n->buf; + wordprop_t *p = (wordprop_t *)n->data; wordcnts_t *c = &p->cnts; return c; } @@ -537,11 +537,11 @@ } else { wp = xcalloc(1, sizeof(wordprop_t)); - memcpy(wp, node->buf, sizeof(wordprop_t)); + memcpy(wp, node->data, sizeof(wordprop_t)); if (!who->freeable) wh_trap(); } - who->props[who->count].buf = wp; + who->props[who->count].data = wp; xfree(node->key); node->key = NULL; who->count += 1; Modified: trunk/bogofilter/src/wordhash.h =================================================================== --- trunk/bogofilter/src/wordhash.h 2009-02-14 21:12:52 UTC (rev 6796) +++ trunk/bogofilter/src/wordhash.h 2009-02-14 21:13:00 UTC (rev 6797) @@ -10,7 +10,7 @@ /*@dependent@*/ struct hashnode_t *iter_next; /* Next item added to hash. For fast traversal */ struct hashnode_t *next; /* Next item in linked list of items with same hash */ word_t *key; /* word key */ - void *buf; /* Associated buffer. To be used by caller. */ + void *data; /* Associated data. To be used by caller. */ } hashnode_t; typedef struct wh_alloc_node { Modified: trunk/bogofilter/src/wordhash.main.c =================================================================== --- trunk/bogofilter/src/wordhash.main.c 2009-02-14 21:12:52 UTC (rev 6796) +++ trunk/bogofilter/src/wordhash.main.c 2009-02-14 21:13:00 UTC (rev 6797) @@ -25,7 +25,7 @@ { word_t *key = p->key; (void)word_puts(key, 0, stdout); - (void)printf (" %d\n", ((wh_elt_t *) p->buf)->count); + (void)printf (" %d\n", ((wh_elt_t *) p->data)->count); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <relson@us...> - 2009-02-14 21:12:56
|
Revision: 6796 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6796&view=rev Author: relson Date: 2009-02-14 21:12:52 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Allow 10::1 difference between ham and spam. Modified Paths: -------------- trunk/bogofilter/src/bogotune.c Modified: trunk/bogofilter/src/bogotune.c =================================================================== --- trunk/bogofilter/src/bogotune.c 2009-02-14 18:29:46 UTC (rev 6795) +++ trunk/bogofilter/src/bogotune.c 2009-02-14 21:12:52 UTC (rev 6796) @@ -639,7 +639,7 @@ if ((message_count % 1000) != 0) putchar('.'); else - printf("\r \r%u ", message_count/1000 ); + printf("\r \r%u ", message_count ); fflush(stdout); } return message_count; @@ -732,7 +732,7 @@ for (hn = wordhash_first(wh); hn != NULL; hn = wordhash_next(wh)) { word_t *token = hn->key; - wordprop_t *wp = (wordprop_t *) hn->buf; + wordprop_t *wp = (wordprop_t *) hn->data; wordcnts_t *cnts = &wp->cnts; if (cnts->good == 0 && cnts->bad == 0) { @@ -1462,6 +1462,7 @@ static bool check_msg_counts(void) { bool ok = true; + double ratio; if (msgs_good < LIST_COUNT || msgs_bad < LIST_COUNT) { if (!quiet) @@ -1472,13 +1473,13 @@ ok = false; } - if (msgs_bad * 5.0 < msgs_good || - msgs_bad > msgs_good * 5.0) { - if (!quiet) + ratio = (double)msgs_good / (double)msgs_bad; + fprintf(stderr, "wordlist's ham to spam ratio is %0.1f to 1.0\n", ratio ); + if ( ratio < 0.1 || ratio > 10.0) { + if (!quiet) { fprintf(stderr, - "The wordlist has a ratio of spam to non-spam of %0.1f to 1.0.\n" - "Bogotune requires the ratio be in the range of 0.2 to 5.\n", - (double)msgs_bad / msgs_good); + "Bogotune requires the ratio be in the range of 0.1 to 10.\n"); + } ok = false; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <relson@us...> - 2009-02-14 18:29:49
|
Revision: 6795 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6795&view=rev Author: relson Date: 2009-02-14 18:29:46 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Replace qsort with listsort. Modified Paths: -------------- trunk/bogofilter/src/common.h trunk/bogofilter/src/rstats.c trunk/bogofilter/src/score.c trunk/bogofilter/src/tests/outputs/token.count.ref Modified: trunk/bogofilter/src/common.h =================================================================== --- trunk/bogofilter/src/common.h 2009-02-14 18:28:58 UTC (rev 6794) +++ trunk/bogofilter/src/common.h 2009-02-14 18:29:46 UTC (rev 6795) @@ -160,7 +160,8 @@ typedef struct { wordcnts_t cnts; double prob; - int freq; + int freq; + bool used; } wordprop_t; extern void bf_exit(void); Modified: trunk/bogofilter/src/rstats.c =================================================================== --- trunk/bogofilter/src/rstats.c 2009-02-14 18:28:58 UTC (rev 6794) +++ trunk/bogofilter/src/rstats.c 2009-02-14 18:29:46 UTC (rev 6795) @@ -25,19 +25,6 @@ #include "score.h" #include "xmalloc.h" -#if 1 -#define TRACE(n,p) -#else -#define TRACE(n,p) \ - { \ - const rstats_t *r = (const rstats_t *)p; \ - printf("%s:%d %s: %p %-16s %8.6f\n", __FILE__, __LINE__, \ - n, r, \ - r ? (char *)r->token->u.text : "nil", \ - r ? r->prob : 0.0); \ - } -#endif - typedef struct rstats_s rstats_t; struct rstats_s { rstats_t *next; @@ -82,8 +69,8 @@ { if (stats_head == NULL) { stats_head = xcalloc(1, sizeof(header_t)); - stats_tail = NULL; - stats_head->list = NULL; + stats_tail = (rstats_t *)xcalloc(1, sizeof(rstats_t)); + stats_head->list = stats_tail; } } @@ -103,24 +90,11 @@ void rstats_add(const word_t *token, double prob, bool used, wordcnts_t *cnts) { - rstats_t *stats_new; - if (token == NULL) return; -// fprintf(dbgout, "%s:%d %-16s %8.6f\n", __FILE__, __LINE__, -// token->u.text, prob); - - stats_new = (rstats_t *)xcalloc(1, sizeof(rstats_t)); - - if (stats_head->count == 0) { - stats_head->list = stats_new; - } - if (stats_tail != NULL) - stats_tail->next = stats_new; - stats_tail = stats_new; - stats_head->count += 1; + stats_tail->next = NULL; /* Using externally controlled data; token must not be freed before calling rstats_cleanup() @@ -132,15 +106,17 @@ stats_tail->bad = cnts->bad; stats_tail->msgs_good = cnts->msgs_good; stats_tail->msgs_bad = cnts->msgs_bad; - stats_tail = stats_new; -// fprintf(dbgout, "%s:%d %2d %p %8.6f %-16s\n", __FILE__, __LINE__, stats_head->count, stats_new, stats_new->prob, stats_new->token->u.text); + stats_tail->next = (rstats_t *)xcalloc(1, sizeof(rstats_t)); + stats_tail = stats_tail->next; } -static int compare_rstats_t(const void *const ir1, const void *const ir2) +/* compare_rstats_t - sort by ascending spamicity */ + +static int compare_rstats_t(const void *const pv1, const void *const pv2) { - const rstats_t *r1 = (const rstats_t *)ir1; - const rstats_t *r2 = (const rstats_t *)ir2; + const rstats_t *r1 = (const rstats_t *)pv1; + const rstats_t *r2 = (const rstats_t *)pv2; if (r1->prob > r2->prob) return 1; if (r2->prob > r1->prob) return -1; @@ -161,9 +137,9 @@ void rstats_print(bool unsure) { size_t robn = stats_head->robn; - size_t count = stats_head->count; - stats_head->list = (rstats_t *)listsort((element *)stats_head->list, (fcn_compare *)&compare_rstats_t, false, false); + /* sort by ascending spamicity */ + stats_head->list = listsort(stats_head->list, &compare_rstats_t, false, false); if (Rtable || verbose>=3) rstats_print_rtable(stats_head->list); @@ -215,13 +191,13 @@ if (robn == 0) h->spamicity = robx; - else + else { double invproduct, product; invproduct = 1.0 - exp(invlogsum / invn); product = 1.0 - exp(logsum / invn); - h->spamicity = (invproduct + product < EPS) - ? 0.0 + h->spamicity = (invproduct + product < EPS) + ? 0.0 : (1.0 + (invproduct - product) / (invproduct + product)) / 2.0; } h->count=cnt; @@ -266,7 +242,7 @@ pfx, max_token_len+2, "", "n", "pgood", "pbad", "fw", "invfwlog", "fwlog", "U"); /* Print 1 line per token */ - for (cur=rstats_head; cur != NULL; cur=cur->next) + for (cur=rstats_head->next; cur != NULL; cur=cur->next) { int len = (cur->token->leng >= max_token_len) ? 0 : (max_token_len - cur->token->leng); double fw = calc_prob(cur->good, cur->bad, cur->msgs_good, cur->msgs_bad); Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2009-02-14 18:28:58 UTC (rev 6794) +++ trunk/bogofilter/src/score.c 2009-02-14 18:29:46 UTC (rev 6795) @@ -17,6 +17,7 @@ #include "bogofilter.h" #include "collect.h" #include "datastore.h" +#include "listsort.h" #include "msgcounts.h" #include "prob.h" #include "rand_sleep.h" @@ -71,8 +72,9 @@ static double get_spamicity(size_t robn, FLOAT P, FLOAT Q); static bool need_scoring_boundary(wordhash_t *wh); static double find_scoring_boundary(wordhash_t *wh); +static void compute_scores(wordhash_t *wh); static void compute_spamicity(wordhash_t *wh, FLOAT *P, FLOAT *Q, size_t *robn, bool need_stats); -static int compare_probnode_t(const void *const ipn1, const void *const ipn2); +static int compare_hashnode_t(const void *const pv1, const void *const pv2); /* Static Variables */ @@ -233,6 +235,8 @@ if (DEBUG_ALGORITHM(2)) fprintf(dbgout, "min_dev: %f, robs: %f, robx: %f\n", min_dev, robs, robx); + compute_scores(wh); + score.min_dev = !need_scoring_boundary(wh) ? min_dev : find_scoring_boundary(wh); compute_spamicity(wh, &P, &Q, &robn, need_stats); @@ -252,6 +256,37 @@ } /* +** compute_scores() +** compute the token probabilities from the linked list of tokens +*/ +void compute_scores(wordhash_t *wh) +{ + hashnode_t *node; + + for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) + { + wordcnts_t *cnts; + wordprop_t *props; + + if (!fBogotune) { + props = (wordprop_t *) node->buf; + cnts = &props->cnts; + props->prob = calc_prob(cnts->good, cnts->bad, + cnts->msgs_good, cnts->msgs_bad); + props->used = fabs(props->prob - EVEN_ODDS) > min_dev; + } else { + /* unneeded - remove */ + double prob; + bool useflag; + cnts = (wordcnts_t *) node; + prob = calc_prob(cnts->good, cnts->bad, + cnts->msgs_good, cnts->msgs_bad); + useflag = fabs(prob - EVEN_ODDS) > score.min_dev; + } + } +} + +/* ** compute_spamicity() ** compute the spamicity from the linked list of tokens using ** min_dev to select tokens @@ -263,6 +298,7 @@ hashnode_t *node; size_t count = 0; + for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) { bool useflag; @@ -272,19 +308,19 @@ wordprop_t *props; if (!fBogotune) { + token = node->key; props = (wordprop_t *) node->buf; cnts = &props->cnts; - token = node->key; + prob = props->prob; + useflag = props->used; } else { + token = NULL; cnts = (wordcnts_t *) node; - token = NULL; + prob = calc_prob(cnts->good, cnts->bad, + cnts->msgs_good, cnts->msgs_bad); + useflag = fabs(prob - EVEN_ODDS) > score.min_dev; } - prob = calc_prob(cnts->good, cnts->bad, - cnts->msgs_good, cnts->msgs_bad); - - useflag = fabs(prob - EVEN_ODDS) > score.min_dev; - if (need_stats) rstats_add(token, prob, useflag, cnts); @@ -337,24 +373,17 @@ // Count scorable tokens for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) { - double prob; - wordcnts_t *cnts; - wordprop_t *props; - if (!fBogotune) { - props = (wordprop_t *) node->buf; - cnts = &props->cnts; + wordprop_t *props = (wordprop_t *) node->buf; + if (props->used) + count += 1; } else { - cnts = (wordcnts_t *) node; + wordcnts_t *cnts = (wordcnts_t *) node; + double prob = calc_prob(cnts->good, cnts->bad, + cnts->msgs_good, cnts->msgs_bad); + if (fabs(prob - EVEN_ODDS) >= min_dev) + count += 1; } - - prob = calc_prob(cnts->good, cnts->bad, - cnts->msgs_good, cnts->msgs_bad); - - if (fabs(prob - EVEN_ODDS) >= min_dev) - { - count += 1; - } } // Compare count to limits @@ -375,60 +404,80 @@ */ double find_scoring_boundary(wordhash_t *wh) { - size_t node_index = 0; - size_t node_count = wh->count; + size_t count = 0; double min_prob = (token_count_max == 0.0) ? min_dev : 1.0; hashnode_t *node; - probnode_t *node_array = calloc(node_count, sizeof(probnode_t)); - /* create array from linked list to allow sorting */ - for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) - { - double prob, dev; - word_t *token; + /* sort by ascending score difference (from 0.5) */ + wh->iter_head = listsort(wh->iter_head, &compare_hashnode_t, false, false); + + count = max(token_count_fix, max(token_count_min, token_count_max)); + + for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) { wordcnts_t *cnts; wordprop_t *props; + double prob; + double dev; if (!fBogotune) { props = (wordprop_t *) node->buf; cnts = &props->cnts; - token = node->key; } else { cnts = (wordcnts_t *) node; - token = NULL; } - prob = calc_prob(cnts->good, cnts->bad, cnts->msgs_good, cnts->msgs_bad); dev = fabs(prob - EVEN_ODDS); - node_array[node_index].node = node; - node_array[node_index].prob = prob; - node_array[node_index].dev = dev; - node_index += 1; + if (count > 0) { + count -= 1; + props->used = true; + min_prob = dev; + } + else if (dev >= min_prob) { + props->used = true; + } + else { + props->used = false; + } } - qsort(node_array, node_count, sizeof(probnode_t), compare_probnode_t); - - node_index = max(token_count_fix, max(token_count_min, token_count_max)); - min_prob = node_array[ node_index ].dev; - - free(node_array); - return min_prob; } -static int compare_probnode_t(const void *const ipn1, const void *const ipn2) +/* compare_hashnode_t - sort by ascending score difference (from 0.5) */ + +static int compare_hashnode_t(const void *const pv1, const void *const pv2) { - const probnode_t *pn1 = (const probnode_t const *)ipn1; - const probnode_t *pn2 = (const probnode_t const *)ipn2; + double d1; + double d2; - if (pn1->dev < pn2->dev) + if (!fBogotune) { + const hashnode_t *hn1 = (const hashnode_t const *)pv1; + const hashnode_t *hn2 = (const hashnode_t const *)pv2; + d1 = fabs(((wordprop_t *) hn1->buf)->prob - EVEN_ODDS); + d2 = fabs(((wordprop_t *) hn2->buf)->prob - EVEN_ODDS); + } else { + const wordcnts_t *cnts; + double prob; + cnts = (const wordcnts_t *) pv1; + prob = calc_prob(cnts->good, cnts->bad, + cnts->msgs_good, cnts->msgs_bad); + d1 = fabs(prob - EVEN_ODDS); + + cnts = (const wordcnts_t *) pv2; + prob = calc_prob(cnts->good, cnts->bad, + cnts->msgs_good, cnts->msgs_bad); + d2 = fabs(prob - EVEN_ODDS); + } + + if (d1 < d2) return +1; - if (pn1->dev > pn2->dev) + if (d1 > d2) return -1; + return 0; } Modified: trunk/bogofilter/src/tests/outputs/token.count.ref =================================================================== --- trunk/bogofilter/src/tests/outputs/token.count.ref 2009-02-14 18:28:58 UTC (rev 6794) +++ trunk/bogofilter/src/tests/outputs/token.count.ref 2009-02-14 18:29:46 UTC (rev 6795) @@ -57,7 +57,7 @@ "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + N_P_Q_S_s_x_md 4 0.000085 0.065746 0.532831 - 0.017800 0.520000 0.490906 + 0.017800 0.520000 0.491605 #### --min-dev=0.496 --token-count-max=8 #### X-Bogosity: Unsure, tests=bogofilter, spamicity=0.493025 n pgood pbad fw U @@ -177,7 +177,7 @@ "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + N_P_Q_S_s_x_md 8 0.006086 0.006252 0.500083 - 0.017800 0.520000 0.253238 + 0.017800 0.520000 0.277190 #### --min-dev=0.100 --token-count=20 #### X-Bogosity: Unsure, tests=bogofilter, spamicity=0.495886 n pgood pbad fw U @@ -207,7 +207,7 @@ "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + N_P_Q_S_s_x_md 20 0.107193 0.098964 0.495886 - 0.017800 0.520000 0.059390 + 0.017800 0.520000 0.067490 #### U 0.493025 --min-dev=0.496 #### U 0.532831 --min-dev=0.496 --token-count-min=4 #### U 0.493025 --min-dev=0.496 --token-count-max=8 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <relson@us...> - 2009-02-14 18:29:00
|
Revision: 6794 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6794&view=rev Author: relson Date: 2009-02-14 18:28:58 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Remove WH_ORDERED as it's no longer needed. Modified Paths: -------------- trunk/bogofilter/src/wordhash.c trunk/bogofilter/src/wordhash.h Modified: trunk/bogofilter/src/wordhash.c =================================================================== --- trunk/bogofilter/src/wordhash.c 2009-02-14 18:27:45 UTC (rev 6793) +++ trunk/bogofilter/src/wordhash.c 2009-02-14 18:28:58 UTC (rev 6794) @@ -397,10 +397,6 @@ case WH_NORMAL: val = wh->iter_ptr = wh->iter_head; break; - case WH_ORDERED: - wh->index = 0; - val = wh->order[wh->index]; - break; case WH_PROPS: wh->index = 0; val = &wh->props[wh->index]; @@ -424,10 +420,6 @@ if (wh->iter_ptr != NULL) val = wh->iter_ptr = wh->iter_ptr->iter_next; break; - case WH_ORDERED: - if (++wh->index < wh->count) - val = wh->order[wh->index]; - break; case WH_PROPS: if (++wh->index < wh->count) val = &wh->props[wh->index]; Modified: trunk/bogofilter/src/wordhash.h =================================================================== --- trunk/bogofilter/src/wordhash.h 2009-02-14 18:27:45 UTC (rev 6793) +++ trunk/bogofilter/src/wordhash.h 2009-02-14 18:28:58 UTC (rev 6794) @@ -29,7 +29,9 @@ typedef /*@null@*/ hashnode_t *hashnode_pt; -typedef enum wh_e { WH_NORMAL, WH_ORDERED, WH_PROPS, WH_CNTS } wh_t; +typedef enum wh_e { WH_NORMAL, + WH_PROPS, + WH_CNTS } wh_t; typedef struct wordhash_s { /*@null@*/ /*@dependent@*/ wh_t type; /* normal, ordered, props, or cnts */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <relson@us...> - 2009-02-14 18:27:48
|
Revision: 6793 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6793&view=rev Author: relson Date: 2009-02-14 18:27:45 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Replace qsort with listsort. Modified Paths: -------------- trunk/bogofilter/src/wordhash.c trunk/bogofilter/src/wordhash.h Modified: trunk/bogofilter/src/wordhash.c =================================================================== --- trunk/bogofilter/src/wordhash.c 2009-02-14 18:21:48 UTC (rev 6792) +++ trunk/bogofilter/src/wordhash.c 2009-02-14 18:27:45 UTC (rev 6793) @@ -29,6 +29,7 @@ #include <string.h> #include <stddef.h> /* for offsetof */ +#include "listsort.h" #include "wordhash.h" #include "xmalloc.h" @@ -440,12 +441,13 @@ return val; } -static int compare_hashnode_t(const void *const ihn1, const void *const ihn2) +/* compare_hashnode_t - sort by ascending token text */ + +static int compare_hashnode_t(const void *const pv1, const void *const pv2) { - const hashnode_t *hn1 = *(const hashnode_t *const *)ihn1; - const hashnode_t *hn2 = *(const hashnode_t *const *)ihn2; - int cmp = word_cmp(hn1->key, hn2->key); - return cmp; + const hashnode_t *hn1 = (const hashnode_t *)pv1; + const hashnode_t *hn2 = (const hashnode_t *)pv2; + return word_cmp(hn1->key, hn2->key); } static wordcnts_t *wordhash_get_counts(wordhash_t *wh, hashnode_t *n) @@ -473,26 +475,13 @@ } } +/* wordhash_sort - sort by ascending token text */ + void wordhash_sort (wordhash_t *wh) { - hashnode_t *node; + wh->iter_head = listsort(wh->iter_head, &compare_hashnode_t, false, false); - if (wh->size == 0 - || wh->type != WH_NORMAL - || msg_count_file) - return; - - wh->order = (hashnode_t **) xcalloc(wh->size, sizeof(hashnode_t *)); - - wh->count = 0; - for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) - wh->order[wh->count++] = node; - - qsort(wh->order, wh->count, sizeof(hashnode_t *), compare_hashnode_t); - - wh->type = WH_ORDERED; - return; } Modified: trunk/bogofilter/src/wordhash.h =================================================================== --- trunk/bogofilter/src/wordhash.h 2009-02-14 18:21:48 UTC (rev 6792) +++ trunk/bogofilter/src/wordhash.h 2009-02-14 18:27:45 UTC (rev 6793) @@ -7,10 +7,10 @@ /* Hash entry. */ typedef struct hashnode_t { + /*@dependent@*/ struct hashnode_t *iter_next; /* Next item added to hash. For fast traversal */ + struct hashnode_t *next; /* Next item in linked list of items with same hash */ word_t *key; /* word key */ void *buf; /* Associated buffer. To be used by caller. */ - struct hashnode_t *next; /* Next item in linked list of items with same hash */ - /*@dependent@*/ struct hashnode_t *iter_next; /* Next item added to hash. For fast traversal */ } hashnode_t; typedef struct wh_alloc_node { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <relson@us...> - 2009-02-14 18:21:51
|
Revision: 6792 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6792&view=rev Author: relson Date: 2009-02-14 18:21:48 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Use void * where possible. Modified Paths: -------------- trunk/bogofilter/src/listsort.c trunk/bogofilter/src/listsort.h Modified: trunk/bogofilter/src/listsort.c =================================================================== --- trunk/bogofilter/src/listsort.c 2009-02-12 17:05:08 UTC (rev 6791) +++ trunk/bogofilter/src/listsort.c 2009-02-14 18:21:48 UTC (rev 6792) @@ -43,16 +43,10 @@ typedef unsigned char byte; #include "word.h" -typedef struct rstats_s rstats_t; -struct rstats_s { - rstats_t *next; - const word_t *token; - u_int32_t good; - u_int32_t bad; - u_int32_t msgs_good; - u_int32_t msgs_bad; - bool used; - double prob; +typedef struct element element; +struct element { + element *next, *prev; + int i; }; #ifdef TEST @@ -76,18 +70,8 @@ * * list = listsort(mylist); */ -#if 1 -#define TRACE(n,p) -#else -#define TRACE(n,p) \ - { \ - const rstats_t *r = (const rstats_t *)p; \ - printf("%s:%d %s: %p %-16s %8.6f\n", __FILE__, __LINE__, \ - n, r, r->token->u.text, r->prob); \ - } -#endif -element *listsort(element *list, fcn_compare *compare, bool is_circular, bool is_double) { +void *listsort(void *list, fcn_compare *compare, bool is_circular, bool is_double) { element *p, *q, *e, *tail, *oldhead; int insize, nmerges, psize, qsize, i; @@ -101,7 +85,6 @@ insize = 1; while (1) { -// printf("%s:%d %d\n", __FILE__, __LINE__, insize); p = list; oldhead = list; /* only used for circular linkage */ list = NULL; @@ -168,7 +151,7 @@ if (is_circular) { tail->next = list; if (is_double) - list->prev = tail; + ((element *)list)->prev = tail; } else tail->next = NULL; Modified: trunk/bogofilter/src/listsort.h =================================================================== --- trunk/bogofilter/src/listsort.h 2009-02-12 17:05:08 UTC (rev 6791) +++ trunk/bogofilter/src/listsort.h 2009-02-14 18:21:48 UTC (rev 6792) @@ -5,14 +5,8 @@ #include "bftypes.h" -typedef struct element element; -struct element { - element *next, *prev; - int i; -}; +typedef int fcn_compare(const void *a, const void *b); -typedef int fcn_compare(const element *a, const element *b); +extern void *listsort(void *list, fcn_compare *compare, bool is_circular, bool is_double); -extern element *listsort(element *list, fcn_compare *compare, bool is_circular, bool is_double); - #endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |