Thread: [cvs] SF.net SVN: bogofilter:[6750] trunk/bogofilter/src
Fast Bayesian spam filter along lines suggested by Paul Graham
Brought to you by:
m-a
From: <cl...@us...> - 2008-10-15 23:18:54
|
Revision: 6750 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6750&view=rev Author: clint Date: 2008-10-15 23:18:45 +0000 (Wed, 15 Oct 2008) Log Message: ----------- Add transform and environment variable support to bf_tar and bf_copy. Modified Paths: -------------- trunk/bogofilter/src/bf_copy.in trunk/bogofilter/src/bf_tar.in Modified: trunk/bogofilter/src/bf_copy.in =================================================================== --- trunk/bogofilter/src/bf_copy.in 2008-10-15 23:08:06 UTC (rev 6749) +++ trunk/bogofilter/src/bf_copy.in 2008-10-15 23:18:45 UTC (rev 6750) @@ -9,6 +9,8 @@ set -e # die on errors +: ${BOGOUTIL:=@transformed_bogoutil@} + COMPACT=0 while test "$1" ; do case "$1" in @@ -30,7 +32,7 @@ DST="$2" # flush mempools -bogoutil --db-checkpoint="$SRC" || : +$BOGOUTIL --db-checkpoint="$SRC" || : mkdir "$DST" @@ -39,7 +41,7 @@ trap "rm -rf $TMP \"$DST\"" 0 if test $COMPACT -eq 1 ; then # don't copy unneeded logs - bogoutil --db-list-logfiles="$SRC" >$TMP + $BOGOUTIL --db-list-logfiles="$SRC" >$TMP else : >$TMP fi @@ -50,10 +52,10 @@ if test -f "$SRC"/DB_CONFIG ; then cp -p "$SRC"/DB_CONFIG "$DST" ; fi for FILE in "$SRC"/*.db ; do - SIZE=`bogoutil --db-print-pagesize="$FILE"` + SIZE=`$BOGOUTIL --db-print-pagesize="$FILE"` dd bs=$SIZE if=$FILE of="$DST/"`basename "$FILE"` done -if test "$LOGS" ; then bogoutil --db-recover="$DST" ; fi +if test "$LOGS" ; then $BOGOUTIL --db-recover="$DST" ; fi rm -f $TMP trap - 0 Modified: trunk/bogofilter/src/bf_tar.in =================================================================== --- trunk/bogofilter/src/bf_tar.in 2008-10-15 23:08:06 UTC (rev 6749) +++ trunk/bogofilter/src/bf_tar.in 2008-10-15 23:18:45 UTC (rev 6750) @@ -11,6 +11,9 @@ set -e +: ${BOGOFILTER:=@transformed_bogofilter@} +: ${BOGOUTIL:=@transformed_bogoutil@} + REMOVEBEF=0 REMOVEAFT=0 while [ "$1" ] ; do @@ -43,14 +46,14 @@ fi nukelogs() { - bogoutil --db-prune="$BOGOHOME" + $BOGOUTIL --db-prune="$BOGOHOME" } # remove if requested if [ $REMOVEBEF -eq 1 ] ; then nukelogs else - bogoutil --db-checkpoint="$BOGOHOME" + $BOGOUTIL --db-checkpoint="$BOGOHOME" fi # database first, then the logs. @@ -61,8 +64,8 @@ ( c="${BOGOHOME}/DB_CONFIG" if [ -f "$c" ] ; then echo "$c" ; fi - bogofilter -QQ -d "$BOGOHOME" | grep '^wordlist ' | cut -f3 -d, - bogoutil --db-list-logfiles="$BOGOHOME" all + $BOGOFILTER -QQ -d "$BOGOHOME" | grep '^wordlist ' | cut -f3 -d, + $BOGOUTIL --db-list-logfiles="$BOGOHOME" all ) | pax -w -v -x ustar # remove if requested This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-01-12 04:12:46
|
Revision: 6765 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6765&view=rev Author: relson Date: 2009-01-12 04:12:41 +0000 (Mon, 12 Jan 2009) Log Message: ----------- Update copyright messages. Modified Paths: -------------- trunk/bogofilter/src/bogoconfig.c trunk/bogofilter/src/bogolexer.c trunk/bogofilter/src/bogoutil.c Modified: trunk/bogofilter/src/bogoconfig.c =================================================================== --- trunk/bogofilter/src/bogoconfig.c 2009-01-11 13:42:59 UTC (rev 6764) +++ trunk/bogofilter/src/bogoconfig.c 2009-01-12 04:12:41 UTC (rev 6765) @@ -390,7 +390,7 @@ (void)fprintf(stdout, "%s version %s\n" " Database: %s\n" - "Copyright (C) 2002-2007 David Relson, Matthias Andree\n" + "Copyright (C) 2002-2009 David Relson, Matthias Andree\n" "Copyright (C) 2002-2004 Greg Louis\n" "Copyright (C) 2002-2003 Eric S. Raymond, Adrian Otto, Gyepi Sam\n\n" "%s comes with ABSOLUTELY NO WARRANTY. " Modified: trunk/bogofilter/src/bogolexer.c =================================================================== --- trunk/bogofilter/src/bogolexer.c 2009-01-11 13:42:59 UTC (rev 6764) +++ trunk/bogofilter/src/bogolexer.c 2009-01-12 04:12:41 UTC (rev 6765) @@ -71,7 +71,7 @@ { (void)fprintf(stdout, "%s version %s\n" - "Copyright (C) 2002-2007 David Relson\n\n" + "Copyright (C) 2002-2009 David Relson\n\n" "%s comes with ABSOLUTELY NO WARRANTY. " "This is free software, and\nyou are welcome to " "redistribute it under the General Public License. " Modified: trunk/bogofilter/src/bogoutil.c =================================================================== --- trunk/bogofilter/src/bogoutil.c 2009-01-11 13:42:59 UTC (rev 6764) +++ trunk/bogofilter/src/bogoutil.c 2009-01-12 04:12:41 UTC (rev 6765) @@ -462,7 +462,7 @@ (void)fprintf(stdout, "%s version %s\n" " Database: %s\n" - "Copyright (C) 2002-2007 David Relson, Matthias Andree\n" + "Copyright (C) 2002-2009 David Relson, Matthias Andree\n" "Copyright (C) 2002-2003 Gyepi Sam.\n\n" "%s comes with ABSOLUTELY NO WARRANTY. " "This is free software, and\nyou are welcome to " This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-01 01:29:50
|
Revision: 6771 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6771&view=rev Author: relson Date: 2009-02-01 01:29:47 +0000 (Sun, 01 Feb 2009) Log Message: ----------- Move score_t definition. Modified Paths: -------------- trunk/bogofilter/src/score.c trunk/bogofilter/src/score.h Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2009-01-30 00:26:42 UTC (rev 6770) +++ trunk/bogofilter/src/score.c 2009-02-01 01:29:47 UTC (rev 6771) @@ -40,6 +40,19 @@ #include "gsl/gsl_cdf.h" #endif +/* Structure Definitions */ + +/* struct for saving stats for printing. */ +typedef struct score_s { + double min_dev; + double spamicity; + u_int32_t robn; + double p_ln; /* Robinson P, as a log*/ + double q_ln; /* Robinson Q, as a log*/ + double p_pr; /* Robinson P */ + double q_pr; /* Robinson Q */ +} score_t; + /* Function Prototypes */ static double get_spamicity(size_t robn, FLOAT P, FLOAT Q); Modified: trunk/bogofilter/src/score.h =================================================================== --- trunk/bogofilter/src/score.h 2009-01-30 00:26:42 UTC (rev 6770) +++ trunk/bogofilter/src/score.h 2009-02-01 01:29:47 UTC (rev 6771) @@ -9,19 +9,6 @@ #define MAX_REPEATS 1 /* cap on word frequency per message */ #define GOOD_BIAS 1.0 /* don't give good words more weight */ -/* -** Define a struct so stats can be saved for printing. -*/ - -typedef struct score_s { - double spamicity; - u_int32_t robn; - double p_ln; /* Robinson P, as a log*/ - double q_ln; /* Robinson Q, as a log*/ - double p_pr; /* Robinson P */ - double q_pr; /* Robinson Q */ -} score_t; - extern void lookup_words(wordhash_t *wh); extern void score_initialize(void); extern void score_cleanup(void); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-01 02:29:31
|
Revision: 6774 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6774&view=rev Author: relson Date: 2009-02-01 02:29:27 +0000 (Sun, 01 Feb 2009) Log Message: ----------- Cleanup compiler warning. Modified Paths: -------------- trunk/bogofilter/src/memstr.c trunk/bogofilter/src/memstr.h Modified: trunk/bogofilter/src/memstr.c =================================================================== --- trunk/bogofilter/src/memstr.c 2009-02-01 01:41:00 UTC (rev 6773) +++ trunk/bogofilter/src/memstr.c 2009-02-01 02:29:27 UTC (rev 6774) @@ -12,9 +12,9 @@ /** find the C string \a needle in the \a n bytes starting with \a hay, * \return 0 if no match found, the pointer to the first byte otherwise. */ -void *memstr(const void *hay, size_t n, const char *needle) +void *memstr(void *hay, size_t n, const char *needle) { - unsigned const char *haystack = hay; + unsigned char *haystack = hay; size_t l = strlen(needle); while (n >= l) { Modified: trunk/bogofilter/src/memstr.h =================================================================== --- trunk/bogofilter/src/memstr.h 2009-02-01 01:41:00 UTC (rev 6773) +++ trunk/bogofilter/src/memstr.h 2009-02-01 02:29:27 UTC (rev 6774) @@ -11,6 +11,6 @@ #include <string.h> /** find needle in haystack (which is treated as unsigned char *). */ -void *memstr(const void *haystack, size_t n, const char *needle); +void *memstr(void *haystack, size_t n, const char *needle); #endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-01 02:55:30
|
Revision: 6772 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6772&view=rev Author: relson Date: 2009-02-01 01:39:47 +0000 (Sun, 01 Feb 2009) Log Message: ----------- Remove unused FILE * parameter. Modified Paths: -------------- trunk/bogofilter/src/bogofilter.c trunk/bogofilter/src/bogotune.c trunk/bogofilter/src/score.c trunk/bogofilter/src/score.h Modified: trunk/bogofilter/src/bogofilter.c =================================================================== --- trunk/bogofilter/src/bogofilter.c 2009-02-01 01:29:47 UTC (rev 6771) +++ trunk/bogofilter/src/bogofilter.c 2009-02-01 01:39:47 UTC (rev 6772) @@ -110,7 +110,7 @@ if (classify_msg || write_msg) { lookup_words(w); /* This reads the database */ - spamicity = msg_compute_spamicity(w, NULL); + spamicity = msg_compute_spamicity(w); status = msg_status(); if (run_type & RUN_UPDATE) /* Note: don't register if RC_UNSURE */ { Modified: trunk/bogofilter/src/bogotune.c =================================================================== --- trunk/bogofilter/src/bogotune.c 2009-02-01 01:29:47 UTC (rev 6771) +++ trunk/bogofilter/src/bogotune.c 2009-02-01 01:39:47 UTC (rev 6772) @@ -385,7 +385,7 @@ mlitem_t *item; for (item = list->head; item != NULL; item = item->next) { wordhash_t *wh = item->wh; - double score = msg_compute_spamicity(wh, NULL); + double score = msg_compute_spamicity(wh); results[count++] = score; if ( -verbose == SCORE_DETAIL || (-verbose >= SCORE_DETAIL && EPS < score && score < 1 - EPS)) @@ -435,7 +435,7 @@ mlitem_t *item; for (item = list->head; item != NULL; item = item->next) { wordhash_t *wh = item->wh; - double score = msg_compute_spamicity(wh, NULL); + double score = msg_compute_spamicity(wh); results[count++] = score; if ( -verbose == SCORE_DETAIL || (-verbose >= SCORE_DETAIL && EPS < score && score < 1 - EPS)) Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2009-02-01 01:29:47 UTC (rev 6771) +++ trunk/bogofilter/src/score.c 2009-02-01 01:39:47 UTC (rev 6772) @@ -202,7 +202,7 @@ /** selects the best spam/non-spam indicators and calculates Robinson's S, * \return -1.0 for error, S otherwise */ -double msg_compute_spamicity(wordhash_t *wh, FILE *fp) /*@globals errno@*/ +double msg_compute_spamicity(wordhash_t *wh) /*@globals errno@*/ { hashnode_t *node; @@ -214,8 +214,6 @@ size_t count = 0; bool need_stats = (Rtable || passthrough || (verbose > 0)) && !fBogotune; - (void) fp; /* suppress compiler warning */ - if (DEBUG_ALGORITHM(2)) fprintf(dbgout, "### msg_compute_spamicity() begins\n"); if (DEBUG_ALGORITHM(2)) fprintf(dbgout, "min_dev: %f, robs: %f, robx: %f\n", Modified: trunk/bogofilter/src/score.h =================================================================== --- trunk/bogofilter/src/score.h 2009-02-01 01:29:47 UTC (rev 6771) +++ trunk/bogofilter/src/score.h 2009-02-01 01:39:47 UTC (rev 6772) @@ -13,7 +13,7 @@ extern void score_initialize(void); extern void score_cleanup(void); -extern double msg_compute_spamicity(wordhash_t *wordhash, FILE *fp) /*@globals errno@*/; +extern double msg_compute_spamicity(wordhash_t *wordhash) /*@globals errno@*/; extern double msg_spamicity(void); extern rc_t msg_status(void); extern void msg_print_stats(FILE *fp); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-07 02:24:00
|
Revision: 6782 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6782&view=rev Author: relson Date: 2009-02-07 01:37:38 +0000 (Sat, 07 Feb 2009) Log Message: ----------- Improved find_scoring_boundary function. Modified Paths: -------------- trunk/bogofilter/src/score.c trunk/bogofilter/src/tests/outputs/token.count.ref trunk/bogofilter/src/tests/t.token.count Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2009-02-01 14:10:32 UTC (rev 6781) +++ trunk/bogofilter/src/score.c 2009-02-07 01:37:38 UTC (rev 6782) @@ -44,7 +44,7 @@ typedef struct probnode_t { hashnode_t * node; - double prob; + double dev; } probnode_t; /* struct for saving stats for printing. */ @@ -58,12 +58,20 @@ double q_pr; /* Robinson Q */ } score_t; +/* struct for printing doubles as hex. */ +typedef union +{ + double d; + long long q; +} t_DOUBLE_QUAD; + /* Function Prototypes */ static double get_spamicity(size_t robn, FLOAT P, FLOAT Q); static bool need_scoring_boundary(wordhash_t *wh); static double find_scoring_boundary(wordhash_t *wh); static void compute_spamicity(wordhash_t *wh, FLOAT *P, FLOAT *Q, size_t *robn, bool need_stats); +static int compare_probnode_t(const void *const ipn1, const void *const ipn2); /* Static Variables */ @@ -275,7 +283,7 @@ prob = calc_prob(cnts->good, cnts->bad, cnts->msgs_good, cnts->msgs_bad); - useflag = fabs(EVEN_ODDS - prob) >= score.min_dev; + useflag = fabs(prob - EVEN_ODDS) > score.min_dev; if (need_stats) rstats_add(token, prob, useflag, cnts); @@ -367,8 +375,7 @@ double find_scoring_boundary(wordhash_t *wh) { size_t node_index = 0; - size_t prob_index; - size_t node_count = max(token_count_fix, max(token_count_min, token_count_max)); + size_t node_count = wh->count; double min_prob = (token_count_max == 0.0) ? min_dev : 1.0; @@ -377,7 +384,7 @@ for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) { - double prob; + double prob, dev; word_t *token; wordcnts_t *cnts; wordprop_t *props; @@ -393,49 +400,39 @@ prob = calc_prob(cnts->good, cnts->bad, cnts->msgs_good, cnts->msgs_bad); - prob = fabs(prob - EVEN_ODDS); + dev = fabs(prob - EVEN_ODDS); - if (node_index < node_count) - { - // first "n" tokens go into array - node_array[node_index].node = node; - node_array[node_index].prob = prob; - if (prob < min_prob) - min_prob = prob; - node_index += 1; - } - else if (prob > min_prob) - { - /* after the first "n" tokens, a token goes into array if - ** it has a higher score than a token already in the - ** array */ - for (prob_index = 0; prob_index < node_count; prob_index += 1) - { - /* replace element with minimum score */ - if (node_array[prob_index].prob == min_prob) - { - node_array[prob_index].node = node; - node_array[prob_index].prob = prob; - break; - } - } - /* find current minimum score */ - min_prob = 1.0; - for (prob_index = 0; prob_index < node_count; prob_index += 1) - { - if (node_array[prob_index].prob < min_prob) - { - min_prob = node_array[ prob_index ].prob; - } - } - } + node_array[node_index].node = node; + node_array[node_index].dev = dev; + node_index += 1; } + qsort(node_array, node_count, sizeof(probnode_t), compare_probnode_t); + + node_index = max(token_count_fix, max(token_count_min, token_count_max)); + min_prob = node_array[ node_index ].dev; + + if (DEBUG_SPAMICITY(1)) { + printf( "%d %8.6f\n", (int)node_index, min_prob ); + } + free(node_array); return min_prob; } +static int compare_probnode_t(const void *const ipn1, const void *const ipn2) +{ + const probnode_t *pn1 = (const probnode_t const *)ipn1; + const probnode_t *pn2 = (const probnode_t const *)ipn2; + + if (pn1->dev < pn2->dev) + return +1; + if (pn1->dev > pn2->dev) + return -1; + return 0; +} + void score_initialize(void) { word_t *word_robx = word_news(ROBX_W); @@ -580,6 +577,12 @@ pfx, "N_P_Q_S_s_x_md", (unsigned long)score.robn, score.p_pr, score.q_pr, score.spamicity, robs, robx, score.min_dev); } + if (DEBUG_SPAMICITY(1)) { + t_DOUBLE_QUAD dq; + dq.d = score.min_dev; + fprintf(dbgout, "md: %10.8f 0x%016qX\n", + score.min_dev, dq.q); + } } /* Done */ Modified: trunk/bogofilter/src/tests/outputs/token.count.ref =================================================================== --- trunk/bogofilter/src/tests/outputs/token.count.ref 2009-02-01 14:10:32 UTC (rev 6781) +++ trunk/bogofilter/src/tests/outputs/token.count.ref 2009-02-07 01:37:38 UTC (rev 6782) @@ -57,7 +57,7 @@ "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + N_P_Q_S_s_x_md 4 0.000085 0.065746 0.532831 - 0.017800 0.520000 0.491605 + 0.017800 0.520000 0.327442 #### --min-dev=0.496 --token-count-max=8 #### X-Bogosity: Unsure, tests=bogofilter, spamicity=0.493025 n pgood pbad fw U @@ -177,7 +177,7 @@ "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + N_P_Q_S_s_x_md 8 0.005444 0.034712 0.514634 - 0.017800 0.520000 0.277190 + 0.017800 0.520000 0.239529 #### --min-dev=0.100 --token-count=20 #### X-Bogosity: Unsure, tests=bogofilter, spamicity=0.570641 n pgood pbad fw U @@ -207,7 +207,7 @@ "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + N_P_Q_S_s_x_md 20 0.100430 0.241712 0.570641 - 0.017800 0.520000 0.067490 + 0.017800 0.520000 0.059390 #### U 0.493025 --min-dev=0.496 #### U 0.532831 --min-dev=0.496 --token-count-min=4 #### U 0.493025 --min-dev=0.496 --token-count-max=8 Modified: trunk/bogofilter/src/tests/t.token.count =================================================================== --- trunk/bogofilter/src/tests/t.token.count 2009-02-01 14:10:32 UTC (rev 6781) +++ trunk/bogofilter/src/tests/t.token.count 2009-02-07 01:37:38 UTC (rev 6782) @@ -74,9 +74,16 @@ cat /dev/null > $OUT +#TOK="-x s -D" + for OPT in "$TST1" "$MIN1" "$MAX1" "$TST2" "$MIN2" "$MAX2" "$FIX2" ; do echo "#### $OPT ####" >> $OUT - map_rc $BOGOFILTER $OPT -c $CFG -vvv -H -I $INP >> $OUT + map_rc $BOGOFILTER $OPT -c $CFG -vvv -H $TOK -I $INP >> $OUT + if [ -n "$TOK" ] ; then + echo >> $OUT + echo >> $OUT + echo >> $OUT + fi done for OPT in "$TST1" "$MIN1" "$MAX1" "$TST2" "$MIN2" "$MAX2" "$FIX2" ; do This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-08 16:05:39
|
Revision: 6785 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6785&view=rev Author: relson Date: 2009-02-08 16:05:35 +0000 (Sun, 08 Feb 2009) Log Message: ----------- Break up min_dev check to avoid floating point issues on 32-bit. Modified Paths: -------------- trunk/bogofilter/src/score.c trunk/bogofilter/src/tests/t.token.count Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2009-02-07 10:37:02 UTC (rev 6784) +++ trunk/bogofilter/src/score.c 2009-02-08 16:05:35 UTC (rev 6785) @@ -267,6 +267,7 @@ { bool useflag; double prob; + double diff; word_t *token; wordcnts_t *cnts; wordprop_t *props; @@ -280,12 +281,57 @@ token = NULL; } - count += 1; - prob = calc_prob(cnts->good, cnts->bad, cnts->msgs_good, cnts->msgs_bad); + + /* Regression test 'tests/t.token.count' tests bogofilter's + ** '--token-count...' options + ** + ** A 'min_dev' value is found that provides an exact cutoff + ** for selecting scorable tokens. For each token, a + ** comparison of 'prob', 'EVEN_ODDS', and 'min_dev' then + ** determines whether to score the token. How the comparison + ** expression is written is important since some ways of + ** writing the comparison can produce different answers on + ** 32-bit and 64-bit architectures. + ** + ** tested on: + ** 64-bits: AMD64x2 + ** 32-bits: AMD Geode & Intel PIII + */ + +#if 0 + // unsafe on 32-bits useflag = fabs(prob - EVEN_ODDS) > score.min_dev; +#else + // safe on 32-bits and 64-bits + diff = fabs(prob - EVEN_ODDS); + useflag = diff > score.min_dev; +#endif + if (DEBUG_SPAMICITY(1)) { + // display difference of 'useflag' computations + double d1 = fabs(prob - EVEN_ODDS); + double d2 = fabs(d1 - score.min_dev); + double d3 = fabs(fabs(prob - EVEN_ODDS) - score.min_dev); + + if ( d2 <= 0.001 ) { + t_DOUBLE_QUAD dq; + + dq.d = prob; + fprintf(dbgout, "md2-pf: %15.10f %16llX %s %-16s\n", prob, dq.q, useflag ? "+" : "-", token->u.text); + + dq.d = d1; + fprintf(dbgout, "md2-d1: %15.10f %16llX\n", d1, dq.q ); + + dq.d = d2; + fprintf(dbgout, "md2-d2: %15.10f %16llX %16.6g\n", d2, dq.q, d2 ); + + dq.d = d3; + fprintf(dbgout, "md2-d3: %15.10f %16llX %16.6g\n", d3, dq.q, d3 ); + } + } + if (need_stats) rstats_add(token, prob, useflag, cnts); @@ -312,6 +358,7 @@ } if (DEBUG_ALGORITHM(3)) { + count += 1; (void)fprintf(dbgout, "%3lu %3lu %f ", (unsigned long)*robn, (unsigned long)count, prob); (void)word_puts(token, 0, dbgout); @@ -383,6 +430,7 @@ hashnode_t *node; probnode_t *node_array = calloc(node_count, sizeof(probnode_t)); + /* create array from linked list to allow sorting */ for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) { double prob, dev; @@ -414,24 +462,6 @@ node_index = max(token_count_fix, max(token_count_min, token_count_max)); min_prob = node_array[ node_index ].dev; - if (DEBUG_SPAMICITY(1)) { - printf( "%d %8.6f\n", (int)node_index, min_prob ); - } - - if (DEBUG_SPAMICITY(2)) { - unsigned int ni; - for (ni = 0; ni <= node_index; ni += 1) - { - t_DOUBLE_QUAD dq; - probnode_t *pn = &node_array[ni]; - hashnode_t *hn = pn->node ; - - dq.d = pn->dev; - printf( "%2d %-16s %8.6f %8.6f %16llX\n", - ni, hn->key->u.text, pn->prob, pn->dev, dq.q); - } - } - free(node_array); return min_prob; @@ -592,12 +622,6 @@ (void)fprintf(fpo, "%s%-24s %6lu %9.2e %9.2e %9.2e %9.2e %9.2e %5.3f\n", pfx, "N_P_Q_S_s_x_md", (unsigned long)score.robn, score.p_pr, score.q_pr, score.spamicity, robs, robx, score.min_dev); - } - if (DEBUG_SPAMICITY(1)) { - t_DOUBLE_QUAD dq; - dq.d = score.min_dev; - fprintf(dbgout, "md: %10.8f 0x%016llX\n", - score.min_dev, dq.q); } } Modified: trunk/bogofilter/src/tests/t.token.count =================================================================== --- trunk/bogofilter/src/tests/t.token.count 2009-02-07 10:37:02 UTC (rev 6784) +++ trunk/bogofilter/src/tests/t.token.count 2009-02-08 16:05:35 UTC (rev 6785) @@ -56,29 +56,29 @@ header_format = %h: %c, tests=bogofilter, spamicity=%p EOF -# 2 tokens scored -TST1="--min-dev=0.496" +# large min_dev - few tokens scored +TST_L="--min-dev=0.496" # min 4 -- score more -MIN1="--min-dev=0.496 --token-count-min=4" +MIN_L4="--min-dev=0.496 --token-count-min=4" # max 8 -- OK - 2 less than 8 -MAX1="--min-dev=0.496 --token-count-max=8" +MAX_L8="--min-dev=0.496 --token-count-max=8" -# 18 tokens scored -TST2="--min-dev=0.100" +# small min_dev - many tokens scored +TST_S="--min-dev=0.100" # min 4 -- OK - 4 less than 18 -MIN2="--min-dev=0.100 --token-count-min=4" +MIN_S4="--min-dev=0.100 --token-count-min=4" # max 8 -- score fewer -MAX2="--min-dev=0.100 --token-count-max=8" +MAX_S8="--min-dev=0.100 --token-count-max=8" # fix 20 -- score more -FIX2="--min-dev=0.100 --token-count=20" +FIX_S20="--min-dev=0.100 --token-count=20" cat /dev/null > $OUT #TOK="-x s -D" -for OPT in "$TST1" "$MIN1" "$MAX1" "$TST2" "$MIN2" "$MAX2" "$FIX2" ; do +for OPT in "$TST_L" "$MIN_L4" "$MAX_L8" "$TST_S" "$MIN_S4" "$MAX_S8" "$FIX_S20" ; do echo "#### $OPT ####" >> $OUT - map_rc $BOGOFILTER $OPT -c $CFG -vvv -H $TOK -I $INP >> $OUT + map_rc $BOGOFILTER $OPT $TOK -c $CFG -vvv -H -I $INP >> $OUT if [ -n "$TOK" ] ; then echo >> $OUT echo >> $OUT @@ -86,8 +86,8 @@ fi done -for OPT in "$TST1" "$MIN1" "$MAX1" "$TST2" "$MIN2" "$MAX2" "$FIX2" ; do - SCORE=$( map_rc $BOGOFILTER $OPT -c $CFG -v -tt -H -I $INP ) +for OPT in "$TST_L" "$MIN_L4" "$MAX_L8" "$TST_S" "$MIN_S4" "$MAX_S8" "$FIX_S20" ; do + SCORE=$( map_rc $BOGOFILTER $OPT $TOK -c $CFG -v -tt -H -I $INP ) echo "#### $SCORE $OPT " >> $OUT done @@ -97,3 +97,4 @@ else diff $DIFF_BRIEF "$REF" "$OUT" fi + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-08 20:43:00
|
Revision: 6786 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6786&view=rev Author: relson Date: 2009-02-08 20:42:57 +0000 (Sun, 08 Feb 2009) Log Message: ----------- Revise regression test to avoid 32/64 bit issues. Modified Paths: -------------- trunk/bogofilter/src/score.c trunk/bogofilter/src/tests/outputs/token.count.ref trunk/bogofilter/src/tests/t.token.count Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2009-02-08 16:05:35 UTC (rev 6785) +++ trunk/bogofilter/src/score.c 2009-02-08 20:42:57 UTC (rev 6786) @@ -267,7 +267,6 @@ { bool useflag; double prob; - double diff; word_t *token; wordcnts_t *cnts; wordprop_t *props; @@ -284,54 +283,8 @@ prob = calc_prob(cnts->good, cnts->bad, cnts->msgs_good, cnts->msgs_bad); - /* Regression test 'tests/t.token.count' tests bogofilter's - ** '--token-count...' options - ** - ** A 'min_dev' value is found that provides an exact cutoff - ** for selecting scorable tokens. For each token, a - ** comparison of 'prob', 'EVEN_ODDS', and 'min_dev' then - ** determines whether to score the token. How the comparison - ** expression is written is important since some ways of - ** writing the comparison can produce different answers on - ** 32-bit and 64-bit architectures. - ** - ** tested on: - ** 64-bits: AMD64x2 - ** 32-bits: AMD Geode & Intel PIII - */ - -#if 0 - // unsafe on 32-bits useflag = fabs(prob - EVEN_ODDS) > score.min_dev; -#else - // safe on 32-bits and 64-bits - diff = fabs(prob - EVEN_ODDS); - useflag = diff > score.min_dev; -#endif - if (DEBUG_SPAMICITY(1)) { - // display difference of 'useflag' computations - double d1 = fabs(prob - EVEN_ODDS); - double d2 = fabs(d1 - score.min_dev); - double d3 = fabs(fabs(prob - EVEN_ODDS) - score.min_dev); - - if ( d2 <= 0.001 ) { - t_DOUBLE_QUAD dq; - - dq.d = prob; - fprintf(dbgout, "md2-pf: %15.10f %16llX %s %-16s\n", prob, dq.q, useflag ? "+" : "-", token->u.text); - - dq.d = d1; - fprintf(dbgout, "md2-d1: %15.10f %16llX\n", d1, dq.q ); - - dq.d = d2; - fprintf(dbgout, "md2-d2: %15.10f %16llX %16.6g\n", d2, dq.q, d2 ); - - dq.d = d3; - fprintf(dbgout, "md2-d3: %15.10f %16llX %16.6g\n", d3, dq.q, d3 ); - } - } - if (need_stats) rstats_add(token, prob, useflag, cnts); Modified: trunk/bogofilter/src/tests/outputs/token.count.ref =================================================================== --- trunk/bogofilter/src/tests/outputs/token.count.ref 2009-02-08 16:05:35 UTC (rev 6785) +++ trunk/bogofilter/src/tests/outputs/token.count.ref 2009-02-08 20:42:57 UTC (rev 6786) @@ -2,11 +2,11 @@ X-Bogosity: Unsure, tests=bogofilter, spamicity=0.493025 n pgood pbad fw U "there" 14 0.291667 0.000000 0.000660 + - "its" 12 0.229167 0.047619 0.172558 - + "zip" 1 0.020833 0.000000 0.009094 - "all" 21 0.395833 0.095238 0.194216 - "web" 10 0.187500 0.047619 0.203096 - "too" 9 0.166667 0.047619 0.222810 - - "more" 15 0.270833 0.095238 0.260471 - + "been" 8 0.145833 0.047619 0.246762 - "file" 6 0.104167 0.047619 0.314336 - "also" 11 0.187500 0.095238 0.337138 - "about" 16 0.270833 0.142857 0.345518 - @@ -32,11 +32,11 @@ X-Bogosity: Unsure, tests=bogofilter, spamicity=0.532831 n pgood pbad fw U "there" 14 0.291667 0.000000 0.000660 + - "its" 12 0.229167 0.047619 0.172558 - + "zip" 1 0.020833 0.000000 0.009094 - "all" 21 0.395833 0.095238 0.194216 - "web" 10 0.187500 0.047619 0.203096 - "too" 9 0.166667 0.047619 0.222810 - - "more" 15 0.270833 0.095238 0.260471 - + "been" 8 0.145833 0.047619 0.246762 - "file" 6 0.104167 0.047619 0.314336 - "also" 11 0.187500 0.095238 0.337138 - "about" 16 0.270833 0.142857 0.345518 - @@ -57,16 +57,16 @@ "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + N_P_Q_S_s_x_md 4 0.000085 0.065746 0.532831 - 0.017800 0.520000 0.327442 + 0.017800 0.520000 0.490906 #### --min-dev=0.496 --token-count-max=8 #### X-Bogosity: Unsure, tests=bogofilter, spamicity=0.493025 n pgood pbad fw U "there" 14 0.291667 0.000000 0.000660 + - "its" 12 0.229167 0.047619 0.172558 - + "zip" 1 0.020833 0.000000 0.009094 - "all" 21 0.395833 0.095238 0.194216 - "web" 10 0.187500 0.047619 0.203096 - "too" 9 0.166667 0.047619 0.222810 - - "more" 15 0.270833 0.095238 0.260471 - + "been" 8 0.145833 0.047619 0.246762 - "file" 6 0.104167 0.047619 0.314336 - "also" 11 0.187500 0.095238 0.337138 - "about" 16 0.270833 0.142857 0.345518 - @@ -89,14 +89,14 @@ N_P_Q_S_s_x_md 2 0.019431 0.005482 0.493025 0.017800 0.520000 0.496000 #### --min-dev=0.100 #### -X-Bogosity: Unsure, tests=bogofilter, spamicity=0.559839 +X-Bogosity: Unsure, tests=bogofilter, spamicity=0.496075 n pgood pbad fw U "there" 14 0.291667 0.000000 0.000660 + - "its" 12 0.229167 0.047619 0.172558 + + "zip" 1 0.020833 0.000000 0.009094 + "all" 21 0.395833 0.095238 0.194216 + "web" 10 0.187500 0.047619 0.203096 + "too" 9 0.166667 0.047619 0.222810 + - "more" 15 0.270833 0.095238 0.260471 + + "been" 8 0.145833 0.047619 0.246762 + "file" 6 0.104167 0.047619 0.314336 + "also" 11 0.187500 0.095238 0.337138 + "about" 16 0.270833 0.142857 0.345518 + @@ -116,17 +116,17 @@ "visiting" 1 0.000000 0.047619 0.991605 + "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + - N_P_Q_S_s_x_md 18 0.073858 0.193537 0.559839 + N_P_Q_S_s_x_md 18 0.079368 0.071517 0.496075 0.017800 0.520000 0.100000 #### --min-dev=0.100 --token-count-min=4 #### -X-Bogosity: Unsure, tests=bogofilter, spamicity=0.559839 +X-Bogosity: Unsure, tests=bogofilter, spamicity=0.496075 n pgood pbad fw U "there" 14 0.291667 0.000000 0.000660 + - "its" 12 0.229167 0.047619 0.172558 + + "zip" 1 0.020833 0.000000 0.009094 + "all" 21 0.395833 0.095238 0.194216 + "web" 10 0.187500 0.047619 0.203096 + "too" 9 0.166667 0.047619 0.222810 + - "more" 15 0.270833 0.095238 0.260471 + + "been" 8 0.145833 0.047619 0.246762 + "file" 6 0.104167 0.047619 0.314336 + "also" 11 0.187500 0.095238 0.337138 + "about" 16 0.270833 0.142857 0.345518 + @@ -146,17 +146,17 @@ "visiting" 1 0.000000 0.047619 0.991605 + "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + - N_P_Q_S_s_x_md 18 0.073858 0.193537 0.559839 + N_P_Q_S_s_x_md 18 0.079368 0.071517 0.496075 0.017800 0.520000 0.100000 #### --min-dev=0.100 --token-count-max=8 #### -X-Bogosity: Unsure, tests=bogofilter, spamicity=0.514634 +X-Bogosity: Unsure, tests=bogofilter, spamicity=0.500083 n pgood pbad fw U "there" 14 0.291667 0.000000 0.000660 + - "its" 12 0.229167 0.047619 0.172558 + + "zip" 1 0.020833 0.000000 0.009094 + "all" 21 0.395833 0.095238 0.194216 + "web" 10 0.187500 0.047619 0.203096 + "too" 9 0.166667 0.047619 0.222810 + - "more" 15 0.270833 0.095238 0.260471 - + "been" 8 0.145833 0.047619 0.246762 - "file" 6 0.104167 0.047619 0.314336 - "also" 11 0.187500 0.095238 0.337138 - "about" 16 0.270833 0.142857 0.345518 - @@ -176,17 +176,17 @@ "visiting" 1 0.000000 0.047619 0.991605 + "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + - N_P_Q_S_s_x_md 8 0.005444 0.034712 0.514634 - 0.017800 0.520000 0.239529 + N_P_Q_S_s_x_md 8 0.006086 0.006252 0.500083 + 0.017800 0.520000 0.253238 #### --min-dev=0.100 --token-count=20 #### -X-Bogosity: Unsure, tests=bogofilter, spamicity=0.570641 +X-Bogosity: Unsure, tests=bogofilter, spamicity=0.495886 n pgood pbad fw U "there" 14 0.291667 0.000000 0.000660 + - "its" 12 0.229167 0.047619 0.172558 + + "zip" 1 0.020833 0.000000 0.009094 + "all" 21 0.395833 0.095238 0.194216 + "web" 10 0.187500 0.047619 0.203096 + "too" 9 0.166667 0.047619 0.222810 + - "more" 15 0.270833 0.095238 0.260471 + + "been" 8 0.145833 0.047619 0.246762 + "file" 6 0.104167 0.047619 0.314336 + "also" 11 0.187500 0.095238 0.337138 + "about" 16 0.270833 0.142857 0.345518 + @@ -206,12 +206,12 @@ "visiting" 1 0.000000 0.047619 0.991605 + "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + - N_P_Q_S_s_x_md 20 0.100430 0.241712 0.570641 + N_P_Q_S_s_x_md 20 0.107193 0.098964 0.495886 0.017800 0.520000 0.059390 #### U 0.493025 --min-dev=0.496 #### U 0.532831 --min-dev=0.496 --token-count-min=4 #### U 0.493025 --min-dev=0.496 --token-count-max=8 -#### U 0.559839 --min-dev=0.100 -#### U 0.559839 --min-dev=0.100 --token-count-min=4 -#### U 0.514634 --min-dev=0.100 --token-count-max=8 -#### U 0.570641 --min-dev=0.100 --token-count=20 +#### U 0.496075 --min-dev=0.100 +#### U 0.496075 --min-dev=0.100 --token-count-min=4 +#### U 0.500083 --min-dev=0.100 --token-count-max=8 +#### U 0.495886 --min-dev=0.100 --token-count=20 Modified: trunk/bogofilter/src/tests/t.token.count =================================================================== --- trunk/bogofilter/src/tests/t.token.count 2009-02-08 16:05:35 UTC (rev 6785) +++ trunk/bogofilter/src/tests/t.token.count 2009-02-08 20:42:57 UTC (rev 6786) @@ -24,11 +24,11 @@ cat > $INP <<EOF there 0.000660 4 8 20 -its 0.172558 8 20 +zip 0.009094 8 20 all 0.194216 8 20 web 0.203096 8 20 -too 0.222810 9 -more 0.260471 9 20 +too 0.222810 8 20 +been 0.246762 20 file 0.314336 20 also 0.337138 20 about 0.345518 20 @@ -81,8 +81,6 @@ map_rc $BOGOFILTER $OPT $TOK -c $CFG -vvv -H -I $INP >> $OUT if [ -n "$TOK" ] ; then echo >> $OUT - echo >> $OUT - echo >> $OUT fi done @@ -97,4 +95,3 @@ else diff $DIFF_BRIEF "$REF" "$OUT" fi - This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-12 01:12:58
|
Revision: 6789 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6789&view=rev Author: relson Date: 2009-02-12 01:12:53 +0000 (Thu, 12 Feb 2009) Log Message: ----------- Enhance formatting to allow including token count in X-Bogosity message. Modified Paths: -------------- trunk/bogofilter/src/bogofilter.c trunk/bogofilter/src/format.c trunk/bogofilter/src/format.h trunk/bogofilter/src/register.c Modified: trunk/bogofilter/src/bogofilter.c =================================================================== --- trunk/bogofilter/src/bogofilter.c 2009-02-11 23:36:35 UTC (rev 6788) +++ trunk/bogofilter/src/bogofilter.c 2009-02-12 01:12:53 UTC (rev 6789) @@ -34,6 +34,7 @@ #include "bogoconfig.h" #include "bogoreader.h" #include "collect.h" +#include "format.h" #include "passthrough.h" #include "register.h" #include "rstats.h" @@ -98,6 +99,8 @@ wordhash_sort(w); msgcount += 1; + format_set_counts(w->count, msgcount); + if (!passthrough_keepopen()) bogoreader_close_ifeof(); Modified: trunk/bogofilter/src/format.c =================================================================== --- trunk/bogofilter/src/format.c 2009-02-11 23:36:35 UTC (rev 6788) +++ trunk/bogofilter/src/format.c 2009-02-12 01:12:53 UTC (rev 6789) @@ -434,12 +434,16 @@ return convert_format_to_string( buff, size, terse_format ); } -char *format_log_update(char *buff, size_t size, const char *_reg, const char *_unreg, uint _wrd, uint _msg) +void format_set_counts(uint _wrd, uint _msg) { + wrdcount = _wrd; + msgcount = _msg; +} + +char *format_log_update(char *buff, size_t size, const char *_reg, const char *_unreg) +{ reg = _reg; unreg = _unreg; - wrdcount = _wrd; - msgcount = _msg; return convert_format_to_string( buff, size, log_update_format ); } Modified: trunk/bogofilter/src/format.h =================================================================== --- trunk/bogofilter/src/format.h 2009-02-11 23:36:35 UTC (rev 6788) +++ trunk/bogofilter/src/format.h 2009-02-12 01:12:53 UTC (rev 6789) @@ -36,5 +36,6 @@ extern char *format_header(char *buff, size_t size); extern char *format_terse(char *buff, size_t size); extern char *format_log_header(char *buff, size_t size); -extern char *format_log_update(char *buff, size_t size, const char *reg, const char *unreg, uint wordcount, uint msgcount); +extern void format_set_counts(uint _wrd, uint _msg); +extern char *format_log_update(char *buff, size_t size, const char *reg, const char *unreg); #endif Modified: trunk/bogofilter/src/register.c =================================================================== --- trunk/bogofilter/src/register.c 2009-02-11 23:36:35 UTC (rev 6788) +++ trunk/bogofilter/src/register.c 2009-02-12 01:12:53 UTC (rev 6789) @@ -57,7 +57,8 @@ if (wordcount == 0) msgcount = 0; - format_log_update(msg_register, msg_register_size, u, r, wordcount, msgcount); + format_set_counts(wordcount, msgcount); + format_log_update(msg_register, msg_register_size, u, r); if (verbose) (void)fprintf(dbgout, "# %u word%s, %u message%s\n", This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-12 17:05:14
|
Revision: 6791 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6791&view=rev Author: relson Date: 2009-02-12 17:05:08 +0000 (Thu, 12 Feb 2009) Log Message: ----------- Use listsort for statistics (-vv and -vvv) output. Modified Paths: -------------- trunk/bogofilter/src/Makefile.am trunk/bogofilter/src/listsort.c trunk/bogofilter/src/rstats.c Added Paths: ----------- trunk/bogofilter/src/listsort.h Modified: trunk/bogofilter/src/Makefile.am =================================================================== --- trunk/bogofilter/src/Makefile.am 2009-02-12 17:01:00 UTC (rev 6790) +++ trunk/bogofilter/src/Makefile.am 2009-02-12 17:05:08 UTC (rev 6791) @@ -119,6 +119,7 @@ find_home.h find_home.c find_home_user.c find_home_tildeexpand.c \ format.h format.c \ lexer.h lexer.c lexer_v3.l \ + listsort.h listsort.c \ longoptions.h \ maint.h maint.c \ memstr.h memstr.c \ Modified: trunk/bogofilter/src/listsort.c =================================================================== --- trunk/bogofilter/src/listsort.c 2009-02-12 17:01:00 UTC (rev 6790) +++ trunk/bogofilter/src/listsort.c 2009-02-12 17:05:08 UTC (rev 6791) @@ -39,18 +39,30 @@ * SOFTWARE. */ -#define FALSE 0 -#define TRUE 1 +#include "listsort.h" +typedef unsigned char byte; +#include "word.h" -typedef struct element element; -struct element { - element *next, *prev; - int i; +typedef struct rstats_s rstats_t; +struct rstats_s { + rstats_t *next; + const word_t *token; + u_int32_t good; + u_int32_t bad; + u_int32_t msgs_good; + u_int32_t msgs_bad; + bool used; + double prob; }; -int cmp(element *a, element *b) { +#ifdef TEST + +static int cmp(const element *a, const element *b); + +static int cmp(const element *a, const element *b) { return a->i - b->i; } +#endif /* * This is the actual sort function. Notice that it returns the new @@ -64,7 +76,18 @@ * * list = listsort(mylist); */ -element *listsort(element *list, int is_circular, int is_double) { +#if 1 +#define TRACE(n,p) +#else +#define TRACE(n,p) \ + { \ + const rstats_t *r = (const rstats_t *)p; \ + printf("%s:%d %s: %p %-16s %8.6f\n", __FILE__, __LINE__, \ + n, r, r->token->u.text, r->prob); \ + } +#endif + +element *listsort(element *list, fcn_compare *compare, bool is_circular, bool is_double) { element *p, *q, *e, *tail, *oldhead; int insize, nmerges, psize, qsize, i; @@ -78,6 +101,7 @@ insize = 1; while (1) { +// printf("%s:%d %d\n", __FILE__, __LINE__, insize); p = list; oldhead = list; /* only used for circular linkage */ list = NULL; @@ -114,7 +138,7 @@ /* q is empty; e must come from p. */ e = p; p = p->next; psize--; if (is_circular && p == oldhead) p = NULL; - } else if (cmp(p,q) <= 0) { + } else if (compare(p,q) <= 0) { /* First element of p is lower (or same); * e must come from p. */ e = p; p = p->next; psize--; @@ -163,25 +187,26 @@ * the end and some will not. */ +#ifdef TEST int main(void) { #define n 13 element k[n], *head, *p; - int is_circular, is_double; + bool is_circular, is_double; int order[][n] = { { 0,1,2,3,4,5,6,7,8,9,10,11,12 }, { 6,2,8,4,11,1,12,7,3,9,5,0,10 }, { 12,11,10,9,8,7,6,5,4,3,2,1,0 }, }; - int i, j; + unsigned int i, j; for (j = 0; j < n; j++) k[j].i = j; - listsort(NULL, 0, 0); + listsort(NULL, cmp, 0, 0); - for (is_circular = 0; is_circular < 2; is_circular++) { - for (is_double = 0; is_double < 2; is_double++) { + for (is_circular = false; is_circular <= true; is_circular++) { + for (is_double = false; is_double < true; is_double++) { for (i = 0; i < sizeof(order)/sizeof(*order); i++) { int *ord = order[i]; head = &k[ord[0]]; @@ -200,6 +225,8 @@ } } + printf("%s %s ", is_circular ? "cir" : " ", is_double ? "dbl" : " "); + printf("before:"); p = head; do { @@ -210,8 +237,8 @@ } p = p->next; } while (is_circular ? (p != head) : (p != NULL)); - printf("\n"); - head = listsort(head, is_circular, is_double); + printf("\t"); + head = listsort(head, cmp, is_circular, is_double); printf(" after:"); p = head; do { @@ -224,7 +251,9 @@ } while (is_circular ? (p != head) : (p != NULL)); printf("\n"); } + printf("\n"); } } return 0; } +#endif Added: trunk/bogofilter/src/listsort.h =================================================================== --- trunk/bogofilter/src/listsort.h (rev 0) +++ trunk/bogofilter/src/listsort.h 2009-02-12 17:05:08 UTC (rev 6791) @@ -0,0 +1,18 @@ +#ifndef _LISTSORT_H +#define _LISTSORT_H + +#define LISTSORT + +#include "bftypes.h" + +typedef struct element element; +struct element { + element *next, *prev; + int i; +}; + +typedef int fcn_compare(const element *a, const element *b); + +extern element *listsort(element *list, fcn_compare *compare, bool is_circular, bool is_double); + +#endif Modified: trunk/bogofilter/src/rstats.c =================================================================== --- trunk/bogofilter/src/rstats.c 2009-02-12 17:01:00 UTC (rev 6790) +++ trunk/bogofilter/src/rstats.c 2009-02-12 17:05:08 UTC (rev 6791) @@ -18,12 +18,26 @@ #include <stdlib.h> #include "bogofilter.h" +#include "listsort.h" #include "msgcounts.h" #include "prob.h" #include "rstats.h" #include "score.h" #include "xmalloc.h" +#if 1 +#define TRACE(n,p) +#else +#define TRACE(n,p) \ + { \ + const rstats_t *r = (const rstats_t *)p; \ + printf("%s:%d %s: %p %-16s %8.6f\n", __FILE__, __LINE__, \ + n, r, \ + r ? (char *)r->token->u.text : "nil", \ + r ? r->prob : 0.0); \ + } +#endif + typedef struct rstats_s rstats_t; struct rstats_s { rstats_t *next; @@ -59,8 +73,8 @@ /* Function Prototypes */ -static void rstats_print_histogram(size_t robn, rstats_t **rstats_array, size_t count); -static void rstats_print_rtable(rstats_t **rstats_array, size_t count); +static void rstats_print_histogram(size_t robn, rstats_t *rstats_head); +static void rstats_print_rtable(rstats_t *rstats_head); /* Function Definitions */ @@ -68,8 +82,8 @@ { if (stats_head == NULL) { stats_head = xcalloc(1, sizeof(header_t)); - stats_tail = (rstats_t *) xcalloc( 1, sizeof(rstats_t)); - stats_head->list = stats_tail; + stats_tail = NULL; + stats_head->list = NULL; } } @@ -89,11 +103,25 @@ void rstats_add(const word_t *token, double prob, bool used, wordcnts_t *cnts) { + rstats_t *stats_new; + if (token == NULL) return; +// fprintf(dbgout, "%s:%d %-16s %8.6f\n", __FILE__, __LINE__, +// token->u.text, prob); + + stats_new = (rstats_t *)xcalloc(1, sizeof(rstats_t)); + + if (stats_head->count == 0) { + stats_head->list = stats_new; + } + if (stats_tail != NULL) + stats_tail->next = stats_new; + stats_tail = stats_new; + stats_head->count += 1; - stats_tail->next = NULL; + /* Using externally controlled data; token must not be freed before calling rstats_cleanup() */ @@ -104,17 +132,18 @@ stats_tail->bad = cnts->bad; stats_tail->msgs_good = cnts->msgs_good; stats_tail->msgs_bad = cnts->msgs_bad; - stats_tail->next = (rstats_t *)xcalloc(1, sizeof(rstats_t)); - stats_tail = stats_tail->next; + stats_tail = stats_new; + +// fprintf(dbgout, "%s:%d %2d %p %8.6f %-16s\n", __FILE__, __LINE__, stats_head->count, stats_new, stats_new->prob, stats_new->token->u.text); } static int compare_rstats_t(const void *const ir1, const void *const ir2) { - const rstats_t *r1 = *(const rstats_t *const *)ir1; - const rstats_t *r2 = *(const rstats_t *const *)ir2; + const rstats_t *r1 = (const rstats_t *)ir1; + const rstats_t *r2 = (const rstats_t *)ir2; - if (r1->prob - r2->prob > EPS) return 1; - if (r2->prob - r1->prob > EPS) return -1; + if (r1->prob > r2->prob) return 1; + if (r2->prob > r1->prob) return -1; return word_cmp(r1->token, r2->token); } @@ -131,30 +160,22 @@ void rstats_print(bool unsure) { - size_t r; size_t robn = stats_head->robn; size_t count = stats_head->count; - rstats_t *cur; - rstats_t **rstats_array = (rstats_t **) xcalloc(count, sizeof(rstats_t *)); - for (r=0, cur=stats_head->list; r<count; r+=1, cur=cur->next) - rstats_array[r] = cur; + stats_head->list = (rstats_t *)listsort((element *)stats_head->list, (fcn_compare *)&compare_rstats_t, false, false); - /* sort by ascending probability, then name */ - qsort(rstats_array, count, sizeof(rstats_t *), compare_rstats_t); - if (Rtable || verbose>=3) - rstats_print_rtable(rstats_array, count); + rstats_print_rtable(stats_head->list); else if (verbose==2 || (unsure && verbose)) - rstats_print_histogram(robn, rstats_array, count); - - xfree(rstats_array); + rstats_print_histogram(robn, stats_head->list); } -static void rstats_print_histogram(size_t robn, rstats_t **rstats_array, size_t count) +static void rstats_print_histogram(size_t robn, rstats_t *rstats_head) { - size_t i, r; + size_t i; size_t maxcnt=0; + rstats_t *cur=rstats_head; rhistogram_t hist[INTERVALS]; double invn = (double) robn; @@ -166,16 +187,17 @@ (void)fprintf(fpo, "\n" ); /* Compute histogram */ - for (i=r=0; i<INTERVALS; i+=1) + for (i=0; i<INTERVALS; i+=1) { rhistogram_t *h = &hist[i]; double fin = 1.0*(i+1)/INTERVALS; size_t cnt = 0; + h->prob = 0.0; h->spamicity=0.0; - while (r < count) + + while (cur != NULL) { - rstats_t *cur = rstats_array[r]; double prob = cur->prob; if (prob >= fin) break; @@ -188,7 +210,7 @@ logsum += log(prob); } - r += 1; + cur = cur->next; } if (robn == 0) @@ -223,17 +245,18 @@ if (maxcnt>48) cnt = (cnt * 48 + maxcnt - 1) / maxcnt; /* display histogram */ - for (r=0; r<cnt; r+=1) + while (cnt-- > 0) (void)fputc( '#', fpo); (void)fputc( '\n', fpo); } } -static void rstats_print_rtable(rstats_t **rstats_array, size_t count) +static void rstats_print_rtable(rstats_t *rstats_head) { - size_t r; const char *pfx = !stats_in_header ? "" : " "; + rstats_t *cur; + /* print header */ if (!Rtable) (void)fprintf(fpo, "%s%*s %6s %-6s %-6s %-6s %s\n", @@ -243,9 +266,8 @@ pfx, max_token_len+2, "", "n", "pgood", "pbad", "fw", "invfwlog", "fwlog", "U"); /* Print 1 line per token */ - for (r= 0; r<count; r+=1) + for (cur=rstats_head; cur != NULL; cur=cur->next) { - rstats_t *cur = rstats_array[r]; int len = (cur->token->leng >= max_token_len) ? 0 : (max_token_len - cur->token->leng); double fw = calc_prob(cur->good, cur->bad, cur->msgs_good, cur->msgs_bad); char flag = cur->used ? '+' : '-'; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-14 18:21:51
|
Revision: 6792 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6792&view=rev Author: relson Date: 2009-02-14 18:21:48 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Use void * where possible. Modified Paths: -------------- trunk/bogofilter/src/listsort.c trunk/bogofilter/src/listsort.h Modified: trunk/bogofilter/src/listsort.c =================================================================== --- trunk/bogofilter/src/listsort.c 2009-02-12 17:05:08 UTC (rev 6791) +++ trunk/bogofilter/src/listsort.c 2009-02-14 18:21:48 UTC (rev 6792) @@ -43,16 +43,10 @@ typedef unsigned char byte; #include "word.h" -typedef struct rstats_s rstats_t; -struct rstats_s { - rstats_t *next; - const word_t *token; - u_int32_t good; - u_int32_t bad; - u_int32_t msgs_good; - u_int32_t msgs_bad; - bool used; - double prob; +typedef struct element element; +struct element { + element *next, *prev; + int i; }; #ifdef TEST @@ -76,18 +70,8 @@ * * list = listsort(mylist); */ -#if 1 -#define TRACE(n,p) -#else -#define TRACE(n,p) \ - { \ - const rstats_t *r = (const rstats_t *)p; \ - printf("%s:%d %s: %p %-16s %8.6f\n", __FILE__, __LINE__, \ - n, r, r->token->u.text, r->prob); \ - } -#endif -element *listsort(element *list, fcn_compare *compare, bool is_circular, bool is_double) { +void *listsort(void *list, fcn_compare *compare, bool is_circular, bool is_double) { element *p, *q, *e, *tail, *oldhead; int insize, nmerges, psize, qsize, i; @@ -101,7 +85,6 @@ insize = 1; while (1) { -// printf("%s:%d %d\n", __FILE__, __LINE__, insize); p = list; oldhead = list; /* only used for circular linkage */ list = NULL; @@ -168,7 +151,7 @@ if (is_circular) { tail->next = list; if (is_double) - list->prev = tail; + ((element *)list)->prev = tail; } else tail->next = NULL; Modified: trunk/bogofilter/src/listsort.h =================================================================== --- trunk/bogofilter/src/listsort.h 2009-02-12 17:05:08 UTC (rev 6791) +++ trunk/bogofilter/src/listsort.h 2009-02-14 18:21:48 UTC (rev 6792) @@ -5,14 +5,8 @@ #include "bftypes.h" -typedef struct element element; -struct element { - element *next, *prev; - int i; -}; +typedef int fcn_compare(const void *a, const void *b); -typedef int fcn_compare(const element *a, const element *b); +extern void *listsort(void *list, fcn_compare *compare, bool is_circular, bool is_double); -extern element *listsort(element *list, fcn_compare *compare, bool is_circular, bool is_double); - #endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-14 18:27:48
|
Revision: 6793 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6793&view=rev Author: relson Date: 2009-02-14 18:27:45 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Replace qsort with listsort. Modified Paths: -------------- trunk/bogofilter/src/wordhash.c trunk/bogofilter/src/wordhash.h Modified: trunk/bogofilter/src/wordhash.c =================================================================== --- trunk/bogofilter/src/wordhash.c 2009-02-14 18:21:48 UTC (rev 6792) +++ trunk/bogofilter/src/wordhash.c 2009-02-14 18:27:45 UTC (rev 6793) @@ -29,6 +29,7 @@ #include <string.h> #include <stddef.h> /* for offsetof */ +#include "listsort.h" #include "wordhash.h" #include "xmalloc.h" @@ -440,12 +441,13 @@ return val; } -static int compare_hashnode_t(const void *const ihn1, const void *const ihn2) +/* compare_hashnode_t - sort by ascending token text */ + +static int compare_hashnode_t(const void *const pv1, const void *const pv2) { - const hashnode_t *hn1 = *(const hashnode_t *const *)ihn1; - const hashnode_t *hn2 = *(const hashnode_t *const *)ihn2; - int cmp = word_cmp(hn1->key, hn2->key); - return cmp; + const hashnode_t *hn1 = (const hashnode_t *)pv1; + const hashnode_t *hn2 = (const hashnode_t *)pv2; + return word_cmp(hn1->key, hn2->key); } static wordcnts_t *wordhash_get_counts(wordhash_t *wh, hashnode_t *n) @@ -473,26 +475,13 @@ } } +/* wordhash_sort - sort by ascending token text */ + void wordhash_sort (wordhash_t *wh) { - hashnode_t *node; + wh->iter_head = listsort(wh->iter_head, &compare_hashnode_t, false, false); - if (wh->size == 0 - || wh->type != WH_NORMAL - || msg_count_file) - return; - - wh->order = (hashnode_t **) xcalloc(wh->size, sizeof(hashnode_t *)); - - wh->count = 0; - for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) - wh->order[wh->count++] = node; - - qsort(wh->order, wh->count, sizeof(hashnode_t *), compare_hashnode_t); - - wh->type = WH_ORDERED; - return; } Modified: trunk/bogofilter/src/wordhash.h =================================================================== --- trunk/bogofilter/src/wordhash.h 2009-02-14 18:21:48 UTC (rev 6792) +++ trunk/bogofilter/src/wordhash.h 2009-02-14 18:27:45 UTC (rev 6793) @@ -7,10 +7,10 @@ /* Hash entry. */ typedef struct hashnode_t { + /*@dependent@*/ struct hashnode_t *iter_next; /* Next item added to hash. For fast traversal */ + struct hashnode_t *next; /* Next item in linked list of items with same hash */ word_t *key; /* word key */ void *buf; /* Associated buffer. To be used by caller. */ - struct hashnode_t *next; /* Next item in linked list of items with same hash */ - /*@dependent@*/ struct hashnode_t *iter_next; /* Next item added to hash. For fast traversal */ } hashnode_t; typedef struct wh_alloc_node { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-14 18:29:00
|
Revision: 6794 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6794&view=rev Author: relson Date: 2009-02-14 18:28:58 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Remove WH_ORDERED as it's no longer needed. Modified Paths: -------------- trunk/bogofilter/src/wordhash.c trunk/bogofilter/src/wordhash.h Modified: trunk/bogofilter/src/wordhash.c =================================================================== --- trunk/bogofilter/src/wordhash.c 2009-02-14 18:27:45 UTC (rev 6793) +++ trunk/bogofilter/src/wordhash.c 2009-02-14 18:28:58 UTC (rev 6794) @@ -397,10 +397,6 @@ case WH_NORMAL: val = wh->iter_ptr = wh->iter_head; break; - case WH_ORDERED: - wh->index = 0; - val = wh->order[wh->index]; - break; case WH_PROPS: wh->index = 0; val = &wh->props[wh->index]; @@ -424,10 +420,6 @@ if (wh->iter_ptr != NULL) val = wh->iter_ptr = wh->iter_ptr->iter_next; break; - case WH_ORDERED: - if (++wh->index < wh->count) - val = wh->order[wh->index]; - break; case WH_PROPS: if (++wh->index < wh->count) val = &wh->props[wh->index]; Modified: trunk/bogofilter/src/wordhash.h =================================================================== --- trunk/bogofilter/src/wordhash.h 2009-02-14 18:27:45 UTC (rev 6793) +++ trunk/bogofilter/src/wordhash.h 2009-02-14 18:28:58 UTC (rev 6794) @@ -29,7 +29,9 @@ typedef /*@null@*/ hashnode_t *hashnode_pt; -typedef enum wh_e { WH_NORMAL, WH_ORDERED, WH_PROPS, WH_CNTS } wh_t; +typedef enum wh_e { WH_NORMAL, + WH_PROPS, + WH_CNTS } wh_t; typedef struct wordhash_s { /*@null@*/ /*@dependent@*/ wh_t type; /* normal, ordered, props, or cnts */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-14 18:29:49
|
Revision: 6795 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6795&view=rev Author: relson Date: 2009-02-14 18:29:46 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Replace qsort with listsort. Modified Paths: -------------- trunk/bogofilter/src/common.h trunk/bogofilter/src/rstats.c trunk/bogofilter/src/score.c trunk/bogofilter/src/tests/outputs/token.count.ref Modified: trunk/bogofilter/src/common.h =================================================================== --- trunk/bogofilter/src/common.h 2009-02-14 18:28:58 UTC (rev 6794) +++ trunk/bogofilter/src/common.h 2009-02-14 18:29:46 UTC (rev 6795) @@ -160,7 +160,8 @@ typedef struct { wordcnts_t cnts; double prob; - int freq; + int freq; + bool used; } wordprop_t; extern void bf_exit(void); Modified: trunk/bogofilter/src/rstats.c =================================================================== --- trunk/bogofilter/src/rstats.c 2009-02-14 18:28:58 UTC (rev 6794) +++ trunk/bogofilter/src/rstats.c 2009-02-14 18:29:46 UTC (rev 6795) @@ -25,19 +25,6 @@ #include "score.h" #include "xmalloc.h" -#if 1 -#define TRACE(n,p) -#else -#define TRACE(n,p) \ - { \ - const rstats_t *r = (const rstats_t *)p; \ - printf("%s:%d %s: %p %-16s %8.6f\n", __FILE__, __LINE__, \ - n, r, \ - r ? (char *)r->token->u.text : "nil", \ - r ? r->prob : 0.0); \ - } -#endif - typedef struct rstats_s rstats_t; struct rstats_s { rstats_t *next; @@ -82,8 +69,8 @@ { if (stats_head == NULL) { stats_head = xcalloc(1, sizeof(header_t)); - stats_tail = NULL; - stats_head->list = NULL; + stats_tail = (rstats_t *)xcalloc(1, sizeof(rstats_t)); + stats_head->list = stats_tail; } } @@ -103,24 +90,11 @@ void rstats_add(const word_t *token, double prob, bool used, wordcnts_t *cnts) { - rstats_t *stats_new; - if (token == NULL) return; -// fprintf(dbgout, "%s:%d %-16s %8.6f\n", __FILE__, __LINE__, -// token->u.text, prob); - - stats_new = (rstats_t *)xcalloc(1, sizeof(rstats_t)); - - if (stats_head->count == 0) { - stats_head->list = stats_new; - } - if (stats_tail != NULL) - stats_tail->next = stats_new; - stats_tail = stats_new; - stats_head->count += 1; + stats_tail->next = NULL; /* Using externally controlled data; token must not be freed before calling rstats_cleanup() @@ -132,15 +106,17 @@ stats_tail->bad = cnts->bad; stats_tail->msgs_good = cnts->msgs_good; stats_tail->msgs_bad = cnts->msgs_bad; - stats_tail = stats_new; -// fprintf(dbgout, "%s:%d %2d %p %8.6f %-16s\n", __FILE__, __LINE__, stats_head->count, stats_new, stats_new->prob, stats_new->token->u.text); + stats_tail->next = (rstats_t *)xcalloc(1, sizeof(rstats_t)); + stats_tail = stats_tail->next; } -static int compare_rstats_t(const void *const ir1, const void *const ir2) +/* compare_rstats_t - sort by ascending spamicity */ + +static int compare_rstats_t(const void *const pv1, const void *const pv2) { - const rstats_t *r1 = (const rstats_t *)ir1; - const rstats_t *r2 = (const rstats_t *)ir2; + const rstats_t *r1 = (const rstats_t *)pv1; + const rstats_t *r2 = (const rstats_t *)pv2; if (r1->prob > r2->prob) return 1; if (r2->prob > r1->prob) return -1; @@ -161,9 +137,9 @@ void rstats_print(bool unsure) { size_t robn = stats_head->robn; - size_t count = stats_head->count; - stats_head->list = (rstats_t *)listsort((element *)stats_head->list, (fcn_compare *)&compare_rstats_t, false, false); + /* sort by ascending spamicity */ + stats_head->list = listsort(stats_head->list, &compare_rstats_t, false, false); if (Rtable || verbose>=3) rstats_print_rtable(stats_head->list); @@ -215,13 +191,13 @@ if (robn == 0) h->spamicity = robx; - else + else { double invproduct, product; invproduct = 1.0 - exp(invlogsum / invn); product = 1.0 - exp(logsum / invn); - h->spamicity = (invproduct + product < EPS) - ? 0.0 + h->spamicity = (invproduct + product < EPS) + ? 0.0 : (1.0 + (invproduct - product) / (invproduct + product)) / 2.0; } h->count=cnt; @@ -266,7 +242,7 @@ pfx, max_token_len+2, "", "n", "pgood", "pbad", "fw", "invfwlog", "fwlog", "U"); /* Print 1 line per token */ - for (cur=rstats_head; cur != NULL; cur=cur->next) + for (cur=rstats_head->next; cur != NULL; cur=cur->next) { int len = (cur->token->leng >= max_token_len) ? 0 : (max_token_len - cur->token->leng); double fw = calc_prob(cur->good, cur->bad, cur->msgs_good, cur->msgs_bad); Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2009-02-14 18:28:58 UTC (rev 6794) +++ trunk/bogofilter/src/score.c 2009-02-14 18:29:46 UTC (rev 6795) @@ -17,6 +17,7 @@ #include "bogofilter.h" #include "collect.h" #include "datastore.h" +#include "listsort.h" #include "msgcounts.h" #include "prob.h" #include "rand_sleep.h" @@ -71,8 +72,9 @@ static double get_spamicity(size_t robn, FLOAT P, FLOAT Q); static bool need_scoring_boundary(wordhash_t *wh); static double find_scoring_boundary(wordhash_t *wh); +static void compute_scores(wordhash_t *wh); static void compute_spamicity(wordhash_t *wh, FLOAT *P, FLOAT *Q, size_t *robn, bool need_stats); -static int compare_probnode_t(const void *const ipn1, const void *const ipn2); +static int compare_hashnode_t(const void *const pv1, const void *const pv2); /* Static Variables */ @@ -233,6 +235,8 @@ if (DEBUG_ALGORITHM(2)) fprintf(dbgout, "min_dev: %f, robs: %f, robx: %f\n", min_dev, robs, robx); + compute_scores(wh); + score.min_dev = !need_scoring_boundary(wh) ? min_dev : find_scoring_boundary(wh); compute_spamicity(wh, &P, &Q, &robn, need_stats); @@ -252,6 +256,37 @@ } /* +** compute_scores() +** compute the token probabilities from the linked list of tokens +*/ +void compute_scores(wordhash_t *wh) +{ + hashnode_t *node; + + for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) + { + wordcnts_t *cnts; + wordprop_t *props; + + if (!fBogotune) { + props = (wordprop_t *) node->buf; + cnts = &props->cnts; + props->prob = calc_prob(cnts->good, cnts->bad, + cnts->msgs_good, cnts->msgs_bad); + props->used = fabs(props->prob - EVEN_ODDS) > min_dev; + } else { + /* unneeded - remove */ + double prob; + bool useflag; + cnts = (wordcnts_t *) node; + prob = calc_prob(cnts->good, cnts->bad, + cnts->msgs_good, cnts->msgs_bad); + useflag = fabs(prob - EVEN_ODDS) > score.min_dev; + } + } +} + +/* ** compute_spamicity() ** compute the spamicity from the linked list of tokens using ** min_dev to select tokens @@ -263,6 +298,7 @@ hashnode_t *node; size_t count = 0; + for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) { bool useflag; @@ -272,19 +308,19 @@ wordprop_t *props; if (!fBogotune) { + token = node->key; props = (wordprop_t *) node->buf; cnts = &props->cnts; - token = node->key; + prob = props->prob; + useflag = props->used; } else { + token = NULL; cnts = (wordcnts_t *) node; - token = NULL; + prob = calc_prob(cnts->good, cnts->bad, + cnts->msgs_good, cnts->msgs_bad); + useflag = fabs(prob - EVEN_ODDS) > score.min_dev; } - prob = calc_prob(cnts->good, cnts->bad, - cnts->msgs_good, cnts->msgs_bad); - - useflag = fabs(prob - EVEN_ODDS) > score.min_dev; - if (need_stats) rstats_add(token, prob, useflag, cnts); @@ -337,24 +373,17 @@ // Count scorable tokens for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) { - double prob; - wordcnts_t *cnts; - wordprop_t *props; - if (!fBogotune) { - props = (wordprop_t *) node->buf; - cnts = &props->cnts; + wordprop_t *props = (wordprop_t *) node->buf; + if (props->used) + count += 1; } else { - cnts = (wordcnts_t *) node; + wordcnts_t *cnts = (wordcnts_t *) node; + double prob = calc_prob(cnts->good, cnts->bad, + cnts->msgs_good, cnts->msgs_bad); + if (fabs(prob - EVEN_ODDS) >= min_dev) + count += 1; } - - prob = calc_prob(cnts->good, cnts->bad, - cnts->msgs_good, cnts->msgs_bad); - - if (fabs(prob - EVEN_ODDS) >= min_dev) - { - count += 1; - } } // Compare count to limits @@ -375,60 +404,80 @@ */ double find_scoring_boundary(wordhash_t *wh) { - size_t node_index = 0; - size_t node_count = wh->count; + size_t count = 0; double min_prob = (token_count_max == 0.0) ? min_dev : 1.0; hashnode_t *node; - probnode_t *node_array = calloc(node_count, sizeof(probnode_t)); - /* create array from linked list to allow sorting */ - for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) - { - double prob, dev; - word_t *token; + /* sort by ascending score difference (from 0.5) */ + wh->iter_head = listsort(wh->iter_head, &compare_hashnode_t, false, false); + + count = max(token_count_fix, max(token_count_min, token_count_max)); + + for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) { wordcnts_t *cnts; wordprop_t *props; + double prob; + double dev; if (!fBogotune) { props = (wordprop_t *) node->buf; cnts = &props->cnts; - token = node->key; } else { cnts = (wordcnts_t *) node; - token = NULL; } - prob = calc_prob(cnts->good, cnts->bad, cnts->msgs_good, cnts->msgs_bad); dev = fabs(prob - EVEN_ODDS); - node_array[node_index].node = node; - node_array[node_index].prob = prob; - node_array[node_index].dev = dev; - node_index += 1; + if (count > 0) { + count -= 1; + props->used = true; + min_prob = dev; + } + else if (dev >= min_prob) { + props->used = true; + } + else { + props->used = false; + } } - qsort(node_array, node_count, sizeof(probnode_t), compare_probnode_t); - - node_index = max(token_count_fix, max(token_count_min, token_count_max)); - min_prob = node_array[ node_index ].dev; - - free(node_array); - return min_prob; } -static int compare_probnode_t(const void *const ipn1, const void *const ipn2) +/* compare_hashnode_t - sort by ascending score difference (from 0.5) */ + +static int compare_hashnode_t(const void *const pv1, const void *const pv2) { - const probnode_t *pn1 = (const probnode_t const *)ipn1; - const probnode_t *pn2 = (const probnode_t const *)ipn2; + double d1; + double d2; - if (pn1->dev < pn2->dev) + if (!fBogotune) { + const hashnode_t *hn1 = (const hashnode_t const *)pv1; + const hashnode_t *hn2 = (const hashnode_t const *)pv2; + d1 = fabs(((wordprop_t *) hn1->buf)->prob - EVEN_ODDS); + d2 = fabs(((wordprop_t *) hn2->buf)->prob - EVEN_ODDS); + } else { + const wordcnts_t *cnts; + double prob; + cnts = (const wordcnts_t *) pv1; + prob = calc_prob(cnts->good, cnts->bad, + cnts->msgs_good, cnts->msgs_bad); + d1 = fabs(prob - EVEN_ODDS); + + cnts = (const wordcnts_t *) pv2; + prob = calc_prob(cnts->good, cnts->bad, + cnts->msgs_good, cnts->msgs_bad); + d2 = fabs(prob - EVEN_ODDS); + } + + if (d1 < d2) return +1; - if (pn1->dev > pn2->dev) + if (d1 > d2) return -1; + return 0; } Modified: trunk/bogofilter/src/tests/outputs/token.count.ref =================================================================== --- trunk/bogofilter/src/tests/outputs/token.count.ref 2009-02-14 18:28:58 UTC (rev 6794) +++ trunk/bogofilter/src/tests/outputs/token.count.ref 2009-02-14 18:29:46 UTC (rev 6795) @@ -57,7 +57,7 @@ "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + N_P_Q_S_s_x_md 4 0.000085 0.065746 0.532831 - 0.017800 0.520000 0.490906 + 0.017800 0.520000 0.491605 #### --min-dev=0.496 --token-count-max=8 #### X-Bogosity: Unsure, tests=bogofilter, spamicity=0.493025 n pgood pbad fw U @@ -177,7 +177,7 @@ "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + N_P_Q_S_s_x_md 8 0.006086 0.006252 0.500083 - 0.017800 0.520000 0.253238 + 0.017800 0.520000 0.277190 #### --min-dev=0.100 --token-count=20 #### X-Bogosity: Unsure, tests=bogofilter, spamicity=0.495886 n pgood pbad fw U @@ -207,7 +207,7 @@ "dealer" 2 0.000000 0.095238 0.995766 + "agree" 3 0.000000 0.142857 0.997169 + N_P_Q_S_s_x_md 20 0.107193 0.098964 0.495886 - 0.017800 0.520000 0.059390 + 0.017800 0.520000 0.067490 #### U 0.493025 --min-dev=0.496 #### U 0.532831 --min-dev=0.496 --token-count-min=4 #### U 0.493025 --min-dev=0.496 --token-count-max=8 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-14 21:13:03
|
Revision: 6797 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6797&view=rev Author: relson Date: 2009-02-14 21:13:00 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Rename hashnode_t's 'buf' field. Modified Paths: -------------- trunk/bogofilter/src/register.c trunk/bogofilter/src/score.c trunk/bogofilter/src/wordhash.c trunk/bogofilter/src/wordhash.h trunk/bogofilter/src/wordhash.main.c Modified: trunk/bogofilter/src/register.c =================================================================== --- trunk/bogofilter/src/register.c 2009-02-14 21:12:52 UTC (rev 6796) +++ trunk/bogofilter/src/register.c 2009-02-14 21:13:00 UTC (rev 6797) @@ -87,7 +87,7 @@ for (node = wordhash_first(h); node != NULL; node = wordhash_next(h)) { - wordprop = node->buf; + wordprop = node->data; switch (ds_read(list->dsh, node->key, &val)) { case DS_ABORT_RETRY: rand_sleep(4*1000,1000*1000); Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2009-02-14 21:12:52 UTC (rev 6796) +++ trunk/bogofilter/src/score.c 2009-02-14 21:13:00 UTC (rev 6797) @@ -206,7 +206,7 @@ for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) { word_t *token = node->key; - wordprop_t *props = (wordprop_t *) node->buf; + wordprop_t *props = (wordprop_t *) node->data; wordcnts_t *cnts = &props->cnts; ret = lookup(token, cnts); if (ret == DS_ABORT_RETRY) @@ -269,7 +269,7 @@ wordprop_t *props; if (!fBogotune) { - props = (wordprop_t *) node->buf; + props = (wordprop_t *) node->data; cnts = &props->cnts; props->prob = calc_prob(cnts->good, cnts->bad, cnts->msgs_good, cnts->msgs_bad); @@ -309,7 +309,7 @@ if (!fBogotune) { token = node->key; - props = (wordprop_t *) node->buf; + props = (wordprop_t *) node->data; cnts = &props->cnts; prob = props->prob; useflag = props->used; @@ -374,7 +374,7 @@ for (node = wordhash_first(wh); node != NULL; node = wordhash_next(wh)) { if (!fBogotune) { - wordprop_t *props = (wordprop_t *) node->buf; + wordprop_t *props = (wordprop_t *) node->data; if (props->used) count += 1; } else { @@ -422,7 +422,7 @@ double dev; if (!fBogotune) { - props = (wordprop_t *) node->buf; + props = (wordprop_t *) node->data; cnts = &props->cnts; } else { cnts = (wordcnts_t *) node; @@ -457,8 +457,8 @@ if (!fBogotune) { const hashnode_t *hn1 = (const hashnode_t const *)pv1; const hashnode_t *hn2 = (const hashnode_t const *)pv2; - d1 = fabs(((wordprop_t *) hn1->buf)->prob - EVEN_ODDS); - d2 = fabs(((wordprop_t *) hn2->buf)->prob - EVEN_ODDS); + d1 = fabs(((wordprop_t *) hn1->data)->prob - EVEN_ODDS); + d2 = fabs(((wordprop_t *) hn2->data)->prob - EVEN_ODDS); } else { const wordcnts_t *cnts; double prob; Modified: trunk/bogofilter/src/wordhash.c =================================================================== --- trunk/bogofilter/src/wordhash.c 2009-02-14 21:12:52 UTC (rev 6796) +++ trunk/bogofilter/src/wordhash.c 2009-02-14 21:13:00 UTC (rev 6797) @@ -170,7 +170,7 @@ if (wh->freeable) { uint i; for (i=0; i<wh->size; i++) - xfree(wh->props[i].buf); + xfree(wh->props[i].data); } xfree (wh->props); } @@ -242,7 +242,7 @@ static void display_node(hashnode_t *n, const char *str) { - wordprop_t *p = (wordprop_t *)n->buf; + wordprop_t *p = (wordprop_t *)n->data; if (verbose > 2) printf( "%20.20s %5u %5u%s", n->key->u.text, p->cnts.bad, p->cnts.good, str); } @@ -264,7 +264,7 @@ } for (s = wordhash_first(src); s != NULL; s = wordhash_next(src)) { - wordprop_t *p = (wordprop_t *)s->buf; + wordprop_t *p = (wordprop_t *)s->data; word_t *key = s->key; wordprop_t *d; if (key == NULL) @@ -287,7 +287,7 @@ hashnode_t *hn; for (hn = wordhash_first(wh); hn != NULL; hn = wordhash_next(wh)) { - (*hook)(hn->key, hn->buf, userdata); + (*hook)(hn->key, hn->data, userdata); } return; @@ -313,7 +313,7 @@ for (hn = wh->bin[idx]; hn != NULL; hn = hn->next) { word_t *key = hn->key; if (key->leng == t->leng && memcmp (t->u.text, key->u.text, t->leng) == 0) { - wordprop_t *p = (wordprop_t *)hn->buf; + wordprop_t *p = (wordprop_t *)hn->data; return p; } } @@ -331,11 +331,11 @@ return buf; hn = nmalloc (wh); - hn->buf = smalloc (wh, n); + hn->data = smalloc (wh, n); if (initializer) - initializer(hn->buf); + initializer(hn->data); else - memset(hn->buf, '\0', n); + memset(hn->data, '\0', n); hn->key = word_dup(t); @@ -355,7 +355,7 @@ wh->count += 1; wh->size += 1; - return hn->buf; + return hn->data; } static void * @@ -445,7 +445,7 @@ static wordcnts_t *wordhash_get_counts(wordhash_t *wh, hashnode_t *n) { if (wh->cnts == NULL) { - wordprop_t *p = (wordprop_t *)n->buf; + wordprop_t *p = (wordprop_t *)n->data; wordcnts_t *c = &p->cnts; return c; } @@ -537,11 +537,11 @@ } else { wp = xcalloc(1, sizeof(wordprop_t)); - memcpy(wp, node->buf, sizeof(wordprop_t)); + memcpy(wp, node->data, sizeof(wordprop_t)); if (!who->freeable) wh_trap(); } - who->props[who->count].buf = wp; + who->props[who->count].data = wp; xfree(node->key); node->key = NULL; who->count += 1; Modified: trunk/bogofilter/src/wordhash.h =================================================================== --- trunk/bogofilter/src/wordhash.h 2009-02-14 21:12:52 UTC (rev 6796) +++ trunk/bogofilter/src/wordhash.h 2009-02-14 21:13:00 UTC (rev 6797) @@ -10,7 +10,7 @@ /*@dependent@*/ struct hashnode_t *iter_next; /* Next item added to hash. For fast traversal */ struct hashnode_t *next; /* Next item in linked list of items with same hash */ word_t *key; /* word key */ - void *buf; /* Associated buffer. To be used by caller. */ + void *data; /* Associated data. To be used by caller. */ } hashnode_t; typedef struct wh_alloc_node { Modified: trunk/bogofilter/src/wordhash.main.c =================================================================== --- trunk/bogofilter/src/wordhash.main.c 2009-02-14 21:12:52 UTC (rev 6796) +++ trunk/bogofilter/src/wordhash.main.c 2009-02-14 21:13:00 UTC (rev 6797) @@ -25,7 +25,7 @@ { word_t *key = p->key; (void)word_puts(key, 0, stdout); - (void)printf (" %d\n", ((wh_elt_t *) p->buf)->count); + (void)printf (" %d\n", ((wh_elt_t *) p->data)->count); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-14 23:22:22
|
Revision: 6799 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6799&view=rev Author: relson Date: 2009-02-14 22:10:54 +0000 (Sat, 14 Feb 2009) Log Message: ----------- Remove some cruft. Add some comments. Modified Paths: -------------- trunk/bogofilter/src/score.c trunk/bogofilter/src/wordhash.c trunk/bogofilter/src/wordhash.h Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2009-02-14 21:58:29 UTC (rev 6798) +++ trunk/bogofilter/src/score.c 2009-02-14 22:10:54 UTC (rev 6799) @@ -46,7 +46,6 @@ typedef struct probnode_t { hashnode_t * node; double prob; - double dev; } probnode_t; /* struct for saving stats for printing. */ @@ -60,13 +59,6 @@ double q_pr; /* Robinson Q */ } score_t; -/* struct for printing doubles as hex. */ -typedef union -{ - double d; - long long q; -} t_DOUBLE_QUAD; - /* Function Prototypes */ static double get_spamicity(size_t robn, FLOAT P, FLOAT Q); @@ -235,16 +227,18 @@ if (DEBUG_ALGORITHM(2)) fprintf(dbgout, "min_dev: %f, robs: %f, robx: %f\n", min_dev, robs, robx); + /* compute scores for the wordhash's tokens */ compute_scores(wh); + /* recalculate min_dev if necessary to satisfy token_count settings */ score.min_dev = !need_scoring_boundary(wh) ? min_dev : find_scoring_boundary(wh); + /* compute message spamicity from the wordhash's scores */ compute_spamicity(wh, &P, &Q, &robn, need_stats); /* Robinson's P, Q and S ** S = (P - Q) / (P + Q) [combined indicator] */ - spamicity = get_spamicity(robn, P, Q); if (need_stats && robn != 0) Modified: trunk/bogofilter/src/wordhash.c =================================================================== --- trunk/bogofilter/src/wordhash.c 2009-02-14 21:58:29 UTC (rev 6798) +++ trunk/bogofilter/src/wordhash.c 2009-02-14 22:10:54 UTC (rev 6799) @@ -166,8 +166,6 @@ wordhash_free_alloc_nodes(wh); wordhash_free_strings(wh); - xfree (wh->order); - switch (wh->type) { case WH_NORMAL: Modified: trunk/bogofilter/src/wordhash.h =================================================================== --- trunk/bogofilter/src/wordhash.h 2009-02-14 21:58:29 UTC (rev 6798) +++ trunk/bogofilter/src/wordhash.h 2009-02-14 22:10:54 UTC (rev 6799) @@ -41,20 +41,19 @@ /*@null@*/ /*@dependent@*/ uint size; /* size of array */ hashnode_pt *bin; - /*@null@*/ /*@owned@*/ wh_alloc_node *nodes; /*list of node buffers */ + /*@null@*/ /*@owned@*/ wh_alloc_node *nodes; /* list of node buffers */ /*@null@*/ wh_alloc_str *strings; /* list of string buffers */ /*@null@*/ /*@dependent@*/ hashnode_t *iter_ptr; /*@null@*/ /*@dependent@*/ hashnode_t *iter_head; /*@null@*/ /*@dependent@*/ hashnode_t *iter_tail; - /*@null@*/ /*@dependent@*/ hashnode_t **order; /* array of nodes */ - /*@null@*/ /*@dependent@*/ hashnode_t *props; /* array of nodes */ - /*@null@*/ /*@dependent@*/ wordcnts_t *cnts; /* array of counts */ + /*@null@*/ /*@dependent@*/ hashnode_t *props; /* array of nodes */ + /*@null@*/ /*@dependent@*/ wordcnts_t *cnts; /* array of counts */ } wordhash_t; /*@only@*/ wordhash_t *wordhash_new(void); -/*@only@*/ wordhash_t *wordhash_init(wh_t t, uint c); +/*@only@*/ wordhash_t *wordhash_init(wh_t type, uint count); void wordhash_free(/*@only@*/ wordhash_t *); size_t wordhash_count(wordhash_t * h); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-19 04:41:55
|
Revision: 6806 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6806&view=rev Author: relson Date: 2009-02-19 04:41:50 +0000 (Thu, 19 Feb 2009) Log Message: ----------- Add token-count options to bogotune. Modified Paths: -------------- trunk/bogofilter/src/bogoconfig.c trunk/bogofilter/src/bogotune.c trunk/bogofilter/src/longoptions.h Modified: trunk/bogofilter/src/bogoconfig.c =================================================================== --- trunk/bogofilter/src/bogoconfig.c 2009-02-19 01:03:34 UTC (rev 6805) +++ trunk/bogofilter/src/bogoconfig.c 2009-02-19 04:41:50 UTC (rev 6806) @@ -94,6 +94,7 @@ LONGOPTIONS_COMMON /* longoptions.h - bogofilter */ LONGOPTIONS_MAIN + LONGOPTIONS_MAIN_TUNE /* longoptions.h - bogofilter/bogolexer options */ LONGOPTIONS_LEX /* longoptions.h - bogofilter/bogoutil options */ Modified: trunk/bogofilter/src/bogotune.c =================================================================== --- trunk/bogofilter/src/bogotune.c 2009-02-19 01:03:34 UTC (rev 6805) +++ trunk/bogofilter/src/bogotune.c 2009-02-19 04:41:50 UTC (rev 6806) @@ -947,6 +947,7 @@ static struct option longopts_bogotune[] = { /* longoptions.h - common options */ LONGOPTIONS_COMMON + LONGOPTIONS_MAIN_TUNE /* longoptions.h - bogofilter/-lexer options */ LONGOPTIONS_LEX /* end of list */ @@ -1125,6 +1126,18 @@ replace_nonascii_characters = get_bool(name, val); break; + case O_TOKEN_COUNT_FIX: + token_count_fix = atoi(val); + break; + + case O_TOKEN_COUNT_MIN: + token_count_min = atoi(val); + break; + + case O_TOKEN_COUNT_MAX: + token_count_max = atoi(val); + break; + default: help(); exit(EX_ERROR); Modified: trunk/bogofilter/src/longoptions.h =================================================================== --- trunk/bogofilter/src/longoptions.h 2009-02-19 01:03:34 UTC (rev 6805) +++ trunk/bogofilter/src/longoptions.h 2009-02-19 04:41:50 UTC (rev 6806) @@ -99,7 +99,10 @@ /* options for bogofilter */ #define LONGOPTIONS_MAIN \ - { "ham-true" , N, 0, O_HAM_TRUE }, \ + { "ham-true" , N, 0, O_HAM_TRUE }, + +/* options for bogofilter */ +#define LONGOPTIONS_MAIN_TUNE \ { "token-count" , R, 0, O_TOKEN_COUNT_FIX }, \ { "token-count-min" , R, 0, O_TOKEN_COUNT_MIN }, \ { "token-count-max" , R, 0, O_TOKEN_COUNT_MAX }, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-21 21:04:53
|
Revision: 6814 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6814&view=rev Author: relson Date: 2009-02-21 21:04:46 +0000 (Sat, 21 Feb 2009) Log Message: ----------- Remove listsort's unneeded is_circular and is_double parameters. Modified Paths: -------------- trunk/bogofilter/src/listsort.c trunk/bogofilter/src/listsort.h trunk/bogofilter/src/rstats.c trunk/bogofilter/src/score.c trunk/bogofilter/src/wordhash.c Modified: trunk/bogofilter/src/listsort.c =================================================================== --- trunk/bogofilter/src/listsort.c 2009-02-21 20:41:42 UTC (rev 6813) +++ trunk/bogofilter/src/listsort.c 2009-02-21 21:04:46 UTC (rev 6814) @@ -71,7 +71,7 @@ * list = listsort(mylist); */ -void *listsort(void *list, fcn_compare *compare, bool is_circular, bool is_double) { +void *listsort(void *list, fcn_compare *compare) { element *p, *q, *e, *tail, *oldhead; int insize, nmerges, psize, qsize, i; @@ -99,10 +99,7 @@ psize = 0; for (i = 0; i < insize; i++) { psize++; - if (is_circular) - q = (q->next == oldhead ? NULL : q->next); - else - q = q->next; + q = q->next; if (!q) break; } @@ -116,20 +113,16 @@ if (psize == 0) { /* p is empty; e must come from q. */ e = q; q = q->next; qsize--; - if (is_circular && q == oldhead) q = NULL; } else if (qsize == 0 || !q) { /* q is empty; e must come from p. */ e = p; p = p->next; psize--; - if (is_circular && p == oldhead) p = NULL; } else if (compare(p,q) <= 0) { /* First element of p is lower (or same); * e must come from p. */ e = p; p = p->next; psize--; - if (is_circular && p == oldhead) p = NULL; } else { /* First element of q is lower; e must come from q. */ e = q; q = q->next; qsize--; - if (is_circular && q == oldhead) q = NULL; } /* add the next element to the merged list */ @@ -138,22 +131,13 @@ } else { list = e; } - if (is_double) { - /* Maintain reverse pointers in a doubly linked list. */ - e->prev = tail; - } tail = e; } /* now p has stepped `insize' places along, and q has too */ p = q; } - if (is_circular) { - tail->next = list; - if (is_double) - ((element *)list)->prev = tail; - } else - tail->next = NULL; + tail->next = NULL; /* If we have done only one merge, we're finished. */ if (nmerges <= 1) /* allow for nmerges==0, the empty list case */ @@ -174,7 +158,6 @@ int main(void) { #define n 13 element k[n], *head, *p; - bool is_circular, is_double; int order[][n] = { { 0,1,2,3,4,5,6,7,8,9,10,11,12 }, @@ -188,50 +171,30 @@ listsort(NULL, cmp, 0, 0); - for (is_circular = false; is_circular <= true; is_circular++) { - for (is_double = false; is_double < true; is_double++) { for (i = 0; i < sizeof(order)/sizeof(*order); i++) { int *ord = order[i]; head = &k[ord[0]]; for (j = 0; j < n; j++) { if (j == n-1) - k[ord[j]].next = (is_circular ? &k[ord[0]] : - NULL); + k[ord[j]].next = NULL; else k[ord[j]].next = &k[ord[j+1]]; - if (is_double) { - if (j == 0) - k[ord[j]].prev = (is_circular ? &k[ord[n-1]] : - NULL); - else - k[ord[j]].prev = &k[ord[j-1]]; - } } - printf("%s %s ", is_circular ? "cir" : " ", is_double ? "dbl" : " "); - printf("before:"); p = head; do { printf(" %d", p->i); - if (is_double) { - if (p->next && p->next->prev != p) - printf(" [REVERSE LINK ERROR!]"); - } p = p->next; - } while (is_circular ? (p != head) : (p != NULL)); + } while (p != NULL); printf("\t"); - head = listsort(head, cmp, is_circular, is_double); + head = listsort(head, cmp); printf(" after:"); p = head; do { printf(" %d", p->i); - if (is_double) { - if (p->next && p->next->prev != p) - printf(" [REVERSE LINK ERROR!]"); - } p = p->next; - } while (is_circular ? (p != head) : (p != NULL)); + } while (p != NULL); printf("\n"); } printf("\n"); Modified: trunk/bogofilter/src/listsort.h =================================================================== --- trunk/bogofilter/src/listsort.h 2009-02-21 20:41:42 UTC (rev 6813) +++ trunk/bogofilter/src/listsort.h 2009-02-21 21:04:46 UTC (rev 6814) @@ -7,6 +7,6 @@ typedef int fcn_compare(const void *a, const void *b); -extern void *listsort(void *list, fcn_compare *compare, bool is_circular, bool is_double); +extern void *listsort(void *list, fcn_compare *compare); #endif Modified: trunk/bogofilter/src/rstats.c =================================================================== --- trunk/bogofilter/src/rstats.c 2009-02-21 20:41:42 UTC (rev 6813) +++ trunk/bogofilter/src/rstats.c 2009-02-21 21:04:46 UTC (rev 6814) @@ -139,7 +139,7 @@ size_t robn = stats_head->robn; /* sort by ascending spamicity */ - stats_head->list = listsort(stats_head->list, &compare_rstats_t, false, false); + stats_head->list = listsort(stats_head->list, &compare_rstats_t); if (Rtable || verbose>=3) rstats_print_rtable(stats_head->list); Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2009-02-21 20:41:42 UTC (rev 6813) +++ trunk/bogofilter/src/score.c 2009-02-21 21:04:46 UTC (rev 6814) @@ -380,7 +380,7 @@ hashnode_t *node; /* sort by ascending score difference (from 0.5) */ - wh->iter_head = listsort(wh->iter_head, &compare_hashnode_t, false, false); + wh->iter_head = listsort(wh->iter_head, &compare_hashnode_t); count = max(token_count_fix, max(token_count_min, token_count_max)); Modified: trunk/bogofilter/src/wordhash.c =================================================================== --- trunk/bogofilter/src/wordhash.c 2009-02-21 20:41:42 UTC (rev 6813) +++ trunk/bogofilter/src/wordhash.c 2009-02-21 21:04:46 UTC (rev 6814) @@ -481,7 +481,7 @@ void wordhash_sort (wordhash_t *wh) { - wh->iter_head = listsort(wh->iter_head, &compare_hashnode_t, false, false); + wh->iter_head = listsort(wh->iter_head, &compare_hashnode_t); return; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-21 21:11:04
|
Revision: 6816 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6816&view=rev Author: relson Date: 2009-02-21 21:11:01 +0000 (Sat, 21 Feb 2009) Log Message: ----------- Restore listsort type info. Modified Paths: -------------- trunk/bogofilter/src/listsort.c trunk/bogofilter/src/listsort.h trunk/bogofilter/src/rstats.c trunk/bogofilter/src/score.c trunk/bogofilter/src/wordhash.c Modified: trunk/bogofilter/src/listsort.c =================================================================== --- trunk/bogofilter/src/listsort.c 2009-02-21 21:10:04 UTC (rev 6815) +++ trunk/bogofilter/src/listsort.c 2009-02-21 21:11:01 UTC (rev 6816) @@ -43,12 +43,6 @@ typedef unsigned char byte; #include "word.h" -typedef struct element element; -struct element { - element *next, *prev; - int i; -}; - #ifdef TEST static int cmp(const element *a, const element *b); @@ -71,7 +65,7 @@ * list = listsort(mylist); */ -void *listsort(void *list, fcn_compare *compare) { +element *listsort(element *list, fcn_compare *compare) { element *p, *q, *e, *tail, *oldhead; int insize, nmerges, psize, qsize, i; Modified: trunk/bogofilter/src/listsort.h =================================================================== --- trunk/bogofilter/src/listsort.h 2009-02-21 21:10:04 UTC (rev 6815) +++ trunk/bogofilter/src/listsort.h 2009-02-21 21:11:01 UTC (rev 6816) @@ -5,8 +5,14 @@ #include "bftypes.h" -typedef int fcn_compare(const void *a, const void *b); +typedef struct element element; +struct element { + element *next, *prev; + int i; +}; -extern void *listsort(void *list, fcn_compare *compare); +typedef int fcn_compare(const element *a, const element *b); +extern element *listsort(element *list, fcn_compare *compare); + #endif Modified: trunk/bogofilter/src/rstats.c =================================================================== --- trunk/bogofilter/src/rstats.c 2009-02-21 21:10:04 UTC (rev 6815) +++ trunk/bogofilter/src/rstats.c 2009-02-21 21:11:01 UTC (rev 6816) @@ -139,7 +139,7 @@ size_t robn = stats_head->robn; /* sort by ascending spamicity */ - stats_head->list = listsort(stats_head->list, &compare_rstats_t); + stats_head->list = (rstats_t *)listsort((element *)stats_head->list, (fcn_compare *)&compare_rstats_t); if (Rtable || verbose>=3) rstats_print_rtable(stats_head->list); Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2009-02-21 21:10:04 UTC (rev 6815) +++ trunk/bogofilter/src/score.c 2009-02-21 21:11:01 UTC (rev 6816) @@ -380,7 +380,7 @@ hashnode_t *node; /* sort by ascending score difference (from 0.5) */ - wh->iter_head = listsort(wh->iter_head, &compare_hashnode_t); + wh->iter_head = (hashnode_t *)listsort((element *)wh->iter_head, (fcn_compare *)&compare_hashnode_t); count = max(token_count_fix, max(token_count_min, token_count_max)); Modified: trunk/bogofilter/src/wordhash.c =================================================================== --- trunk/bogofilter/src/wordhash.c 2009-02-21 21:10:04 UTC (rev 6815) +++ trunk/bogofilter/src/wordhash.c 2009-02-21 21:11:01 UTC (rev 6816) @@ -481,7 +481,7 @@ void wordhash_sort (wordhash_t *wh) { - wh->iter_head = listsort(wh->iter_head, &compare_hashnode_t); + wh->iter_head = (hashnode_t *)listsort((element *)wh->iter_head, (fcn_compare *)&compare_hashnode_t); return; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <re...@us...> - 2009-02-22 20:13:18
|
Revision: 6820 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6820&view=rev Author: relson Date: 2009-02-22 20:13:08 +0000 (Sun, 22 Feb 2009) Log Message: ----------- Move common variables spam_header_name and spam_header_place to globals.c Modified Paths: -------------- trunk/bogofilter/src/bogolexer.c trunk/bogofilter/src/bogotune.c trunk/bogofilter/src/bogoutil.c trunk/bogofilter/src/configtest.c trunk/bogofilter/src/format.c trunk/bogofilter/src/globals.c trunk/bogofilter/src/passthrough.c Modified: trunk/bogofilter/src/bogolexer.c =================================================================== --- trunk/bogofilter/src/bogolexer.c 2009-02-22 04:21:33 UTC (rev 6819) +++ trunk/bogofilter/src/bogolexer.c 2009-02-22 20:13:08 UTC (rev 6820) @@ -32,11 +32,6 @@ const char *progname = "bogolexer"; -/* prevent larger inclusions */ - -const char *spam_header_name = SPAM_HEADER_NAME; -const char *spam_header_place = ""; - /* Function Definitions */ static void usage(void) Modified: trunk/bogofilter/src/bogotune.c =================================================================== --- trunk/bogofilter/src/bogotune.c 2009-02-22 04:21:33 UTC (rev 6819) +++ trunk/bogofilter/src/bogotune.c 2009-02-22 20:13:08 UTC (rev 6820) @@ -189,7 +189,6 @@ uint cMakeCheck = 50; /* ... for 50 cycles */ const char *logtag = NULL; -const char *spam_header_place = ""; /* Function Declarations */ Modified: trunk/bogofilter/src/bogoutil.c =================================================================== --- trunk/bogofilter/src/bogoutil.c 2009-02-22 04:21:33 UTC (rev 6819) +++ trunk/bogofilter/src/bogoutil.c 2009-02-22 20:13:08 UTC (rev 6820) @@ -53,8 +53,6 @@ bool maintain = false; bool onlyprint = false; -const char *spam_header_place = ""; - /* Function Definitions */ /* dummies to avoid score.o */ Modified: trunk/bogofilter/src/configtest.c =================================================================== --- trunk/bogofilter/src/configtest.c 2009-02-22 04:21:33 UTC (rev 6819) +++ trunk/bogofilter/src/configtest.c 2009-02-22 20:13:08 UTC (rev 6820) @@ -21,7 +21,6 @@ #include "wordlists.h" const char *progname = "configtest"; -const char *spam_header_place = ""; #ifndef DEBUG_CONFIG #define DEBUG_CONFIG(level) (verbose > level) Modified: trunk/bogofilter/src/format.c =================================================================== --- trunk/bogofilter/src/format.c 2009-02-22 04:21:33 UTC (rev 6819) +++ trunk/bogofilter/src/format.c 2009-02-22 20:13:08 UTC (rev 6820) @@ -46,7 +46,6 @@ /* initialized static variables */ -const char *spam_header_name = SPAM_HEADER_NAME; /* used by lexer */ const char *spam_subject_tag = NULL; /* used in passthrough mode */ const char *unsure_subject_tag = NULL; /* used in passthrough mode */ Modified: trunk/bogofilter/src/globals.c =================================================================== --- trunk/bogofilter/src/globals.c 2009-02-22 04:21:33 UTC (rev 6819) +++ trunk/bogofilter/src/globals.c 2009-02-22 20:13:08 UTC (rev 6820) @@ -57,6 +57,9 @@ /*@observer@*/ const char *stats_prefix; +const char *spam_header_name = SPAM_HEADER_NAME; +const char *spam_header_place = ""; + /* for lexer_v3.l */ bool header_line_markup = true; /* -H */ Modified: trunk/bogofilter/src/passthrough.c =================================================================== --- trunk/bogofilter/src/passthrough.c 2009-02-22 04:21:33 UTC (rev 6819) +++ trunk/bogofilter/src/passthrough.c 2009-02-22 20:13:08 UTC (rev 6820) @@ -31,7 +31,6 @@ char msg_register[256]; static char msg_bogofilter[256]; static char msg_spam_header[256]; -const char *spam_header_place = ""; size_t msg_register_size = sizeof(msg_register); /* Function Definitions */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <m-...@us...> - 2010-03-14 14:16:35
|
Revision: 6882 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6882&view=rev Author: m-a Date: 2010-03-14 14:16:28 +0000 (Sun, 14 Mar 2010) Log Message: ----------- Fix a few warnings. Modified Paths: -------------- trunk/bogofilter/src/convert_unicode.c trunk/bogofilter/src/convert_unicode.h trunk/bogofilter/src/db_lock.c trunk/bogofilter/src/iconvert.c trunk/bogofilter/src/lexer_v3.l Modified: trunk/bogofilter/src/convert_unicode.c =================================================================== --- trunk/bogofilter/src/convert_unicode.c 2010-03-14 14:16:07 UTC (rev 6881) +++ trunk/bogofilter/src/convert_unicode.c 2010-03-14 14:16:28 UTC (rev 6882) @@ -106,11 +106,11 @@ { "csgb2312", T }, }; -iconv_t *bf_iconv_open( const char *to_charset, const char *from_charset ) +iconv_t bf_iconv_open( const char *to_charset, const char *from_charset ) { - iconv_t *xd = iconv_open( to_charset, from_charset ); + iconv_t xd = iconv_open( to_charset, from_charset ); - if (xd == (iconv_t)(-1)) { + if (xd == (iconv_t)-1) { int err = errno; if (err == EINVAL) { if (DEBUG_ICONV(1)) @@ -120,9 +120,6 @@ xd = iconv_open( charset_unicode, charset_default ); } } - - if (xd == (iconv_t)(-1)) - xd = NULL; return xd; } Modified: trunk/bogofilter/src/convert_unicode.h =================================================================== --- trunk/bogofilter/src/convert_unicode.h 2010-03-14 14:16:07 UTC (rev 6881) +++ trunk/bogofilter/src/convert_unicode.h 2010-03-14 14:16:28 UTC (rev 6882) @@ -18,7 +18,7 @@ extern void init_charset_table_iconv(const char *from_charset, const char *to_charset); -extern iconv_t *bf_iconv_open( const char *to_charset, +extern iconv_t bf_iconv_open( const char *to_charset, const char *from_charset ); #if defined(CP866) && !defined(ENABLE_UNICODE) && !defined(DISABLE_UNICODE) Modified: trunk/bogofilter/src/db_lock.c =================================================================== --- trunk/bogofilter/src/db_lock.c 2010-03-14 14:16:07 UTC (rev 6881) +++ trunk/bogofilter/src/db_lock.c 2010-03-14 14:16:28 UTC (rev 6882) @@ -291,7 +291,7 @@ if (0 != check_zombies()) { const char *text = "bogofilter or related application has crashed or directory damaged, aborting.\n"; - write(STDERR_FILENO, text, strlen(text)); + (void)write(STDERR_FILENO, text, strlen(text)); _exit(EX_ERROR); /* use _exit, not exit, to avoid running the atexit handler that might deadlock */ } alarm(chk_intval); Modified: trunk/bogofilter/src/iconvert.c =================================================================== --- trunk/bogofilter/src/iconvert.c 2010-03-14 14:16:07 UTC (rev 6881) +++ trunk/bogofilter/src/iconvert.c 2010-03-14 14:16:28 UTC (rev 6882) @@ -58,15 +58,15 @@ } } -static void convert(iconv_t xd, buff_t *src, buff_t *dst) +static void convert(iconv_t xd, buff_t *restrict src, buff_t *restrict dst) { bool done = false; while (!done) { - char *inbuf; + char * inbuf; size_t inbytesleft; - char *outbuf; + char * outbuf; size_t outbytesleft; size_t count; Modified: trunk/bogofilter/src/lexer_v3.l =================================================================== --- trunk/bogofilter/src/lexer_v3.l 2010-03-14 14:16:07 UTC (rev 6881) +++ trunk/bogofilter/src/lexer_v3.l 2010-03-14 14:16:28 UTC (rev 6882) @@ -61,6 +61,8 @@ ** incorrectly because line 24 isn't base64 decoded. */ +#define YY_NO_INPUT + #include "common.h" #include <ctype.h> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <m-...@us...> - 2010-03-14 16:24:46
|
Revision: 6888 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6888&view=rev Author: m-a Date: 2010-03-14 16:24:40 +0000 (Sun, 14 Mar 2010) Log Message: ----------- Fix further fallout from the iconv_t cleanup. Modified Paths: -------------- trunk/bogofilter/src/convert_unicode.c trunk/bogofilter/src/iconvert.c trunk/bogofilter/src/iconvert.h Modified: trunk/bogofilter/src/convert_unicode.c =================================================================== --- trunk/bogofilter/src/convert_unicode.c 2010-03-14 15:06:04 UTC (rev 6887) +++ trunk/bogofilter/src/convert_unicode.c 2010-03-14 16:24:40 UTC (rev 6888) @@ -33,7 +33,7 @@ #define SP ' ' #include <iconv.h> -iconv_t cd = NULL; +iconv_t cd = (iconv_t)-1; static void map_nonascii_characters(void) { @@ -128,7 +128,7 @@ { uint idx; - if (cd != NULL) + if (cd != (iconv_t)-1) iconv_close(cd); if (DEBUG_ICONV(1)) Modified: trunk/bogofilter/src/iconvert.c =================================================================== --- trunk/bogofilter/src/iconvert.c 2010-03-14 15:06:04 UTC (rev 6887) +++ trunk/bogofilter/src/iconvert.c 2010-03-14 16:24:40 UTC (rev 6888) @@ -177,14 +177,14 @@ src->t.u.text, src->read, src->t.leng, src->size); } -static void copy(buff_t *src, buff_t *dst) +static void copy(buff_t *restrict src, buff_t *restrict dst) { /* if conversion not available, use memcpy */ dst->t.leng = min(dst->size, src->t.leng); memcpy(dst->t.u.text, src->t.u.text, dst->t.leng+D); } -void iconvert(buff_t *src, buff_t *dst) +void iconvert(buff_t *restrict src, buff_t *restrict dst) { if (cd == NULL) copy(src, dst); @@ -192,9 +192,9 @@ convert(cd, src, dst); } -void iconvert_cd(iconv_t xd, buff_t *src, buff_t *dst) +void iconvert_cd(iconv_t xd, buff_t *restrict src, buff_t *restrict dst) { - if (xd == NULL) + if (xd == (iconv_t)-1) copy(src, dst); else convert(xd, src, dst); Modified: trunk/bogofilter/src/iconvert.h =================================================================== --- trunk/bogofilter/src/iconvert.h 2010-03-14 15:06:04 UTC (rev 6887) +++ trunk/bogofilter/src/iconvert.h 2010-03-14 16:24:40 UTC (rev 6888) @@ -13,9 +13,11 @@ #ifndef ICONVERT_H #define ICONVERT_H +#include "config.h" + #include <iconv.h> -extern void iconvert(buff_t *src, buff_t *dst); -extern void iconvert_cd(iconv_t cd, buff_t *src, buff_t *dst); +extern void iconvert(buff_t *restrict src, buff_t *restrict dst); +extern void iconvert_cd(iconv_t cd, buff_t *restrict src, buff_t *restrict dst); #endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <m-...@us...> - 2010-03-17 23:52:54
|
Revision: 6890 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6890&view=rev Author: m-a Date: 2010-03-17 23:52:48 +0000 (Wed, 17 Mar 2010) Log Message: ----------- Fix a few pedantic compiler warnings. Modified Paths: -------------- trunk/bogofilter/src/score.c trunk/bogofilter/src/wordhash.c trunk/bogofilter/src/wordlists.c Modified: trunk/bogofilter/src/score.c =================================================================== --- trunk/bogofilter/src/score.c 2010-03-14 18:14:36 UTC (rev 6889) +++ trunk/bogofilter/src/score.c 2010-03-17 23:52:48 UTC (rev 6890) @@ -358,7 +358,7 @@ */ static bool need_scoring_boundary(size_t count) { - // Early out if no token count limits are set + /* Early out if no token count limits are set */ if (((token_count_min == 0) || (token_count_min <= count)) && ((token_count_max == 0) || (token_count_max >= count)) && ((token_count_fix == 0) || (token_count_fix == count))) @@ -427,8 +427,8 @@ double d2; if (!fBogotune) { - const hashnode_t *hn1 = (const hashnode_t const *)pv1; - const hashnode_t *hn2 = (const hashnode_t const *)pv2; + const hashnode_t *hn1 = (const hashnode_t *const)pv1; + const hashnode_t *hn2 = (const hashnode_t *const)pv2; d1 = fabs(((wordprop_t *) hn1->data)->prob - EVEN_ODDS); d2 = fabs(((wordprop_t *) hn2->data)->prob - EVEN_ODDS); } else { @@ -507,7 +507,7 @@ void score_cleanup(void) { -// rstats_cleanup(); +/* rstats_cleanup(); */ } #ifdef GSL_INTEGRATE_PDF Modified: trunk/bogofilter/src/wordhash.c =================================================================== --- trunk/bogofilter/src/wordhash.c 2010-03-14 18:14:36 UTC (rev 6889) +++ trunk/bogofilter/src/wordhash.c 2010-03-17 23:52:48 UTC (rev 6890) @@ -89,7 +89,7 @@ case WH_NORMAL: wh->bin = xcalloc (NHASH, sizeof (hashnode_t **)); break; - case WH_CNTS: // used for bogotune with msg_count files + case WH_CNTS: /* used for bogotune with msg_count files */ wh->cnts = (wordcnts_t *) xcalloc(wh->size, sizeof(wordcnts_t)); break; case WH_PROPS: Modified: trunk/bogofilter/src/wordlists.c =================================================================== --- trunk/bogofilter/src/wordlists.c 2010-03-14 18:14:36 UTC (rev 6889) +++ trunk/bogofilter/src/wordlists.c 2010-03-17 23:52:48 UTC (rev 6890) @@ -141,7 +141,7 @@ fprintf(stderr, "error #%d - %s.\n", err, strerror(err)); - // print error and exit + /* print error and exit */ wordlist_error(err); } /* switch */ } else { /* ds_open */ @@ -359,7 +359,7 @@ return true; } -// print error and exit +/* print error and exit */ void wordlist_error(int err) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <m-...@us...> - 2010-03-23 17:32:55
|
Revision: 6894 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6894&view=rev Author: m-a Date: 2010-03-23 17:32:46 +0000 (Tue, 23 Mar 2010) Log Message: ----------- Add missing documentation. Modified Paths: -------------- trunk/bogofilter/src/buff.h trunk/bogofilter/src/word.h Modified: trunk/bogofilter/src/buff.h =================================================================== --- trunk/bogofilter/src/buff.h 2010-03-23 17:23:48 UTC (rev 6893) +++ trunk/bogofilter/src/buff.h 2010-03-23 17:32:46 UTC (rev 6894) @@ -49,9 +49,9 @@ /** print the unread part of the buff_t \a self to the stdio stream fp * by means of word_puts(), which see for meaning of \a width. */ -extern void buff_puts(const buff_t *self, +extern void buff_puts(const buff_t *self, /**< buff struct to print */ uint width, /**< passed verbatim to word_puts() */ - FILE *fp); + FILE *fp /**< stdio.h stream to print to */); extern void buff_shift(buff_t *self, uint start, uint length); Modified: trunk/bogofilter/src/word.h =================================================================== --- trunk/bogofilter/src/word.h 2010-03-23 17:23:48 UTC (rev 6893) +++ trunk/bogofilter/src/word.h 2010-03-23 17:32:46 UTC (rev 6894) @@ -26,7 +26,7 @@ /** create a new word_t from the \a leng bytes at address \a text */ extern word_t *word_new(const byte *text, /**< may be NULL, to create a blank word_t */ - uint leng); + uint leng /**< length of input string */); /** create a new word_t from the NUL-terminated \a cstring */ extern word_t *word_news(const char *cstring); @@ -45,11 +45,11 @@ /** output \a self onto the stream \a fp, formatted to \a width * characters. */ -extern void word_puts(const word_t *self, +extern void word_puts(const word_t *self, /**< word structure to print */ uint width, /**< if 0, use actual width, if > 0 then either * truncate the string or fill it with blanks to print * exactly \a width characters */ - FILE *fp); + FILE *fp /**< stdio stream to print word to */); /** Compare word \a w to string \a s. */ extern int word_cmps(const word_t *w, const char *s); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <m-...@us...> - 2010-03-23 17:48:59
|
Revision: 6895 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6895&view=rev Author: m-a Date: 2010-03-23 17:48:52 +0000 (Tue, 23 Mar 2010) Log Message: ----------- Align/document variables to fix Doxygen warnings. Modified Paths: -------------- trunk/bogofilter/src/datastore.h trunk/bogofilter/src/datastore_db.c trunk/bogofilter/src/datastore_db.h trunk/bogofilter/src/datastore_sqlite.c trunk/bogofilter/src/fgetsl.c trunk/bogofilter/src/fgetsl.h trunk/bogofilter/src/word.h Modified: trunk/bogofilter/src/datastore.h =================================================================== --- trunk/bogofilter/src/datastore.h 2010-03-23 17:32:46 UTC (rev 6894) +++ trunk/bogofilter/src/datastore.h 2010-03-23 17:48:52 UTC (rev 6895) @@ -176,9 +176,9 @@ * passed as the first parameter in all subsequent database function calls. */ /*@only@*/ /*@null@*/ -extern void *ds_open(void *dbev, /**< parent environment */ +extern void *ds_open(void *dbe, /**< parent environment */ bfpath *bfp, /**< path to database file */ - dbmode_t mode /**< open mode, DS_READ or DS_WRITE */); + dbmode_t open_mode /**< open mode, DS_READ or DS_WRITE */); /** Close file and clean up. */ extern void ds_close(/*@only@*/ void *vhandle); Modified: trunk/bogofilter/src/datastore_db.c =================================================================== --- trunk/bogofilter/src/datastore_db.c 2010-03-23 17:32:46 UTC (rev 6894) +++ trunk/bogofilter/src/datastore_db.c 2010-03-23 17:48:52 UTC (rev 6895) @@ -503,7 +503,7 @@ * guess a page size. As this is a safety margin for the file size, * we'll return 0 and let the caller guess some size instead. */ /* return page size, of 0xffffffff for trouble */ -static uint32_t get_psize(DB *dbp, +static uint32_t get_psize(DB *dbp, /**< Berkeley DB pointer */ bool wanted /** if set, try harder to get the page size, even if * it requires to read the whole database */) { Modified: trunk/bogofilter/src/datastore_db.h =================================================================== --- trunk/bogofilter/src/datastore_db.h 2010-03-23 17:32:46 UTC (rev 6894) +++ trunk/bogofilter/src/datastore_db.h 2010-03-23 17:48:52 UTC (rev 6895) @@ -52,8 +52,8 @@ * \return zero if the word does not exist in the database. */ int db_get_dbvalue( - void *handle, /**< database handle */ - const dbv_t *token, /**< key to look for */ + void *vhandle, /**< database handle */ + const dbv_t *token, /**< key (token) to look for */ /*@out@*/ dbv_t *val /** output, note: this must be * pre-allocated and val->leng must * specify how many bytes val->data can Modified: trunk/bogofilter/src/datastore_sqlite.c =================================================================== --- trunk/bogofilter/src/datastore_sqlite.c 2010-03-23 17:32:46 UTC (rev 6894) +++ trunk/bogofilter/src/datastore_sqlite.c 2010-03-23 17:48:52 UTC (rev 6895) @@ -152,7 +152,11 @@ return rc; } -static sqlite3_stmt *sqlprep(dbh_t *dbh, const char *cmd, bool bailout /** exit on error? */) { +/** Compile SQL statement \a cmd for database handle \a dbh, exiting on + * failure if \a bailout is true. */ +static sqlite3_stmt *sqlprep(dbh_t *dbh /** data base handle */, + const char *cmd /** sqlite command to compile */, + bool bailout /** exit on error? */) { const char *tail; /* dummy */ sqlite3_stmt *ptr; if (sqlite3_prepare_v2(dbh->db, cmd, strlen(cmd), &ptr, &tail) != SQLITE_OK) { @@ -525,10 +529,10 @@ return sql_fastpath(dbh, "db_set_dbvalue", dbh->insert, NULL, 0); } -int db_get_dbvalue(void *vhandle, const dbv_t* key, /*@out@*/ dbv_t *val) { +int db_get_dbvalue(void *vhandle, const dbv_t* token, /*@out@*/ dbv_t *val) { dbh_t *dbh = vhandle; - sqlite3_bind_blob(dbh->select, 1, key->data, key->leng, SQLITE_STATIC); + sqlite3_bind_blob(dbh->select, 1, token->data, token->leng, SQLITE_STATIC); return sql_fastpath(dbh, "db_get_dbvalue", dbh->select, val, DS_NOTFOUND); } Modified: trunk/bogofilter/src/fgetsl.c =================================================================== --- trunk/bogofilter/src/fgetsl.c 2010-03-23 17:32:46 UTC (rev 6894) +++ trunk/bogofilter/src/fgetsl.c 2010-03-23 17:48:52 UTC (rev 6895) @@ -13,9 +13,9 @@ #include "fgetsl.h" /* calls exit(EX_ERROR) on read error or when max_size < 2 */ -int fgetsl(char *buf, int max_size, /*@null@*/ FILE *in) +int fgetsl(char *buf, int siz, /*@null@*/ FILE *in) { - return xfgetsl(buf, max_size, in, 0); + return xfgetsl(buf, siz, in, 0); } int xfgetsl(char *buf, int max_size, FILE *in, bool no_nul_terminate) Modified: trunk/bogofilter/src/fgetsl.h =================================================================== --- trunk/bogofilter/src/fgetsl.h 2010-03-23 17:32:46 UTC (rev 6894) +++ trunk/bogofilter/src/fgetsl.h 2010-03-23 17:48:52 UTC (rev 6895) @@ -9,7 +9,7 @@ #include <stdio.h> -/** This function reads up to \a siz-1 characters from \a stream into \a buf +/** This function reads up to \a siz-1 characters from the stdio stream \a in into \a buf * and adds a terminating NUL character. When the buffer cannot hold at * least one character of payload, the program is aborted. * \return @@ -19,10 +19,10 @@ */ extern int fgetsl(/*@out@*/ char *buf /** output buffer */, int siz /** capacity of buffer */, - FILE *stream /** input stream */); + FILE *in /** input stream */); -/** This function reads up to \p siz or \p siz-1 (depending on \p - * no_NUL_terminate) characters from \p stream into \p buf and +/** This function reads up to \p max_size or \p max_size-1 (depending on \p + * no_nul_terminate) characters from stdio stream \p in into \p buf and * optionally adds a terminating NUL character. When the buffer * cannot hold at least one character of payload, the program is * aborted. @@ -31,11 +31,10 @@ * - zero or positive: number of characters read (not counting the * trailing NUL) */ - extern int xfgetsl(/*@out@*/ char *buf /** output buffer */, - int siz /** capacity of buffer */, - /*@null@*/ FILE *stream /** input stream */, - bool no_NUL_terminate /** \li if false, the maximum amount of bytes read is size-1 and the buffer is NUL terminated. + int max_size /** capacity of buffer */, + /*@null@*/ FILE *in /** input stream */, + bool no_nul_terminate /** \li if false, the maximum amount of bytes read is size-1 and the buffer is NUL terminated. \li if true, the maximum amount of bytes read is size and the buffer WILL NOT BE NUL terminated. */); #endif Modified: trunk/bogofilter/src/word.h =================================================================== --- trunk/bogofilter/src/word.h 2010-03-23 17:32:46 UTC (rev 6894) +++ trunk/bogofilter/src/word.h 2010-03-23 17:48:52 UTC (rev 6895) @@ -43,9 +43,9 @@ /** create a new word_t that is the concatenation of \a w1 and \a w2 */ extern word_t *word_concat(const word_t *w1, const word_t *w2); -/** output \a self onto the stream \a fp, formatted to \a width +/** output \a word onto the stream \a fp, formatted to \a width * characters. */ -extern void word_puts(const word_t *self, /**< word structure to print */ +extern void word_puts(const word_t *word, /**< word structure to print */ uint width, /**< if 0, use actual width, if > 0 then either * truncate the string or fill it with blanks to print * exactly \a width characters */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |