[cvs] bogofilter/src bogohist.c,1.11.2.2,1.11.2.3 bogotune.c,1.127.2.4,1.127.2.5 bogoutil.c,1.110.2.
Fast Bayesian spam filter along lines suggested by Paul Graham
Brought to you by:
m-a
Update of /cvsroot/bogofilter/bogofilter/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9978/src Modified Files: Tag: branch-db-txn bogohist.c bogotune.c bogoutil.c datastore.c datastore.h datastore_db.c datastore_db.h datastore_qdbm.c datastore_tdb.c lexer.c main.c maint.c msgcounts.c passthrough.c register.c robx.c score.c wordlists.c wordlists_base.c Log Message: Merge up to db-txn-mergepoint12. Index: bogohist.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/bogohist.c,v retrieving revision 1.11.2.2 retrieving revision 1.11.2.3 diff -u -d -r1.11.2.2 -r1.11.2.3 --- bogohist.c 4 Jun 2004 22:03:03 -0000 1.11.2.2 +++ bogohist.c 29 Jun 2004 23:01:14 -0000 1.11.2.3 @@ -136,7 +136,6 @@ build_wordlist_path(filepath, sizeof(filepath), path); - ds_init(); dsh = ds_open(CURDIR_S, filepath, DS_READ); if (dsh == NULL) return EX_ERROR; @@ -160,6 +159,7 @@ ds_cleanup(); memset(&hist, 0, sizeof(hist)); + ds_init(); rc = ds_oper(filepath, DS_READ, ds_histogram_hook, &hist); count = print_histogram(&hist); Index: bogotune.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/bogotune.c,v retrieving revision 1.127.2.4 retrieving revision 1.127.2.5 diff -u -d -r1.127.2.4 -r1.127.2.5 --- bogotune.c 4 Jun 2004 22:03:03 -0000 1.127.2.4 +++ bogotune.c 29 Jun 2004 23:01:14 -0000 1.127.2.5 @@ -1667,6 +1667,8 @@ check_wordlist_path(); } + ds_init(); + bogotune(); bogotune_free(); Index: bogoutil.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/bogoutil.c,v retrieving revision 1.110.2.5 retrieving revision 1.110.2.6 diff -u -d -r1.110.2.5 -r1.110.2.6 --- bogoutil.c 4 Jun 2004 22:03:03 -0000 1.110.2.5 +++ bogoutil.c 29 Jun 2004 23:01:15 -0000 1.110.2.6 @@ -79,7 +79,6 @@ token_count = 0; - set_bogohome(ds_file); rc = ds_oper(ds_file, DS_READ, ds_dump_hook, NULL); if (rc) @@ -114,10 +113,6 @@ unsigned long count[IX_SIZE], date; YYYYMMDD today_save = today; - set_bogohome(ds_file); - - ds_init(); - dsh = ds_open(CURDIR_S, ds_file, DS_WRITE | DS_LOAD); if (dsh == NULL) return EX_ERROR; @@ -188,21 +183,32 @@ load_count += 1; /* Slower, but allows multiple lists to be concatenated */ set_date(date); - ds_read(dsh, token, &data); + switch (ds_read(dsh, token, &data)) { + case 0: + case 1: + break; + default: + rv = 1; + } data.spamcount += spamcount; data.goodcount += goodcount; - ds_write(dsh, token, &data); + if (ds_write(dsh, token, &data)) rv = 1; } word_free(token); } - switch (ds_txn_commit(dsh)) { - case DST_FAILURE: - case DST_TEMPFAIL: - fprintf(stderr, "commit failed\n"); - exit(EXIT_FAILURE); - case DST_OK: - break; + if (rv) { + fprintf(stderr, "read or write error, aborting.\n"); + ds_txn_abort(dsh); + } else { + switch (ds_txn_commit(dsh)) { + case DST_FAILURE: + case DST_TEMPFAIL: + fprintf(stderr, "commit failed\n"); + exit(EXIT_FAILURE); + case DST_OK: + break; + } } ds_close(dsh, false); @@ -256,6 +262,7 @@ void *dsh = NULL; /* initialize to silence bogus gcc warning */ struct stat sb; + int rv = 0; /* protect against broken stat(2) that succeeds for empty names */ if (path == NULL || *path == '\0') { @@ -263,8 +270,6 @@ return EX_ERROR; } - ds_init(); - if ( stat(path, &sb) == 0 ) { /* XXX FIXME: deadlock possible */ if ( ! S_ISDIR(sb.st_mode)) { /* words from file */ @@ -306,6 +311,7 @@ { dsv_t val; word_t *token; + int rc; unsigned long spam_count; unsigned long good_count; @@ -323,33 +329,43 @@ token = word_new(word, (uint) strlen((const char *)word)); } - ds_read(dsh, token, &val); - spam_count = val.spamcount; - good_count = val.goodcount; + rc = ds_read(dsh, token, &val); + switch (rc) { + case 0: + spam_count = val.spamcount; + good_count = val.goodcount; - if (!show_probability) - printf(data_format, token->text, spam_count, good_count); - else - { - rob_prob = calc_prob(good_count, spam_count); - printf(data_format, token->text, spam_count, good_count, rob_prob); + if (!show_probability) + printf(data_format, token->text, spam_count, good_count); + else + { + rob_prob = calc_prob(good_count, spam_count); + printf(data_format, token->text, spam_count, good_count, rob_prob); + } + break; + case 1: + break; + default: + fprintf(stderr, "Cannot read from data base.\n"); + rv = EX_ERROR; + goto finish; } if (token != &buff->t) word_free(token); } - if (DST_OK != ds_txn_commit(dsh)) { - ds_close(dsh, false); - fprintf(stderr, "Cannot commit transaction.\n"); - return EX_ERROR; +finish: + if (DST_OK != rv ? ds_txn_abort(dsh) : ds_txn_commit(dsh)) { + fprintf(stderr, "Cannot %s transaction.\n", rv ? "abort" : "commit"); + rv = EX_ERROR; } ds_close(dsh, false); ds_cleanup(); buff_free(buff); - return 0; + return rv; } static int get_robx(char *path) @@ -358,6 +374,8 @@ int ret = 0; rx = compute_robinson_x(path); + if (rx < 0) + return EX_ERROR; if (onlyprint) printf("%f\n", rx); @@ -373,7 +391,7 @@ run_type = REG_SPAM; set_bogohome(filepath); - ds_init(); + dsh = ds_open(CURDIR_S, filepath, DS_WRITE); if (dsh == NULL) return EX_ERROR; @@ -621,6 +639,7 @@ int main(int argc, char *argv[]) { + int rc; progtype = build_progtype(progname, DB_TYPE); set_today(); /* compute current date for token age */ @@ -635,29 +654,38 @@ atexit(bf_exit); + set_bogohome(ds_file); + ds_init(); + switch(flag) { case M_DUMP: - return dump_wordlist(ds_file); + rc = dump_wordlist(ds_file); + break; case M_LOAD: - return load_wordlist(ds_file); + rc = load_wordlist(ds_file); + break; case M_MAINTAIN: maintain = true; - set_bogohome(ds_file); - return maintain_wordlist_file(ds_file); + rc = maintain_wordlist_file(ds_file); + break; case M_WORD: argc -= optind; argv += optind; - set_bogohome(ds_file); - return display_words(ds_file, argc, argv, prob); + rc = display_words(ds_file, argc, argv, prob); + break; case M_HIST: - set_bogohome(ds_file); - return histogram(ds_file); + rc = histogram(ds_file); + break; case M_ROBX: - set_bogohome(ds_file); - return get_robx(ds_file); + rc = get_robx(ds_file); + break; case M_NONE: default: /* should have been handled above */ abort(); + break; } + + ds_cleanup(); + return rc; } Index: datastore.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/datastore.c,v retrieving revision 1.28.2.9 retrieving revision 1.28.2.10 diff -u -d -r1.28.2.9 -r1.28.2.10 --- datastore.c 28 Jun 2004 14:48:00 -0000 1.28.2.9 +++ datastore.c 29 Jun 2004 23:01:16 -0000 1.28.2.10 @@ -23,6 +23,7 @@ #include "error.h" #include "maint.h" +#include "rand_sleep.h" #include "swap.h" #include "word.h" #include "xmalloc.h" @@ -120,21 +121,25 @@ void *ds_open(const char *path, const char *name, dbmode_t open_mode) { dsh_t *dsh; - bool create = false; - void *v = db_open(path, name, open_mode); + void *v; - if (v == NULL && open_mode != DS_READ) { - create = true; - v = db_open(path, name, DS_CREATE); - } + v = db_open(path, name, open_mode); if (!v) return NULL; dsh = dsh_init(v); - if (create && (open_mode & DS_WRITE) && ! (open_mode & DS_LOAD)) - ds_set_wordlist_version(dsh, NULL); + if (db_created(v) && ! (open_mode & DS_LOAD)) { + if (DST_OK == ds_txn_begin(dsh)) { + ds_set_wordlist_version(dsh, NULL); + if (DST_OK == ds_txn_commit(dsh)) + return dsh; + } + db_close(v, false); + dsh_free(dsh); + dsh = NULL; + } return dsh; } @@ -189,18 +194,18 @@ (unsigned long)val->spamcount, (unsigned long)val->goodcount); } - break; + return 0; case DS_NOTFOUND: if (DEBUG_DATABASE(3)) { fprintf(dbgout, "ds_read: [%.*s] not found\n", CLAMP_INT_MAX(word->leng), (char *) word->text); } - break; + return 1; case DS_ABORT_RETRY: - if (DEBUG_DATABASE(3)) { - fprintf(dbgout, "ds_read: [%.*s] returned abort-retry\n", + if (DEBUG_DATABASE(1)) { + print_error(__FILE__, __LINE__, "ds_read('%.*s') was aborted to recover from a deadlock.", CLAMP_INT_MAX(word->leng), (char *) word->text); } break; @@ -329,7 +334,6 @@ int ret = 0; void *dsh; - ds_init(); dsh = ds_open(CURDIR_S, path, open_mode); if (dsh == NULL) { @@ -346,7 +350,6 @@ } ds_close(dsh, false); - ds_cleanup(); return ret; } @@ -356,13 +359,13 @@ void ds_init() { + db_init(); if (msg_count_tok == NULL) { msg_count_tok = word_new((const byte *)MSG_COUNT, strlen(MSG_COUNT)); } if (wordlist_version_tok == NULL) { wordlist_version_tok = word_new((const byte *)WORDLIST_VERSION, strlen(WORDLIST_VERSION)); } - db_init(); } /* Cleanup storage allocation */ @@ -388,7 +391,7 @@ /* Set the number of messages associated with database. */ -int ds_set_msgcounts(void *vhandle, dsv_t *val) +int ds_set_msgcounts(void *vhandle, dsv_t *val) { dsh_t *dsh = vhandle; @@ -400,20 +403,17 @@ /* Get the wordlist version associated with database. */ -bool ds_get_wordlist_version(void *vhandle, dsv_t *val) +int ds_get_wordlist_version(void *vhandle, dsv_t *val) { - int rc; dsh_t *dsh = vhandle; - rc = ds_read(dsh, wordlist_version_tok, val); - - return rc == 0; + return ds_read(dsh, wordlist_version_tok, val); } /* Set the wordlist version associated with database. */ -void ds_set_wordlist_version(void *vhandle, dsv_t *val) +int ds_set_wordlist_version(void *vhandle, dsv_t *val) { dsh_t *dsh = vhandle; dsv_t tmp; @@ -427,9 +427,7 @@ val->date = today; - ds_write(dsh, wordlist_version_tok, val); - - return; + return ds_write(dsh, wordlist_version_tok, val); } const char *ds_version_str(void) Index: datastore.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/datastore.h,v retrieving revision 1.27.2.9 retrieving revision 1.27.2.10 diff -u -d -r1.27.2.9 -r1.27.2.10 --- datastore.h 20 Jun 2004 17:02:38 -0000 1.27.2.9 +++ datastore.h 29 Jun 2004 23:01:16 -0000 1.27.2.10 @@ -59,7 +59,8 @@ /** Status value used when a key is not found in the data base. */ #define DS_NOTFOUND (-1) -/** Status value used when the transaction must be aborted. */ +/** Status value when the transaction was aborted to resolve a deadlock + * and should be retried. */ #define DS_ABORT_RETRY (-2) /** Macro that clamps its argument to INT_MAX and casts it to int. */ @@ -211,10 +212,10 @@ #define DST_FAILURE (2) /** Get the database version */ -extern bool ds_get_wordlist_version(void *vhandle, dsv_t *val); +extern int ds_get_wordlist_version(void *vhandle, dsv_t *val); /** set the database version */ -extern void ds_set_wordlist_version(void *vhandle, dsv_t *val); +extern int ds_set_wordlist_version(void *vhandle, dsv_t *val); /** Get the current process ID. */ extern unsigned long ds_handle_pid(void *vhandle); Index: datastore_db.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/datastore_db.c,v retrieving revision 1.93.2.16 retrieving revision 1.93.2.17 diff -u -d -r1.93.2.16 -r1.93.2.17 --- datastore_db.c 28 Jun 2004 14:48:01 -0000 1.93.2.16 +++ datastore_db.c 29 Jun 2004 23:01:17 -0000 1.93.2.17 @@ -68,7 +68,6 @@ static const DBTYPE dbtype = DB_BTREE; static bool init = false; -bool create_flag = false; /* For datastore.c (to add .WORDLIST_VERSION) */ typedef struct { char *path; @@ -87,6 +86,9 @@ #define DB_AT_MOST(maj, min) ((DB_VERSION_MAJOR < (maj)) || ((DB_VERSION_MAJOR == (maj)) && (DB_VERSION_MINOR <= (min)))) #define DB_EQUAL(maj, min) ((DB_VERSION_MAJOR == (maj)) && (DB_VERSION_MINOR == (min))) +/* dummy infrastructure, to be expanded by environment + * or transactional initialization/shutdown */ + /* Function definitions */ /** translate BerkeleyDB \a flags bitfield back to symbols */ @@ -289,26 +291,18 @@ char *t; dbh_t *handle = NULL; - uint32_t open_flags = 0; + uint32_t opt_flags = 0; - if (!init) - /* internal error: must be called only after initialization */ - internal_error; + assert(init); check_db_version(); - if (open_mode & DS_READ ) - open_flags = DB_RDONLY; - if (open_mode & DS_CREATE ) - open_flags = DB_CREATE | DB_EXCL; - { #if DB_AT_LEAST(4,1) int flags; #endif DB *dbp; uint32_t pagesize; - bool err = false; handle = dbh_init(path, name); @@ -350,29 +344,13 @@ t = handle->name; retry_db_open: - ret = DB_OPEN(dbp, t, NULL, dbtype, open_flags, 0664); - - if (ret != 0) { - err = (ret != ENOENT) || (open_flags & DB_RDONLY); - if (!err) { - ret = DB_OPEN(dbp, t, NULL, dbtype, open_flags | DB_CREATE | DB_EXCL, 0664); - if (ret != 0) - err = true; - else - handle->created = true; - } - } - - if (ret != 0) { - if (ret == ENOENT && open_flags != DB_RDONLY) - return NULL; - else - err = true; - } - - if (err) + handle->created = false; + if ((ret = DB_OPEN(dbp, t, NULL, dbtype, opt_flags, 0664)) != 0 + && ( ret != ENOENT || opt_flags == DB_RDONLY || + (handle->created = true), + (ret = DB_OPEN(dbp, t, NULL, dbtype, opt_flags | DB_CREATE | DB_EXCL, 0664)) != 0)) { - if (open_flags != DB_RDONLY && ret == EEXIST && --retries) { + if (open_mode != DB_RDONLY && ret == EEXIST && --retries) { /* sleep for 4 to 100 ms - this is just to give up the CPU * to another process and let it create the data base * file in peace */ @@ -518,11 +496,7 @@ dbh_t *handle = vhandle; DB_TXN *t = handle->txn; assert(dbe); - if (!t) { - print_error(__FILE__, __LINE__, - "db_txn_commit called without transaction open."); - return DST_FAILURE; - } + assert(t); ret = BF_TXN_COMMIT(t, 0); if (ret) @@ -612,6 +586,7 @@ ret = DS_NOTFOUND; break; case DB_LOCK_DEADLOCK: + db_txn_abort(handle); ret = DS_ABORT_RETRY; break; default: @@ -647,21 +622,22 @@ ret = dbp->put(dbp, handle->txn, &db_key, &db_data, 0); - if (ret) { + if (ret == DB_LOCK_DEADLOCK) { + db_txn_abort(handle); + return DS_ABORT_RETRY; + } + + if (ret != 0) { print_error(__FILE__, __LINE__, "(db) db_set_dbvalue( '%.*s' ), err: %d, %s", - CLAMP_INT_MAX(token->leng), (char *)token->data, ret, db_strerror(ret)); - if (ret == DB_LOCK_DEADLOCK) { - db_txn_abort(handle); - return DS_ABORT_RETRY; - } else - exit(EX_ERROR); + CLAMP_INT_MAX(token->leng), (char *)token->data, ret, db_strerror(ret)); + exit(EX_ERROR); } if (DEBUG_DATABASE(3)) fprintf(dbgout, "DB->put(%.*s): %s\n", CLAMP_INT_MAX(token->leng), (char *) token->data, db_strerror(ret)); - return ret; + return 0; } Index: datastore_db.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/datastore_db.h,v retrieving revision 1.8.4.6 retrieving revision 1.8.4.7 diff -u -d -r1.8.4.6 -r1.8.4.7 --- datastore_db.h 28 Jun 2004 14:48:01 -0000 1.8.4.6 +++ datastore_db.h 29 Jun 2004 23:01:18 -0000 1.8.4.7 @@ -38,13 +38,6 @@ /** Flush pending writes to disk */ void db_flush(void *handle); -/** Do global initializations. \return 0 for success, non-zero for - * error. */ -int db_init(void); - -/** Cleanup storage allocation */ -void db_cleanup(void); - /** Retrieve the value associated with a given word in a list. * \return zero if the word does not exist in the database. Front-end */ @@ -93,6 +86,9 @@ /* Returns created flag */ bool db_created(void *vhandle); +int db_init(void); +void db_cleanup(void); + /* This is not currently used ... * #define db_write_lock(fd) db_lock(fd, F_SETLKW, F_WRLCK) Index: datastore_qdbm.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/datastore_qdbm.c,v retrieving revision 1.21.2.4 retrieving revision 1.21.2.5 diff -u -d -r1.21.2.4 -r1.21.2.5 --- datastore_qdbm.c 28 Jun 2004 14:48:01 -0000 1.21.2.4 +++ datastore_qdbm.c 29 Jun 2004 23:01:18 -0000 1.21.2.5 @@ -47,8 +47,6 @@ * or transactional initialization/shutdown */ static bool init = false; -int db_init(void) { init = true; return 0; } -void db_cleanup(void) { init = false; } /* Function definitions */ @@ -128,8 +126,6 @@ if (handle == NULL) return NULL; - db_init(); - dbp = handle->dbp = dpopen(handle->name, open_flags, DB_INITBNUM); if ((dbp == NULL) && (open_mode & DS_WRITE)) { @@ -276,8 +272,6 @@ handle->dbp = NULL; dbh_free(handle); - - db_cleanup(); } @@ -347,6 +341,9 @@ /* dummy infrastructure, to be expanded by environment * or transactional initialization/shutdown */ +int db_init(void) { init = true; return 0; } +void db_cleanup(void) { init = false; } + int db_txn_begin(void *d) { (void)d; return 0; } int db_txn_abort(void *d) { (void)d; return 0; } int db_txn_commit(void *d) { (void)d; return 0; } Index: datastore_tdb.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/datastore_tdb.c,v retrieving revision 1.24.4.4 retrieving revision 1.24.4.5 diff -u -d -r1.24.4.4 -r1.24.4.5 --- datastore_tdb.c 28 Jun 2004 14:48:01 -0000 1.24.4.4 +++ datastore_tdb.c 29 Jun 2004 23:01:19 -0000 1.24.4.5 @@ -37,8 +37,6 @@ * or transactional initialization/shutdown */ static bool init = false; -int db_init(void) { init = true; return 0; } -void db_cleanup(void) { init = false; } /* Function definitions */ @@ -119,8 +117,6 @@ if (handle == NULL) return NULL; - db_init(); - dbp = handle->dbp = tdb_open(handle->name, 0, tdb_flags, open_flags, 0664); if ((dbp == NULL) && (open_mode & DS_WRITE)) { @@ -258,8 +254,6 @@ } dbh_free(handle); - - db_cleanup(); } /* @@ -361,6 +355,8 @@ /* dummy infrastructure, to be expanded by environment * or transactional initialization/shutdown */ +int db_init(void) { init = true; return 0; } +void db_cleanup(void) { init = false; } int db_txn_begin(void *d) { (void)d; return 0; } int db_txn_abort(void *d) { (void)d; return 0; } Index: lexer.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/lexer.c,v retrieving revision 1.88.2.4 retrieving revision 1.88.2.5 diff -u -d -r1.88.2.4 -r1.88.2.5 --- lexer.c 28 Jun 2004 21:40:23 -0000 1.88.2.4 +++ lexer.c 29 Jun 2004 23:01:19 -0000 1.88.2.5 @@ -192,7 +192,7 @@ if (!isspace(buff->t.text[0])) return count; /* Check for empty line which terminates message header */ - if (is_eol((char *)buff->t.text, count)) + if (is_eol(buff->t.text, count)) return count; } Index: main.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/main.c,v retrieving revision 1.87.2.2 retrieving revision 1.87.2.3 diff -u -d -r1.87.2.2 -r1.87.2.3 --- main.c 4 Jun 2004 22:03:19 -0000 1.87.2.2 +++ main.c 29 Jun 2004 23:01:19 -0000 1.87.2.3 @@ -52,6 +52,7 @@ openlog("bogofilter", LOG_PID, LOG_MAIL); /* open all wordlists */ + ds_init(); open_wordlists((run_type == RUN_NORMAL) ? DS_READ : DS_WRITE); output_setup(); @@ -73,6 +74,7 @@ close_wordlists(false); free_wordlists(); + ds_cleanup(); /* cleanup storage */ mime_cleanup(); Index: maint.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/maint.c,v retrieving revision 1.41.2.4 retrieving revision 1.41.2.5 diff -u -d -r1.41.2.4 -r1.41.2.5 --- maint.c 4 Jun 2004 22:03:20 -0000 1.41.2.4 +++ maint.c 29 Jun 2004 23:01:19 -0000 1.41.2.5 @@ -142,60 +142,6 @@ return change; } -#ifdef DEAD_CODE -void maintain_wordlists(void) -{ - wordlist_t *list; - - for (list = word_lists; list != NULL; list = list->next) { - maintain_wordlist(list->dsh); - list = list->next; - } -} - -static bool check_wordlist_version(dsh_t *dsh) -{ - dsv_t val; - ds_get_wordlist_version(dsh, &val); - if (val.count[0] >= CURRENT_VERSION) - return true; - else - return false; -} - -int maintain_wordlist_file(const char *db_file) -{ - int rc = 0; - dsh_t *dsh; - bool done = false; - - ds_init(); - dsh = ds_open(CURDIR_S, db_file, DS_WRITE); - - if (dsh == NULL) - return EX_ERROR; - - if (upgrade_wordlist_version) { - done = check_wordlist_version(dsh); - if (!done) - fprintf(dbgout, "Upgrading wordlist.\n"); - else - fprintf(dbgout, "Wordlist has already been upgraded.\n"); - } - - if (!done) - rc = maintain_wordlist(dsh); - - if (!done && upgrade_wordlist_version) - ds_set_wordlist_version(dsh, NULL); - - ds_close(dsh, false); - ds_cleanup(); - - return rc; -} - -#endif struct userdata_t { void *vhandle; ta_t *transaction; @@ -335,10 +281,8 @@ int rc = 0; dsh_t *dsh; - ds_init(); dsh = ds_open(CURDIR_S, db_file, DS_WRITE); - if (dsh == NULL) return EX_ERROR; else Index: msgcounts.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/msgcounts.c,v retrieving revision 1.27.2.1 retrieving revision 1.27.2.2 diff -u -d -r1.27.2.1 -r1.27.2.2 --- msgcounts.c 10 Apr 2004 10:35:34 -0000 1.27.2.1 +++ msgcounts.c 29 Jun 2004 23:01:19 -0000 1.27.2.2 @@ -12,13 +12,9 @@ #include "common.h" -#include <ctype.h> #include <stdlib.h> -#include "datastore.h" -#include "fgetsl.h" #include "msgcounts.h" -#include "wordlists.h" /* Globals */ Index: passthrough.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/passthrough.c,v retrieving revision 1.29.2.1 retrieving revision 1.29.2.2 diff -u -d -r1.29.2.1 -r1.29.2.2 --- passthrough.c 28 Jun 2004 14:48:01 -0000 1.29.2.1 +++ passthrough.c 29 Jun 2004 23:01:20 -0000 1.29.2.2 @@ -96,7 +96,7 @@ b += i; cap -= i; - s = xfgetsl(b, cap, inf, 1); + s = xfgetsl(b, cap, inf, true); if (s == EOF) { if (i) s = i; } else { Index: register.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/register.c,v retrieving revision 1.28.2.11 retrieving revision 1.28.2.12 diff -u -d -r1.28.2.11 -r1.28.2.12 --- register.c 20 Jun 2004 17:02:38 -0000 1.28.2.11 +++ register.c 29 Jun 2004 23:01:20 -0000 1.28.2.12 @@ -65,6 +65,11 @@ run_type |= _run_type; retry: + if (retrycount-- == 0) { + fprintf(stderr, "retry count exceeded, giving up.\n"); + exit(EX_ERROR); + } + if (ds_txn_begin(list->dsh)) { fprintf(stderr, "ds_txn_begin error.\n"); exit(EX_ERROR); @@ -73,8 +78,17 @@ for (node = wordhash_first(h); node != NULL; node = wordhash_next(h)) { wordprop = node->buf; - if (DS_ABORT_RETRY == ds_read(list->dsh, node->key, &val)) - goto abort_retry; + switch (ds_read(list->dsh, node->key, &val)) { + case DS_ABORT_RETRY: + rand_sleep(4*1000,1000*1000); + goto retry; + case 0: + case 1: + break; + default: + fprintf(stderr, "cannot read from data base.\n"); + exit(EX_ERROR); + } if (incr != IX_UNDF) { u_int32_t *counts = val.count; counts[incr] += wordprop->freq; @@ -83,12 +97,29 @@ u_int32_t *counts = val.count; counts[decr] = ((long)counts[decr] < wordprop->freq) ? 0 : counts[decr] - wordprop->freq; } - if (DS_ABORT_RETRY == ds_write(list->dsh, node->key, &val)) - goto abort_retry; + switch (ds_write(list->dsh, node->key, &val)) { + case DS_ABORT_RETRY: + rand_sleep(4*1000,1000*1000); + goto retry; + case 0: + break; + default: + fprintf(stderr, "cannot write to data base.\n"); + exit(EX_ERROR); + } } - if (DS_ABORT_RETRY == ds_get_msgcounts(list->dsh, &val)) - goto abort_retry; + switch (ds_get_msgcounts(list->dsh, &val)) { + case 0: + case 1: + break; + case DS_ABORT_RETRY: + rand_sleep(4 * 1000, 1000 * 1000); + goto retry; + default: + fprintf(stderr, "cannot get message count values.\n"); + exit(EX_ERROR); + } list->msgcount[IX_SPAM] = val.spamcount; list->msgcount[IX_GOOD] = val.goodcount; @@ -105,8 +136,17 @@ val.spamcount = list->msgcount[IX_SPAM]; val.goodcount = list->msgcount[IX_GOOD]; - if (DS_ABORT_RETRY == ds_set_msgcounts(list->dsh, &val)) - goto abort_retry; + switch (ds_set_msgcounts(list->dsh, &val)) { + case 0: + break; + case DS_ABORT_RETRY: + fprintf(stderr, "cannot set message count values, retrying\n"); + rand_sleep(4 * 1000, 1000 * 1000); + goto retry; + default: + fprintf(stderr, "cannot set message count values\n"); + exit(EX_ERROR); + } set_msg_counts(val.goodcount, val.spamcount); switch(ds_txn_commit(list->dsh)) { @@ -114,8 +154,8 @@ break; case DST_TEMPFAIL: if (--retrycount) { - fprintf(stderr, "commit was aborted, retrying (%d tries left)...\n", retrycount); - rand_sleep(4 * 1000, 3000 * 1000); + fprintf(stderr, "commit was aborted, retrying...\n"); + rand_sleep(4 * 1000, 1000 * 1000); goto retry; } fprintf(stderr, "giving up on this transaction.\n"); @@ -135,20 +175,4 @@ list->listname, list->filepath, val.spamcount, val.goodcount); run_type = save_run_type; - return; - -abort_retry: - if (ds_txn_abort(list->dsh) != DST_OK) { - fprintf(stderr, "abort failed.\n"); - exit(EX_ERROR); - } - - if (--retrycount) { - fprintf(stderr, "transaction was aborted, retrying (%d tries left)...\n", retrycount); - rand_sleep(4 * 1000, 3000 * 1000); - goto retry; - } - - fprintf(stderr, "giving up on this transaction.\n"); - exit(EX_ERROR); } Index: robx.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/robx.c,v retrieving revision 1.7.8.4 retrieving revision 1.7.8.5 diff -u -d -r1.7.8.4 -r1.7.8.5 --- robx.c 28 Jun 2004 14:48:01 -0000 1.7.8.4 +++ robx.c 29 Jun 2004 23:01:20 -0000 1.7.8.5 @@ -14,7 +14,6 @@ #include "common.h" #include "datastore.h" -#include "rand_sleep.h" #include "robx.h" #include "wordlists.h" @@ -73,21 +72,16 @@ double rx; dsv_t val; + bool ok; uint32_t good_cnt, spam_cnt; struct robhook_data rh; - int ret, retrycount = 5; + int ret; -retry: - ret = ds_get_msgcounts(dsh, &val); - if (ret != 0 && --retrycount) { - fprintf(stderr, "transaction was aborted, retrying (%d tries left)...\n", retrycount); - rand_sleep(4*1000,1000*1000); - goto retry; - } + ok = ds_get_msgcounts(dsh, &val) == 0; - if (ret) { + if (!ok) { fprintf(stderr, "Can't find message counts.\n"); - exit(EX_ERROR); + return -1; } spam_cnt = val.spamcount; Index: score.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/score.c,v retrieving revision 1.11.2.9 retrieving revision 1.11.2.10 diff -u -d -r1.11.2.9 -r1.11.2.10 --- score.c 28 Jun 2004 14:48:01 -0000 1.11.2.9 +++ score.c 29 Jun 2004 23:01:20 -0000 1.11.2.10 @@ -19,6 +19,7 @@ #include "datastore.h" #include "msgcounts.h" #include "prob.h" +#include "rand_sleep.h" #include "rstats.h" #include "score.h" #include "wordhash.h" @@ -113,14 +114,22 @@ if (override > list->override) /* if already found */ break; +retry: if (ds_txn_begin(list->dsh) != DST_OK) { fprintf(stderr, "Problem starting transaction!\n"); exit(EX_ERROR); } ret = ds_read(list->dsh, token, &val); + if (ret == DS_ABORT_RETRY) { + rand_sleep(4*1000,1000*1000); + goto retry; + } - ds_txn_commit(list->dsh); /* reading shouldn't fail... */ + if (ds_txn_commit(list->dsh) == DST_TEMPFAIL) { + rand_sleep(4*1000,1000*1000); + goto retry; + } if (ret) continue; /* not found */ @@ -295,19 +304,24 @@ if (fabs(robx) < EPS) { - int ret; - dsv_t val; - /* Assign default value in case there's no wordlist * or no wordlist entry */ robx = ROBX; + if (list->dsh != NULL) + { + int ret; + dsv_t val; - if (list->dsh) { +retry: /* Note: .ROBX is scaled by 1000000 in the wordlist */ if (DST_OK != ds_txn_begin(list->dsh)) ret = -1; else { ret = ds_read(list->dsh, word_robx, &val); + if (ret == DS_ABORT_RETRY) { + rand_sleep(4*1000,1000*1000); + goto retry; + } if (ret != 0) robx = ROBX; else { Index: wordlists.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/wordlists.c,v retrieving revision 1.57.2.6 retrieving revision 1.57.2.7 diff -u -d -r1.57.2.6 -r1.57.2.7 --- wordlists.c 28 Jun 2004 14:48:01 -0000 1.57.2.6 +++ wordlists.c 29 Jun 2004 23:01:20 -0000 1.57.2.7 @@ -68,15 +68,30 @@ } /* switch */ } else { /* ds_open */ dsv_t val; - ds_txn_begin(list->dsh); - ds_get_msgcounts(list->dsh, &val); - list->msgcount[IX_GOOD] = val.goodcount; - list->msgcount[IX_SPAM] = val.spamcount; - if (wordlist_version == 0 && - ds_get_wordlist_version(list->dsh, &val)) - wordlist_version = val.count[0]; - if (DST_OK != ds_txn_commit(list->dsh)) - abort(); +retry: + if (DST_OK == ds_txn_begin(list->dsh)) { + switch (ds_get_msgcounts(list->dsh, &val)) { + case 0: + case 1: + list->msgcount[IX_GOOD] = val.goodcount; + list->msgcount[IX_SPAM] = val.spamcount; + if (wordlist_version == 0 && + ds_get_wordlist_version(list->dsh, &val)) + wordlist_version = val.count[0]; + if (DST_OK == ds_txn_commit(list->dsh)) + return retry; + break; + case DS_ABORT_RETRY: + fprintf(stderr, "Transaction reading message count/wordlist version failed, retrying.\n"); + rand_sleep(4000,3000*1000); + goto retry; + break; + default: + break; + } + } + fprintf(stderr, "Transaction reading message count/wordlist version failed.\n"); + exit(EX_ERROR); } /* ds_open */ return retry; @@ -89,8 +104,6 @@ if (word_lists == NULL) init_wordlist("word", WORDLIST, 0, WL_REGULAR); - ds_init(); - while (retry) { if (run_type & (REG_SPAM | REG_GOOD | UNREG_SPAM | UNREG_GOOD)) retry = open_wordlist(default_wordlist(), mode); @@ -114,8 +127,6 @@ if (list->dsh) ds_close(list->dsh, nosync); list->dsh = NULL; } - - ds_cleanup(); } bool build_wordlist_path(char *filepath, size_t size, const char *path) Index: wordlists_base.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/wordlists_base.c,v retrieving revision 1.14.2.3 retrieving revision 1.14.2.4 diff -u -d -r1.14.2.3 -r1.14.2.4 --- wordlists_base.c 6 Jun 2004 23:49:33 -0000 1.14.2.3 +++ wordlists_base.c 29 Jun 2004 23:01:20 -0000 1.14.2.4 @@ -12,6 +12,9 @@ bool config_setup = false; +static wordlist_t *free_wordlist(wordlist_t *list); +static bool dup_wordlist(wordlist_t *a, wordlist_t *b); + /* Default wordlist mode is now wordlist.db - a single wordlist containing ham and spam tokens */ @@ -43,6 +46,11 @@ } while(1) { + if (dup_wordlist(n, list_ptr)) { + free_wordlist(n); + return; + } + if (list_ptr->next == NULL) { /* end of list */ list_ptr->next=n; @@ -59,6 +67,24 @@ } } +static bool dup_wordlist(wordlist_t *a, wordlist_t *b) +{ + if (a->type != b->type) + return false; + + if (a->override!= b->override) + return false; + + if (strcmp(a->listname, b->listname) != 0) + return false; + + if (strcmp(a->filepath, b->filepath) != 0) + return false; + + return true; +} + + /* Set default wordlist for registering messages, finding robx, etc */ wordlist_t *default_wordlist(void) |