[cvs] SF.net SVN: bogofilter:[7045] trunk/bogofilter
Fast Bayesian spam filter along lines suggested by Paul Graham
Brought to you by:
m-a
From: <m-...@us...> - 2016-01-26 02:14:35
|
Revision: 7045 http://sourceforge.net/p/bogofilter/code/7045 Author: m-a Date: 2016-01-26 02:14:32 +0000 (Tue, 26 Jan 2016) Log Message: ----------- Support Kyoto Cabinet databases, code by Denny Lin. Apply patch from Denny Lin, with one fix, to add KyotoCabinet support. To enable, run configure --with-database=kyotocabinet when building bogofilter. Thanks! Modified Paths: -------------- trunk/bogofilter/AUTHORS trunk/bogofilter/INSTALL trunk/bogofilter/NEWS trunk/bogofilter/README trunk/bogofilter/RELEASE.NOTES trunk/bogofilter/configure.ac trunk/bogofilter/src/Makefile.am trunk/bogofilter/src/tests/t.frame Added Paths: ----------- trunk/bogofilter/src/datastore_kc.c Modified: trunk/bogofilter/AUTHORS =================================================================== --- trunk/bogofilter/AUTHORS 2016-01-26 01:20:47 UTC (rev 7044) +++ trunk/bogofilter/AUTHORS 2016-01-26 02:14:32 UTC (rev 7045) @@ -56,3 +56,4 @@ Paul Mangan Roman Trunov Julius Plenz +Denny Lin (KyotoCabinet support) Modified: trunk/bogofilter/INSTALL =================================================================== --- trunk/bogofilter/INSTALL 2016-01-26 01:20:47 UTC (rev 7044) +++ trunk/bogofilter/INSTALL 2016-01-26 02:14:32 UTC (rev 7045) @@ -11,10 +11,13 @@ database, with version (patchlevel omitted) URL to home page --------------------------------------------- --------------------- -1a.Berkeley DB (3.1 - 4.4) with transactions http://sleepycat.com/ -1b.Berkeley DB (3.1 - 4.4) without transactions http://sleepycat.com/ +1a.Berkeley DB (3.1 - 6.x) with transactions +1b.Berkeley DB (3.1 - 6.x) without transactions + http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/ 2. QDBM (1.7.23 or newer) http://qdbm.sf.net/ 3. SQLite (3.2.6 or newer) http://sqlite.org/ +4. TokyoCabinet http://fallabs.com/tokyocabinet/ +5. KyotoCabinet http://fallabs.com/kyotocabinet/ You can use --with-database=ARG (choose from db (for Berkeley DB), qdbm, sqlite) to pick the database backend (you must have installed the Modified: trunk/bogofilter/NEWS =================================================================== --- trunk/bogofilter/NEWS 2016-01-26 01:20:47 UTC (rev 7044) +++ trunk/bogofilter/NEWS 2016-01-26 02:14:32 UTC (rev 7045) @@ -17,6 +17,10 @@ 2016-01-26 + * Apply patch from Denny Lin, with one fix, to add KyotoCabinet + support. To enable, run configure --with-database=kyotocabinet when + building bogofilter. Thanks! + * Apply patch from Denny Lin to plug a few memory leaks in the TokyoCabinet implementation contributed through the bogofilter-dev mailing list. Thanks! Modified: trunk/bogofilter/README =================================================================== --- trunk/bogofilter/README 2016-01-26 01:20:47 UTC (rev 7044) +++ trunk/bogofilter/README 2016-01-26 02:14:32 UTC (rev 7045) @@ -173,7 +173,7 @@ --prefix=PREFIX install architecture-independent files in PREFIX [default: /usr/local] - --with-database=ENGINE Choose database engine {db|qdbm|sqlite3} + --with-database=ENGINE Choose database engine {db|qdbm|sqlite3|tokyocabinet|kyotocabinet} [default: db] CPPFLAGS=-I/opt/csw/include Choose additional include file path /opt/csw/include Modified: trunk/bogofilter/RELEASE.NOTES =================================================================== --- trunk/bogofilter/RELEASE.NOTES 2016-01-26 01:20:47 UTC (rev 7044) +++ trunk/bogofilter/RELEASE.NOTES 2016-01-26 02:14:32 UTC (rev 7045) @@ -28,6 +28,14 @@ NOTE: the NEWS document has greater detail on some of these changes. ------------------------------------------------------------------------ +[Major 1.2.5] Kyoto Cabinet support added. + +Bogofilter, as of release 1.2.5, supports Kyoto Cabinet databases, +courtesy of Denny Lin. The Kyoto Cabinet database is written and +maintained by the same author as the Toyko Cabinet database, and they +recommend to use Kyoto Cabinet instead of Tokyo Cabinet. + +------------------------------------------------------------------------ [Major 1.1.6] Tokyo Cabinet support (B+-trees with transactions) added Bogofilter, as of release 1.1.6, supports Tokyo Cabinet databases, Modified: trunk/bogofilter/configure.ac =================================================================== --- trunk/bogofilter/configure.ac 2016-01-26 01:20:47 UTC (rev 7044) +++ trunk/bogofilter/configure.ac 2016-01-26 02:14:32 UTC (rev 7045) @@ -465,7 +465,7 @@ WITH_DB_ENGINE=db AC_ARG_WITH(database, AS_HELP_STRING([--with-database=ENGINE], - [choose database engine {db|qdbm|sqlite3|tokyocabinet} [[db]]]), + [choose database engine {db|qdbm|sqlite3|tokyocabinet|kyotocabinet} [[db]]]), [ WITH_DB_ENGINE=$withval ] ) @@ -502,6 +502,21 @@ ])],,AC_MSG_ERROR(Cannot link to tokyocabinet library.)) LIBS="$saveLIBS" ;; + xkyotocabinet) + AC_DEFINE(ENABLE_KYOTOCABINET_DATASTORE,1, [Enable kyotocabinet datastore]) + DB_TYPE=kyotocabinet + DB_EXT=.kct + AC_LIB_LINKFLAGS([kyotocabinet]) + LIBDB="$LIBKYOTOCABINET" + saveLIBS="$LIBS" + LIBS="$LIBS $LIBDB" + AC_LINK_IFELSE([AC_LANG_PROGRAM([ +#include <kclangc.h> + ], [ + KCDB *db = kcdbnew(); + ])],,AC_MSG_ERROR(Cannot link to kyotocabinet library.)) + LIBS="$saveLIBS" + ;; xqdbm) AC_DEFINE(ENABLE_QDBM_DATASTORE,1, [Enable qdbm datastore]) DB_TYPE=qdbm @@ -652,7 +667,7 @@ LIBS="$saveLIBS" ;; *) - AC_MSG_ERROR([Invalid --with-database argument. Supported engines are db, qdbm, sqlite3, tokyocabinet.]) + AC_MSG_ERROR([Invalid --with-database argument. Supported engines are db, qdbm, sqlite3, tokyocabinet, kyotocabinet.]) ;; esac @@ -678,6 +693,7 @@ AM_CONDITIONAL(ENABLE_QDBM_DATASTORE, test "x$WITH_DB_ENGINE" = "xqdbm") AM_CONDITIONAL(ENABLE_SQLITE_DATASTORE, test "x$WITH_DB_ENGINE" = "xsqlite3") AM_CONDITIONAL(ENABLE_TOKYOCABINET_DATASTORE, test "x$WITH_DB_ENGINE" = "xtokyocabinet") +AM_CONDITIONAL(ENABLE_KYOTOCABINET_DATASTORE, test "x$WITH_DB_ENGINE" = "xkyotocabinet") dnl Use TRIO to replace missing snprintf/vsnprintf. needtrio=0 Modified: trunk/bogofilter/src/Makefile.am =================================================================== --- trunk/bogofilter/src/Makefile.am 2016-01-26 01:20:47 UTC (rev 7044) +++ trunk/bogofilter/src/Makefile.am 2016-01-26 02:14:32 UTC (rev 7045) @@ -190,6 +190,11 @@ datastore_opthelp_dummies.c \ datastore_dummies.c else +if ENABLE_KYOTOCABINET_DATASTORE +datastore_SOURCE = datastore_kc.c \ + datastore_opthelp_dummies.c \ + datastore_dummies.c +else if ENABLE_TRANSACTIONS datastore_SOURCE = datastore_db.c datastore_db_trans.c else @@ -203,6 +208,7 @@ endif endif endif +endif datastore_OBJECT = $(datastore_SOURCE:.c=.o) Added: trunk/bogofilter/src/datastore_kc.c =================================================================== --- trunk/bogofilter/src/datastore_kc.c (rev 0) +++ trunk/bogofilter/src/datastore_kc.c 2016-01-26 02:14:32 UTC (rev 7045) @@ -0,0 +1,314 @@ +/* $Id$ */ + +/***************************************************************************** + +NAME: +datastore_kc.c -- implements the datastore, using kyotocabinet. + +AUTHORS: +Gyepi Sam <gy...@pr...> 2003 +Matthias Andree <mat...@gm...> 2003 +Stefan Bellon <sb...@sb...> 2003-2004 +Pierre Habouzit <mad...@de...> 2007 +Denny Lin <den...@hs...> 2015 + +******************************************************************************/ + +#include "common.h" + +#include <kclangc.h> +#include <stdbool.h> +#include <stddef.h> +#include <string.h> + +#include "datastore.h" +#include "datastore_db.h" +#include "error.h" +#include "paths.h" +#include "xmalloc.h" +#include "xstrdup.h" + +#define UNUSED(x) ((void)(x)) + +typedef struct { + char *name; + bool created; + bool writable; + KCDB *dbp; +} dbh_t; + +static int kc_txn_begin(void *vhandle) { + dbh_t *dbh = vhandle; + if (!dbh->writable || kcdbbegintran(dbh->dbp, false)) + return DST_OK; + print_error(__FILE__, __LINE__, "kcdbbegintran(%p), err: %d, %s", + dbh->dbp, + kcdbecode(dbh->dbp), kcdbemsg(dbh->dbp)); + return DST_FAILURE; +} + +static int kc_txn_abort(void *vhandle) { + dbh_t *dbh = vhandle; + if (!dbh->writable || kcdbendtran(dbh->dbp, false)) + return DST_OK; + print_error(__FILE__, __LINE__, "kcdbendtran(%p, false), err: %d, %s", + dbh->dbp, + kcdbecode(dbh->dbp), kcdbemsg(dbh->dbp)); + return DST_FAILURE; +} + +static int kc_txn_commit(void *vhandle) { + dbh_t *dbh = vhandle; + if (!dbh->writable || kcdbendtran(dbh->dbp, true)) + return DST_OK; + print_error(__FILE__, __LINE__, "kc_txn_commit(%p, true), err: %d, %s", + dbh->dbp, + kcdbecode(dbh->dbp), kcdbemsg(dbh->dbp)); + return DST_FAILURE; +} + +static dsm_t dsm_kc = { + /* public -- used in datastore.c */ + &kc_txn_begin, + &kc_txn_abort, + &kc_txn_commit, + /* private -- used in datastore_db_*.c */ + NULL, /* dsm_env_init */ + NULL, /* dsm_cleanup */ + NULL, /* dsm_cleanup_lite */ + NULL, /* dsm_get_env_dbe */ + NULL, /* dsm_database_name */ + NULL, /* dsm_recover_open */ + NULL, /* dsm_auto_commit_flags */ + NULL, /* dsm_get_rmw_flag */ + NULL, /* dsm_lock */ + NULL, /* dsm_common_close */ + NULL, /* dsm_sync */ + NULL, /* dsm_log_flush */ + NULL, /* dsm_pagesize */ + NULL, /* dsm_purgelogs */ + NULL, /* dsm_checkpoint */ + NULL, /* dsm_recover */ + NULL, /* dsm_remove */ + NULL, /* dsm_verify */ + NULL, /* dsm_list_logfiles */ + NULL /* dsm_leafpages */ +}; + +dsm_t *dsm = &dsm_kc; + +const char *db_version_str(void) +{ + static char v[80]; + if (v[0] == '\0') + snprintf(v, sizeof(v) - 1, "Kyoto Cabinet %s (TreeDB)", KCVERSION); + return v; +} + + +static dbh_t *dbh_init(bfpath *bfp) +{ + dbh_t *handle; + + handle = xmalloc(sizeof(dbh_t)); + memset(handle, 0, sizeof(dbh_t)); + + handle->name = xstrdup(bfp->filepath); + handle->created = false; + handle->writable = false; + handle->dbp = kcdbnew(); + + return handle; +} + + +static void dbh_free(dbh_t *handle) +{ + if (handle != NULL) { + xfree(handle->name); + kcdbdel(handle->dbp); + xfree(handle); + } +} + + +bool db_is_swapped(void *vhandle) +{ + UNUSED(vhandle); + + return false; +} + + +bool db_created(void *vhandle) +{ + dbh_t *handle = vhandle; + + return handle->created; +} + + +void *db_open(void *env, bfpath *bfp, dbmode_t open_mode) +{ + dbh_t *handle; + uint32_t mode; + bool ret; + + UNUSED(env); + + handle = dbh_init(bfp); + + handle->writable = open_mode & DS_WRITE; + mode = handle->writable ? KCOWRITER : KCOREADER; + ret = kcdbopen(handle->dbp, handle->name, mode); + if (!ret && handle->writable) { + ret = kcdbopen(handle->dbp, handle->name, mode | KCOCREATE); + handle->created = ret; + } + + if (!ret) + goto open_err; + + if (DEBUG_DATABASE(1)) + fprintf(dbgout, "kcdbopen(%s, %u)\n", handle->name, mode); + + return handle; + +open_err: + print_error(__FILE__, __LINE__, "kcdbopen(%s, %u), err: %d, %s", + handle->name, mode, + kcdbecode(handle->dbp), kcdbemsg(handle->dbp)); + dbh_free(handle); + + return NULL; +} + + +int db_delete(void *vhandle, const dbv_t *token) +{ + dbh_t *handle = vhandle; + bool ret; + + ret = kcdbremove(handle->dbp, token->data, token->leng); + if (!ret) { + print_error(__FILE__, __LINE__, "kcdbremove(\"%.*s\"), err: %d, %s", + CLAMP_INT_MAX(token->leng), (char *)token->data, + kcdbecode(handle->dbp), kcdbemsg(handle->dbp)); + exit(EX_ERROR); + } + + return 0; +} + + +int db_get_dbvalue(void *vhandle, const dbv_t *token, dbv_t *val) +{ + dbh_t *handle = vhandle; + char *data; + size_t dsiz; + + data = kcdbget(handle->dbp, token->data, token->leng, &dsiz); + if (data == NULL) + return DS_NOTFOUND; + + val->leng = min(val->leng, dsiz); + memcpy(val->data, data, val->leng); + kcfree(data); + + return 0; +} + +int db_set_dbvalue(void *vhandle, const dbv_t *token, const dbv_t *val) +{ + dbh_t *handle = vhandle; + bool ret; + + ret = kcdbset(handle->dbp, token->data, token->leng, val->data, val->leng); + if (!ret) { + print_error(__FILE__, __LINE__, + "kcdbset: (%.*s, %.*s), err: %d, %s", + CLAMP_INT_MAX(token->leng), (char *)token->data, + CLAMP_INT_MAX(val->leng), (char *)val->data, + kcdbecode(handle->dbp), kcdbemsg(handle->dbp)); + exit(EX_ERROR); + } + + return 0; +} + + +void db_close(void *vhandle) +{ + dbh_t *handle = vhandle; + + if (handle == NULL) + return; + + if (DEBUG_DATABASE(1)) + fprintf(dbgout, "kcdbclose: %s\n", handle->name); + + if (!kcdbclose(handle->dbp)) + print_error(__FILE__, __LINE__, "kcdbclose: %s, err: %d, %s", + handle->name, + kcdbecode(handle->dbp), kcdbemsg(handle->dbp)); + + dbh_free(handle); +} + + +void db_flush(void *vhandle) +{ + dbh_t *handle = vhandle; + + if (!kcdbsync(handle->dbp, false, NULL, NULL)) + print_error(__FILE__, __LINE__, "kcdbsync(), err: %d, %s", + kcdbecode(handle->dbp), kcdbemsg(handle->dbp)); +} + +ex_t db_foreach(void *vhandle, db_foreach_t hook, void *userdata) +{ + dbh_t *handle = vhandle; + KCCUR *cursor; + dbv_t dbv_key, dbv_data; + size_t ksiz, dsiz; + int ret; + ex_t retval = EX_OK; + char *key; + const char *data; + + cursor = kcdbcursor(handle->dbp); + if (!kccurjump(cursor)) { + print_error(__FILE__, __LINE__, "kccurjump(), err: %d, %s", + kcdbecode(handle->dbp), kcdbemsg(handle->dbp)); + retval = EX_ERROR; + goto done; + } + + while ((key = kccurget(cursor, &ksiz, &data, &dsiz, true)) != NULL) { + /* Copy to dbv_key and dbv_data */ + dbv_key.data = xstrdup(key); + dbv_key.leng = ksiz; + dbv_data.data = data; + dbv_data.leng = dsiz; + + /* Call function */ + ret = hook(&dbv_key, &dbv_data, userdata); + + xfree(dbv_key.data); + kcfree(key); + + if (ret != 0) + break; + } + +done: + kccurdel(cursor); + + return retval; +} + +const char *db_str_err(int e) +{ + UNUSED(e); + return "unknown error"; +} Modified: trunk/bogofilter/src/tests/t.frame =================================================================== --- trunk/bogofilter/src/tests/t.frame 2016-01-26 01:20:47 UTC (rev 7044) +++ trunk/bogofilter/src/tests/t.frame 2016-01-26 02:14:32 UTC (rev 7045) @@ -50,6 +50,7 @@ esac ;; *QDBM*) DB_TXN=false ;; *Tokyo*) DB_TXN=true ;; + *Kyoto*) DB_TXN=true ;; *SQLite*) DB_TXN=true ;; *TrivialDB*) DB_TXN=false ;; *) echo >&2 "Unknown data base type in bogofilter -V: $DB_NAME" This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |