[cvs] SF.net SVN: bogofilter: [6683] branches/datastore_tc
Fast Bayesian spam filter along lines suggested by Paul Graham
Brought to you by:
m-a
From: <cl...@us...> - 2007-11-23 02:50:33
|
Revision: 6683 http://bogofilter.svn.sourceforge.net/bogofilter/?rev=6683&view=rev Author: clint Date: 2007-11-22 18:50:37 -0800 (Thu, 22 Nov 2007) Log Message: ----------- tokyocabinet support from Pierre Habouzit Modified Paths: -------------- branches/datastore_tc/configure.ac branches/datastore_tc/src/Makefile.am branches/datastore_tc/src/tests/t.frame Added Paths: ----------- branches/datastore_tc/src/datastore_tc.c Modified: branches/datastore_tc/configure.ac =================================================================== --- branches/datastore_tc/configure.ac 2007-11-23 02:42:57 UTC (rev 6682) +++ branches/datastore_tc/configure.ac 2007-11-23 02:50:37 UTC (rev 6683) @@ -459,7 +459,7 @@ WITH_DB_ENGINE=db AC_ARG_WITH(database, AS_HELP_STRING([--with-database=ENGINE], - [choose database engine {db|qdbm|sqlite3} [[db]]]), + [choose database engine {db|qdbm|tokyocabinet|sqlite3} [[db]]]), [ WITH_DB_ENGINE=$withval ] ) @@ -473,6 +473,27 @@ LIBDB="$LIBSQLITE3" WITH_DB_ENGINE="sqlite3" ;; + xtokyocabinet) + AC_DEFINE(ENABLE_TOKYOCABINET_DATASTORE,1, [Enable tokyocabinet datastore]) + DB_TYPE=tokyocabinet + DB_EXT=.tc + AC_LIB_LINKFLAGS([tokyocabinet]) + LIBDB="$LIBTOKYOCABINET" + saveLIBS="$LIBS" + LIBS="$LIBS $LIBDB" + AC_LINK_IFELSE([AC_LANG_PROGRAM([ +#include <tcutil.h> +#include <tchdb.h> +#include <tcbdb.h> +#include <stdlib.h> +#include <time.h> +#include <stdbool.h> +#include <stdint.h> + ], [ + TCBDB *d = tcbdbnew(); + ])],,AC_MSG_ERROR(Cannot link to tokyocabinet library.)) + LIBS="$saveLIBS" + ;; xqdbm) AC_DEFINE(ENABLE_QDBM_DATASTORE,1, [Enable qdbm datastore]) DB_TYPE=qdbm @@ -624,7 +645,7 @@ LIBS="$saveLIBS" ;; *) - AC_MSG_ERROR([Invalid --with-database argument. Supported engines are db, qdbm, sqlite3.]) + AC_MSG_ERROR([Invalid --with-database argument. Supported engines are db, qdbm, tokyocabinet, sqlite3.]) ;; esac @@ -648,6 +669,7 @@ AC_SUBST(STATIC_DB) AM_CONDITIONAL(ENABLE_QDBM_DATASTORE, test "x$WITH_DB_ENGINE" = "xqdbm") +AM_CONDITIONAL(ENABLE_TOKYOCABINET_DATASTORE, test "x$WITH_DB_ENGINE" = "xtokyocabinet") AM_CONDITIONAL(ENABLE_SQLITE_DATASTORE, test "x$WITH_DB_ENGINE" = "xsqlite3") dnl Use TRIO to replace missing snprintf/vsnprintf. Modified: branches/datastore_tc/src/Makefile.am =================================================================== --- branches/datastore_tc/src/Makefile.am 2007-11-23 02:42:57 UTC (rev 6682) +++ branches/datastore_tc/src/Makefile.am 2007-11-23 02:50:37 UTC (rev 6683) @@ -180,6 +180,11 @@ bogoQDBMupgrade_SOURCES = bogoQDBMupgrade.c datastore_qdbm_cmpkey.c bogoQDBMupgrade_LDADD = $(LDADD) $(LIBDB) else +if ENABLE_TOKYOCABINET_DATASTORE +datastore_SOURCE = datastore_tc.c \ + datastore_txn_dummies.c datastore_opthelp_dummies.c \ + datastore_dummies.c +else if ENABLE_SQLITE_DATASTORE datastore_SOURCE = datastore_sqlite.c datastore_opthelp_dummies.c \ datastore_dummies.c @@ -196,6 +201,7 @@ endif endif endif +endif datastore_OBJECT = $(datastore_SOURCE:.c=.o) Added: branches/datastore_tc/src/datastore_tc.c =================================================================== --- branches/datastore_tc/src/datastore_tc.c (rev 0) +++ branches/datastore_tc/src/datastore_tc.c 2007-11-23 02:50:37 UTC (rev 6683) @@ -0,0 +1,333 @@ +/* $Id: datastore_qdbm.c,v 1.49 2005/04/04 11:16:21 relson Exp $ */ + +/***************************************************************************** + +NAME: +datastore_tc.c -- implements the datastore, using tokyocabinet. + +AUTHORS: +Gyepi Sam <gy...@pr...> 2003 +Matthias Andree <mat...@gm...> 2003 +Stefan Bellon <sb...@sb...> 2003-2004 +Pierre Habouzit <mad...@de...> 2007 + +******************************************************************************/ + +#include "common.h" + +#include <tcutil.h> +#include <tchdb.h> +#include <tcbdb.h> +#include <stdlib.h> +#include <time.h> +#include <stdbool.h> +#include <stdint.h> + +#include "datastore.h" +#include "datastore_db.h" +#include "error.h" +#include "paths.h" +#include "xmalloc.h" +#include "xstrdup.h" + +#define UNUSED(x) ((void)&x) + +typedef struct { + char *path; + char *name; + bool locked; + bool created; + TCBDB *dbp; +} dbh_t; + +/* Function definitions */ + +const char *db_version_str(void) +{ + static char v[80]; + if (!v[0]) + snprintf(v, sizeof(v), "TokyoCabinet (version %s, B+tree API)", tcversion); + return v; +} + + +static dbh_t *dbh_init(bfpath *bfp) +{ + dbh_t *handle; + + handle = xmalloc(sizeof(dbh_t)); + memset(handle, 0, sizeof(dbh_t)); /* valgrind */ + + handle->name = xstrdup(bfp->filepath); + + handle->locked = false; + handle->created = false; + + return handle; +} + + +static void dbh_free(/*@only@*/ dbh_t *handle) +{ + if (handle != NULL) { + xfree(handle->name); + xfree(handle->path); + xfree(handle); + } + return; +} + + +/* Returns is_swapped flag */ +bool db_is_swapped(void *vhandle) +{ + UNUSED(vhandle); + + return false; +} + + +/* Returns created flag */ +bool db_created(void *vhandle) +{ + dbh_t *handle = vhandle; + return handle->created; +} + + +/* + Initialize database. + Returns: pointer to database handle on success, NULL otherwise. +*/ +void *db_open(void * dummy, bfpath *bfp, dbmode_t open_mode) +{ + dbh_t *handle; + + bool res; + int open_flags; + TCBDB *dbp; + + UNUSED(dummy); + + if (open_mode & DS_WRITE) + open_flags = BDBOWRITER; + else + open_flags = BDBOREADER; + + handle = dbh_init(bfp); + + if (handle == NULL) return NULL; + + dbp = handle->dbp = tcbdbnew(); + res = tcbdbopen(dbp, handle->name, open_flags); + if (!res && (open_mode & DS_WRITE)) { + res = tcbdbopen(dbp, handle->name, open_flags | BDBOCREAT); + handle->created |= res; + } + + if (!res) + goto open_err; + + if (DEBUG_DATABASE(1)) + fprintf(dbgout, "(tc) tcbdbopen( %s, %d )\n", handle->name, open_mode); + + return handle; + + open_err: + print_error(__FILE__, __LINE__, "(tc) tcbdbopen(%s, %d), err: %d, %s", + handle->name, open_flags, + tcbdbecode(dbp), tcbdberrmsg(tcbdbecode(dbp))); + dbh_free(handle); + + return NULL; +} + + +int db_delete(void *vhandle, const dbv_t *token) +{ + int ret; + dbh_t *handle = vhandle; + TCBDB *dbp; + + dbp = handle->dbp; + ret = tcbdbout(dbp, token->data, token->leng); + + if (ret == 0) { + print_error(__FILE__, __LINE__, "(tc) tcbdbout('%.*s'), err: %d, %s", + CLAMP_INT_MAX(token->leng), + (char *)token->data, + tcbdbecode(dbp), tcbdberrmsg(tcbdbecode(dbp))); + exit(EX_ERROR); + } + ret = ret ^ 1; /* ok is 1 in qdbm and 0 in bogofilter */ + + return ret; /* 0 if ok */ +} + + +int db_get_dbvalue(void *vhandle, const dbv_t *token, /*@out@*/ dbv_t *val) +{ + char *data; + int dsiz; + + dbh_t *handle = vhandle; + TCBDB *dbp = handle->dbp; + + data = tcbdbget(dbp, token->data, token->leng, &dsiz); + + if (data == NULL) + return DS_NOTFOUND; + + if (val->leng < (unsigned)dsiz) { + print_error(__FILE__, __LINE__, + "(tc) db_get_dbvalue( '%.*s' ), size error %lu: %lu", + CLAMP_INT_MAX(token->leng), + (char *)token->data, (unsigned long)val->leng, + (unsigned long)dsiz); + exit(EX_ERROR); + } + + val->leng = dsiz; /* read count */ + memcpy(val->data, data, dsiz); + + free(data); /* not xfree() as allocated by tcbdbget() */ + + return 0; +} + + +/* + Re-organize database according to some heuristics +*/ +static inline void db_optimize(TCBDB *dbp, char *name) +{ + UNUSED(dbp); + UNUSED(name); + + /* The Villa API doesn't need optimizing like the formerly used + Depot API because Villa uses B+ trees and Depot uses hash tables. + Database size may grow larger and could get compacted with + tcbdboptimize() however as the database size with Villa is smaller + anyway, I don't think it is worth it. */ +} + + +int db_set_dbvalue(void *vhandle, const dbv_t *token, const dbv_t *val) +{ + int ret; + dbh_t *handle = vhandle; + TCBDB *dbp = handle->dbp; + + ret = tcbdbput(dbp, token->data, token->leng, val->data, val->leng); + + if (ret == 0) { + print_error(__FILE__, __LINE__, + "(tc) db_set_dbvalue( '%.*s' ) err: %d, %s", + CLAMP_INT_MAX(token->leng), (char *)token->data, + tcbdbecode(dbp), tcbdberrmsg(tcbdbecode(dbp))); + exit(EX_ERROR); + } + + db_optimize(dbp, handle->name); + + return 0; +} + + +/* + Close files and clean up. +*/ +void db_close(void *vhandle) +{ + dbh_t *handle = vhandle; + TCBDB *dbp; + + if (handle == NULL) return; + + if (DEBUG_DATABASE(1)) + fprintf(dbgout, "(tc) tcbdbclose(%s)\n", handle->name); + + dbp = handle->dbp; + + db_optimize(dbp, handle->name); + + if (!tcbdbclose(dbp)) + print_error(__FILE__, __LINE__, "(tc) tcbdbclose for %s err: %d, %s", + handle->name, + tcbdbecode(dbp), tcbdberrmsg(tcbdbecode(dbp))); + + tcbdbdel(dbp); + handle->dbp = NULL; + + dbh_free(handle); +} + + +/* + Flush any data in memory to disk +*/ +void db_flush(void *vhandle) +{ + dbh_t *handle = vhandle; + TCBDB * dbp = handle->dbp; + + if (!tcbdbsync(dbp)) + print_error(__FILE__, __LINE__, "(tc) tcbdbsync err: %d, %s", + tcbdbecode(dbp), tcbdberrmsg(tcbdbecode(dbp))); +} + + +ex_t db_foreach(void *vhandle, db_foreach_t hook, void *userdata) +{ + int ret = 0; + + dbh_t *handle = vhandle; + TCBDB *dbp = handle->dbp; + BDBCUR *cursor; + + dbv_t dbv_key, dbv_data; + int ksiz, dsiz; + char *key, *data; + + cursor = tcbdbcurnew(dbp); + ret = tcbdbcurfirst(cursor); + if (ret) { + while ((key = tcbdbcurkey(cursor, &ksiz))) { + data = tcbdbcurval(cursor, &dsiz); + if (data) { + /* switch to "dbv_t *" variables */ + dbv_key.leng = ksiz; + dbv_key.data = xmalloc(dbv_key.leng+1); + memcpy(dbv_key.data, key, ksiz); + ((char *)dbv_key.data)[dbv_key.leng] = '\0'; + + dbv_data.data = data; + dbv_data.leng = dsiz; /* read count */ + + /* call user function */ + ret = hook(&dbv_key, &dbv_data, userdata); + + xfree(dbv_key.data); + + if (ret != 0) + break; + free(data); /* not xfree() as allocated by dpget() */ + } + free(key); /* not xfree() as allocated by dpiternext() */ + + tcbdbcurnext(cursor); + } + } else { + print_error(__FILE__, __LINE__, "(tc) tcbdbcurfirst err: %d, %s", + tcbdbecode(dbp), tcbdberrmsg(tcbdbecode(dbp))); + exit(EX_ERROR); + } + + tcbdbcurdel(cursor); + return EX_OK; +} + +const char *db_str_err(int e) +{ + return tcbdberrmsg(e); +} Modified: branches/datastore_tc/src/tests/t.frame =================================================================== --- branches/datastore_tc/src/tests/t.frame 2007-11-23 02:42:57 UTC (rev 6682) +++ branches/datastore_tc/src/tests/t.frame 2007-11-23 02:50:37 UTC (rev 6683) @@ -44,6 +44,7 @@ *) DB_TXN=false ;; esac ;; *QDBM*) DB_TXN=false ;; + *Tokyo*) DB_TXN=false ;; *SQLite*) DB_TXN=true ;; *TrivialDB*) DB_TXN=false ;; *) echo >&2 "Unknown data base type in bogofilter -V: $DB_NAME" This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |