[cvs] bogofilter strlcat.3,NONE,1.1.2.1 strlcat.c,NONE,1.2.2.1 strlcpy.3,NONE,1.1.2.1 strlcpy.c,NONE
Fast Bayesian spam filter along lines suggested by Paul Graham
Brought to you by:
m-a
Update of /cvsroot/bogofilter/bogofilter In directory sc8-pr-cvs1:/tmp/cvs-serv9299 Modified Files: Tag: bogofilter-0_9_1_1_dev AUTHORS Makefile.am bogofilter.c bogofilter.h bogofilter.xml bogoutil.c bogowordfreq.c common.h config.c configtest.c configure.in find_home_tildeexpand.c fisher.c graham.c main.c method.h robinson.c robinson.h system.h wordlists.c Added Files: Tag: bogofilter-0_9_1_1_dev strlcat.3 strlcat.c strlcpy.3 strlcpy.c Log Message: Synchronize tests and reference results with cvs. --- NEW FILE: strlcat.3 --- .so strlcpy.3 --- NEW FILE: strlcat.c --- /* $Id: strlcat.c,v 1.2.2.1 2002/12/05 17:20:43 relson Exp $ */ /* from NetBSD: strlcat.c,v 1.5.8.2 2002/04/26 13:17:23 he Exp */ /* from OpenBSD: strlcat.c,v 1.2 1999/06/17 16:28:58 millert Exp */ /* * Copyright (c) 1998 Todd C. Miller <Tod...@co...> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <sys/types.h> #include <string.h> #include "system.h" /* * Appends src to string dst of size siz (unlike strncat, siz is the * full size of dst, not space left). At most siz-1 characters * will be copied. Always NUL terminates (unless siz == 0). * Returns strlen(src); if retval >= siz, truncation occurred. */ size_t strlcat(char *dst, const char *src, size_t siz) { register char *d = dst; register const char *s = src; register size_t n = siz; size_t dlen; /* Find the end of dst and adjust bytes left but don't go past end */ while (*d != '\0' && n-- != 0) d++; dlen = d - dst; n = siz - dlen; if (n == 0) return(dlen + strlen(s)); while (*s != '\0') { if (n != 1) { *d++ = *s; n--; } s++; } *d = '\0'; return(dlen + (s - src)); /* count does not include NUL */ } --- NEW FILE: strlcpy.3 --- .\" $NetBSD: strlcpy.3,v 1.9 2002/02/07 07:00:32 ross Exp $ .\" from OpenBSD: strlcpy.3,v 1.11 2000/11/16 23:27:41 angelos Exp .\" .\" Copyright (c) 1998, 2000 Todd C. Miller <Tod...@co...> .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. The name of the author may not be used to endorse or promote products .\" derived from this software without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, .\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY .\" AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL .\" THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, .\" EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, .\" PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; .\" OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, .\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR .\" OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF .\" ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .Dd March 1, 2001 .Dt STRLCPY 3 .Os .Sh NAME .Nm strlcpy , .Nm strlcat .Nd size-bounded string copying and concatenation .Sh LIBRARY .Lb libc .Sh SYNOPSIS .Fd #include \*[Lt]string.h\*[Gt] .Ft size_t .Fn strlcpy "char *dst" "const char *src" "size_t size" .Ft size_t .Fn strlcat "char *dst" "const char *src" "size_t size" .Sh DESCRIPTION The .Fn strlcpy and .Fn strlcat functions copy and concatenate strings respectively. They are designed to be safer, more consistent, and less error prone replacements for .Xr strncpy 3 and .Xr strncat 3 . Unlike those functions, .Fn strlcpy and .Fn strlcat take the full size of the buffer (not just the length) and guarantee to NUL-terminate the result (as long as .Fa size is larger than 0 or, in the case of .Fn strlcat , as long as there is at least one byte free in .Fa dst ) . Note that you should include a byte for the NUL in .Fa size . Also note that .Fn strlcpy and .Fn strlcat only operate on true .Dq C strings. This means that for .Fn strlcpy .Fa src must be NUL-terminated and for .Fn strlcat both .Fa src and .Fa dst must be NUL-terminated. .Pp The .Fn strlcpy function copies up to .Fa size - 1 characters from the NUL-terminated string .Fa src to .Fa dst , NUL-terminating the result. .Pp The .Fn strlcat function appends the NUL-terminated string .Fa src to the end of .Fa dst . It will append at most .Fa size - strlen(dst) - 1 bytes, NUL-terminating the result. .Sh RETURN VALUES The .Fn strlcpy and .Fn strlcat functions return the total length of the string they tried to create. For .Fn strlcpy that means the length of .Fa src . For .Fn strlcat that means the initial length of .Fa dst plus the length of .Fa src . While this may seem somewhat confusing it was done to make truncation detection simple. .Pp Note however, that if .Fn strlcat traverses .Fa size characters without finding a NUL, the length of the string is considered to be .Fa size and the destination string will not be NUL-terminated (since there was no space for the NUL). This keeps .Fn strlcat from running off the end of a string. In practice this should not happen (as it means that either .Fa size is incorrect or that .Fa dst is not a proper .Dq C string). The check exists to prevent potential security problems in incorrect code. .Sh EXAMPLES The following code fragment illustrates the simple case: .Bd -literal -offset indent char *s, *p, buf[BUFSIZ]; \&... (void)strlcpy(buf, s, sizeof(buf)); (void)strlcat(buf, p, sizeof(buf)); .Ed .Pp To detect truncation, perhaps while building a pathname, something like the following might be used: .Bd -literal -offset indent char *dir, *file, pname[MAXPATHLEN]; \&... if (strlcpy(pname, dir, sizeof(pname)) \*[Ge] sizeof(pname)) goto toolong; if (strlcat(pname, file, sizeof(pname)) \*[Ge] sizeof(pname)) goto toolong; .Ed .Pp Since we know how many characters we copied the first time, we can speed things up a bit by using a copy instead of an append: .Bd -literal -offset indent char *dir, *file, pname[MAXPATHLEN]; size_t n; \&... n = strlcpy(pname, dir, sizeof(pname)); if (n \*[Ge] sizeof(pname)) goto toolong; if (strlcpy(pname + n, file, sizeof(pname) - n) \*[Ge] sizeof(pname) - n) goto toolong; .Ed .Pp However, one may question the validity of such optimizations, as they defeat the whole purpose of .Fn strlcpy and .Fn strlcat . .Sh SEE ALSO .Xr snprintf 3 , .Xr strncat 3 , .Xr strncpy 3 .Sh HISTORY .Fn strlcpy and .Fn strlcat first appeared in .Ox 2.4 , then in .Nx 1.4.3 and .Fx 3.3.0 . --- NEW FILE: strlcpy.c --- /* $Id: strlcpy.c,v 1.2.2.1 2002/12/05 17:20:44 relson Exp $ */ /* from NetBSD: strlcpy.c,v 1.5.8.2 2002/04/26 13:17:42 he Exp */ /* from OpenBSD: strlcpy.c,v 1.4 1999/05/01 18:56:41 millert Exp */ /* * Copyright (c) 1998 Todd C. Miller <Tod...@co...> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <sys/types.h> #include <string.h> #include "system.h" /* * Copy src to string dst of size siz. At most siz-1 characters * will be copied. Always NUL terminates (unless siz == 0). * Returns strlen(src); if retval >= siz, truncation occurred. */ size_t strlcpy(char *dst, const char *src, size_t siz) { register char *d = dst; register const char *s = src; register size_t n = siz; /* Copy as many bytes as will fit */ if (n != 0 && --n != 0) { do { if ((*d++ = *s++) == 0) break; } while (--n != 0); } /* Not enough room in dst, add NUL and traverse rest of src */ if (n == 0) { if (siz != 0) *d = '\0'; /* NUL-terminate dst */ while (*s++) ; } return(s - src - 1); /* count does not include NUL */ } Index: AUTHORS =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/AUTHORS,v retrieving revision 1.6 retrieving revision 1.6.22.1 diff -u -d -r1.6 -r1.6.22.1 --- AUTHORS 21 Nov 2002 17:07:29 -0000 1.6 +++ AUTHORS 5 Dec 2002 17:20:27 -0000 1.6.22.1 @@ -9,3 +9,7 @@ Allyn Fratkin (bug fixes, bug database maintainer) Clint Adams (bug fixes, portability, debian packaging) Greg Louis (spamicity calculation methods) + +The strlcpy and strlcat functions are + (C) 1998 by Todd C. Miller <Tod...@co...> + and were taken from the NetBSD CVS repository. Index: Makefile.am =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/Makefile.am,v retrieving revision 1.81.2.1 retrieving revision 1.81.2.1.8.1 diff -u -d -r1.81.2.1 -r1.81.2.1.8.1 --- Makefile.am 30 Nov 2002 18:06:49 -0000 1.81.2.1 +++ Makefile.am 5 Dec 2002 17:20:31 -0000 1.81.2.1.8.1 @@ -5,6 +5,7 @@ AUTOMAKE_OPTIONS = foreign 1.6 SUBDIRS = dcdflib . tests SYSCONFDIR = @sysconfdir@ +DISTCHECK_CONFIGURE_FLAGS= @DISTCHECK_CONFIGURE_FLAGS@ if ENABLE_GRAHAM_METHOD GRAHAM_SRC = graham.c graham.h @@ -29,7 +30,7 @@ sysconf_DATA = bogofilter.cf.example noinst_LIBRARIES = libbogofilter.a -LDADD = libbogofilter.a +LDADD = libbogofilter.a @LIBOBJS@ # what to build that from libbogofilter_a_SOURCES= \ @@ -81,7 +82,9 @@ README.cvs README.freebsd README.hp-ux \ README.dcdflib README.Robinson \ $(extradistdirs) \ - bogofilter-SA-2002-01 + bogofilter-SA-2002-01 \ + strlcat.3 strlcpy.3 + # VERSION_FROM=main.c bogofilter.c bogoutil.c lexer.l version.c: version.sh $(VERSION_FROM) @@ -99,7 +102,7 @@ xmlto html-nochunks $< .xml.1: - xmlto man $< + { xmllint --noout --postvalid $< && xmlto man $< ; } || { rm -f $@ ; exit 1 ; } .PHONY: rpm cvs-check Index: bogofilter.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.c,v retrieving revision 1.89 retrieving revision 1.89.14.1 diff -u -d -r1.89 -r1.89.14.1 --- bogofilter.c 25 Nov 2002 20:54:25 -0000 1.89 +++ bogofilter.c 5 Dec 2002 17:20:33 -0000 1.89.14.1 @@ -74,22 +74,22 @@ ++msgcount; } while(cont); - if (msgcount > 1) { - fprintf(stderr, "%s: must get only one message to calculate spamicity!\n", progname); - exit(2); - } - spamicity = method->compute_spamicity(wordhash, NULL); db_lock_release_list(word_lists); - status = (spamicity > spam_cutoff) ? RC_SPAM : RC_NONSPAM; + status = method->status(); if (xss != NULL) *xss = spamicity; - if (run_type == RUN_UPDATE) - register_words((status==RC_SPAM) ? REG_SPAM : REG_GOOD, wordhash, msgcount, wordcount); + if (run_type == RUN_UPDATE) /* Note: don't register if RC_UNSURE */ + { + if (status == RC_SPAM) + register_words(REG_SPAM, wordhash, msgcount, wordcount); + if (status == RC_HAM) + register_words(REG_GOOD, wordhash, msgcount, wordcount); + } wordhash_free(wordhash); Index: bogofilter.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.h,v retrieving revision 1.32 retrieving revision 1.32.22.1 diff -u -d -r1.32 -r1.32.22.1 --- bogofilter.h 21 Nov 2002 15:42:53 -0000 1.32 +++ bogofilter.h 5 Dec 2002 17:20:35 -0000 1.32.22.1 @@ -8,7 +8,7 @@ #define UNKNOWN_WORD 0.4f /* odds that unknown word is spammish */ #define DEVIATION(n) fabs((n) - EVEN_ODDS) /* deviation from average */ -typedef enum rc_e {RC_SPAM=0, RC_NONSPAM=1} rc_t; +typedef enum rc_e {RC_SPAM=0, RC_HAM=1, RC_UNSURE=2} rc_t; extern void initialize_constants(void); extern rc_t bogofilter(/*@out@*/ double *xss); Index: bogofilter.xml =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.xml,v retrieving revision 1.34 retrieving revision 1.34.22.1 diff -u -d -r1.34 -r1.34.22.1 --- bogofilter.xml 22 Nov 2002 00:39:09 -0000 1.34 +++ bogofilter.xml 5 Dec 2002 17:20:36 -0000 1.34.22.1 @@ -80,8 +80,7 @@ url="http://radio.weblogs.com/0101454/stories/2002/09/16/spamDetection.html"> Spam Detection</ulink>.</para> -<para><existing>Another seeming improvement...Spam -Detection.</existing> Since then, Robinson and others have +<para>Since then, Robinson and others have realized that the S calculation can be further optimized: if a vector of length k contains random, uniformly-distributed probabilities p, then -2 * sum(ln(p)) is distributed as chi-squared with 2n degrees of @@ -123,6 +122,13 @@ on standard input as non-spam and to undo a prior registration of the same message as spam.</para> +<para>The <option>-u</option> option tells +<application>bogofilter</application> to register the message's text +after classifying it as spam or non-spam. A spam message will be registered +on the spamlist and a non-spam message on the goodlist. If using the +Robinson-Fisher method and the classification is "unsure", the message will +not be registered.</para> + <para>The <option>-d</option><replaceable> dir</replaceable> option allows you to set the directory under which wordlists will be found to <replaceable>dir</replaceable>. If omitted, the default directory @@ -185,12 +191,15 @@ lists the tokens with highest deviation from a mean of 0.5 association with spam.</para> +<para>The Robinson method is the default algorithm used for computing a +message's spamicity score, unless <application>bogofilter</application> +has been compiled without it, by using the +<option>--disable-robinson-method</option> option to the configure +script. The method to be used can be specified on the command line or +in the configuration file.</para> + <para>The <option>-g</option> option selects the original Graham form -of the calculation method. This is the default unless the -<option>-r</option> option is specified, or unless -<application>bogofilter</application> has been compiled with the -<option>--disable-graham-method</option> option to the configure -script.</para> +of the calculation method.</para> <para>The <option>-r</option> option selects the Robinson modifications to the calculation method. This is automatically enabled when the @@ -232,7 +241,7 @@ <refsect1 id='environment'><title>ENVIRONMENT</title> <para> - Bogofilter will initialize its data base directory to + Bogofilter will initialize its data base directory to <filename>$BOGOFILTER_DIR</filename> if <envar>BOGOFILTER_DIR</envar> is set. If it is not set, bogofilter will use <filename>$HOME/.bogofilter</filename> instead. If neither @@ -240,6 +249,28 @@ <replaceable>dir</replaceable></option> option must be present. </para></refsect1> +<refsect1 id='configuration'><title>CONFIGURATION</title> +<para>The <application>bogofilter</application> command line allows setting of many +options that determine how <application>bogofilter</application> operates. +File <filename>/etc/bogofilter.cf</filename> can be used to set additional +parameters that affect its operation. File +<filename>/etc/bogofilter.cf.example</filename> has samples of all of the parameters. +Status and logging messages can be customized for each site (see +<filename>/etc/bogofilter.cf.example</filename>). +</para> +</refsect1> + +<refsect1 id='returns'><title>RETURN VALUES</title> +<para>0 for spam; 1 for non-spam; 2 for I/O or other errors.</para> +<para>If both <option>-p</option> and <option>-e</option> are used, the + return values are: 0 for spam or non-spam; 2 for I/O or other + errors.</para> + +<para>Error 2 usually means that the wordlist files +<application>bogofilter</application> wants to read at startup +are missing or the hard disk has filled up in <option>-p</option> mode.</para> +</refsect1> + <refsect1 id='integration'><title>INTEGRATION WITH OTHER TOOLS</title> <para>Use with Procmail</para> @@ -368,17 +399,6 @@ the form of comments.</para> </refsect1> -<refsect1 id='returns'><title>RETURN VALUES</title> -<para>0 for spam; 1 for non-spam; 2 for I/O or other errors.</para> -<para>If both <option>-p</option> and <option>-e</option> are used, the - return values are: 0 for spam or non-spam; 2 for I/O or other - errors.</para> - -<para>Error 2 usually means that the wordlist files -<application>bogofilter</application> wants to read at startup -are missing or the hard disk has filled up in <option>-p</option> mode.</para> -</refsect1> - <refsect1 id='files'><title>FILES</title> <variablelist> <varlistentry> @@ -386,7 +406,7 @@ <listitem><para>System configuration file.</para></listitem> </varlistentry> <varlistentry> -<term><filename>~/.bogofiltercf</filename></term> +<term><filename>~/.bogofilter.cf</filename></term> <listitem><para>User configuration file.</para></listitem> </varlistentry> <varlistentry> Index: bogoutil.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogoutil.c,v retrieving revision 1.37.6.1 retrieving revision 1.37.6.1.8.1 diff -u -d -r1.37.6.1 -r1.37.6.1.8.1 --- bogoutil.c 30 Nov 2002 18:06:49 -0000 1.37.6.1 +++ bogoutil.c 5 Dec 2002 17:20:36 -0000 1.37.6.1.8.1 @@ -242,11 +242,15 @@ const char *head_format = !show_probability ? "%-20s %6s %6s\n" : "%-20s %6s %6s %6s %6s\n"; const char *data_format = !show_probability ? "%-20s %6ld %6ld\n" : "%-20s %6ld %6ld %f %f\n"; - build_path(filepath, PATH_LEN, dir, GOODFILE); + if (build_path(filepath, sizeof(filepath), dir, GOODFILE) < 0) + return 2; + if ((dbh_good = db_open_and_lock_file(filepath, GOODFILE, DB_READ)) == NULL) return 2; - build_path(filepath, PATH_LEN, dir, SPAMFILE); + if (build_path(filepath, sizeof(filepath), dir, SPAMFILE) < 0) + return 2; + if ((dbh_spam = db_open_and_lock_file(filepath, SPAMFILE, DB_READ)) == NULL) return 2; @@ -423,16 +427,20 @@ static int compute_robinson_x(char *path) { + int e; + dbh_t *dbh_good; dbh_t *dbh_spam; - char db_good_file[PATH_LEN]; char db_spam_file[PATH_LEN]; + char db_good_file[PATH_LEN]; double robx; - sprintf( db_spam_file, "%s/%s", path, "spamlist.db" ); - sprintf( db_good_file, "%s/%s", path, "goodlist.db" ); + e = build_path(db_spam_file, sizeof(db_spam_file), path, "spamlist.db"); + if (e < 0) goto overflow; + e = build_path(db_good_file, sizeof(db_good_file), path, "goodlist.db"); + if (e < 0) goto overflow; dbh_good = db_open(db_good_file, "good", DB_READ); dbh_spam = db_open(db_spam_file, "spam", DB_WRITE); @@ -451,18 +459,22 @@ db_close(dbh_good); return 0; + +overflow: + fprintf(stderr, "%s: string too long creating .db file name.\n", PROGNAME); + exit(2); } static void print_version(void) { fprintf(stderr, - PROGNAME ": version: %s\n" + "%s: version: %s\n" "Copyright (C) 2002 Gyepi Sam\n\n" - PROGNAME " comes with ABSOLUTELY NO WARRANTY.\n" + "%s comes with ABSOLUTELY NO WARRANTY.\n" "This is free software, and you are welcome to redistribute\n" "it under the General Public License.\n" "See the COPYING file with the source distribution for details.\n\n", - version); + PROGNAME, version, PROGNAME); } static void usage(void) @@ -482,7 +494,7 @@ "\t-h\tPrint this message.\n" "\t-R\tCompute Robinson's X for specified directory.\n" "\t-V\tPrint program version.\n" - PROGNAME " is part of the bogofilter package.\n"); + "%s is part of the bogofilter package.\n", PROGNAME); } #undef ROBX @@ -550,8 +562,9 @@ if (count != 1) { - fprintf(stderr, PROGNAME ": Exactly one of the -d, -l, or -w flags must be present.\n"); - exit(1); + fprintf(stderr, "%s: Exactly one of the -d, -l, or -w flags " + "must be present.\n", PROGNAME); + exit(1); } /* Extra or missing parameters */ Index: bogowordfreq.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/bogowordfreq.c,v retrieving revision 1.3.6.1 retrieving revision 1.3.6.1.8.1 diff -u -d -r1.3.6.1 -r1.3.6.1.8.1 --- bogowordfreq.c 30 Nov 2002 18:06:49 -0000 1.3.6.1 +++ bogowordfreq.c 5 Dec 2002 17:20:36 -0000 1.3.6.1.8.1 @@ -21,7 +21,9 @@ #include "system.h" #include "collect.h" +const char *progname = "bogowordfreq.c"; const char *spam_header_name = "X-Bogosity:"; /* unused */ +bool logflag; /* unused */ int passthrough = 0; /* unused */ bool quiet = 0; /* unused */ int verbose = 0; /* unused */ Index: common.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/common.h,v retrieving revision 1.23 retrieving revision 1.23.22.1 diff -u -d -r1.23 -r1.23.22.1 --- common.h 22 Nov 2002 16:31:03 -0000 1.23 +++ common.h 5 Dec 2002 17:20:36 -0000 1.23.22.1 @@ -9,6 +9,8 @@ #include <sys/param.h> #endif +#include <stdio.h> + #include "debug.h" #include "system.h" /* defines bool */ @@ -53,6 +55,8 @@ int exp; } FLOAT; -void build_path(char* dest, int size, const char* dir, const char* file); +extern int build_path(char* dest, size_t size, const char* dir, const char* file); + +#define internal_error do { fprintf(stderr, "Internal error in %s:%u\n", __FILE__, __LINE__); abort(); } while(0) #endif Index: config.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/config.c,v retrieving revision 1.58.6.1 retrieving revision 1.58.6.1.8.1 diff -u -d -r1.58.6.1 -r1.58.6.1.8.1 --- config.c 30 Nov 2002 18:06:49 -0000 1.58.6.1 +++ config.c 5 Dec 2002 17:20:37 -0000 1.58.6.1.8.1 @@ -96,7 +96,7 @@ static enum algorithm_e algorithm = AL_DEFAULT; -double spam_cutoff; +double spam_cutoff = 0.0; /* set during method initialization */ double min_dev = 0.0f; double thresh_stats = 0.0f; @@ -135,6 +135,9 @@ { "robx", CP_DOUBLE, { (void *) NULL } }, /* Robinson */ { "robs", CP_DOUBLE, { (void *) NULL } }, /* Robinson */ #endif +#ifdef ENABLE_ROBINSON_FISHER + { "ham_cutoff", CP_FUNCTION, { (void *) NULL } }, /* Robinson-Fisher */ +#endif { NULL, CP_NONE, { (void *) NULL } }, }; @@ -142,7 +145,7 @@ static bool select_algorithm(const unsigned char *s) { - enum algorithm_e al = tolower(*s); + enum algorithm_e al = s ? (unsigned) tolower(*s) : algorithm; bool ok = true; switch (al) { @@ -203,7 +206,7 @@ int sign = (*val == '-') ? -1 : 1; if (*val == '-' || *val == '+') val += 1; - *arg->addr.i = atoi(val) * sign; + *arg->addr.i = atoi((const char *)val) * sign; if (DEBUG_CONFIG(0)) fprintf( stderr, "%s -> %d\n", arg->name, *arg->addr.i ); break; @@ -213,7 +216,7 @@ double sign = (*val == '-') ? -1.0f : 1.0f; if (*val == '-' || *val == '+') val += 1; - *arg->addr.d = atof(val) * sign; + *arg->addr.d = atof((const char *)val) * sign; if (DEBUG_CONFIG(0)) fprintf( stderr, "%s -> %f\n", arg->name, *arg->addr.d ); break; @@ -267,7 +270,7 @@ { if (DEBUG_CONFIG(1)) fprintf( stderr, "Testing: %s\n", arg->name); - if (strncmp(arg->name, line, len) == 0) + if (strncmp(arg->name, (const char *)line, len) == 0) { bool ok = process_config_parameter(arg, val); if (DEBUG_CONFIG(1) && ok ) @@ -309,10 +312,12 @@ size_t len; unsigned char buff[MAXBUFFLEN]; + memset(buff, '\0', sizeof(buff)); /* for debugging */ + lineno += 1; if (fgets((char *)buff, sizeof(buff), fp) == NULL) break; - len = strlen(buff); + len = strlen((char *)buff); if ( buff[0] == '#' || buff[0] == ';' || buff[0] == '\n' ) continue; while (iscntrl(buff[len-1])) @@ -387,6 +392,7 @@ (void)printf( "\t-e\t- in -p mode, exit with code 0 when the mail is not spam.\n"); (void)printf( "\t-s\t- register message as spam.\n" ); (void)printf( "\t-n\t- register message as non-spam.\n" ); + (void)printf( "\t-o cutoff\t- set user defined spamicity cutoff.\n" ); (void)printf( "\t-u\t- classify message as spam or non-spam and register appropriately.\n" ); (void)printf( "\t-S\t- move message's words from non-spam list to spam list.\n" ); (void)printf( "\t-N\t- move message's words from spam list to spam non-list.\n" ); @@ -440,12 +446,14 @@ int option; int exitcode; - while ((option = getopt(argc, argv, "d:ehlsnSNvVpuc:CgrRx:fqt" G R F)) != EOF) + select_algorithm(NULL); /* select default algorithm */ + + while ((option = getopt(argc, argv, "d:eFhlo:snSNvVpuc:CgrRx:fqt" G R F)) != EOF) { switch(option) { case 'd': - strncpy(directory, optarg, PATH_LEN); + strlcpy(directory, optarg, PATH_LEN); break; case 'e': @@ -498,6 +506,7 @@ #ifdef GRAHAM_AND_ROBINSON case 'g': algorithm = AL_GRAHAM; + select_algorithm(NULL); break; #endif @@ -509,6 +518,7 @@ /* fall through to force Robinson calculations */ case 'r': algorithm = AL_ROBINSON; + select_algorithm(NULL); #endif break; #endif @@ -516,6 +526,7 @@ #ifdef ENABLE_ROBINSON_FISHER case 'f': algorithm = AL_FISHER; + select_algorithm(NULL); break; #endif @@ -540,6 +551,10 @@ suppress_config_file = true; break; + case 'o': + spam_cutoff = atof( optarg ); + break; + case 't': terse = true; break; @@ -559,10 +574,6 @@ /* exported */ void process_config_files(void) { - char buff[2]; - sprintf(buff, "%c", algorithm); - select_algorithm(buff); - if (! suppress_config_file) { read_config_file(system_config_file, false); Index: configtest.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/configtest.c,v retrieving revision 1.15 retrieving revision 1.15.12.1 diff -u -d -r1.15 -r1.15.12.1 --- configtest.c 26 Nov 2002 23:44:13 -0000 1.15 +++ configtest.c 5 Dec 2002 17:20:39 -0000 1.15.12.1 @@ -33,13 +33,13 @@ /* Dummy struct definitions to support config.c */ method_t graham_method = { - NULL, NULL, NULL, NULL, NULL, NULL + NULL, NULL, NULL, NULL, NULL, NULL, NULL } ; method_t rf_robinson_method = { - NULL, NULL, NULL, NULL, NULL, NULL + NULL, NULL, NULL, NULL, NULL, NULL, NULL } ; method_t rf_fisher_method = { - NULL, NULL, NULL, NULL, NULL, NULL + NULL, NULL, NULL, NULL, NULL, NULL, NULL } ; #ifdef COMPILE_DEAD_CODE Index: configure.in =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/configure.in,v retrieving revision 1.54.2.3 retrieving revision 1.54.2.3.8.1 diff -u -d -r1.54.2.3 -r1.54.2.3.8.1 --- configure.in 30 Nov 2002 21:12:46 -0000 1.54.2.3 +++ configure.in 5 Dec 2002 17:20:40 -0000 1.54.2.3.8.1 @@ -1,5 +1,5 @@ # $Id$ -AC_INIT(bogofilter, 0.9.1) +AC_INIT(bogofilter, 0.9.1.1) AC_PREREQ(2.55) AC_CONFIG_SRCDIR([bogofilter.c]) AM_INIT_AUTOMAKE @@ -44,10 +44,13 @@ AH_TEMPLATE([HAVE_DB_H], [Have suitable db.h header]) +WITH_DB= + AC_ARG_WITH(db, [ --with-db=PATH Specify path to BerkelyDB install directory ], [ if test "x$withval" != "xno" ; then + WITH_DB=$withval if test -d "$withval/lib"; then if test -n "${need_dash_r}"; then LDFLAGS="-R${withval}/lib ${LDFLAGS}" @@ -65,7 +68,11 @@ CPPFLAGS="-I${withval} ${CPPFLAGS}" fi fi - ]) + ] +) +if test "x$WITH_DB" != "x" ; then + DISTCHECK_CONFIGURE_FLAGS="$DISTCHECK_CONFIGURE_FLAGS --with-db=$WITH_DB" +fi AC_EGREP_HEADER(db_create, [db.h], AC_DEFINE(HAVE_DB_H), AC_MSG_ERROR([Can not locate a suitable BerkeleyDB db.h header file. @@ -190,7 +197,9 @@ # AC_FUNC_STAT wants to replace stat(2) if stat("", &st) succeeds. # we catch this. -AC_CHECK_FUNCS(alarm bzero ftruncate memset mkdir socket strdup strerror flock lockf fcntl strchr strrchr memcpy) +AC_CHECK_FUNCS(alarm bzero ftruncate memset mkdir socket strdup) +AC_CHECK_FUNCS(strerror flock lockf fcntl strchr strrchr memcpy) +AC_REPLACE_FUNCS(strlcpy strlcat) if test "$sysconfdir" = "\${prefix}/etc" \ && { test "$prefix" = "NONE" || test "$prefix" = "/usr" \ @@ -206,6 +215,7 @@ ) fi +AC_SUBST(DISTCHECK_CONFIGURE_FLAGS) AC_CONFIG_FILES([Makefile tests/Makefile tests/bogoutil/Makefile bogofilter.spec dcdflib/Makefile dcdflib/src/Makefile]) AC_OUTPUT Index: find_home_tildeexpand.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/find_home_tildeexpand.c,v retrieving revision 1.4 retrieving revision 1.4.22.1 diff -u -d -r1.4 -r1.4.22.1 --- find_home_tildeexpand.c 21 Nov 2002 15:42:53 -0000 1.4 +++ find_home_tildeexpand.c 5 Dec 2002 17:20:41 -0000 1.4.22.1 @@ -34,19 +34,22 @@ char *tildeexpand(const char *name) { char *tmp; const char *home; - size_t l; + size_t l, tl; if (name[0] != '~') return xstrdup(name); + /* figure length of user name */ l = strspn(&name[1], "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789._-"); /* Portable Filename Character Set */ - if (l) { + if (l > 0) { /* got a parameter to the tilde */ tmp = xmalloc(l + 1); - strncpy(tmp, &name[1], l); + strlcpy(tmp, &name[1], l+1); + /* robustness: we only want the first l characters, so truncate + * here just in case */ tmp[l] = '\0'; home = find_home_user(tmp); @@ -64,9 +67,10 @@ } xfree(tmp); - tmp = xmalloc(strlen(name) - l + strlen(home)); - strcpy(tmp, home); - /* no need to insert a slash here, name[l] should contain one */ - strcat(tmp, name + l + 1); + tl = strlen(name) + strlen(home) - l + 1; + tmp = xmalloc(tl); + (void)strlcpy(tmp, home, tl); + /* no need to insert a slash here, name[l] contains one */ + if (strlcat(tmp, name + l + 1, tl) >= tl) internal_error; return tmp; } Index: fisher.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/fisher.c,v retrieving revision 1.6 retrieving revision 1.6.18.1 diff -u -d -r1.6 -r1.6.18.1 --- fisher.c 23 Nov 2002 18:42:19 -0000 1.6 +++ fisher.c 5 Dec 2002 17:20:41 -0000 1.6.18.1 @@ -20,19 +20,39 @@ #define RF_DEBUG #undef RF_DEBUG -#define FISHER_SPAM_CUTOFF 0.952f -#define FISHER_MIN_DEV 0.1f +#define FISHER_HAM_CUTOFF 0.10f +#define FISHER_SPAM_CUTOFF 0.95f +#define FISHER_MIN_DEV 0.10f void fis_initialize_constants(void); double fis_get_spamicity(size_t robn, FLOAT P, FLOAT Q); void fis_print_summary(void); +rc_t fis_status(void); + +/* Static Variables */ + +double ham_cutoff = 0.0; + +extern double robx; /* in robinson.c */ +extern double robs; /* in robinson.c */ +extern double thresh_rtable; /* in robinson.c */ + +const parm_desc fis_parm_table[] = +{ + { "robx", CP_DOUBLE, { (void *) &robx } }, + { "robs", CP_DOUBLE, { (void *) &robs } }, + { "thresh_rtable", CP_DOUBLE, { (void *) &thresh_rtable } }, + { "ham_cutoff", CP_DOUBLE, { (void *) &ham_cutoff } }, + { NULL, CP_NONE, { (void *) NULL } }, +}; rf_method_t rf_fisher_method = { { "fisher", /* const char *name; */ - rob_parm_table, /* m_parm_table *parm_table */ + fis_parm_table, /* m_parm_table *parm_table */ fis_initialize_constants, /* m_initialize_constants *initialize_constants */ rob_bogofilter, /* m_compute_spamicity *compute_spamicity */ + fis_status, /* m_status *status */ rob_print_bogostats, /* m_print_bogostats *print_stats */ rob_cleanup, /* m_free *cleanup */ }, @@ -40,7 +60,7 @@ fis_print_summary /* rf_print_summary *print_summary */ }; -static stats_t stats; +static rob_stats_t stats; double prbf(double x, double df) { @@ -79,6 +99,18 @@ void fis_initialize_constants(void) { rob_initialize_with_parameters(FISHER_MIN_DEV, FISHER_SPAM_CUTOFF); +} + +rc_t fis_status(void) +{ + if ( stats.spamicity >= spam_cutoff ) + return RC_SPAM; + + + if (ham_cutoff < EPS || (stats.spamicity - ham_cutoff < EPS)) + return RC_HAM; + + return RC_UNSURE; } /* Done */ Index: graham.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/graham.c,v retrieving revision 1.9.4.1 retrieving revision 1.9.4.1.8.1 diff -u -d -r1.9.4.1 -r1.9.4.1.8.1 --- graham.c 30 Nov 2002 18:06:49 -0000 1.9.4.1 +++ graham.c 5 Dec 2002 17:20:41 -0000 1.9.4.1.8.1 @@ -37,17 +37,22 @@ int thresh_index = 0; +stats_t stats; + static const parm_desc gra_parm_table[] = { { "thresh_index", CP_INTEGER, { (void *) &thresh_index } }, { NULL, CP_NONE, { (void *) NULL } }, }; +static rc_t gra_status(void); + method_t graham_method = { "graham", /* const char *name; */ gra_parm_table, /* m_parm_table *parm_table */ gra_initialize_constants, /* m_initialize_constants *initialize_constants */ gra_bogofilter, /* m_compute_spamicity *compute_spamicity */ + gra_status, /* m_status *status */ gra_print_bogostats, /* m_print_bogostats *print_stats */ gra_cleanup /* m_free *cleanup */ } ; @@ -138,8 +143,12 @@ DEVIATION(hit->prob), hit->prob, curkey); } hit->prob = prob; - strncpy(hit->key, text, MAXTOKENLEN); - hit->key[MAXTOKENLEN] = '\0'; + if (strlcpy(hit->key, text, MAXTOKENLEN) >= MAXTOKENLEN) { + /* The lexer should not have returned a token longer than + * MAXTOKENLEN */ + internal_error; + abort(); + } } return; } @@ -293,6 +302,8 @@ } } + stats.spamicity = spamicity; + return spamicity; } @@ -304,6 +315,12 @@ if (spam_cutoff < EPS) spam_cutoff = GRAHAM_SPAM_CUTOFF; set_good_weight( GRAHAM_GOOD_BIAS ); +} + +rc_t gra_status(void) +{ + rc_t status = ( stats.spamicity >= spam_cutoff ) ? RC_SPAM : RC_HAM; + return status; } double gra_bogofilter(wordhash_t *wordhash, FILE *fp) /*@globals errno@*/ Index: main.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/main.c,v retrieving revision 1.85.14.1 retrieving revision 1.85.14.1.8.1 diff -u -d -r1.85.14.1 -r1.85.14.1.8.1 --- main.c 30 Nov 2002 18:06:49 -0000 1.85.14.1 +++ main.c 5 Dec 2002 17:20:42 -0000 1.85.14.1.8.1 @@ -30,6 +30,7 @@ #include "lexer.h" #include "bogofilter.h" #include "bogoconfig.h" +#include "method.h" #include "register.h" #include "wordlists.h" @@ -47,26 +48,28 @@ /* if the given environment variable 'var' exists, copy it to 'dest' and tack on the optional 'subdir' value. + return value: 0 - success, no copy done + 1 - success, copied + -1 - error (overflow) */ -static void set_dir_from_env(/*@reldef@*/ /*@unique@*/ char* dest, +static int set_dir_from_env(/*@reldef@*/ /*@unique@*/ char* dest, const char *var, - /*@null@*/ const char *subdir) + /*@null@*/ const char *subdir, + size_t path_size /* size of the full buffer */) { char *env; - size_t path_left=PATH_LEN-1; env = getenv(var); - if (env == NULL) return; + if (env == NULL) return 0; - strncpy(dest, env, path_left-1); /* leave one char left for '/' */ - path_left -= strlen(env); + if (strlcpy(dest, env, path_size) >= path_size) return -1; if ('/' != dest[strlen(dest)-1]) { - strcat(dest, "/"); - path_left--; + if (strlcat(dest, "/", path_size) >= path_size) return -1; } - if (subdir && (path_left > 0)) { - strncat(dest, subdir, path_left); + if (subdir != NULL) { + if (strlcat(dest, subdir, path_size) >= path_size) return -1; } + return 1; } /* check that our directory exists and try to create it if it doesn't @@ -90,18 +93,15 @@ if(mkdir(path, S_IRUSR|S_IWUSR|S_IXUSR)) { perror("Error creating directory"); return -1; - } - else if (verbose > 0) { + } else if (verbose > 0) { (void)fprintf(stderr, "Created directory %s .\n", path); } return 0; - } - else { + } else { perror("Error accessing directory"); return -1; } - } - else { + } else { if (! S_ISDIR(sb.st_mode)) { (void)fprintf(stderr, "Error: %s is not a directory.\n", path); } @@ -113,8 +113,11 @@ { int exitcode; - set_dir_from_env(directory, "HOME", BOGODIR); - set_dir_from_env(directory, "BOGOFILTER_DIR", NULL); + if ((set_dir_from_env(directory, "HOME", BOGODIR, sizeof(directory)) < 0) + || (set_dir_from_env(directory, "BOGOFILTER_DIR", NULL, sizeof(directory)) < 0)) { + fprintf(stderr, "HOME or BOGOFILTER_DIR too long\n"); + exit(2); + } exitcode = process_args(argc, argv); if (exitcode != 0) @@ -134,6 +137,7 @@ { double spamicity; rc_t status = bogofilter(&spamicity); + const char *yes_no_unsure = (status==RC_SPAM) ? "Yes" : ((status==RC_HAM) ? "No" : "Unsure"); if (passthrough) { @@ -155,16 +159,14 @@ { if ( terse ) { - (void)printf("%c %f\n", (status==RC_SPAM) ? 'Y' : 'N', spamicity); + (void)printf("%1.1s %f\n", yes_no_unsure, spamicity); } else { /* print spam-status at the end of the header * then mark the beginning of the message body */ (void)printf("%s: %s, tests=bogofilter, spamicity=%0.6f, version=%s\n", - spam_header_name, - (status==RC_SPAM) ? "Yes" : "No", - spamicity, version); + spam_header_name, yes_no_unsure, spamicity, version); } } @@ -195,13 +197,12 @@ if (fflush(stdout) || ferror(stdout)) exit(2); } - exitcode = status; - if (nonspam_exits_zero && passthrough && exitcode == 1) + exitcode = (status == RC_SPAM) ? 0 : 1; + if (nonspam_exits_zero && passthrough && exitcode != 0) exitcode = 0; (void)sprintf(msg_bogofilter, "%s: %s, spamicity=%0.6f, version=%s", - spam_header_name, (status==RC_SPAM) ? "Yes" : "No", - spamicity, version); + spam_header_name, yes_no_unsure, spamicity, version); } break; default: Index: method.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/method.h,v retrieving revision 1.5 retrieving revision 1.5.22.1 diff -u -d -r1.5 -r1.5.22.1 --- method.h 21 Nov 2002 15:24:47 -0000 1.5 +++ method.h 5 Dec 2002 17:20:42 -0000 1.5.22.1 @@ -9,6 +9,7 @@ #define HAVE_METHOD_H #include <bogoconfig.h> +#include <bogofilter.h> #include <wordhash.h> typedef struct bogostat_s bogostat_t; @@ -17,6 +18,7 @@ typedef double m_compute_spamicity(wordhash_t *wordhash, FILE *fp); /*@globals errno@*/ typedef void m_print_bogostats(FILE *fp, double spamicity); typedef void m_cleanup(void); +typedef rc_t m_status(void); /* ** This defines an object oriented API for accessing @@ -28,10 +30,19 @@ const parm_desc *config_parms; m_initialize *initialize; m_compute_spamicity *compute_spamicity; + m_status *status; /* string - Yes, No, ... */ m_print_bogostats *print_stats; m_cleanup *cleanup; } method_t; extern method_t *method; + +/* +** Define instance storage ... +*/ + +typedef struct stats_s { + double spamicity; +} stats_t; #endif /* HAVE_METHOD_H */ Index: robinson.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/robinson.c,v retrieving revision 1.9 retrieving revision 1.9.18.1 diff -u -d -r1.9 -r1.9.18.1 --- robinson.c 23 Nov 2002 18:42:19 -0000 1.9 +++ robinson.c 5 Dec 2002 17:20:42 -0000 1.9.18.1 @@ -40,11 +40,11 @@ extern int Rtable; static double scalefactor; -static double thresh_rtable = 0.0f; -static double robx = 0.0f; -static double robs = 0.0f; +double thresh_rtable = 0.0f; +double robx = 0.0f; +double robs = 0.0f; -static stats_t stats; +static rob_stats_t stats; const parm_desc rob_parm_table[] = /* needed by fisher.c */ { @@ -57,6 +57,7 @@ void rob_initialize_constants(void); double rob_get_spamicity(size_t robn, FLOAT P, FLOAT Q); void rob_print_summary(void); +rc_t rob_status(void); #ifdef ENABLE_ROBINSON_METHOD rf_method_t rf_robinson_method = { @@ -65,6 +66,7 @@ rob_parm_table, /* m_parm_table *parm_table */ rob_initialize_constants, /* m_initialize_constants *initialize_constants */ rob_bogofilter, /* m_compute_spamicity *compute_spamicity */ + rob_status, /* m_status *status */ rob_print_bogostats, /* m_print_bogostats *print_stats */ rob_cleanup, /* m_free *cleanup */ }, @@ -227,6 +229,8 @@ } else spamicity = robx; + stats.spamicity = spamicity; + return (spamicity); } @@ -271,6 +275,12 @@ void rob_initialize_constants(void) { rob_initialize_with_parameters(ROBINSON_MIN_DEV, ROBINSON_SPAM_CUTOFF); +} + +rc_t rob_status(void) +{ + rc_t status = ( stats.spamicity >= spam_cutoff ) ? RC_SPAM : RC_HAM; + return status; } double rob_bogofilter(wordhash_t *wordhash, FILE *fp) /*@globals errno@*/ Index: robinson.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/robinson.h,v retrieving revision 1.7 retrieving revision 1.7.18.1 diff -u -d -r1.7 -r1.7.18.1 --- robinson.h 23 Nov 2002 18:42:19 -0000 1.7 +++ robinson.h 5 Dec 2002 17:20:42 -0000 1.7.18.1 @@ -29,14 +29,14 @@ ** Define a struct so stats can be saved for printing. */ -typedef struct stats_s { +typedef struct rob_stats_s { size_t robn; double p_ln; /* Robinson P, as a log*/ double q_ln; /* Robinson Q, as a log*/ double p_pr; /* Robinson P */ double q_pr; /* Robinson Q */ double spamicity; -} stats_t; +} rob_stats_t; extern double rob_bogofilter(wordhash_t *wordhash, FILE *fp); /*@globals errno@*/ extern double rob_compute_spamicity(wordhash_t *wordhash, FILE *fp); /*@globals errno@*/ Index: system.h =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/system.h,v retrieving revision 1.6 retrieving revision 1.6.18.1 diff -u -d -r1.6 -r1.6.18.1 --- system.h 22 Nov 2002 21:57:02 -0000 1.6 +++ system.h 5 Dec 2002 17:20:43 -0000 1.6.18.1 @@ -37,5 +37,12 @@ # endif #endif +#if !HAVE_STRLCPY +size_t strlcpy(char *dst, const char *src, size_t size); +#endif + +#if !HAVE_STRLCAT +size_t strlcat(char *dst, const char *src, size_t size); +#endif #endif Index: wordlists.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/wordlists.c,v retrieving revision 1.29 retrieving revision 1.29.14.1 diff -u -d -r1.29 -r1.29.14.1 --- wordlists.c 25 Nov 2002 22:19:36 -0000 1.29 +++ wordlists.c 5 Dec 2002 17:20:43 -0000 1.29.14.1 @@ -78,24 +78,22 @@ return 0; } -/* build an absolute path to a file given a directory and file name +/* build an path to a file given a directory and file name, + * concatenating dir and file, adding a slash if necessary + * + * returns: -1 for overflow + * 0 for success */ -void build_path(char* dest, int size, const char* dir, const char* file) +int build_path(char* dest, size_t size, const char* dir, const char* file) { - int path_left=size-1; - - *dest = '\0'; - strncat(dest, dir, path_left); - path_left -= strlen(dir); - if (path_left <= 0) return; + if (strlcpy(dest, dir, size) >= size) return -1; if ('/' != dest[strlen(dest)-1]) { - strcat(dest, "/"); - path_left--; - if (path_left <= 0) return; + if (strlcat(dest, "/", size) >= size) return -1; /* RATS: ignore */ } - strncat(dest, file, path_left); + if (strlcat(dest, file, size) >= size) return -1; + return 0; } /* returns -1 for error, 0 for success */ @@ -104,10 +102,10 @@ int rc = 0; char filepath[PATH_LEN]; - build_path(filepath, PATH_LEN, dir, GOODFILE); + if (build_path(filepath, sizeof(filepath), dir, GOODFILE) < 0) rc = -1; if (init_list(&good_list, "good", filepath, good_weight, false, 0, 0)) rc = -1; - build_path(filepath, PATH_LEN, dir, SPAMFILE); + if (build_path(filepath, sizeof(filepath), dir, SPAMFILE) < 0) rc = -1; if (init_list(&spam_list, "spam", filepath, bad_weight, true, 0, 0)) rc = -1; return rc; |