[cvs] bogofilter/src lexer.c,1.99,1.100

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Update of /cvsroot/bogofilter/bogofilter/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7975/src

Modified Files:
	lexer.c 
Log Message:
Bugfix: Remove bogus adding of NUL bytes which might be past the buffer's end.
Cleanup: Rename variable copy to adjacent, to mean what it reads.
Bugfix: Switch str* -> mem* functions, we're treating word_t, not C strings.
Document text_decode function. (Still not bug-free.)

Index: lexer.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/lexer.c,v
retrieving revision 1.99
retrieving revision 1.100
diff -u -d -r1.99 -r1.100

--- lexer.c	8 Oct 2004 23:50:04 -0000	1.99
+++ lexer.c	9 Oct 2004 02:17:53 -0000	1.100
@@ -18,6 +18,7 @@
 #include "charset.h"
 #include "error.h"
 #include "lexer.h"
+#include "memstr.h"
 #include "mime.h"
 #include "msgcounts.h"
 #include "qp.h"
@@ -289,22 +290,28 @@
 
 size_t text_decode(word_t *w)
 {
-    byte *beg = w->text;
-    byte *fin = beg + w->leng;
-    byte *txt = (byte *) strstr((char *)beg, "=?");	/* skip past leading text */
-    uint size = (uint) (txt - beg);
+    byte *const beg = w->text;		/* base pointer, fixed */
+    byte *const fin = beg + w->leng;	/* end+1 position */
+
+    byte *txt = (byte *) memstr(w->text, w->leng, "=?"); /* input position */
+    uint size = (uint) (txt - beg); /* output offset */
+
+    if (txt == NULL)
+	return w->leng;
 
     while (txt < fin) {
 	word_t n;
-	byte *typ = (byte *) strchr((char *)txt+2, '?');/* Encoding type - 'B' or 'Q' */
-	byte *tmp = typ + 3;
-	byte *end = (byte *) strstr((char *)tmp, "?=");	/* last byte of encoded word  */
-	uint len = end - tmp;
-	bool copy;
+	byte *typ, *tmp, *end;
+	uint len;
+	bool adjacent;
+
+	typ = (byte *) memchr((char *)txt+2, '?', fin-(txt+2));/* Encoding type - 'B' or 'Q' */
+	tmp = typ + 3;	/* start of encoded word */
+	end = (byte *) memstr((char *)tmp, fin-tmp, "?=");	/* last byte of encoded word  */
+	len = end - tmp;
 
 	n.text = tmp;				/* Start of encoded word */
 	n.leng = len;				/* Length of encoded word */
-	n.text[n.leng] = (byte) '\0';
 
 	if (DEBUG_LEXER(2)) {
 	    fputs("***  ", dbgout);
@@ -324,7 +331,6 @@
 	}
 
 	n.leng = len;
-	n.text[n.leng] = '\0';
 
 	if (DEBUG_LEXER(3)) {
 	    fputs("***  ", dbgout);
@@ -332,28 +338,38 @@
 	    fputs("\n", dbgout);
 	}
 
+	/* move decoded word to where the encoded used to be */
 	memmove(beg+size, n.text, len+1);
-	size += len;
-	txt = end + 2;
+	size += len; /* bump output pointer */
+	txt = end + 2; /* skip ?= trailer */
 	if (txt >= fin)
 	    break;
-	end = (byte *) strstr((char *)txt, "=?");
-	copy = end != NULL;
 
-	if (copy) {
+	/* check for next encoded word */
+	end = (byte *) memstr((char *)txt, fin-txt, "=?");
+	adjacent = end != NULL;
+
+	/* clear adjacent flag if non-whitespace character found between
+	 * adjacent encoded words */
+	if (adjacent) {
 	    tmp = txt;
-	    while (copy && tmp < end) {
+	    while (adjacent && tmp < end) {
 		if (isspace(*tmp))
 		    tmp += 1;
 		else
-		    copy = false;
+		    adjacent = false;
 	    }
 	}
 
-	if (copy)
+	/* we have a next encoded word and we've had only whitespace
+	 * between the current and the next */
+	if (adjacent)
+	    /* just skip whitespace */
 	    txt = end;
-	else while (txt < end)
-	    beg[size++] = *txt++;
+	else
+	    /* copy everything that was between the encoded words */
+	    while (txt < end)
+		beg[size++] = *txt++;
     }
 
     return size;





[cvs] bogofilter/src lexer.c,1.99,1.100

Fast Bayesian spam filter along lines suggested by Paul Graham

[cvs] bogofilter/src lexer.c,1.99,1.100