[cvs] bogofilter/src mime.c,1.27,1.28
Fast Bayesian spam filter along lines suggested by Paul Graham
Brought to you by:
m-a
From: <m-...@us...> - 2004-12-08 00:44:37
|
Update of /cvsroot/bogofilter/bogofilter/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26519 Modified Files: mime.c Log Message: Reformat to use less screen real estate (indent -kr -i4). Index: mime.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/mime.c,v retrieving revision 1.27 retrieving revision 1.28 diff -u -d -r1.27 -r1.28 --- mime.c 7 Nov 2004 22:00:23 -0000 1.27 +++ mime.c 8 Dec 2004 00:44:28 -0000 1.28 @@ -43,360 +43,324 @@ mime_t *msg_state = msg_stack; mime_t *msg_top = msg_stack; -static struct type_s -{ - enum mimetype type; - const char *name; - size_t len; -} -mime_type_table[] = -{ - { MIME_TEXT_HTML, "text/html", 9, } , - { MIME_TEXT_PLAIN, "text/plain", 10, } , - { MIME_TEXT, "text", 4, } , /* NON-COMPLIANT; should be "text/" */ - { MIME_APPLICATION, "application/", 12, } , - { MIME_IMAGE, "image/", 6, } , - { MIME_MESSAGE, "message/", 8, } , - { MIME_MULTIPART, "multipart/", 10, } , -}; +static struct type_s { + enum mimetype type; + const char *name; + size_t len; +} mime_type_table[] = { + { + MIME_TEXT_HTML, "text/html", 9,}, { + MIME_TEXT_PLAIN, "text/plain", 10,}, { + MIME_TEXT, "text", 4,}, /* NON-COMPLIANT; should be "text/" */ + { + MIME_APPLICATION, "application/", 12,}, { + MIME_IMAGE, "image/", 6,}, { + MIME_MESSAGE, "message/", 8,}, { +MIME_MULTIPART, "multipart/", 10,},}; -static struct encoding_s -{ - enum mimeencoding encoding; - const char *name; -} -mime_encoding_table[] = -{ - { MIME_7BIT, "7BIT", } , - { MIME_8BIT, "8BIT", } , - { MIME_BINARY, "BINARY", } , - { MIME_QP, "QUOTED-PRINTABLE", } , - { MIME_BASE64, "BASE64", } , - { MIME_UUENCODE, "X-UUENCODE", } , -}; +static struct encoding_s { + enum mimeencoding encoding; + const char *name; +} mime_encoding_table[] = { + { + MIME_7BIT, "7BIT",}, { + MIME_8BIT, "8BIT",}, { + MIME_BINARY, "BINARY",}, { + MIME_QP, "QUOTED-PRINTABLE",}, { + MIME_BASE64, "BASE64",}, { +MIME_UUENCODE, "X-UUENCODE",},}; -static struct disposition_s -{ - enum mimedisposition disposition; - const char *name; -} -mime_disposition_table[] = -{ - { MIME_INLINE, "inline", } , - { MIME_ATTACHMENT, "attachment", } , -}; +static struct disposition_s { + enum mimedisposition disposition; + const char *name; +} mime_disposition_table[] = { + { + MIME_INLINE, "inline",}, { +MIME_ATTACHMENT, "attachment",},}; /* boundary properties */ typedef struct { - bool is_valid; - bool is_final; - int depth; + bool is_valid; + bool is_final; + int depth; } boundary_t; /* Function Prototypes */ -static void mime_disposition (word_t *text); -static void mime_encoding (word_t *text); -static void mime_type (word_t *text); +static void mime_disposition(word_t * text); +static void mime_encoding(word_t * text); +static void mime_type(word_t * text); -static const byte *skipws (const byte *t, const byte *e); -static byte *getword (const byte *t, const byte *e); +static const byte *skipws(const byte * t, const byte * e); +static byte *getword(const byte * t, const byte * e); #if 0 -static char *getparam (const byte *t, char *e, const byte *param); +static char *getparam(const byte * t, char *e, const byte * param); #endif -static void mime_push (mime_t * parent); -static void mime_pop (void); +static void mime_push(mime_t * parent); +static void mime_pop(void); /* Function Definitions */ -#if 0 /* Unused */ +#if 0 /* Unused */ const char *mime_type_name(enum mimetype type) { - size_t i; - for (i = 0; i < COUNTOF (mime_type_table); i += 1) - { - struct type_s *typ = mime_type_table + i; - if (typ->type == type) - return typ->name; - } - return "unknown"; + size_t i; + for (i = 0; i < COUNTOF(mime_type_table); i += 1) { + struct type_s *typ = mime_type_table + i; + if (typ->type == type) + return typ->name; + } + return "unknown"; } #endif -static void -mime_init (mime_t * parent) +static void mime_init(mime_t * parent) { - msg_state->mime_type = MIME_TEXT; - msg_state->mime_encoding = MIME_7BIT; - msg_state->boundary = NULL; - msg_state->boundary_len = 0; - msg_state->parent = parent; - msg_state->charset = xstrdup ("US-ASCII"); - msg_state->child_count = 0; + msg_state->mime_type = MIME_TEXT; + msg_state->mime_encoding = MIME_7BIT; + msg_state->boundary = NULL; + msg_state->boundary_len = 0; + msg_state->parent = parent; + msg_state->charset = xstrdup("US-ASCII"); + msg_state->child_count = 0; } -static void -mime_free (mime_t * t) +static void mime_free(mime_t * t) { - if (t == NULL) - return; + if (t == NULL) + return; - if (t->boundary) - { - xfree (t->boundary); - t->boundary = NULL; - } + if (t->boundary) { + xfree(t->boundary); + t->boundary = NULL; + } - if (t->charset) - { - xfree (t->charset); - t->charset = NULL; - } + if (t->charset) { + xfree(t->charset); + t->charset = NULL; + } - t->parent = NULL; + t->parent = NULL; } /** Cleanup storage allocation */ void mime_cleanup() { while (stackp > -1) - mime_pop (); + mime_pop(); } -static void -mime_push (mime_t * parent) +static void mime_push(mime_t * parent) { - if (stackp < MIME_STACK_MAX) - { - /* Top level is its own parent, but does not increase child_count */ - if (parent == NULL) - { - if (stackp == -1) - parent = &msg_stack[0]; - else - { - fprintf (stderr, "**mime_push: expecting non-null parent\n"); - exit(EX_ERROR); - } - } - else - { - parent->child_count++; - } + if (stackp < MIME_STACK_MAX) { + /* Top level is its own parent, but does not increase child_count */ + if (parent == NULL) { + if (stackp == -1) + parent = &msg_stack[0]; + else { + fprintf(stderr, + "**mime_push: expecting non-null parent\n"); + exit(EX_ERROR); + } + } else { + parent->child_count++; + } - msg_state = &msg_stack[++stackp]; + msg_state = &msg_stack[++stackp]; - mime_init (parent); + mime_init(parent); - if (DEBUG_MIME (1)) - fprintf (dbgout, "*** mime_push. stackp: %d\n", stackp); - } - else - { - fprintf (stderr, "Attempt to overflow mime stack\n"); - } + if (DEBUG_MIME(1)) + fprintf(dbgout, "*** mime_push. stackp: %d\n", stackp); + } else { + fprintf(stderr, "Attempt to overflow mime stack\n"); + } } -static void -mime_pop (void) +static void mime_pop(void) { - if (DEBUG_MIME (1)) - fprintf (dbgout, "*** mime_pop. stackp: %d\n", stackp); + if (DEBUG_MIME(1)) + fprintf(dbgout, "*** mime_pop. stackp: %d\n", stackp); - if (stackp > -1) - { - mime_t *parent = msg_state->parent; + if (stackp > -1) { + mime_t *parent = msg_state->parent; - if (parent == msg_state) - parent = NULL; + if (parent == msg_state) + parent = NULL; - mime_free (msg_state); - stackp--; + mime_free(msg_state); + stackp--; - msg_state = stackp == -1 ? NULL : &msg_stack[stackp]; - - if (msg_state && parent && parent->child_count > 0) - parent->child_count--; - } - else - { - fprintf (stderr, "Attempt to underflow mime stack\n"); - } + msg_state = stackp == -1 ? NULL : &msg_stack[stackp]; + + if (msg_state && parent && parent->child_count > 0) + parent->child_count--; + } else { + fprintf(stderr, "Attempt to underflow mime stack\n"); + } } -static int -is_mime_container (mime_t * m) +static int is_mime_container(mime_t * m) { - return (m && ((m->mime_type == MIME_MESSAGE) || (m->mime_type == MIME_MULTIPART))); + return (m && ((m->mime_type == MIME_MESSAGE) + || (m->mime_type == MIME_MULTIPART))); } -void -mime_reset (void) +void mime_reset(void) { - if (DEBUG_MIME (0)) - fprintf (dbgout, "*** mime_reset\n"); + if (DEBUG_MIME(0)) + fprintf(dbgout, "*** mime_reset\n"); - while (stackp > -1) - mime_pop (); + while (stackp > -1) + mime_pop(); - mime_push (NULL); + mime_push(NULL); } -void -mime_add_child (mime_t * parent) +void mime_add_child(mime_t * parent) { - mime_push (parent); + mime_push(parent); } static -bool get_boundary_props(const word_t *boundary, boundary_t *b) +bool get_boundary_props(const word_t * boundary, boundary_t * b) { - int i; - const byte *buf = boundary->text; - size_t blen = boundary->leng; + int i; + const byte *buf = boundary->text; + size_t blen = boundary->leng; - b->is_valid = false; + b->is_valid = false; - if (blen > 2 && buf[0] == '-' && buf[1] == '-') { + if (blen > 2 && buf[0] == '-' && buf[1] == '-') { - while (blen > 2 && - (buf[blen - 1] == '\r' || - buf[blen - 1] == '\n')) - blen--; + while (blen > 2 && + (buf[blen - 1] == '\r' || buf[blen - 1] == '\n')) + blen--; - /* skip initial -- */ - buf += 2; - blen -= 2; + /* skip initial -- */ + buf += 2; + blen -= 2; - /* skip and note ending --, if any */ - if (blen > 2 && - buf[blen-1] == '-' && - buf[blen-2] == '-') { - b->is_final = true; - blen -= 2; - } else { - b->is_final = false; - } + /* skip and note ending --, if any */ + if (blen > 2 && buf[blen - 1] == '-' && buf[blen - 2] == '-') { + b->is_final = true; + blen -= 2; + } else { + b->is_final = false; + } - for (i = stackp; i > -1; i--) { - if (is_mime_container (&msg_stack[i]) && - msg_stack[i].boundary && - (memcmp (msg_stack[i].boundary, buf, blen) == 0)) { - b->depth = i; - b->is_valid = true; - break; - } + for (i = stackp; i > -1; i--) { + if (is_mime_container(&msg_stack[i]) && + msg_stack[i].boundary && + (memcmp(msg_stack[i].boundary, buf, blen) == 0)) { + b->depth = i; + b->is_valid = true; + break; + } + } } - } - return b->is_valid; + return b->is_valid; } -bool -mime_is_boundary(word_t *boundary) +bool mime_is_boundary(word_t * boundary) { boundary_t b; get_boundary_props(boundary, &b); return b.is_valid; } -bool -got_mime_boundary(word_t *boundary) +bool got_mime_boundary(word_t * boundary) { - mime_t *parent; - boundary_t b; + mime_t *parent; + boundary_t b; - get_boundary_props(boundary, &b); + get_boundary_props(boundary, &b); - if (!b.is_valid) - return false; + if (!b.is_valid) + return false; - if (DEBUG_MIME (0)) - fprintf (dbgout, "*** got_mime_boundary: stackp: %d, boundary: '%s'\n", - stackp, boundary->text); + if (DEBUG_MIME(0)) + fprintf(dbgout, + "*** got_mime_boundary: stackp: %d, boundary: '%s'\n", + stackp, boundary->text); - if (stackp > 0) - { - /* This handles explicit and implicit boundaries */ - while (stackp > 0 && stackp > b.depth) - mime_pop (); + if (stackp > 0) { + /* This handles explicit and implicit boundaries */ + while (stackp > 0 && stackp > b.depth) + mime_pop(); - /* explicit end boundary */ - if (b.is_final) - return true; - } + /* explicit end boundary */ + if (b.is_final) + return true; + } - parent = is_mime_container (msg_state) ? msg_state : msg_state->parent; - mime_push (parent); - return true; + parent = is_mime_container(msg_state) ? msg_state : msg_state->parent; + mime_push(parent); + return true; } /* skips whitespace, returns NULL when ran into end of string */ -static const byte * -skipws (const byte *t, const byte *e) +static const byte *skipws(const byte * t, const byte * e) { - while (t < e && isspace (*t)) - t++; - if (t < e) - return t; - return NULL; + while (t < e && isspace(*t)) + t++; + if (t < e) + return t; + return NULL; } /* skips [ws]";"[ws] */ #if 0 -char * -skipsemi (const byte *t, const byte *e) +char *skipsemi(const byte * t, const byte * e) { - if (!(t = skipws (t, e))) - return NULL; - if (*t == ';') - t++; - return skipws (t, e); + if (!(t = skipws(t, e))) + return NULL; + if (*t == ';') + t++; + return skipws(t, e); } #endif /* get next MIME word, NULL when none found. * caller must free returned string with xfree() */ -static byte * -getword (const byte *t, const byte *e) +static byte *getword(const byte * t, const byte * e) { - int quote = 0; - int l; - const byte *ts; - byte *n; + int quote = 0; + int l; + const byte *ts; + byte *n; - t = skipws (t, e); - if (!t) - return NULL; - if (*t == '"') { - quote++; - t++; - } - ts = t; - while ((t < e) && (quote ? *t != '"' : !isspace(*t))) { - t++; - } - l = t - ts; - n = (byte *)xmalloc(l + 1); - memcpy(n, ts, l); - n[l] = (byte) '\0'; - return n; + t = skipws(t, e); + if (!t) + return NULL; + if (*t == '"') { + quote++; + t++; + } + ts = t; + while ((t < e) && (quote ? *t != '"' : !isspace(*t))) { + t++; + } + l = t - ts; + n = (byte *) xmalloc(l + 1); + memcpy(n, ts, l); + n[l] = (byte) '\0'; + return n; } #if 0 -char * -getparam (char *t, char *e, const byte *param) +char *getparam(char *t, char *e, const byte * param) { /* char *w, *u; */ - return NULL; /* NOT YET IMPLEMENTED */ + return NULL; /* NOT YET IMPLEMENTED */ } #endif -void -mime_content(word_t *text) +void mime_content(word_t * text) { char *key = (char *) text->text; switch (tolower(key[9])) { @@ -412,34 +376,33 @@ } } -static void -mime_disposition (word_t *text) +static void mime_disposition(word_t * text) { - size_t i; - size_t l; - char *w; + size_t i; + size_t l; + char *w; - l = strlen("Content-Disposition:"); - w = (char *)getword (text->text + l, text->text + text->leng); + l = strlen("Content-Disposition:"); + w = (char *) getword(text->text + l, text->text + text->leng); - if (!w) return; + if (!w) + return; - msg_state->mime_disposition = MIME_DISPOSITION_UNKNOWN; - for (i = 0; i < COUNTOF (mime_disposition_table); i += 1 ) - { - struct disposition_s *dis = mime_disposition_table + i; - if (strcasecmp (w, dis->name) == 0) - { - msg_state->mime_disposition = dis->disposition; - if (DEBUG_MIME (1)) - fprintf (dbgout, "*** mime_disposition: %s\n", text->text); - break; + msg_state->mime_disposition = MIME_DISPOSITION_UNKNOWN; + for (i = 0; i < COUNTOF(mime_disposition_table); i += 1) { + struct disposition_s *dis = mime_disposition_table + i; + if (strcasecmp(w, dis->name) == 0) { + msg_state->mime_disposition = dis->disposition; + if (DEBUG_MIME(1)) + fprintf(dbgout, "*** mime_disposition: %s\n", text->text); + break; + } } - } - if (DEBUG_MIME (0) && msg_state->mime_disposition == MIME_DISPOSITION_UNKNOWN) - fprintf (stderr, "Unknown mime disposition - '%s'\n", w); - xfree (w); - return; + if (DEBUG_MIME(0) + && msg_state->mime_disposition == MIME_DISPOSITION_UNKNOWN) + fprintf(stderr, "Unknown mime disposition - '%s'\n", w); + xfree(w); + return; } /********* @@ -454,151 +417,148 @@ ** *********/ -static void -mime_encoding (word_t *text) +static void mime_encoding(word_t * text) { - size_t i; - size_t l; - char *w; + size_t i; + size_t l; + char *w; - l = strlen("Content-Transfer-Encoding:"); - w = (char *)getword(text->text + l, text->text + text->leng); + l = strlen("Content-Transfer-Encoding:"); + w = (char *) getword(text->text + l, text->text + text->leng); - if (!w) return; + if (!w) + return; - msg_state->mime_encoding = MIME_ENCODING_UNKNOWN; - for (i = 0; i < COUNTOF (mime_encoding_table); i += 1 ) - { - struct encoding_s *enc = mime_encoding_table + i; - if (strcasecmp (w, enc->name) == 0) - { - msg_state->mime_encoding = enc->encoding; - if (DEBUG_MIME (1)) - fprintf (dbgout, "*** mime_encoding: %s\n", text->text); - break; + msg_state->mime_encoding = MIME_ENCODING_UNKNOWN; + for (i = 0; i < COUNTOF(mime_encoding_table); i += 1) { + struct encoding_s *enc = mime_encoding_table + i; + if (strcasecmp(w, enc->name) == 0) { + msg_state->mime_encoding = enc->encoding; + if (DEBUG_MIME(1)) + fprintf(dbgout, "*** mime_encoding: %s\n", text->text); + break; + } } - } - if (DEBUG_MIME (0) && msg_state->mime_encoding == MIME_ENCODING_UNKNOWN) - fprintf (stderr, "Unknown mime encoding - '%s'\n", w); - xfree (w); - return; + if (DEBUG_MIME(0) + && msg_state->mime_encoding == MIME_ENCODING_UNKNOWN) + fprintf(stderr, "Unknown mime encoding - '%s'\n", w); + xfree(w); + return; } -static void -mime_type (word_t *text) +static void mime_type(word_t * text) { - size_t l; - char *w; - struct type_s *typ; + size_t l; + char *w; + struct type_s *typ; - l = strlen("Content-Type:"); - w = (char *)getword(text->text + l, text->text + text->leng); + l = strlen("Content-Type:"); + w = (char *) getword(text->text + l, text->text + text->leng); - if (!w) return; + if (!w) + return; - msg_state->mime_type = MIME_TYPE_UNKNOWN; - for (typ = mime_type_table; typ < mime_type_table + COUNTOF (mime_type_table); typ += 1) - { - if (strncasecmp (w, typ->name, typ->len) == 0) - { - msg_state->mime_type = typ->type; - if (DEBUG_MIME (1) || DEBUG_LEXER(1)) - fprintf (dbgout, "*** mime_type: %s\n", text->text); - break; + msg_state->mime_type = MIME_TYPE_UNKNOWN; + for (typ = mime_type_table; + typ < mime_type_table + COUNTOF(mime_type_table); typ += 1) { + if (strncasecmp(w, typ->name, typ->len) == 0) { + msg_state->mime_type = typ->type; + if (DEBUG_MIME(1) || DEBUG_LEXER(1)) + fprintf(dbgout, "*** mime_type: %s\n", text->text); + break; + } } - } - if (DEBUG_MIME (0) && msg_state->mime_type == MIME_TYPE_UNKNOWN) - fprintf (stderr, "Unknown mime type - '%s'\n", w); - xfree (w); + if (DEBUG_MIME(0) && msg_state->mime_type == MIME_TYPE_UNKNOWN) + fprintf(stderr, "Unknown mime type - '%s'\n", w); + xfree(w); - switch (msg_state->mime_type) - { - case MIME_TEXT: - case MIME_TEXT_HTML: - case MIME_TEXT_PLAIN: - /* XXX: read charset */ - return; - case MIME_TYPE_UNKNOWN: - return; - case MIME_MULTIPART: - return; - case MIME_MESSAGE: - /* XXX: read boundary */ - return; - case MIME_APPLICATION: - /* XXX: read boundary */ - return; - case MIME_IMAGE: - /* XXX: read boundary */ + switch (msg_state->mime_type) { + case MIME_TEXT: + case MIME_TEXT_HTML: + case MIME_TEXT_PLAIN: + /* XXX: read charset */ + return; + case MIME_TYPE_UNKNOWN: + return; + case MIME_MULTIPART: + return; + case MIME_MESSAGE: + /* XXX: read boundary */ + return; + case MIME_APPLICATION: + /* XXX: read boundary */ + return; + case MIME_IMAGE: + /* XXX: read boundary */ + return; + } return; - } - return; } -void -mime_boundary_set (word_t *text) +void mime_boundary_set(word_t * text) { - byte *boundary = text->text; - size_t blen = text->leng; + byte *boundary = text->text; + size_t blen = text->leng; - if (DEBUG_MIME (1)) { - int len = blen; - if (blen > INT_MAX) len = INT_MAX; - fprintf (dbgout, "*** --> mime_boundary_set: %d '%-.*s'\n", stackp, - len, boundary); - } + if (DEBUG_MIME(1)) { + int len = blen; + if (blen > INT_MAX) + len = INT_MAX; + fprintf(dbgout, "*** --> mime_boundary_set: %d '%-.*s'\n", + stackp, len, boundary); + } - boundary = getword(boundary + strlen("boundary="), boundary + blen); - xfree(msg_state->boundary); - msg_state->boundary = (char *)boundary; - msg_state->boundary_len = strlen((char *)boundary); + boundary = getword(boundary + strlen("boundary="), boundary + blen); + xfree(msg_state->boundary); + msg_state->boundary = (char *) boundary; + msg_state->boundary_len = strlen((char *) boundary); - if (DEBUG_MIME (1)) - fprintf (dbgout, "*** <-- mime_boundary_set: %d '%s'\n", stackp, - boundary); + if (DEBUG_MIME(1)) + fprintf(dbgout, "*** <-- mime_boundary_set: %d '%s'\n", + stackp, boundary); - return; + return; } -uint -mime_decode (word_t *text) +uint mime_decode(word_t * text) { - uint count = text->leng; + uint count = text->leng; - /* early out for the uninteresting cases */ - if (msg_state->mime_encoding == MIME_7BIT || - msg_state->mime_encoding == MIME_8BIT || - msg_state->mime_encoding == MIME_BINARY || - msg_state->mime_encoding == MIME_ENCODING_UNKNOWN) - return count; + /* early out for the uninteresting cases */ + if (msg_state->mime_encoding == MIME_7BIT || + msg_state->mime_encoding == MIME_8BIT || + msg_state->mime_encoding == MIME_BINARY || + msg_state->mime_encoding == MIME_ENCODING_UNKNOWN) + return count; - if (DEBUG_MIME (3)) - fprintf (dbgout, "*** mime_decode %lu \"%-.*s\"\n", (unsigned long)count, count > INT_MAX ? INT_MAX : (int)(count - 1), text->text); + if (DEBUG_MIME(3)) + fprintf(dbgout, "*** mime_decode %lu \"%-.*s\"\n", + (unsigned long) count, + count > INT_MAX ? INT_MAX : (int) (count - 1), text->text); - /* Do not decode "real" boundary lines */ - if (mime_is_boundary(text) == true) - return count; + /* Do not decode "real" boundary lines */ + if (mime_is_boundary(text) == true) + return count; - switch (msg_state->mime_encoding) - { - case MIME_QP: - count = qp_decode(text, RFC2045); - break; - case MIME_BASE64: - if (count > 4) - count = base64_decode(text); - break; - case MIME_UUENCODE: - count = uudecode(text); - break; - case MIME_7BIT: - case MIME_8BIT: - case MIME_BINARY: - case MIME_ENCODING_UNKNOWN: - break; - } + switch (msg_state->mime_encoding) { + case MIME_QP: + count = qp_decode(text, RFC2045); + break; + case MIME_BASE64: + if (count > 4) + count = base64_decode(text); + break; + case MIME_UUENCODE: + count = uudecode(text); + break; + case MIME_7BIT: + case MIME_8BIT: + case MIME_BINARY: + case MIME_ENCODING_UNKNOWN: + break; + } - return count; + return count; } enum mimetype get_content_type(void) |