From: Rainer W. <rwe...@ms...> - 2010-06-08 23:58:01
|
The file included below is a diff of the 'running' uid_db code against the 6.3.17 fetchmail tree. I hope that it is ok in this form, because I'm still in extreme 'now, we just need to deliver'-mode ... ----------- --- fetchmail/Makefile.am 7 May 2010 13:49:15 -0000 1.1.1.6 +++ fetchmail/Makefile.am 25 May 2010 17:28:29 -0000 1.1.1.6.2.3 @@ -38,7 +38,7 @@ smbencrypt.h smbdes.c smbencrypt.c smbmd4.c smbutil.c \ libesmtp/gethostbyname.h libesmtp/gethostbyname.c \ smbtypes.h fm_getaddrinfo.c tls.c rfc822valid.c \ - xmalloc.h sdump.h sdump.c + xmalloc.h sdump.h sdump.c uid_db.h uid_db.c libfm_a_LIBADD= $(EXTRAOBJ) libfm_a_DEPENDENCIES= $(EXTRAOBJ) LDADD = libfm.a @LIBINTL@ $(LIBOBJS) --- fetchmail/fetchmail.c 7 May 2010 13:49:14 -0000 1.1.1.4 +++ fetchmail/fetchmail.c 24 May 2010 16:02:10 -0000 1.1.1.4.2.3 @@ -1537,6 +1537,14 @@ return(st); } +static int print_id_of(struct uid_db_record *rec, void *unused) +{ + (void)unused; + + printf("\t%s\n", rec->id); + return 0; +} + static void dump_params (struct runctl *runp, struct query *querylist, flag implicit) /* display query parameters in English */ @@ -1938,20 +1946,14 @@ if (ctl->server.protocol > P_POP2 && MAILBOX_PROTOCOL(ctl)) { - if (!ctl->oldsaved) + int count; + + if (!(count = uid_db_n_records(&ctl->oldsaved))) printf(GT_(" No UIDs saved from this host.\n")); else { - struct idlist *idp; - int count = 0; - - for (idp = ctl->oldsaved; idp; idp = idp->next) - ++count; - printf(GT_(" %d UIDs saved.\n"), count); - if (outlevel >= O_VERBOSE) - for (idp = ctl->oldsaved; idp; idp = idp->next) - printf("\t%s\n", idp->id); + traverse_uid_db(&ctl->oldsaved, print_id_of, NULL); } } --- fetchmail/fetchmail.h 7 May 2010 13:49:13 -0000 1.1.1.4 +++ fetchmail/fetchmail.h 24 May 2010 16:02:10 -0000 1.1.1.4.2.3 @@ -39,6 +39,8 @@ # include "trio/trio.h" #endif +#include "uid_db.h" + /* We need this for strstr */ #if !defined(HAVE_STRSTR) && !defined(strstr) char *strstr(const char *, const char *); @@ -387,8 +389,7 @@ int smtp_socket; /* socket descriptor for SMTP connection */ unsigned int uid; /* UID of user to deliver to */ struct idlist *skipped; /* messages skipped on the mail server */ - struct idlist *oldsaved, *newsaved; - struct idlist **oldsavedend; + struct uid_db oldsaved, newsaved; char lastdigest[DIGESTLEN]; /* last MD5 hash seen on this connection */ char *folder; /* folder currently being polled */ --- fetchmail/pop3.c 7 May 2010 13:49:19 -0000 1.1.1.4 +++ fetchmail/pop3.c 24 May 2010 16:02:10 -0000 1.1.1.4.2.6 @@ -21,6 +21,7 @@ #include "fetchmail.h" #include "socket.h" #include "i18n.h" +#include "uid_db.h" #ifdef OPIE_ENABLE #ifdef __cplusplus @@ -845,20 +846,20 @@ last_nr = count + 1; while (first_nr < last_nr - 1) { - struct idlist *newl; + struct uid_db_record *rec; try_nr = (first_nr + last_nr) / 2; if ((ok = pop3_getuidl(sock, try_nr, id, sizeof(id))) != 0) return ok; - if ((newl = str_in_list(&ctl->oldsaved, id, FALSE))) + if ((rec = find_uid_by_id(&ctl->oldsaved, id))) { - flag mark = newl->val.status.mark; + flag mark = rec->status; if (mark == UID_DELETED || mark == UID_EXPUNGED) { if (outlevel >= O_VERBOSE) report(stderr, GT_("id=%s (num=%u) was deleted, but is still present!\n"), id, try_nr); /* just mark it as seen now! */ - newl->val.status.mark = mark = UID_SEEN; + rec->status = mark = UID_SEEN; } /* narrow the search region! */ @@ -872,7 +873,7 @@ first_nr = try_nr; /* save the number */ - newl->val.status.num = try_nr; + set_uid_db_num(&ctl->oldsaved, rec, try_nr); } else { @@ -881,8 +882,8 @@ last_nr = try_nr; /* save it */ - newl = save_str(&ctl->oldsaved, id, UID_UNSEEN); - newl->val.status.num = try_nr; + rec = uid_db_insert(&ctl->oldsaved, id, UID_UNSEEN); + set_uid_db_num(&ctl->oldsaved, rec, try_nr); } } if (outlevel >= O_DEBUG && last_nr <= count) @@ -910,34 +911,38 @@ * of messages to get. * + Otherwise run a binary search to determine the last known message */ + struct uid_db_record *rec; int ok, nolinear = 0; - int first_nr, list_len, try_id, try_nr, add_id; + int first_nr, n_recs, try_id, try_nr, add_id; int num; char id [IDLEN+1]; if ((ok = pop3_gettopid(sock, 1, id, sizeof(id))) != 0) return ok; - if( ( first_nr = str_nr_in_list(&ctl->oldsaved, id) ) == -1 ) { + rec = first_uid_in_db(&ctl->oldsaved, id); + if (!rec) { /* the first message is unknown -> all messages are new */ *newp = *countp; return 0; } - + first_nr = rec->pos; + /* check where we expect the latest known message */ - list_len = count_list( &ctl->oldsaved ); - try_id = list_len - first_nr; /* -1 + 1 */ + n_recs = uid_db_n_records(&ctl->oldsaved); + try_id = n_recs - first_nr; /* -1 + 1 */ if( try_id > 1 ) { if( try_id <= *countp ) { if ((ok = pop3_gettopid(sock, try_id, id, sizeof(id))) != 0) return ok; - - try_nr = str_nr_last_in_list(&ctl->oldsaved, id); + + rec = last_uid_in_db(&ctl->oldsaved, id); + try_nr = rec ? rec->pos : -1; } else { try_id = *countp+1; try_nr = -1; } - if( try_nr != list_len -1 ) { + if( try_nr != n_recs -1 ) { /* some messages inbetween have been deleted... */ if( try_nr == -1 ) { nolinear = 1; @@ -955,7 +960,9 @@ if ((ok = pop3_gettopid(sock, try_id, id, sizeof(id))) != 0) return ok; - try_nr = str_nr_in_list(&ctl->oldsaved, id); + + rec = find_uid_by_id(&ctl->oldsaved, id); + try_nr = rec ? rec->pos : -1; } if( try_nr == -1 ) { try_id--; @@ -968,17 +975,16 @@ } } /* the first try_id messages are known -> copy them to the newsaved list */ - for( num = first_nr; num < list_len; num++ ) + for( num = first_nr; num < n_recs; num++ ) { - struct idlist *newl = save_str(&ctl->newsaved, - str_from_nr_list(&ctl->oldsaved, num), - UID_UNSEEN); - newl->val.status.num = num - first_nr + 1; + rec = find_uid_by_pos(&ctl->oldsaved, num); + + rec = uid_db_insert(&ctl->newsaved, rec->id, rec->status); + set_uid_db_num(&ctl->newsaved, rec, num - first_nr + 1); } if( nolinear ) { - free_str_list(&ctl->oldsaved); - ctl->oldsaved = 0; + clear_uid_db(&ctl->oldsaved); last = try_id; } @@ -997,7 +1003,7 @@ (void)folder; /* Ensure that the new list is properly empty */ - ctl->newsaved = (struct idlist *)NULL; + clear_uid_db(&ctl->newsaved); #ifdef MBOX /* Alain Knaff suggests this, but it's not RFC standard */ @@ -1032,6 +1038,9 @@ int fastuidl; char id [IDLEN+1]; + set_uid_db_num_pos_0(&ctl->oldsaved, *countp); + set_uid_db_num_pos_0(&ctl->newsaved, *countp); + /* should we do fast uidl this time? */ fastuidl = ctl->fastuidl; if (*countp > 7 && /* linear search is better if there are few mails! */ @@ -1091,14 +1100,13 @@ if (parseuid(buf, &unum, id, sizeof(id)) == PS_SUCCESS) { - struct idlist *old, *newl; + struct uid_db_record *old_rec, *new_rec; - newl = save_str(&ctl->newsaved, id, UID_UNSEEN); - newl->val.status.num = unum; + new_rec = uid_db_insert(&ctl->newsaved, id, UID_UNSEEN); - if ((old = str_in_list(&ctl->oldsaved, id, FALSE))) + if ((old_rec = find_uid_by_id(&ctl->oldsaved, id))) { - flag mark = old->val.status.mark; + flag mark = old_rec->status; if (mark == UID_DELETED || mark == UID_EXPUNGED) { /* XXX FIXME: switch 3 occurrences from @@ -1108,9 +1116,9 @@ if (outlevel >= O_VERBOSE) report(stderr, GT_("id=%s (num=%d) was deleted, but is still present!\n"), id, (int)unum); /* just mark it as seen now! */ - old->val.status.mark = mark = UID_SEEN; + old_rec->status = mark = UID_SEEN; } - newl->val.status.mark = mark; + new_rec->status = mark; if (mark == UID_UNSEEN) { (*newp)++; @@ -1127,10 +1135,14 @@ * swap the lists (say, due to socket error), * the same mail will not be downloaded again. */ - old = save_str(&ctl->oldsaved, id, UID_UNSEEN); + old_rec = uid_db_insert(&ctl->oldsaved, id, UID_UNSEEN); + } /* save the number */ - old->val.status.num = unum; + if (new_rec->status == UID_UNSEEN || !ctl->keep) { + set_uid_db_num(&ctl->oldsaved, old_rec, unum); + set_uid_db_num(&ctl->newsaved, new_rec, unum); + } } else return PS_ERROR; } /* multi-line loop for UIDL reply */ @@ -1199,8 +1211,9 @@ static int pop3_is_old(int sock, struct query *ctl, int num) /* is the given message old? */ { - struct idlist *newl; - if (!ctl->oldsaved) + struct uid_db_record *rec; + + if (!uid_db_n_records(&ctl->oldsaved)) return (num <= last); else if (dofastuidl) { @@ -1211,30 +1224,31 @@ /* in fast uidl, we manipulate the old list only! */ - if ((newl = id_find(&ctl->oldsaved, num))) + if ((rec = find_uid_by_num(&ctl->oldsaved, num))) { /* we already have the id! */ - return(newl->val.status.mark != UID_UNSEEN); + return(rec->status != UID_UNSEEN); } /* get the uidl first! */ if (pop3_getuidl(sock, num, id, sizeof(id)) != PS_SUCCESS) return(TRUE); - if ((newl = str_in_list(&ctl->oldsaved, id, FALSE))) { + if ((rec = find_uid_by_id(&ctl->oldsaved, id))) { /* we already have the id! */ - newl->val.status.num = num; - return(newl->val.status.mark != UID_UNSEEN); + set_uid_db_num(&ctl->oldsaved, rec, num); + return(rec->status != UID_UNSEEN); } /* save it */ - newl = save_str(&ctl->oldsaved, id, UID_UNSEEN); - newl->val.status.num = num; + rec = uid_db_insert(&ctl->oldsaved, id, UID_UNSEEN); + set_uid_db_num(&ctl->oldsaved, rec, num); + return(FALSE); + } else { + rec = find_uid_by_num(&ctl->newsaved, num); + return !rec || rec->status != UID_UNSEEN; } - else - return ((newl = id_find(&ctl->newsaved, num)) != NULL && - newl->val.status.mark != UID_UNSEEN); } #ifdef UNUSED @@ -1353,28 +1367,31 @@ static void mark_uid_seen(struct query *ctl, int number) /* Tell the UID code we've seen this. */ { - struct idlist *sdp; + struct uid_db_record *rec; - if ((sdp = id_find(&ctl->newsaved, number))) - sdp->val.status.mark = UID_SEEN; + if ((rec = find_uid_by_num(&ctl->newsaved, number))) + rec->status = UID_SEEN; /* mark it as seen in oldsaved also! In case, we do not swap the lists * (say, due to socket error), the same mail will not be downloaded * again. */ - if ((sdp = id_find(&ctl->oldsaved, number))) - sdp->val.status.mark = UID_SEEN; + if ((rec = find_uid_by_num(&ctl->oldsaved, number))) + rec->status = UID_SEEN; } static int pop3_delete(int sock, struct query *ctl, int number) /* delete a given message */ { + struct uid_db_record *rec; int ok; mark_uid_seen(ctl, number); /* actually, mark for deletion -- doesn't happen until QUIT time */ ok = gen_transact(sock, "DELE %d", number); if (ok != PS_SUCCESS) return(ok); - delete_str(dofastuidl ? &ctl->oldsaved : &ctl->newsaved, number); + + rec = find_uid_by_num(dofastuidl ? &ctl->oldsaved : &ctl->newsaved, number); + rec->status = UID_DELETED; return(PS_SUCCESS); } --- fetchmail/uid.c 7 May 2010 13:49:16 -0000 1.1.1.2 +++ fetchmail/uid.c 24 May 2010 16:02:10 -0000 1.1.1.2.2.6 @@ -108,6 +108,19 @@ static struct idlist *scratchlist; /** Read saved IDs from \a idfile and attach to each host in \a hostlist. */ +static int dump_saved_uid(struct uid_db_record *rec, void *unused) +{ + char *t; + + (void)unused; + + t = sdump(rec->id, rec->id_len); + report_build(stdout, " %s", t); + free(t); + + return 0; +} + void initialize_saved_lists(struct query *hostlist, const char *idfile) { struct stat statbuf; @@ -117,9 +130,9 @@ /* make sure lists are initially empty */ for (ctl = hostlist; ctl; ctl = ctl->next) { ctl->skipped = (struct idlist *)NULL; - ctl->oldsaved = (struct idlist *)NULL; - ctl->newsaved = (struct idlist *)NULL; - ctl->oldsavedend = &ctl->oldsaved; + + init_uid_db(&ctl->oldsaved); + init_uid_db(&ctl->newsaved); } errno = 0; @@ -212,11 +225,11 @@ *atsign = '\0'; host = atsign + 1; - /* find proper list and save it */ + /* find uidl db and save it */ for (ctl = hostlist; ctl; ctl = ctl->next) { if (strcasecmp(host, ctl->server.queryname) == 0 && strcasecmp(user, ctl->remotename) == 0) { - save_str(&ctl->oldsaved, id, UID_SEEN); + uid_db_insert(&ctl->oldsaved, id, UID_SEEN); break; } } @@ -248,14 +261,12 @@ { report_build(stdout, GT_("Old UID list from %s:"), ctl->server.pollname); - idp = ctl->oldsaved; - if (!idp) + + if (!uid_db_n_records(&ctl->oldsaved)) report_build(stdout, GT_(" <empty>")); - else for (idp = ctl->oldsaved; idp; idp = idp->next) { - char *t = sdump(idp->id, strlen(idp->id)); - report_build(stdout, " %s", t); - free(t); - } + else + traverse_uid_db(&ctl->oldsaved, dump_saved_uid, NULL); + report_complete(stdout, "\n"); } @@ -273,13 +284,18 @@ /** Assert that all UIDs marked deleted in query \a ctl have actually been expunged. */ -void expunge_uids(struct query *ctl) +static int mark_as_expunged_if(struct uid_db_record *rec, void *unused) { - struct idlist *idl; + (void)unused; + + if (rec->status == UID_DELETED) rec->status = UID_EXPUNGED; + return 0; +} - for (idl = dofastuidl ? ctl->oldsaved : ctl->newsaved; idl; idl = idl->next) - if (idl->val.status.mark == UID_DELETED) - idl->val.status.mark = UID_EXPUNGED; +void expunge_uids(struct query *ctl) +{ + traverse_uid_db(dofastuidl ? &ctl->oldsaved : &ctl->newsaved, + mark_as_expunged_if, NULL); } static const char *str_uidmark(int mark) @@ -303,16 +319,32 @@ } } -static void dump_list(const struct idlist *idp) +static int dump_uid_db_record(struct uid_db_record *rec, void *arg) +{ + unsigned *n_recs; + char *t; + + n_recs = arg; + --*n_recs; + + t = sdump(rec->id, rec->id_len); + report_build(stdout, " %s = %s%s", t, str_uidmark(rec->status), *n_recs ? "," : ""); + free(t); + + return 0; +} + +static void dump_uid_db(struct uid_db *db) { - if (!idp) { + unsigned n_recs; + + n_recs = uid_db_n_records(db); + if (!n_recs) { report_build(stdout, GT_(" <empty>")); - } else while (idp) { - char *t = sdump(idp->id, strlen(idp->id)); - report_build(stdout, " %s = %s%s", t, str_uidmark(idp->val.status.mark), idp->next ? "," : ""); - free(t); - idp = idp->next; + return; } + + traverse_uid_db(db, dump_uid_db_record, &n_recs); } /* finish a query */ @@ -323,10 +355,10 @@ { if (dofastuidl) { report_build(stdout, GT_("Merged UID list from %s:"), ctl->server.pollname); - dump_list(ctl->oldsaved); + dump_uid_db(&ctl->oldsaved); } else { report_build(stdout, GT_("New UID list from %s:"), ctl->server.pollname); - dump_list(ctl->newsaved); + dump_uid_db(&ctl->newsaved); } report_complete(stdout, "\n"); } @@ -347,15 +379,10 @@ * with UIDLs from that account in .fetchids, there is no way for * them to ever get garbage-collected. */ - if (ctl->newsaved) + if (uid_db_n_records(&ctl->newsaved)) { - /* old state of mailbox may now be irrelevant */ - struct idlist *temp = ctl->oldsaved; - if (outlevel >= O_DEBUG) - report(stdout, GT_("swapping UID lists\n")); - ctl->oldsaved = ctl->newsaved; - ctl->newsaved = (struct idlist *) NULL; - free_str_list(&temp); + swap_uid_db_data(&ctl->newsaved, &ctl->oldsaved); + clear_uid_db(&ctl->newsaved); } /* in fast uidl, there is no need to swap lists: the old state of * mailbox cannot be discarded! */ @@ -372,29 +399,53 @@ /* this is now a merged list! the mails which were seen in this * poll are marked here. */ report_build(stdout, GT_("Merged UID list from %s:"), ctl->server.pollname); - dump_list(ctl->oldsaved); + dump_uid_db(&ctl->oldsaved); report_complete(stdout, "\n"); } - if (ctl->newsaved) + if (uid_db_n_records(&ctl->newsaved)) { /* new state of mailbox is not reliable */ if (outlevel >= O_DEBUG) report(stdout, GT_("discarding new UID list\n")); - free_str_list(&ctl->newsaved); - ctl->newsaved = (struct idlist *) NULL; + clear_uid_db(&ctl->newsaved); } } /** Reset the number associated with each id */ void uid_reset_num(struct query *ctl) { - struct idlist *idp; - for (idp = ctl->oldsaved; idp; idp = idp->next) - idp->val.status.num = 0; + reset_uid_db_nums(&ctl->oldsaved); } /** Write list of seen messages, at end of run. */ +static int count_seen_deleted(struct uid_db_record *rec, void *arg) +{ + if (rec->status == UID_SEEN || rec->status == UID_DELETED) + ++*(long *)arg; + return 0; +} + +struct write_saved_info { + struct query *ctl; + FILE *fp; +}; + +static int write_uid_db_record(struct uid_db_record *rec, void *arg) +{ + struct write_saved_info *info; + int rc; + + if (!(rec->status == UID_SEEN || rec->status == UID_DELETED)) + return 0; + + info = arg; + rc = fprintf(info->fp, "%s@%s %s\n", + info->ctl->remotename, info->ctl->server.queryname, + rec->id); + return rc < 0 ? -1 : 0; +} + void write_saved_lists(struct query *hostlist, const char *idfile) { long idcount; @@ -404,12 +455,8 @@ /* if all lists are empty, nuke the file */ idcount = 0; - for (ctl = hostlist; ctl; ctl = ctl->next) { - for (idp = ctl->oldsaved; idp; idp = idp->next) - if (idp->val.status.mark == UID_SEEN - || idp->val.status.mark == UID_DELETED) - idcount++; - } + for (ctl = hostlist; ctl; ctl = ctl->next) + traverse_uid_db(&ctl->oldsaved, count_seen_deleted, &idcount); /* either nuke the file or write updated last-seen IDs */ if (!idcount && !scratchlist) @@ -428,19 +475,22 @@ report(stdout, GT_("Writing fetchids file.\n")); (void)unlink(newnam); /* remove file/link first */ if ((tmpfp = fopen(newnam, "w")) != (FILE *)NULL) { + struct write_saved_info info; int errflg = 0; + + info.fp = tmpfp; + for (ctl = hostlist; ctl; ctl = ctl->next) { - for (idp = ctl->oldsaved; idp; idp = idp->next) - if (idp->val.status.mark == UID_SEEN - || idp->val.status.mark == UID_DELETED) - if (fprintf(tmpfp, "%s@%s %s\n", - ctl->remotename, ctl->server.queryname, idp->id) < 0) { - int e = errno; - report(stderr, GT_("Write error on fetchids file %s: %s\n"), newnam, strerror(e)); - errflg = 1; - goto bailout; - } + info.ctl = ctl; + + if (traverse_uid_db(&ctl->oldsaved, write_uid_db_record, &info) < 0) { + int e = errno; + report(stderr, GT_("Write error on fetchids file %s: %s\n"), newnam, strerror(e)); + errflg = 1; + goto bailout; + } } + for (idp = scratchlist; idp; idp = idp->next) if (EOF == fputs(idp->id, tmpfp)) { int e = errno; --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ fetchmail/uid_db.c 27 May 2010 21:14:18 -0000 1.23.2.39 @@ -0,0 +1,547 @@ +/* + POP3 UID db + + Copyright (c) 2010 MAD Partners, Ltd. (rwe...@ms...) + + This file is being published in accordance with the GPLv2 terms + contained in the COPYING file being part of the fetchmail + 6.3.17 release, including the OpenSSL exemption. +*/ + +/* + * includes + */ +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "xmalloc.h" +#include "uid_db.h" + +/* + * constants + */ +enum { + MIN_RECORDS = 16 /* arbitrary */ +}; + +/* + * types + */ +struct pat_node { + struct pat_node *ptrs_[3]; + unsigned bit_ndx; + uint16_t bit_ofs, bit_mask; + + struct uid_db_record *rec; +}; + +/* + The idea behind this is that the 'left' pointer of + a node is accessible as ptrs(np)[-1] and the right + one a ptrs(np)[1]. This implies that no separate codepaths + for 'symmetric left- and right-cases' are needed. +*/ +#define ptrs(np) ((np)->ptrs_ + 1) + +/* + * routines + */ +/* + ** various helpers + */ +static inline unsigned bit_ofs(unsigned bit_ndx) +{ + return bit_ndx >> 3; +} + +static inline unsigned bit_mask(unsigned bit_ndx) +{ + return 1 << (bit_ndx & 7); +} + +/* + ** PATRICIA trie insertion + */ +/* + *** walkers + */ +static struct pat_node *walk_down(struct uid_db *db, struct uid_db_record *rec, + struct pat_node ***edgep, struct pat_node **parentp) +{ + /* + Find the pat node whose id is 'most similar' to the id + stored in rec->id. Return a pointer to this node. + 'parentp' and 'edgep' are output-only parameters which + will point to the parent of returned node and to the edge + pointer going from the parent to the returned node after + the call has returned. + + This routine is intended for inserts only. + */ + struct pat_node *cur, **edge; + unsigned bit_ndx, v, ofs; + + cur = db->pat_root; + ofs = -1; + do { + if (ofs != cur->bit_ofs) { + ofs = cur->bit_ofs; + v = ofs < rec->id_len ? rec->id[ofs] : 0; + } + + bit_ndx = cur->bit_ndx; + edge = ptrs(cur) + (v & cur->bit_mask ? 1 : -1); + } while ((cur = *edge) && cur->bit_ndx > bit_ndx); + + *parentp = + (struct pat_node *) + ((unsigned char *)edge - (v & bit_mask(bit_ndx) ? + offsetof(struct pat_node, ptrs_[2]) + : offsetof(struct pat_node, ptrs_[0]))); + *edgep = edge; + return cur; +} + +static inline struct pat_node *walk_up(unsigned diff_ndx, struct pat_node **parent) +{ + struct pat_node *p, *np; + + np = *parent; + + while ((p = *ptrs(np)) && p->bit_ndx > diff_ndx) + np = p; + + *parent = p; + return np; +} + +/* + *** bit fiddling + */ +static inline unsigned first_set_bit_in_char(unsigned v) +{ + return ffs(v) - 1; +} + +static int find_first_diff_bit(struct uid_db_record const *r0, + struct uid_db_record const *r1) +{ + /* + Return the bit_ndx of the first differing bit in + r0->id and r1->id or -1 if the strings are identical. + */ + struct uid_db_record const *long_id; + unsigned ofs, max; + unsigned char v; + + max = r0->id_len; + if (max > r1->id_len) { + max = r1->id_len; + long_id = r0; + } else + long_id = r1; + + ofs = 0; + do + v = r0->id[ofs] ^ r1->id[ofs]; + while (!v && ++ofs < max); + + if (!v) { + if (r0->id_len == r1->id_len) return -1; + v = long_id->id[ofs]; + } + + return first_set_bit_in_char(v) + ofs * 8; +} + +static inline unsigned bit_set(struct pat_node const *np, + struct uid_db_record const *rec) +{ + return np->bit_ofs < rec->id_len ? + rec->id[np->bit_ofs] & np->bit_mask : 0; +} + +/* + *** node allocation + */ +static struct pat_node *get_pat_node(struct uid_db_record *rec) +{ + struct pat_node *np; + + np = xmalloc(sizeof(*np)); + np->rec = rec; + rec->next = NULL; + return np; +} + +static struct pat_node *get_standalone_node(struct uid_db_record *rec) +{ + /* + Return a pat_node suitable for being inserted on the 'left edge' + of the trie, ie either linked to a node whose left pointer was zero + or being inserted as root node into an empty trie. The bit_ndx of + the pat_node is set to the index corresponding with the highest + set bit in rec->id. + + NB: This is a bad choice when UIDs share a common prefix because + this implies that the root node will cause a bit to be tested which + is non-zero in all other nodes, adding a theoretically redundant + level to the trie. This is (to the best of my knowledge) un- + fortunately unavoidable if nodes with different key lengths need + to be supported. + */ + struct pat_node *np; + + np = get_pat_node(rec); + + np->bit_ndx = first_set_bit_in_char(*rec->id); + np->bit_mask = bit_mask(np->bit_ndx); + np->bit_ofs = bit_ofs(np->bit_ndx); + + return np; +} + +/* + *** various helpers + */ +static inline int record_id_equal(struct uid_db_record const *r0, + struct uid_db_record const *r1) +{ + return + r0->id_len == r1->id_len + && memcmp(r0->id, r1->id, r0->id_len) == 0; +} + +static struct uid_db_record *append_to_list(struct uid_db_record **recp, + struct uid_db_record *rec) +{ + while (*recp) recp = &(*recp)->next; + *recp = rec; + + rec->next = NULL; + return rec; +} + +/* + *** insert routine + */ +static struct uid_db_record *pat_insert(struct uid_db *db, + struct uid_db_record *rec) +{ + /* + Insert the record pointed to by rec in the (potentially empty) + PATRICIA trie pointed to by db->pat_root and return rec. + */ + struct pat_node *np, *closest, *parent, **edge; + int me, bit_ndx; + + if (!db->pat_root) { + np = get_standalone_node(rec); + ptrs(np)[-1] = *ptrs(np) = NULL; + ptrs(np)[1] = np; + + db->pat_root = np; + return rec; + } + + closest = walk_down(db, rec, &edge, &parent); + + if (closest) { + bit_ndx = find_first_diff_bit(closest->rec, rec); + if (bit_ndx < 0) + return append_to_list(&closest->rec->next, rec); + + np = get_pat_node(rec); + + np->bit_ndx = bit_ndx; + np->bit_mask = bit_mask(bit_ndx); + np->bit_ofs = bit_ofs(bit_ndx); + } else + np = get_standalone_node(rec); + + if (parent->bit_ndx > np->bit_ndx) { + closest = walk_up(np->bit_ndx, &parent); + + if (!parent) edge = &db->pat_root; + else edge = ptrs(parent)[-1] == closest ? + ptrs(parent) - 1 : ptrs(parent) + 1; + *ptrs(closest) = np; + } + + *edge = np; + *ptrs(np) = parent; + + me = bit_set(np, rec) ? 1 : -1; + ptrs(np)[me] = np; + ptrs(np)[-me] = closest; + + return rec; +} + +/* + ** general db insertion + */ +static struct uid_db_record *get_uid_db_record(char const *id, unsigned status) +{ + struct uid_db_record *rec; + size_t id_len; + + rec = xmalloc(sizeof(*rec)); + + id_len = strlen(id); + rec->id = memcpy(xmalloc(id_len + 1), id, id_len + 1); + rec->id_len = id_len; + rec->status = status; + rec->num = 0; + + return rec; +} + +static void insert_into_records(struct uid_db *db, + struct uid_db_record *rec) +{ + unsigned next, want; + + next = db->records_next; + + if (next == db->records_max) { + want = db->records_max *= 2; + db->records = xrealloc(db->records, want * sizeof(rec)); + } + + rec->pos = next; + db->records[next] = rec; + db->records_next = next + 1; +} + +struct uid_db_record *uid_db_insert(struct uid_db *db, + char const *id, unsigned status) +{ + struct uid_db_record *rec; + + rec = get_uid_db_record(id, status); + insert_into_records(db, rec); + return pat_insert(db, rec); +} + +/* + ** message number index + */ +void set_uid_db_num(struct uid_db *db, struct uid_db_record *rec, + unsigned num) +{ + /* + Set the message number of the record pointed to by rec to num + and insert it into the num_ndx of the uid_db pointed to by db + at position corresponding with num. The num_ndx lookup array + is grown as needed. Message numbers are expected to 'generally' + be recorded in ascending order and hence, no provisions are + made to deal with the potentially quadratic complexity of + inserting a sequence of numbers into an array such that it + needs to be grown continuously. + */ + struct num_ndx *num_ndx; + unsigned have, want; + + num_ndx = &db->num_ndx; + + if (num_ndx->end_value > num) { + have = num_ndx->pos_0_value - num_ndx->end_value + 1; + want = num_ndx->pos_0_value - num + 1; + num_ndx->end_value = num; + + num_ndx->records = xrealloc(num_ndx->records, want * sizeof(rec)); + do num_ndx->records[--want] = NULL; while (want > have); + } + + num_ndx->records[uid_db_num_ofs(num_ndx, num)] = rec; +} + +void reset_uid_db_nums(struct uid_db *db) +{ + /* + Reset the message numbers of all uid_db_records stored + in the uid_db pointed to by db. The corresponding num_ndx + lookup array is afterwards freed and the num_ndx end_value + adjusted in order to indicate an 'empty' message number + index. + */ + struct uid_db_record **rec; + struct num_ndx *num_ndx; + unsigned ndx; + + num_ndx = &db->num_ndx; + + if (num_ndx->end_value < num_ndx->pos_0_value) { + ndx = num_ndx->pos_0_value - num_ndx->end_value; + while (ndx) { + rec = num_ndx->records + --ndx; + if (*rec) (*rec)->num = 0; + } + + num_ndx->end_value = num_ndx->pos_0_value + 1; + + free(num_ndx->records); + num_ndx->records = NULL; + } +} + +/* + ** search routines + */ +struct uid_db_record *find_uid_by_id(struct uid_db *db, char const *id) +{ + struct pat_node *np; + struct uid_db_record *rec; + unsigned v, bit_ndx, ofs; + size_t len; + + np = db->pat_root; + if (np) { + len = strlen(id); + ofs = -1; + do { + if (ofs != np->bit_ofs) { + ofs = np->bit_ofs; + v = ofs < len ? id[ofs] : 0; + } + + bit_ndx = np->bit_ndx; + np = ptrs(np)[v & bit_mask(bit_ndx) ? 1 : -1]; + } while (np && np->bit_ndx > bit_ndx); + + if (!np) return NULL; + + rec = np->rec; + return rec->id_len == len && memcmp(id, rec->id, len) == 0 ? + rec : NULL; + } + + return NULL; +} + +struct uid_db_record *last_uid_in_db(struct uid_db *db, char const *id) +{ + struct uid_db_record *rec; + + rec = find_uid_by_id(db, id); + if (!rec) return NULL; + + while (rec->next) rec = rec->next; + return rec; +} + +/* + ** destruction + */ +static void free_uid_list(struct uid_db_record *rec) +{ + if (rec->next) free_uid_list(rec->next); + + xfree(rec->id); + xfree(rec); +} + +static void free_pat_trie(struct pat_node *np) +{ + /* + Free the PATRCIA-trie pointed to by np and all + uid_db_records contained in it. + + The algorithm implemented below is: + + 1. Load the left pointer of the node pointed to by + np into next. + + 2. If the result is not NULL, + 2a) Set the left pointer to NULL. + 2b) Goto 1 if next points to a child of np. + + 3. Load the right pointer of the node pointed to by + np into next. + + 4. If the result is not NULL, + 4a) Set the right pointer to NULL. + 4b) Goto 1 id next points to a child of np. + + 5. Load next with the parent pointer of np. + + 6. Free np->rec and np. + + 7. Set np to next and goto 1 if it is not null. + */ + struct pat_node *next; + + do { + next = ptrs(np)[-1]; + if (next) { + ptrs(np)[-1] = NULL; + if (next->bit_ndx > np->bit_ndx) continue; + } + + next = ptrs(np)[1]; + if (next) { + ptrs(np)[1] = NULL; + if (next->bit_ndx > np->bit_ndx) continue; + } + + next = *ptrs(np); + + free_uid_list(np->rec); + free(np); + } while ((np = next)); +} + +void free_uid_db(struct uid_db *db) +{ + if (db->pat_root) free_pat_trie(db->pat_root); + + xfree(db->records); + xfree(db->num_ndx.records); +} + +/* + ** various public interfaces + */ +void init_uid_db(struct uid_db *db) +{ + struct num_ndx *num_ndx; + + db->pat_root = NULL; + + db->records = xmalloc(MIN_RECORDS * sizeof(*db->records)); + db->records_max = MIN_RECORDS; + db->records_next = 0; + + num_ndx = &db->num_ndx; + num_ndx->pos_0_value = num_ndx->end_value = -1; + num_ndx->records = NULL; +} + +void swap_uid_db_data(struct uid_db *db_0, struct uid_db *db_1) +{ + struct uid_db tmp; + + tmp = *db_0; + *db_0 = *db_1; + *db_1 = tmp; +} + +int traverse_uid_db(struct uid_db *db, + uid_db_traversal_routine *r, void *arg) +{ + struct uid_db_record **recs; + unsigned ndx, max; + int rc; + + rc = 0; + ndx = 0; + max = db->records_next; + recs = db->records; + while (ndx < max && (rc = r(recs[ndx], arg)) == 0) + ++ndx; + + return rc; +} --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ fetchmail/uid_db.h 27 May 2010 21:09:47 -0000 1.3.2.20 @@ -0,0 +1,155 @@ +/* + POP3 UID database + + Copyright (c) 2010 MAD Partners, Ltd. (rwe...@ms...) + + This file is being published in accordance with the GPLv2 terms + contained in the COPYING file being part of the fetchmail + 6.3.17 release, including the OpenSSL exemption. +*/ +#ifndef fetchmail_uid_db_h +#define fetchmail_uid_db_h + +/* + * includes + */ +#include <stddef.h> + +/* types */ +struct uid_db_record { + char *id; + size_t id_len; + + /* + num - message number assigned by server + status - message status (eg seen, deleted, ...) + pos - position in record list + */ + unsigned num, status, pos; + + struct uid_db_record *next; +}; + +struct num_ndx { + /* + Used to find uid records by message number. + + pos_0_value - highest message number + end_value - lowest known message number + + Grows downwards because the fastuidl-code may record + message numbers in non-ascending order but the + lookup array should ideally only be large enough to + store pointers to interesting ('new') messages. + */ + struct uid_db_record **records; + unsigned pos_0_value, end_value; +}; + +struct uid_db +{ + struct pat_node *pat_root; + + struct uid_db_record **records; + unsigned records_max, records_next; + + struct num_ndx num_ndx; +}; + +typedef int uid_db_traversal_routine(struct uid_db_record *, void *); + +/* + * routines + */ +/* + ** initialization/ finalization + */ +void init_uid_db(struct uid_db *db); + +void free_uid_db(struct uid_db *db); + +static inline void clear_uid_db(struct uid_db *db) +{ + free_uid_db(db); + init_uid_db(db); +} + +/* + ** message number index handling + */ +static inline unsigned uid_db_num_ofs(struct num_ndx const *num_ndx, unsigned num) +{ + return num_ndx->pos_0_value - num; +} + +void set_uid_db_num(struct uid_db *db, struct uid_db_record *rec, + unsigned num); + +static inline void set_uid_db_num_pos_0(struct uid_db *db, unsigned num) +{ + db->num_ndx.pos_0_value = num; + db->num_ndx.end_value = num + 1; +} + +void reset_uid_db_nums(struct uid_db *db); + +/* + ** various uidl db manipulatiors + */ +struct uid_db_record *uid_db_insert(struct uid_db *db, + char const *id, unsigned status); + +void swap_uid_db_data(struct uid_db *db_0, struct uid_db *db_1); + +/* + ** search routines + */ +struct uid_db_record *find_uid_by_id(struct uid_db *db, char const *id); + +static inline struct uid_db_record * +find_uid_by_num(struct uid_db *db, unsigned num) +{ + struct num_ndx *num_ndx; + + num_ndx = &db->num_ndx; + return num >= num_ndx->end_value ? + num_ndx->records[uid_db_num_ofs(num_ndx, num)] : NULL; +} + +static inline struct uid_db_record * +find_uid_by_pos(struct uid_db *db, unsigned pos) +{ + return pos < db->records_next ? db->records[pos] : NULL; +} + +static inline struct uid_db_record * +first_uid_in_db(struct uid_db *db, char const *id) +{ + return find_uid_by_id(db, id); +} + +struct uid_db_record *last_uid_in_db(struct uid_db *db, char const *id); + +/* + ** various accessors + */ +static inline unsigned uid_db_n_records(struct uid_db const *db) +{ + return db->records_next; +} + +/* + Traverses the struct uid_db records array in insert order, + invoking the subroutine pointed to by r with a pointer to + each record and the arg pointer as arguments. If the return + value of that is non-zero, traverse_uid_db immediately returns + with this value. Otherwise, zero is returned after the last + record was visited. + + The uid_db_traversal_routine must not modify the uid_db during + traversal. +*/ +int traverse_uid_db(struct uid_db *db, + uid_db_traversal_routine *r, void *arg); + +#endif |
From: Matthias A. <mat...@gm...> - 2010-06-09 09:17:52
|
Am 08.06.2010, 23:57 Uhr, schrieb Rainer Weikusat: > The file included below is a diff of the 'running' uid_db code against > the 6.3.17 fetchmail tree. I hope that it is ok in this form, because > I'm still in extreme 'now, we just need to deliver'-mode ... Hi Rainer, thanks a lot. Is that the final submission to be expected, or do you still want to polish some bits of it? I'm asking because I plan to use the code in a new minor release (6.4 or 6.5, not yet sure how I'll name it) rather than 6.3.X, which means I may need to tweak a few ends (because I already purged old compatibility cruft from that tree) and that might conflict with your future changes. If you want to modify it, I'll just push the patch to a new branch in Gitorious so it's there for the public, but hold off with merging until around June 20, so that I don't need to re-merge later. You also mentioned Disconnected IMAP earlier, is that a project you are still pursuing and/or want to submit later? If so, what's the status? I absolutely do not mean to urge here, this is only so that we can align planning a bit. Best regards Matthias |
From: Rainer W. <rwe...@ms...> - 2010-06-12 00:06:53
|
"Matthias Andree" <mat...@gm...> writes: > Am 08.06.2010, 23:57 Uhr, schrieb Rainer Weikusat: > >> The file included below is a diff of the 'running' uid_db code against >> the 6.3.17 fetchmail tree. I hope that it is ok in this form, because >> I'm still in extreme 'now, we just need to deliver'-mode ... [...] > Is that the final submission to be expected, or do you still want to > polish some bits of it? This is the code which is used for the POP3-using customers of 'my' application. Because of this, I will fix or mitigate any issues with it which come to my attention and affect me. As always, I'm meanwhile working on something completely different with the usual 'Yesterday!' [emphatically!] as most wanted release date. This means the short answer is 'Yes' and the medium-sized one would be "I can want things all day but that doesn't mean I can actually do them :-(". [...] > You also mentioned Disconnected IMAP earlier, is that a project you > are still pursuing and/or want to submit later? > If so, what's the status? Using \Seen to track 'message newness' reportedly caused the gmail-interface to no longer bring up a whole thread of communication when using it to access a message for the first time. This was deemed serious enough (by the powers which be) to warrant a fix and because of this, I am running a fetchmail-variant which support DIMAP exactly to the degree needed for this particular application. In particular, it doesn't keep a database of actually downloaded messages but just assumes that everything between (<last recorded uid>, <UIDNEXT>) is interesting and anything else isn't. An additional unpleasant workaround would be a hack intended to deal with so-called 'IMAP4rev1-servers' supporting UIDVALIDITY but not UIDNEXT (eg, mail.amuni.com): It is assumed that the new UID space can be queried in batches of thousands such that SEARCH UID ... never returns an empty response except if the end of the mailbox was reached. This is non-compliant with the IPMAP4rev1 RFC. Independently of these technical problems, this is code I have written with the intent that no one except myself ever looks at it and it contains too many of my usual opinions regarding how code should or shouldn't be written, such as using mutable function pointers as parser state variables and things like that, that I would dare to show it to anyone. |
From: Matthias A. <mat...@gm...> - 2010-06-12 15:50:26
|
Am 12.06.2010, 00:06 Uhr, schrieb Rainer Weikusat: > "Matthias Andree" <mat...@gm...> writes: > >> Am 08.06.2010, 23:57 Uhr, schrieb Rainer Weikusat: >> >>> The file included below is a diff of the 'running' uid_db code against >>> the 6.3.17 fetchmail tree. I hope that it is ok in this form, because >>> I'm still in extreme 'now, we just need to deliver'-mode ... > > [...] > >> Is that the final submission to be expected, or do you still want to >> polish some bits of it? > > This is the code which is used for the POP3-using customers of 'my' > application. Because of this, I will fix or mitigate any issues with > it which come to my attention and affect me. As always, I'm meanwhile > working on something completely different with the usual 'Yesterday!' > [emphatically!] as most wanted release date. This means the short > answer is 'Yes' and the medium-sized one would be "I can want things > all day but that doesn't mean I can actually do them :-(". The usual there's more work than time problem that most of us share (and that I'd prefer to being bored all the time!). Thank you. I will integrate the code as is for a new minor branch and see to publishing it shortly after the merge in Git (I plan that there be a series of alpha- and beta-releases). I can't say how soon this will start, but I will create a Git branch for that. That new branch will have substantially less portability hacks and will require somewhat recent tool chains (expecting some of the more sophisticated C99 features but omitting the arcane ones) and operating systems (Single Unix Specification v3 aka IEEE Std 1003.1-2001, 2004 edition -- which has been superseded by v4 aka IEEE Std 1003.1-2008, but OS support for v4 isn't widely available yet), but is supposed to have little practical impact. I currently have FreeBSD 6, 7, 8, NetBSD 5.1, Solaris 10, Ubuntu 10.04, Fedora 13 and openSUSE 11.2 for testing and can occasionally test compiles on Cygwin 1.7, Ubuntu 8.04, and Darwin (the Unix OS underneath MacOS X). I do not care about half-baked or dying/dead non-IEEE-Std-1003 systems and have in fact removed support for them from the code to make it more readable and maintainable. If someone wants to provide me with an SSH account (no root privileges required) on another operating system that is under full vendor support, such as DragonflyBSD, OpenBSD, AIX, HP/UX, please contact me off-list. I think I don't care much about SCO, OpenServer, Unixware currently, but would take patches if they integrate cleanly and don't cause regressions on supported systems. > [...] > >> You also mentioned Disconnected IMAP earlier, is that a project you >> are still pursuing and/or want to submit later? >> If so, what's the status? > > Using \Seen to track 'message newness' reportedly caused the > gmail-interface to no longer bring up a whole thread of communication > when using it to access a message for the first time. This was deemed > serious enough (by the powers which be) to warrant a fix and because > of this, I am running a fetchmail-variant which support DIMAP exactly > to the degree needed for this particular application. In particular, > it doesn't keep a database of actually downloaded messages but just > assumes that everything between (<last recorded uid>, <UIDNEXT>) is > interesting and anything else isn't. An additional unpleasant > workaround would be a hack intended to deal with so-called > 'IMAP4rev1-servers' supporting UIDVALIDITY but not UIDNEXT (eg, > mail.amuni.com): It is assumed that the new UID space can be queried > in batches of thousands such that SEARCH UID ... never returns an empty > response except if the end of the mailbox was reached. This is > non-compliant with the IPMAP4rev1 RFC. > > Independently of these technical problems, this is code I have written > with the intent that no one except myself ever looks at it and it > contains too many of my usual opinions regarding how code should or > shouldn't be written, such as using mutable function pointers as > parser state variables and things like that, that I would dare to show > it to anyone. What's wrong with that? Function pointers are a regular feature of standard C. -- Matthias Andree |
From: Matthias A. <mat...@gm...> - 2010-12-12 17:52:34
|
Am 12.06.2010 00:06, schrieb Rainer Weikusat: > "Matthias Andree" <mat...@gm...> writes: > >> Am 08.06.2010, 23:57 Uhr, schrieb Rainer Weikusat: >> >>> The file included below is a diff of the 'running' uid_db code against >>> the 6.3.17 fetchmail tree. I hope that it is ok in this form, because >>> I'm still in extreme 'now, we just need to deliver'-mode ... > > [...] > >> Is that the final submission to be expected, or do you still want to >> polish some bits of it? > > This is the code which is used for the POP3-using customers of 'my' > application. Because of this, I will fix or mitigate any issues with > it which come to my attention and affect me. As always, I'm meanwhile > working on something completely different with the usual 'Yesterday!' > [emphatically!] as most wanted release date. This means the short > answer is 'Yes' and the medium-sized one would be "I can want things > all day but that doesn't mean I can actually do them :-(". Hi Rainer, thanks again for the contribution. I have finally rebased your patch onto 6.3.19, and then merged it for 6.4.0-alpha1 (in Git's next branch, see http://gitorious.org/fetchmail/ for the repositories). I'd like you to look over the changes; specifically I had to initialize "v" in two functions to 0 explicitly to fix compiler warnings. I'd also appreciate if you could read the code and check for merge errors. Thanks again. Best regards -- Matthias Andree |
From: Rainer W. <rwe...@ms...> - 2010-12-16 15:16:12
|
Matthias Andree <mat...@gm...> writes: > Am 12.06.2010 00:06, schrieb Rainer Weikusat: >> "Matthias Andree" <mat...@gm...> writes: >> >>> Am 08.06.2010, 23:57 Uhr, schrieb Rainer Weikusat: >>> >>>> The file included below is a diff of the 'running' uid_db code against >>>> the 6.3.17 fetchmail tree. I hope that it is ok in this form, because >>>> I'm still in extreme 'now, we just need to deliver'-mode ... >> >> [...] >> >>> Is that the final submission to be expected, or do you still want to >>> polish some bits of it? >> >> This is the code which is used for the POP3-using customers of 'my' >> application. Because of this, I will fix or mitigate any issues with >> it which come to my attention and affect me. As always, I'm meanwhile >> working on something completely different with the usual 'Yesterday!' >> [emphatically!] as most wanted release date. This means the short >> answer is 'Yes' and the medium-sized one would be "I can want things >> all day but that doesn't mean I can actually do them :-(". > > Hi Rainer, > > thanks again for the contribution. I have finally rebased your patch onto > 6.3.19, and then merged it for 6.4.0-alpha1 (in Git's next branch, see > http://gitorious.org/fetchmail/ for the repositories). > > I'd like you to look over the changes; specifically I had to initialize "v" in > two functions to 0 explicitly to fix compiler warnings. > > I'd also appreciate if you could read the code and check for merge errors. I apologize for the delayed reply, I've moved to the UK last Friday (2010/12/10) and I'm still in the process of forcing the resulting chaos out of my life again. I'll be busy with catching up on work I had leave on its own since then but I'll try to have a look at this in the next few days. |