From: Rainer W. <rwe...@ms...> - 2010-05-24 21:30:50
|
Below is a diff of my modified fetchmail tree against the 6.3.17 release tree. This should still be considered somewhat experimental because it has so far only been tested with a single e-mail account in 'oneshot' mode. According to prelimnary measurement, the modification seems to be advantageous if there are sixty or more e-mail kept on a server. Additional changes I haven't mentioned so far: - I've dropped the l from the uid in all the names. 'UIDL' likely means 'uid listing' and hence, what is listed must be an uid - The message number index now grows downwards from the message count to the lowest-numbered 'interesting' ('new') message seen. In order to be able to also use this, I've changed the POP3 'stock uidl code' to only record the message numbers of 'new' messages in keep mode and to treat a NULL return to the 'find by message number' operations as 'message is old'. The message number index will either only be large enough to hold the new message numbers or (at worst) be large enough to hold the lowest number of a 'seen' message tested by the fastuidl code - I've tried to document everything not completely trivial in form of comments in the uid_db.c file. IMO, this is a mixed blessing because an algorithm is not necessarily easier to understand when described in natural language and everybody contemplating to do something with code needs to understand that, anyway ... After sending this mail, I will integrate this into our 'product fetchmail' and I may be able to publish some real comparisons between old and new code tomorrow. NB: The Makefile.am change should probably be moved to the POP3 section of Makefile.am -------------- --- fetchmail/Makefile.am 7 May 2010 13:49:15 -0000 1.1.1.6 +++ fetchmail/Makefile.am 23 May 2010 21:23:09 -0000 1.1.1.6.2.2 @@ -68,7 +68,7 @@ driver.c transact.c sink.c smtp.c \ idlist.c uid.c mxget.c md5ify.c cram.c gssapi.c \ opie.c interface.c netrc.c \ - unmime.c conf.c checkalias.c \ + unmime.c conf.c checkalias.c uid_db.h uid_db.c\ lock.h lock.c \ rcfile_l.l rcfile_y.y ucs/norm_charmap.c if POP2_ENABLE --- fetchmail/fetchmail.c 7 May 2010 13:49:14 -0000 1.1.1.4 +++ fetchmail/fetchmail.c 24 May 2010 16:02:10 -0000 1.1.1.4.2.3 @@ -1537,6 +1537,14 @@ return(st); } +static int print_id_of(struct uid_db_record *rec, void *unused) +{ + (void)unused; + + printf("\t%s\n", rec->id); + return 0; +} + static void dump_params (struct runctl *runp, struct query *querylist, flag implicit) /* display query parameters in English */ @@ -1938,20 +1946,14 @@ if (ctl->server.protocol > P_POP2 && MAILBOX_PROTOCOL(ctl)) { - if (!ctl->oldsaved) + int count; + + if (!(count = uid_db_n_records(&ctl->oldsaved))) printf(GT_(" No UIDs saved from this host.\n")); else { - struct idlist *idp; - int count = 0; - - for (idp = ctl->oldsaved; idp; idp = idp->next) - ++count; - printf(GT_(" %d UIDs saved.\n"), count); - if (outlevel >= O_VERBOSE) - for (idp = ctl->oldsaved; idp; idp = idp->next) - printf("\t%s\n", idp->id); + traverse_uid_db(&ctl->oldsaved, print_id_of, NULL); } } --- fetchmail/fetchmail.h 7 May 2010 13:49:13 -0000 1.1.1.4 +++ fetchmail/fetchmail.h 24 May 2010 16:02:10 -0000 1.1.1.4.2.3 @@ -39,6 +39,8 @@ # include "trio/trio.h" #endif +#include "uid_db.h" + /* We need this for strstr */ #if !defined(HAVE_STRSTR) && !defined(strstr) char *strstr(const char *, const char *); @@ -387,8 +389,7 @@ int smtp_socket; /* socket descriptor for SMTP connection */ unsigned int uid; /* UID of user to deliver to */ struct idlist *skipped; /* messages skipped on the mail server */ - struct idlist *oldsaved, *newsaved; - struct idlist **oldsavedend; + struct uid_db oldsaved, newsaved; char lastdigest[DIGESTLEN]; /* last MD5 hash seen on this connection */ char *folder; /* folder currently being polled */ --- fetchmail/pop3.c 7 May 2010 13:49:19 -0000 1.1.1.4 +++ fetchmail/pop3.c 24 May 2010 16:02:10 -0000 1.1.1.4.2.6 @@ -21,6 +21,7 @@ #include "fetchmail.h" #include "socket.h" #include "i18n.h" +#include "uid_db.h" #ifdef OPIE_ENABLE #ifdef __cplusplus @@ -845,20 +846,20 @@ last_nr = count + 1; while (first_nr < last_nr - 1) { - struct idlist *newl; + struct uid_db_record *rec; try_nr = (first_nr + last_nr) / 2; if ((ok = pop3_getuidl(sock, try_nr, id, sizeof(id))) != 0) return ok; - if ((newl = str_in_list(&ctl->oldsaved, id, FALSE))) + if ((rec = find_uid_by_id(&ctl->oldsaved, id))) { - flag mark = newl->val.status.mark; + flag mark = rec->status; if (mark == UID_DELETED || mark == UID_EXPUNGED) { if (outlevel >= O_VERBOSE) report(stderr, GT_("id=%s (num=%u) was deleted, but is still present!\n"), id, try_nr); /* just mark it as seen now! */ - newl->val.status.mark = mark = UID_SEEN; + rec->status = mark = UID_SEEN; } /* narrow the search region! */ @@ -872,7 +873,7 @@ first_nr = try_nr; /* save the number */ - newl->val.status.num = try_nr; + set_uid_db_num(&ctl->oldsaved, rec, try_nr); } else { @@ -881,8 +882,8 @@ last_nr = try_nr; /* save it */ - newl = save_str(&ctl->oldsaved, id, UID_UNSEEN); - newl->val.status.num = try_nr; + rec = uid_db_insert(&ctl->oldsaved, id, UID_UNSEEN); + set_uid_db_num(&ctl->oldsaved, rec, try_nr); } } if (outlevel >= O_DEBUG && last_nr <= count) @@ -910,34 +911,38 @@ * of messages to get. * + Otherwise run a binary search to determine the last known message */ + struct uid_db_record *rec; int ok, nolinear = 0; - int first_nr, list_len, try_id, try_nr, add_id; + int first_nr, n_recs, try_id, try_nr, add_id; int num; char id [IDLEN+1]; if ((ok = pop3_gettopid(sock, 1, id, sizeof(id))) != 0) return ok; - if( ( first_nr = str_nr_in_list(&ctl->oldsaved, id) ) == -1 ) { + rec = first_uid_in_db(&ctl->oldsaved, id); + if (!rec) { /* the first message is unknown -> all messages are new */ *newp = *countp; return 0; } - + first_nr = rec->pos; + /* check where we expect the latest known message */ - list_len = count_list( &ctl->oldsaved ); - try_id = list_len - first_nr; /* -1 + 1 */ + n_recs = uid_db_n_records(&ctl->oldsaved); + try_id = n_recs - first_nr; /* -1 + 1 */ if( try_id > 1 ) { if( try_id <= *countp ) { if ((ok = pop3_gettopid(sock, try_id, id, sizeof(id))) != 0) return ok; - - try_nr = str_nr_last_in_list(&ctl->oldsaved, id); + + rec = last_uid_in_db(&ctl->oldsaved, id); + try_nr = rec ? rec->pos : -1; } else { try_id = *countp+1; try_nr = -1; } - if( try_nr != list_len -1 ) { + if( try_nr != n_recs -1 ) { /* some messages inbetween have been deleted... */ if( try_nr == -1 ) { nolinear = 1; @@ -955,7 +960,9 @@ if ((ok = pop3_gettopid(sock, try_id, id, sizeof(id))) != 0) return ok; - try_nr = str_nr_in_list(&ctl->oldsaved, id); + + rec = find_uid_by_id(&ctl->oldsaved, id); + try_nr = rec ? rec->pos : -1; } if( try_nr == -1 ) { try_id--; @@ -968,17 +975,16 @@ } } /* the first try_id messages are known -> copy them to the newsaved list */ - for( num = first_nr; num < list_len; num++ ) + for( num = first_nr; num < n_recs; num++ ) { - struct idlist *newl = save_str(&ctl->newsaved, - str_from_nr_list(&ctl->oldsaved, num), - UID_UNSEEN); - newl->val.status.num = num - first_nr + 1; + rec = find_uid_by_pos(&ctl->oldsaved, num); + + rec = uid_db_insert(&ctl->newsaved, rec->id, rec->status); + set_uid_db_num(&ctl->newsaved, rec, num - first_nr + 1); } if( nolinear ) { - free_str_list(&ctl->oldsaved); - ctl->oldsaved = 0; + clear_uid_db(&ctl->oldsaved); last = try_id; } @@ -997,7 +1003,7 @@ (void)folder; /* Ensure that the new list is properly empty */ - ctl->newsaved = (struct idlist *)NULL; + clear_uid_db(&ctl->newsaved); #ifdef MBOX /* Alain Knaff suggests this, but it's not RFC standard */ @@ -1032,6 +1038,9 @@ int fastuidl; char id [IDLEN+1]; + set_uid_db_num_pos_0(&ctl->oldsaved, *countp); + set_uid_db_num_pos_0(&ctl->newsaved, *countp); + /* should we do fast uidl this time? */ fastuidl = ctl->fastuidl; if (*countp > 7 && /* linear search is better if there are few mails! */ @@ -1091,14 +1100,13 @@ if (parseuid(buf, &unum, id, sizeof(id)) == PS_SUCCESS) { - struct idlist *old, *newl; + struct uid_db_record *old_rec, *new_rec; - newl = save_str(&ctl->newsaved, id, UID_UNSEEN); - newl->val.status.num = unum; + new_rec = uid_db_insert(&ctl->newsaved, id, UID_UNSEEN); - if ((old = str_in_list(&ctl->oldsaved, id, FALSE))) + if ((old_rec = find_uid_by_id(&ctl->oldsaved, id))) { - flag mark = old->val.status.mark; + flag mark = old_rec->status; if (mark == UID_DELETED || mark == UID_EXPUNGED) { /* XXX FIXME: switch 3 occurrences from @@ -1108,9 +1116,9 @@ if (outlevel >= O_VERBOSE) report(stderr, GT_("id=%s (num=%d) was deleted, but is still present!\n"), id, (int)unum); /* just mark it as seen now! */ - old->val.status.mark = mark = UID_SEEN; + old_rec->status = mark = UID_SEEN; } - newl->val.status.mark = mark; + new_rec->status = mark; if (mark == UID_UNSEEN) { (*newp)++; @@ -1127,10 +1135,14 @@ * swap the lists (say, due to socket error), * the same mail will not be downloaded again. */ - old = save_str(&ctl->oldsaved, id, UID_UNSEEN); + old_rec = uid_db_insert(&ctl->oldsaved, id, UID_UNSEEN); + } /* save the number */ - old->val.status.num = unum; + if (new_rec->status == UID_UNSEEN || !ctl->keep) { + set_uid_db_num(&ctl->oldsaved, old_rec, unum); + set_uid_db_num(&ctl->newsaved, new_rec, unum); + } } else return PS_ERROR; } /* multi-line loop for UIDL reply */ @@ -1199,8 +1211,9 @@ static int pop3_is_old(int sock, struct query *ctl, int num) /* is the given message old? */ { - struct idlist *newl; - if (!ctl->oldsaved) + struct uid_db_record *rec; + + if (!uid_db_n_records(&ctl->oldsaved)) return (num <= last); else if (dofastuidl) { @@ -1211,30 +1224,31 @@ /* in fast uidl, we manipulate the old list only! */ - if ((newl = id_find(&ctl->oldsaved, num))) + if ((rec = find_uid_by_num(&ctl->oldsaved, num))) { /* we already have the id! */ - return(newl->val.status.mark != UID_UNSEEN); + return(rec->status != UID_UNSEEN); } /* get the uidl first! */ if (pop3_getuidl(sock, num, id, sizeof(id)) != PS_SUCCESS) return(TRUE); - if ((newl = str_in_list(&ctl->oldsaved, id, FALSE))) { + if ((rec = find_uid_by_id(&ctl->oldsaved, id))) { /* we already have the id! */ - newl->val.status.num = num; - return(newl->val.status.mark != UID_UNSEEN); + set_uid_db_num(&ctl->oldsaved, rec, num); + return(rec->status != UID_UNSEEN); } /* save it */ - newl = save_str(&ctl->oldsaved, id, UID_UNSEEN); - newl->val.status.num = num; + rec = uid_db_insert(&ctl->oldsaved, id, UID_UNSEEN); + set_uid_db_num(&ctl->oldsaved, rec, num); + return(FALSE); + } else { + rec = find_uid_by_num(&ctl->newsaved, num); + return !rec || rec->status != UID_UNSEEN; } - else - return ((newl = id_find(&ctl->newsaved, num)) != NULL && - newl->val.status.mark != UID_UNSEEN); } #ifdef UNUSED @@ -1353,28 +1367,31 @@ static void mark_uid_seen(struct query *ctl, int number) /* Tell the UID code we've seen this. */ { - struct idlist *sdp; + struct uid_db_record *rec; - if ((sdp = id_find(&ctl->newsaved, number))) - sdp->val.status.mark = UID_SEEN; + if ((rec = find_uid_by_num(&ctl->newsaved, number))) + rec->status = UID_SEEN; /* mark it as seen in oldsaved also! In case, we do not swap the lists * (say, due to socket error), the same mail will not be downloaded * again. */ - if ((sdp = id_find(&ctl->oldsaved, number))) - sdp->val.status.mark = UID_SEEN; + if ((rec = find_uid_by_num(&ctl->oldsaved, number))) + rec->status = UID_SEEN; } static int pop3_delete(int sock, struct query *ctl, int number) /* delete a given message */ { + struct uid_db_record *rec; int ok; mark_uid_seen(ctl, number); /* actually, mark for deletion -- doesn't happen until QUIT time */ ok = gen_transact(sock, "DELE %d", number); if (ok != PS_SUCCESS) return(ok); - delete_str(dofastuidl ? &ctl->oldsaved : &ctl->newsaved, number); + + rec = find_uid_by_num(dofastuidl ? &ctl->oldsaved : &ctl->newsaved, number); + rec->status = UID_DELETED; return(PS_SUCCESS); } --- fetchmail/uid.c 7 May 2010 13:49:16 -0000 1.1.1.2 +++ fetchmail/uid.c 24 May 2010 16:02:10 -0000 1.1.1.2.2.6 @@ -108,6 +108,19 @@ static struct idlist *scratchlist; /** Read saved IDs from \a idfile and attach to each host in \a hostlist. */ +static int dump_saved_uid(struct uid_db_record *rec, void *unused) +{ + char *t; + + (void)unused; + + t = sdump(rec->id, rec->id_len); + report_build(stdout, " %s", t); + free(t); + + return 0; +} + void initialize_saved_lists(struct query *hostlist, const char *idfile) { struct stat statbuf; @@ -117,9 +130,9 @@ /* make sure lists are initially empty */ for (ctl = hostlist; ctl; ctl = ctl->next) { ctl->skipped = (struct idlist *)NULL; - ctl->oldsaved = (struct idlist *)NULL; - ctl->newsaved = (struct idlist *)NULL; - ctl->oldsavedend = &ctl->oldsaved; + + init_uid_db(&ctl->oldsaved); + init_uid_db(&ctl->newsaved); } errno = 0; @@ -212,11 +225,11 @@ *atsign = '\0'; host = atsign + 1; - /* find proper list and save it */ + /* find uidl db and save it */ for (ctl = hostlist; ctl; ctl = ctl->next) { if (strcasecmp(host, ctl->server.queryname) == 0 && strcasecmp(user, ctl->remotename) == 0) { - save_str(&ctl->oldsaved, id, UID_SEEN); + uid_db_insert(&ctl->oldsaved, id, UID_SEEN); break; } } @@ -248,14 +261,12 @@ { report_build(stdout, GT_("Old UID list from %s:"), ctl->server.pollname); - idp = ctl->oldsaved; - if (!idp) + + if (!uid_db_n_records(&ctl->oldsaved)) report_build(stdout, GT_(" <empty>")); - else for (idp = ctl->oldsaved; idp; idp = idp->next) { - char *t = sdump(idp->id, strlen(idp->id)); - report_build(stdout, " %s", t); - free(t); - } + else + traverse_uid_db(&ctl->oldsaved, dump_saved_uid, NULL); + report_complete(stdout, "\n"); } @@ -273,13 +284,18 @@ /** Assert that all UIDs marked deleted in query \a ctl have actually been expunged. */ -void expunge_uids(struct query *ctl) +static int mark_as_expunged_if(struct uid_db_record *rec, void *unused) { - struct idlist *idl; + (void)unused; + + if (rec->status == UID_DELETED) rec->status = UID_EXPUNGED; + return 0; +} - for (idl = dofastuidl ? ctl->oldsaved : ctl->newsaved; idl; idl = idl->next) - if (idl->val.status.mark == UID_DELETED) - idl->val.status.mark = UID_EXPUNGED; +void expunge_uids(struct query *ctl) +{ + traverse_uid_db(dofastuidl ? &ctl->oldsaved : &ctl->newsaved, + mark_as_expunged_if, NULL); } static const char *str_uidmark(int mark) @@ -303,16 +319,32 @@ } } -static void dump_list(const struct idlist *idp) +static int dump_uid_db_record(struct uid_db_record *rec, void *arg) +{ + unsigned *n_recs; + char *t; + + n_recs = arg; + --*n_recs; + + t = sdump(rec->id, rec->id_len); + report_build(stdout, " %s = %s%s", t, str_uidmark(rec->status), *n_recs ? "," : ""); + free(t); + + return 0; +} + +static void dump_uid_db(struct uid_db *db) { - if (!idp) { + unsigned n_recs; + + n_recs = uid_db_n_records(db); + if (!n_recs) { report_build(stdout, GT_(" <empty>")); - } else while (idp) { - char *t = sdump(idp->id, strlen(idp->id)); - report_build(stdout, " %s = %s%s", t, str_uidmark(idp->val.status.mark), idp->next ? "," : ""); - free(t); - idp = idp->next; + return; } + + traverse_uid_db(db, dump_uid_db_record, &n_recs); } /* finish a query */ @@ -323,10 +355,10 @@ { if (dofastuidl) { report_build(stdout, GT_("Merged UID list from %s:"), ctl->server.pollname); - dump_list(ctl->oldsaved); + dump_uid_db(&ctl->oldsaved); } else { report_build(stdout, GT_("New UID list from %s:"), ctl->server.pollname); - dump_list(ctl->newsaved); + dump_uid_db(&ctl->newsaved); } report_complete(stdout, "\n"); } @@ -347,15 +379,10 @@ * with UIDLs from that account in .fetchids, there is no way for * them to ever get garbage-collected. */ - if (ctl->newsaved) + if (uid_db_n_records(&ctl->newsaved)) { - /* old state of mailbox may now be irrelevant */ - struct idlist *temp = ctl->oldsaved; - if (outlevel >= O_DEBUG) - report(stdout, GT_("swapping UID lists\n")); - ctl->oldsaved = ctl->newsaved; - ctl->newsaved = (struct idlist *) NULL; - free_str_list(&temp); + swap_uid_db_data(&ctl->newsaved, &ctl->oldsaved); + clear_uid_db(&ctl->newsaved); } /* in fast uidl, there is no need to swap lists: the old state of * mailbox cannot be discarded! */ @@ -372,29 +399,53 @@ /* this is now a merged list! the mails which were seen in this * poll are marked here. */ report_build(stdout, GT_("Merged UID list from %s:"), ctl->server.pollname); - dump_list(ctl->oldsaved); + dump_uid_db(&ctl->oldsaved); report_complete(stdout, "\n"); } - if (ctl->newsaved) + if (uid_db_n_records(&ctl->newsaved)) { /* new state of mailbox is not reliable */ if (outlevel >= O_DEBUG) report(stdout, GT_("discarding new UID list\n")); - free_str_list(&ctl->newsaved); - ctl->newsaved = (struct idlist *) NULL; + clear_uid_db(&ctl->newsaved); } } /** Reset the number associated with each id */ void uid_reset_num(struct query *ctl) { - struct idlist *idp; - for (idp = ctl->oldsaved; idp; idp = idp->next) - idp->val.status.num = 0; + reset_uid_db_nums(&ctl->oldsaved); } /** Write list of seen messages, at end of run. */ +static int count_seen_deleted(struct uid_db_record *rec, void *arg) +{ + if (rec->status == UID_SEEN || rec->status == UID_DELETED) + ++*(long *)arg; + return 0; +} + +struct write_saved_info { + struct query *ctl; + FILE *fp; +}; + +static int write_uid_db_record(struct uid_db_record *rec, void *arg) +{ + struct write_saved_info *info; + int rc; + + if (!(rec->status == UID_SEEN || rec->status == UID_DELETED)) + return 0; + + info = arg; + rc = fprintf(info->fp, "%s@%s %s\n", + info->ctl->remotename, info->ctl->server.queryname, + rec->id); + return rc < 0 ? -1 : 0; +} + void write_saved_lists(struct query *hostlist, const char *idfile) { long idcount; @@ -404,12 +455,8 @@ /* if all lists are empty, nuke the file */ idcount = 0; - for (ctl = hostlist; ctl; ctl = ctl->next) { - for (idp = ctl->oldsaved; idp; idp = idp->next) - if (idp->val.status.mark == UID_SEEN - || idp->val.status.mark == UID_DELETED) - idcount++; - } + for (ctl = hostlist; ctl; ctl = ctl->next) + traverse_uid_db(&ctl->oldsaved, count_seen_deleted, &idcount); /* either nuke the file or write updated last-seen IDs */ if (!idcount && !scratchlist) @@ -428,19 +475,22 @@ report(stdout, GT_("Writing fetchids file.\n")); (void)unlink(newnam); /* remove file/link first */ if ((tmpfp = fopen(newnam, "w")) != (FILE *)NULL) { + struct write_saved_info info; int errflg = 0; + + info.fp = tmpfp; + for (ctl = hostlist; ctl; ctl = ctl->next) { - for (idp = ctl->oldsaved; idp; idp = idp->next) - if (idp->val.status.mark == UID_SEEN - || idp->val.status.mark == UID_DELETED) - if (fprintf(tmpfp, "%s@%s %s\n", - ctl->remotename, ctl->server.queryname, idp->id) < 0) { - int e = errno; - report(stderr, GT_("Write error on fetchids file %s: %s\n"), newnam, strerror(e)); - errflg = 1; - goto bailout; - } + info.ctl = ctl; + + if (traverse_uid_db(&ctl->oldsaved, write_uid_db_record, &info) < 0) { + int e = errno; + report(stderr, GT_("Write error on fetchids file %s: %s\n"), newnam, strerror(e)); + errflg = 1; + goto bailout; + } } + for (idp = scratchlist; idp; idp = idp->next) if (EOF == fputs(idp->id, tmpfp)) { int e = errno; --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ fetchmail/uid_db.c 24 May 2010 19:18:14 -0000 1.23.2.36 @@ -0,0 +1,588 @@ +/* + POP3 UID db + + Copyright (c) 2010 MAD Partners, Ltd. (rwe...@ms...) + + This file is being published in accordance with the GPLv2 terms + contained in the COPYING file being part of the fetchmail + 6.3.17 release, including the OpenSSL exemption. +*/ + +/* includes */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "xmalloc.h" +#include "uid_db.h" + +/* constants */ +enum { + MIN_RECORDS = 16 /* arbitrary */ +}; + +/* types */ +struct pat_node { + struct pat_node *ptrs_[3]; + + /* + The bit mask is stored in the nodes (as opposed to the + offset, which is (re-)calculated on demand) because + calculating the mask is a non-trivial operation (at + least on x86). + */ + unsigned bit_ndx, bit_mask; + + struct uid_db_record *rec; +}; + +/* + The idea behind this is that the 'left' pointer of + a node is accessible as ptrs(np)[-1] and the right + one a ptrs(np)[1]. This implies that no separate codepaths + for 'symmetric left- and right-cases' are needed. +*/ +#define ptrs(np) ((np)->ptrs_ + 1) + +/* routines */ +/** various helpers */ +static inline unsigned bit_ofs(unsigned bit_ndx) +{ + return bit_ndx >> 3; +} + +static inline unsigned bit_mask(unsigned bit_ndx) +{ + return 1 << (bit_ndx & 7); +} + +/** PATRICIA trie insertion */ +/*** walkers */ +static struct pat_node *walk_down(struct uid_db *db, struct uid_db_record *rec, + struct pat_node ***edgep, struct pat_node **parentp) +{ + /* + Find the pat node whose id is 'most similar' to the id + stored in rec->id. Return a pointer to this node. + 'parentp' and 'edgep' are output-only parameters which + will point to the parent of returned node and to the edge + pointer going from the parent to the returned node after + the call has returned. + + This routine is intended for inserts only. + */ + struct pat_node *cur, **edge; + unsigned bit_ndx, v, ofs; + + cur = db->pat_root; + ofs = -1; + do { + bit_ndx = cur->bit_ndx; + + if (bit_ofs(bit_ndx) != ofs) { + ofs = bit_ofs(bit_ndx); + v = ofs < rec->id_len ? rec->id[ofs] : 0; + } + + edge = ptrs(cur) + (v & cur->bit_mask ? 1 : -1); + } while ((cur = *edge) && cur->bit_ndx > bit_ndx); + + *parentp = + (struct pat_node *) + ((unsigned char *)edge - (v & bit_mask(bit_ndx) ? + offsetof(struct pat_node, ptrs_[2]) + : offsetof(struct pat_node, ptrs_[0]))); + *edgep = edge; + return cur; +} + +static inline struct pat_node *walk_up(unsigned diff_ndx, struct pat_node **parent) +{ + /* + Walk the chain of parent pointers starting with *parent until a node + is found whose parent has a bit_ndx smaller than diff_ndx. Return + a pointer to this node and update *parent to point to its parent. + */ + struct pat_node *p, *np; + + np = *parent; + + while ((p = *ptrs(np)) && p->bit_ndx > diff_ndx) + np = p; + + *parent = p; + return np; +} + +/*** bit fiddling */ +static inline unsigned first_set_bit_in_char(unsigned v) +{ + return ffs(v) - 1; +} + +static int find_first_diff_bit(struct uid_db_record const *r0, + struct uid_db_record const *r1) +{ + /* + Return the bit_ndx of the first differing bit in + r0->id and r1->id or -1 if the strings are identical. + */ + struct uid_db_record const *long_id; + unsigned ofs, max; + unsigned char v; + + max = r0->id_len; + if (max > r1->id_len) { + max = r1->id_len; + long_id = r0; + } else + long_id = r1; + + ofs = 0; + do + v = r0->id[ofs] ^ r1->id[ofs]; + while (!v && ++ofs < max); + + if (!v) { + if (r0->id_len == r1->id_len) return -1; + v = long_id->id[ofs]; + } + + return first_set_bit_in_char(v) + ofs * 8; +} + +static inline unsigned bit_set(unsigned bit_ndx, struct uid_db_record const *rec) +{ + /* + Return non-zero if the bit corresponding with bit_ndx is set + in rec->id + */ + unsigned ofs; + + ofs = bit_ofs(bit_ndx); + if (ofs >= rec->id_len) return 0; + return rec->id[ofs] & bit_mask(bit_ndx); +} + +/*** node allocation */ +static struct pat_node *get_pat_node(struct uid_db_record *rec) +{ + /* + Allocate a pat_node, set its rec pointer to rec and the + next pointer of rec to NULL. Return pointer to the pat_node. + */ + struct pat_node *np; + + np = xmalloc(sizeof(*np)); + np->rec = rec; + rec->next = NULL; + return np; +} + +static struct pat_node *get_standalone_node(struct uid_db_record *rec) +{ + /* + Return a pat_node suitable for being inserted on the 'left edge' + of the trie, ie either linked to a node whose left pointer was zero + or being inserted as root node into an empty trie. The bit_ndx of + the pat_node is set to the index corresponding with the highest + set bit in rec->id. + + NB: This is a bad choice when UIDs share a common prefix because + this implies that the root node will cause a bit to be tested which + is non-zero in all other nodes, adding a theoretically redundant + level to the trie. This is (to the best of my knowledge) un- + fortunately unavoidable if nodes with different key lengths need + to be supported. + */ + struct pat_node *np; + + np = get_pat_node(rec); + np->bit_ndx = first_set_bit_in_char(*rec->id); + np->bit_mask = bit_mask(np->bit_ndx); + return np; +} + +/*** various helpers */ +static inline int record_id_equal(struct uid_db_record const *r0, + struct uid_db_record const *r1) +{ + return + r0->id_len == r1->id_len + && memcmp(r0->id, r1->id, r0->id_len) == 0; +} + +static struct uid_db_record *append_to_list(struct uid_db_record **recp, + struct uid_db_record *rec) +{ + /* + Append the uid_db_record pointed to by rec to the uid_db_record + list accessible as *recp and return rec. + */ + while (*recp) recp = &(*recp)->next; + *recp = rec; + + rec->next = NULL; + return rec; +} + +/*** insert routine */ +static struct uid_db_record *pat_insert(struct uid_db *db, + struct uid_db_record *rec) +{ + /* + Insert the record pointed to by rec in the (potentially empty) + PATRICIA trie pointed to by db->pat_root and return rec. + */ + struct pat_node *np, *closest, *parent, **edge; + int me, bit_ndx; + + if (!db->pat_root) { + np = get_standalone_node(rec); + ptrs(np)[-1] = *ptrs(np) = NULL; + ptrs(np)[1] = np; + + db->pat_root = np; + return rec; + } + + closest = walk_down(db, rec, &edge, &parent); + + if (closest) { + bit_ndx = find_first_diff_bit(closest->rec, rec); + if (bit_ndx < 0) + return append_to_list(&closest->rec->next, rec); + + np = get_pat_node(rec); + np->bit_ndx = bit_ndx; + np->bit_mask = bit_mask(bit_ndx); + } else + np = get_standalone_node(rec); + + if (parent->bit_ndx > np->bit_ndx) { + closest = walk_up(np->bit_ndx, &parent); + + if (!parent) edge = &db->pat_root; + else edge = ptrs(parent)[-1] == closest ? + ptrs(parent) - 1 : ptrs(parent) + 1; + *ptrs(closest) = np; + } + + *edge = np; + *ptrs(np) = parent; + + me = bit_set(np->bit_ndx, rec) ? 1 : -1; + ptrs(np)[me] = np; + ptrs(np)[-me] = closest; + + return rec; +} + +/** general db insertion */ +static struct uid_db_record *get_uid_db_record(char const *id, unsigned status) +{ + /* + Allocate a uid_db_record structure and set its id pointer to a + dynamically allocated copy of id. The status member of the + new record is set to status and its message number to zero (invalid). + A pointer to it is then returned. + */ + struct uid_db_record *rec; + size_t id_len; + + rec = xmalloc(sizeof(*rec)); + + id_len = strlen(id); + rec->id = memcpy(xmalloc(id_len + 1), id, id_len + 1); + rec->id_len = id_len; + rec->status = status; + rec->num = 0; + + return rec; +} + +static void insert_into_records(struct uid_db *db, + struct uid_db_record *rec) +{ + /* + Insert rec into the records array of the uid_db pointed + to by db. The array is grown as necessary and the + corresponding state variables of the db are updated + accordingly. The pos member of rec is set to its position + in the array. + */ + unsigned next, want; + + next = db->records_next; + + if (next == db->records_max) { + want = db->records_max *= 2; + db->records = xrealloc(db->records, want * sizeof(rec)); + } + + rec->pos = next; + db->records[next] = rec; + db->records_next = next + 1; +} + +struct uid_db_record *uid_db_insert(struct uid_db *db, + char const *id, unsigned status) +{ + /* + Create an uid_db_record whose id is id and whose status is + status and insert it into the uid_db pointed to by db. + Return a pointer to the newly created record. + */ + struct uid_db_record *rec; + + rec = get_uid_db_record(id, status); + insert_into_records(db, rec); + return pat_insert(db, rec); +} + +/** message number index */ +void set_uid_db_num(struct uid_db *db, struct uid_db_record *rec, + unsigned num) +{ + /* + Set the message number of the record pointed to by rec to num + and insert it into the num_ndx of the uid_db pointed to by db + at position corresponding with num. The num_ndx lookup array + is grown as needed. Message numbers are expected to 'generally' + be recorded in ascending order and hence, no provisions are + made to deal with the potentially quadratic complexity of + inserting a sequence of numbers into an array such that it + needs to be grown continuously. + */ + struct num_ndx *num_ndx; + unsigned have, want; + + num_ndx = &db->num_ndx; + + if (num_ndx->end_value > num) { + have = num_ndx->pos_0_value - num_ndx->end_value + 1; + want = num_ndx->pos_0_value - num + 1; + num_ndx->end_value = num; + + num_ndx->records = xrealloc(num_ndx->records, want * sizeof(rec)); + do num_ndx->records[--want] = NULL; while (want > have); + } + + num_ndx->records[uid_db_num_ofs(num_ndx, num)] = rec; +} + +void reset_uid_db_nums(struct uid_db *db) +{ + /* + Reset the message numbers of all uid_db_records stored + in the uid_db pointed to by db. The corresponding num_ndx + lookup array is afterwards freed and the num_ndx end_value + adjusted in order to indicate an 'empty' message number + index. + */ + struct uid_db_record **rec; + struct num_ndx *num_ndx; + unsigned ndx; + + num_ndx = &db->num_ndx; + + if (num_ndx->end_value < num_ndx->pos_0_value) { + ndx = num_ndx->pos_0_value - num_ndx->end_value; + while (ndx) { + rec = num_ndx->records + --ndx; + if (*rec) (*rec)->num = 0; + } + + num_ndx->end_value = num_ndx->pos_0_value + 1; + + free(num_ndx->records); + num_ndx->records = NULL; + } +} + +/** search routines */ +struct uid_db_record *find_uid_by_id(struct uid_db *db, char const *id) +{ + /* + Search for an uid_db_record whose id is id in the uid_db pointed + to by db and return a pointer to it or NULL if no such record was + found. + */ + struct pat_node *np; + struct uid_db_record *rec; + unsigned v, bit_ndx, ofs; + size_t len; + + np = db->pat_root; + if (np) { + len = strlen(id); + ofs = -1; + do { + bit_ndx = np->bit_ndx; + + if (bit_ofs(bit_ndx) != ofs) { + ofs = bit_ofs(bit_ndx); + v = ofs < len ? id[ofs] : 0; + } + + np = ptrs(np)[v & np->bit_mask ? 1 : -1]; + } while (np && np->bit_ndx > bit_ndx); + + if (!np) return NULL; + + rec = np->rec; + return rec->id_len == len && memcmp(id, rec->id, len) == 0 ? + rec : NULL; + } + + return NULL; +} + +struct uid_db_record *last_uid_in_db(struct uid_db *db, char const *id) +{ + /* + Return a pointer to the 'last' (insert order) uid_db_record + contained in the uid_db pointed to by db whose id is id or + NULL if no such record exists. + */ + struct uid_db_record *rec; + + rec = find_uid_by_id(db, id); + if (!rec) return NULL; + + while (rec->next) rec = rec->next; + return rec; +} + +/** destruction */ +static void free_uid_list(struct uid_db_record *rec) +{ + /* + Free the list of uid_db_records starting with + the record pointed to by rec. + */ + if (rec->next) free_uid_list(rec->next); + + xfree(rec->id); + xfree(rec); +} + +static void free_pat_trie(struct pat_node *np) +{ + /* + Free the PATRCIA-trie pointed to by np and all + uid_db_records contained in it. + + The algorithm implemented below is: + + 1. Load the left pointer of the node pointed to by + np into next. + + 2. If the result is not NULL, + 2a) Set the left pointer to NULL. + 2b) Goto 1 if next points to a child of np. + + 3. Load the right pointer of the node pointed to by + np into next. + + 4. If the result is not NULL, + 4a) Set the right pointer to NULL. + 4b) Goto 1 id next points to a child of np. + + 5. Load next with the parent pointer of np. + + 6. Free np->rec and np. + + 7. Set np to next and goto 1 if it is not null. + */ + struct pat_node *next; + + do { + next = ptrs(np)[-1]; + if (next) { + ptrs(np)[-1] = NULL; + if (next->bit_ndx > np->bit_ndx) continue; + } + + next = ptrs(np)[1]; + if (next) { + ptrs(np)[1] = NULL; + if (next->bit_ndx > np->bit_ndx) continue; + } + + next = *ptrs(np); + + free_uid_list(np->rec); + free(np); + } while ((np = next)); +} + +void free_uid_db(struct uid_db *db) +{ + /* + Free all dynamically allocated memory of the uid_db + pointed to by db. The structure is not reinitialized. + */ + if (db->pat_root) free_pat_trie(db->pat_root); + + xfree(db->records); + xfree(db->num_ndx.records); +} + +/** various public interfaces */ +void init_uid_db(struct uid_db *db) +{ + /* + Initialize the uid_db structure pointed to by db 'properly' + such that it represents an empty database. An array of + size MIN_RECORDS is allocated and assigned to db->records. + */ + struct num_ndx *num_ndx; + + db->pat_root = NULL; + + db->records = xmalloc(MIN_RECORDS * sizeof(*db->records)); + db->records_max = MIN_RECORDS; + db->records_next = 0; + + num_ndx = &db->num_ndx; + num_ndx->pos_0_value = num_ndx->end_value = -1; + num_ndx->records = NULL; +} + +void swap_uid_db_data(struct uid_db *db_0, struct uid_db *db_1) +{ + struct uid_db tmp; + + tmp = *db_0; + *db_0 = *db_1; + *db_1 = tmp; +} + +int traverse_uid_db(struct uid_db *db, + uid_db_traversal_routine *r, void *arg) +{ + /* + Traverses the struct uid_db records array in insert order, + invoking the subroutine pointed to by r with a pointer to + each record and the arg pointer as arguments. If the return + value of that is non-zero, traverse_uid_db immediately returns + with this value. Otherwise, zero is returned after the last + record was visited. + + The uid_db_traversal_routine must not modify the uid_db during + traversal. + */ + struct uid_db_record **recs; + unsigned ndx, max; + int rc; + + rc = 0; + ndx = 0; + max = db->records_next; + recs = db->records; + while (ndx < max && (rc = r(recs[ndx], arg)) == 0) + ++ndx; + + return rc; +} --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ fetchmail/uid_db.h 24 May 2010 19:18:38 -0000 1.3.2.19 @@ -0,0 +1,141 @@ +/* + POP3 UID database + + Copyright (c) 2010 MAD Partners, Ltd. (rwe...@ms...) + + This file is being published in accordance with the GPLv2 terms + contained in the COPYING file being part of the fetchmail + 6.3.17 release, including the OpenSSL exemption. +*/ +#ifndef fetchmail_uid_db_h +#define fetchmail_uid_db_h + +/* includes */ +#include <stddef.h> + +/* types */ +struct uid_db_record { + char *id; + size_t id_len; + + /* + num - message number assigned by server + status - message status (eg seen, deleted, ...) + pos - position in record list + */ + unsigned num, status, pos; + + struct uid_db_record *next; +}; + +struct num_ndx { + /* + Used to find uid records by message number. + + pos_0_value - highest message number + end_value - lowest known message number + + Grows downwards because the fastuidl-code may record + message numbers in non-ascending order but the + lookup array should ideally only be large enough to + store pointers to interesting ('new') messages. + */ + struct uid_db_record **records; + unsigned pos_0_value, end_value; +}; + +struct uid_db +{ + struct pat_node *pat_root; + + struct uid_db_record **records; + unsigned records_max, records_next; + + struct num_ndx num_ndx; +}; + +typedef int uid_db_traversal_routine(struct uid_db_record *, void *); + +/* routines */ +/** initialization/ finalization */ +void init_uid_db(struct uid_db *db); + +void free_uid_db(struct uid_db *db); + +static inline void clear_uid_db(struct uid_db *db) +{ + free_uid_db(db); + init_uid_db(db); +} + +/** message number index handling */ +static inline unsigned uid_db_num_ofs(struct num_ndx const *num_ndx, unsigned num) +{ + return num_ndx->pos_0_value - num; +} + +void set_uid_db_num(struct uid_db *db, struct uid_db_record *rec, + unsigned num); + +static inline void set_uid_db_num_pos_0(struct uid_db *db, unsigned num) +{ + db->num_ndx.pos_0_value = num; + db->num_ndx.end_value = num + 1; +} + +void reset_uid_db_nums(struct uid_db *db); + +/** various uidl db manipulatiors */ +struct uid_db_record *uid_db_insert(struct uid_db *db, + char const *id, unsigned status); + +void swap_uid_db_data(struct uid_db *db_0, struct uid_db *db_1); + +/** search routines */ +struct uid_db_record *find_uid_by_id(struct uid_db *db, char const *id); + +static inline struct uid_db_record * +find_uid_by_num(struct uid_db *db, unsigned num) +{ + struct num_ndx *num_ndx; + + num_ndx = &db->num_ndx; + return num >= num_ndx->end_value ? + num_ndx->records[uid_db_num_ofs(num_ndx, num)] : NULL; +} + +static inline struct uid_db_record * +find_uid_by_pos(struct uid_db *db, unsigned pos) +{ + return pos < db->records_next ? db->records[pos] : NULL; +} + +static inline struct uid_db_record * +first_uid_in_db(struct uid_db *db, char const *id) +{ + return find_uid_by_id(db, id); +} + +struct uid_db_record *last_uid_in_db(struct uid_db *db, char const *id); + +/** various accessors */ +static inline unsigned uid_db_n_records(struct uid_db const *db) +{ + return db->records_next; +} + +/* + Traverses the struct uid_db records array in insert order, + invoking the subroutine pointed to by r with a pointer to + each record and the arg pointer as arguments. If the return + value of that is non-zero, traverse_uid_db immediately returns + with this value. Otherwise, zero is returned after the last + record was visited. + + The uid_db_traversal_routine must not modify the uid_db during + traversal. +*/ +int traverse_uid_db(struct uid_db *db, + uid_db_traversal_routine *r, void *arg); + +#endif |