From: Rainer W. <rwe...@ms...> - 2010-05-19 20:30:58
|
"Matthias Andree" <mat...@gm...> writes: [...] > please do (show an advance/preview version) - if you don't want to > expose it to the list yet for whatever reason, but want my comments, > feel free to send personal mail. I'll happily listen to anyone who has something more constructive to say about this than "I hate it!". This is my last 'standalone test program'. It can be compiled and used to do testing/ measurement of the trie-code based on reading 'unique message IDs' from stdin which must point to 'something seekable'. The interface is known to be incomplete and the halfway integrated version contains everything in here and whatever was additionally needed in order to use this as 'backend' for the code in uid.c. My present approach to doing lookups based on 'message numbers' is very simplistic --- just an array going from 0 .. <highest message number>. There's a potential scalability issue in there: Realloc with a constant increment is O(n^2) when the memory area needs to be copied. If the pop3.c code works as I believe it does (inserts message numbers in increasing order), I will likely change this to additionally store the offset of the first element in the array to keep this shorter for the case when a large mailbox contains a few new messages at the end. I will also change the realloc-calculation to something more sensible (usually, this means doubling the allocated area every time it needs to grow. Any feedback on the implementation would be very welcome. ------------- uidl_db.h ------------------- /* uidl db Copyright (c) 2010 MAD Partners, Ltd. This file is being published according to the terms of the GPLv2 license contained in the 6.3.17 release of fetchmail. */ #ifndef fetchmail_uidl_db_h #define fetchmail_uidl_db_h /* includes */ #include <stddef.h> /* types */ struct uidl_record { char *id; unsigned id_len; unsigned num, status, pos; struct uidl_record *next; }; struct pat_node; struct uidl_db { struct pat_node *pat_root; struct uidl_record **records; unsigned records_max, records_next; struct uidl_record **num_ndx; unsigned num_ndx_max; }; /* routines */ void init_uidl_db(struct uidl_db *db); struct uidl_record *insert_uidl(struct uidl_db *db, char const *id, unsigned status); struct uidl_record *insert_uidl_noindex(struct uidl_db *db, char const *id, unsigned status); void set_uidl_num(struct uidl_db *db, struct uidl_record *rec, unsigned num); struct uidl_record *find_uidl_by_id(struct uidl_db *db, char const *id); static inline struct uidl_record * find_uidl_by_num(struct uidl_db *db, unsigned num) { return num < db->num_ndx_max ? db->num_ndx[num] : NULL; } static inline struct uidl_record * find_uidl_by_pos(struct uidl_db *db, unsigned pos) { return pos < db->records_next ? db->records[pos] : NULL; } static inline struct uidl_record * first_uidl_in_db(struct uidl_db *db, char const *id) { return find_uidl_by_id(db, id); } struct uidl_record *last_uidl_in_db(struct uidl_db *db, char const *id); #endif --------------------------------------- ------------------- uidl_db.c ----------------- /* uidl db Copyright (c) 2010 MAD Partners, Ltd. This file is being published according to the terms of the GPLv2 license contained in the 6.3.17 release of fetchmail. */ /* includes */ #include <stdlib.h> #include <stdio.h> #include <string.h> #include "uidl_db.h" /* constants */ enum { MIN_RECORDS = 16 }; /* types */ struct pat_node { struct pat_node *ptrs_[3]; unsigned bit_ndx, bit_mask; struct uidl_record *rec; }; /* The idea behind this is that the 'left' pointer is at -1 and the 'right' pointer at 1. This implies that dealing with 'symmetric cases' for left and right pointers isn't necessary. There's always just 'this pointer' (offset n) and 'the other pointer' (offset -n). */ #define ptrs(p) ((p)->ptrs_ + 1) /* routines */ void *xmalloc(size_t len) { return malloc(len); } void *xrealloc(void *p, size_t new_size) { return realloc(p, new_size); } static inline unsigned bit_ofs(unsigned bit_ndx) { return bit_ndx >> 3; } static inline unsigned bit_mask(unsigned bit_ndx) { return 1 << (bit_ndx & 7); } void init_uidl_db(struct uidl_db *db) { db->pat_root = NULL; db->records = xmalloc(MIN_RECORDS * sizeof(*db->records)); db->records_max = MIN_RECORDS; db->records_next = 0; db->num_ndx = NULL; db->num_ndx_max = 0; }; static struct pat_node *walk_down(struct uidl_db *db, struct uidl_record *rec, struct pat_node ***edgep, struct pat_node **parentp) { struct pat_node *cur, **edge; unsigned bit_ndx, v; int ofs; cur = db->pat_root; ofs = -1; do { bit_ndx = cur->bit_ndx; if (bit_ofs(bit_ndx) != ofs) { ofs = bit_ofs(bit_ndx); v = ofs < rec->id_len ? rec->id[ofs] : 0; } edge = ptrs(cur) + (v & cur->bit_mask ? 1 : -1); } while ((cur = *edge) && cur->bit_ndx > bit_ndx); *parentp = (void *)((char *)edge - (v & bit_mask(bit_ndx) ? offsetof(struct pat_node, ptrs_[2]) : offsetof(struct pat_node, ptrs_[0]))); *edgep = edge; return cur; } static inline unsigned first_set_bit_in_char(unsigned v) { return ffs(v) - 1; } static int find_first_diff_bit(struct uidl_record const *r0, struct uidl_record const *r1) { struct uidl_record const *long_id; unsigned ofs, max; unsigned char v; max = r0->id_len; if (max > r1->id_len) { max = r1->id_len; long_id = r0; } else long_id = r1; ofs = 0; do v = r0->id[ofs] ^ r1->id[ofs]; while (!v && ++ofs < max); if (!v) { if (r0->id_len == r1->id_len) return -1; v = long_id->id[ofs]; } return first_set_bit_in_char(v) + ofs * 8; } static inline int record_id_equal(struct uidl_record const *r0, struct uidl_record const *r1) { return r0->id_len == r1->id_len && memcmp(r0->id, r1->id, r0->id_len) == 0; } static struct uidl_record *append_to_list(struct uidl_record **recp, struct uidl_record *rec) { while (*recp) recp = &(*recp)->next; *recp = rec; rec->next = NULL; return rec; } static struct pat_node *get_pat_node(struct uidl_record *rec) { struct pat_node *np; np = xmalloc(sizeof(*np)); np->rec = rec; rec->next = NULL; return np; } static struct pat_node *get_standalone_node(struct uidl_record *rec) { struct pat_node *np; np = get_pat_node(rec); np->bit_ndx = first_set_bit_in_char(*rec->id); np->bit_mask = bit_mask(np->bit_ndx); return np; } static inline unsigned bit_set(unsigned bit_ndx, struct uidl_record const *rec) { unsigned ofs; ofs = bit_ofs(bit_ndx); if (ofs >= rec->id_len) return 0; return rec->id[ofs] & bit_mask(bit_ndx); } static inline struct pat_node *walk_up(unsigned diff_ndx, struct pat_node **parent) { struct pat_node *p, *np; np = *parent; while ((p = *ptrs(np)) && p->bit_ndx > diff_ndx) np = p; *parent = p; return np; } static struct uidl_record *pat_insert(struct uidl_db *db, struct uidl_record *rec) { struct pat_node *np, *closest, *parent, **edge; int me, bit_ndx; if (!db->pat_root) { np = get_standalone_node(rec); ptrs(np)[-1] = *ptrs(np) = NULL; ptrs(np)[1] = np; db->pat_root = np; return rec; } closest = walk_down(db, rec, &edge, &parent); if (closest) { bit_ndx = find_first_diff_bit(closest->rec, rec); if (bit_ndx < 0) return append_to_list(&closest->rec->next, rec); np = get_pat_node(rec); np->bit_ndx = bit_ndx; np->bit_mask = bit_mask(bit_ndx); } else np = get_standalone_node(rec); if (parent->bit_ndx > np->bit_ndx) { closest = walk_up(np->bit_ndx, &parent); if (!parent) edge = &db->pat_root; else edge = ptrs(parent)[-1] == closest ? ptrs(parent) - 1 : ptrs(parent) + 1; *ptrs(closest) = np; } *edge = np; *ptrs(np) = parent; me = bit_set(np->bit_ndx, rec) ? 1 : -1; ptrs(np)[me] = np; ptrs(np)[-me] = closest; return rec; } static struct uidl_record *get_uidl_record(char const *id, unsigned status) { struct uidl_record *rec; unsigned id_len; rec = xmalloc(sizeof(*rec)); id_len = strlen(id); rec->id = memcpy(xmalloc(id_len + 1), id, id_len + 1); rec->id_len = id_len; rec->status = status; rec->num = 0; return rec; } static struct uidl_record *insert_into_records(struct uidl_db *db, struct uidl_record *rec) { unsigned next, want; next = db->records_next; if (next == db->records_max) { want = db->records_max *= 2; db->records = xrealloc(db->records, want * sizeof(rec)); } rec->pos = next; db->records[next] = rec; db->records_next = next + 1; return rec; } struct uidl_record *insert_uidl(struct uidl_db *db, char const *id, unsigned status) { struct uidl_record *rec; rec = insert_uidl_noindex(db, id, status); return pat_insert(db, rec); } struct uidl_record *insert_uidl_noindex(struct uidl_db *db, char const *id, unsigned status) { struct uidl_record *rec; rec = get_uidl_record(id, status); return insert_into_records(db, rec); } void set_uidl_num(struct uidl_db *db, struct uidl_record *rec, unsigned num) { unsigned want, have; have = db->num_ndx_max; if (num >= have) { want = num + MIN_RECORDS; db->num_ndx = xrealloc(db->num_ndx, want * sizeof(*db->num_ndx)); db->num_ndx_max = want; do db->num_ndx[--want] = NULL; while (want > have); } db->num_ndx[num] = rec; rec->num = num; } struct uidl_record *find_uidl_by_id(struct uidl_db *db, char const *id) { static unsigned step_sum, searches; struct pat_node *np; struct uidl_record *rec; unsigned v, bit_ndx, len; int ofs; len = strlen(id); np = db->pat_root; if (np) { ofs = -1; do { bit_ndx = np->bit_ndx; if (bit_ofs(bit_ndx) != ofs) { ofs = bit_ofs(bit_ndx); v = ofs < len ? id[ofs] : 0; } np = ptrs(np)[v & np->bit_mask ? 1 : -1]; } while (np && np->bit_ndx > bit_ndx); if (!np) return NULL; rec = np->rec; return rec->id_len == len && memcmp(id, rec->id, len) == 0 ? rec : NULL; } #if 0 report(stderr, GT_("id search in unindexed db\n")); #endif ofs = db->records_next; while (ofs) { rec = db->records[--ofs]; if (rec->id_len == len && memcmp(rec->id, id, len) == 0) return rec; } return 0; } struct uidl_record *last_uidl_in_db(struct uidl_db *db, char const *id) { struct uidl_record *rec; rec = find_uidl_by_id(db, id); if (!rec) return NULL; while (rec->next) rec = rec->next; return rec; } static void free_uidl_list(struct uidl_record *rec) { if (rec->next) free_uidl_list(rec->next); free(rec->id); free(rec); } static void free_pat_trie(struct pat_node *np) { struct pat_node *child; child = ptrs(np)[-1]; if (child && child->bit_ndx > np->bit_ndx) free_pat_trie(child); child = ptrs(np)[1]; if (child->bit_ndx > np->bit_ndx) free_pat_trie(child); free_uidl_list(np->rec); free(np); } void free_uidl_db(struct uidl_db *db) { struct uidl_record *rec; unsigned ndx; if (db->pat_root) free_pat_trie(db->pat_root); free(db->records); free(db->num_ndx); } /* test code */ int main(void) { struct uidl_db db; struct uidl_record *rec; unsigned n; char buf[80]; init_uidl_db(&db); while (gets(buf)) insert_uidl(&db, buf, 0); n = 1000; while (n--) { fseek(stdin, 0, 0); while (gets(buf)) { rec = find_uidl_by_id(&db, buf); if (!rec || strcmp(rec->id, buf) != 0) fprintf(stderr, "not found\n"); } } return 0; } --------------------------------------------- |