You can subscribe to this list here.
2000 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(65) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2001 |
Jan
(61) |
Feb
(111) |
Mar
(98) |
Apr
(33) |
May
(31) |
Jun
(64) |
Jul
(35) |
Aug
(50) |
Sep
(170) |
Oct
(89) |
Nov
(72) |
Dec
(214) |
2002 |
Jan
(74) |
Feb
(21) |
Mar
(5) |
Apr
(4) |
May
(61) |
Jun
(13) |
Jul
(3) |
Aug
(39) |
Sep
(14) |
Oct
(80) |
Nov
(22) |
Dec
(76) |
2003 |
Jan
(14) |
Feb
(59) |
Mar
(7) |
Apr
(5) |
May
|
Jun
(4) |
Jul
(6) |
Aug
(78) |
Sep
(68) |
Oct
(23) |
Nov
(25) |
Dec
(107) |
2004 |
Jan
(82) |
Feb
(75) |
Mar
(13) |
Apr
(9) |
May
(21) |
Jun
(2) |
Jul
(1) |
Aug
(52) |
Sep
(23) |
Oct
(15) |
Nov
(6) |
Dec
(60) |
2005 |
Jan
(125) |
Feb
(94) |
Mar
(32) |
Apr
(68) |
May
|
Jun
|
Jul
(11) |
Aug
(3) |
Sep
(15) |
Oct
(3) |
Nov
|
Dec
(58) |
2006 |
Jan
(46) |
Feb
(29) |
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(1) |
Nov
(9) |
Dec
(9) |
2007 |
Jan
(62) |
Feb
(60) |
Mar
|
Apr
|
May
|
Jun
(2) |
Jul
(6) |
Aug
(3) |
Sep
(4) |
Oct
(2) |
Nov
(4) |
Dec
(46) |
2008 |
Jan
(3) |
Feb
(2) |
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(2) |
Sep
|
Oct
|
Nov
(10) |
Dec
(49) |
2009 |
Jan
(14) |
Feb
(12) |
Mar
(37) |
Apr
(8) |
May
|
Jun
|
Jul
|
Aug
(6) |
Sep
(25) |
Oct
(48) |
Nov
(7) |
Dec
(45) |
2010 |
Jan
(15) |
Feb
(14) |
Mar
(7) |
Apr
|
May
|
Jun
(1) |
Jul
(1) |
Aug
(1) |
Sep
|
Oct
(28) |
Nov
|
Dec
(44) |
2011 |
Jan
(22) |
Feb
|
Mar
|
Apr
(1) |
May
|
Jun
(1) |
Jul
|
Aug
|
Sep
(1) |
Oct
|
Nov
|
Dec
(70) |
2012 |
Jan
(6) |
Feb
|
Mar
|
Apr
|
May
(2) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(128) |
2013 |
Jan
(8) |
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(2) |
Nov
(2) |
Dec
(150) |
2014 |
Jan
(70) |
Feb
(44) |
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Sebastian B. <sb...@us...> - 2014-01-19 18:44:45
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv24076 Modified Files: index_external.c Log Message: Grouped the assignment of the path elements together. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.33 retrieving revision 1.34 diff -u -d -r1.33 -r1.34 --- index_external.c 19 Jan 2014 18:44:19 -0000 1.33 +++ index_external.c 19 Jan 2014 18:44:43 -0000 1.34 @@ -254,9 +254,6 @@ bnode_read_block(idx, tmp, block); - path->max_level = level; - path->node[level].block = block; - /* Find the slot with the separation key for the given text. The slot with * the separation key for a given text is the slot whose value is not * lexicographically smaller than the text but whose left neighbor slot value @@ -282,6 +279,8 @@ if (i == 0) lchild = tmp->lchild; else lchild = bnode_get_ith_element_of_node(idx, tmp, i - 1)->gchild; + path->max_level = level; + path->node[level].block = block; path->node[level].key_index = i; /* Leave early if this was a direct match with a separation key. |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:44:21
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv24029 Modified Files: index_external.c Log Message: Define direct_match earlier. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.32 retrieving revision 1.33 diff -u -d -r1.32 -r1.33 --- index_external.c 19 Jan 2014 18:43:51 -0000 1.32 +++ index_external.c 19 Jan 2014 18:44:19 -0000 1.33 @@ -250,11 +250,10 @@ do { int lchild; + int direct_match = 0; bnode_read_block(idx, tmp, block); - int direct_match = 0; - path->max_level = level; path->node[level].block = block; |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:43:54
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23968 Modified Files: index_external.c Log Message: Determine the lchild after we have found the separation key. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.31 retrieving revision 1.32 diff -u -d -r1.31 -r1.32 --- index_external.c 19 Jan 2014 18:43:29 -0000 1.31 +++ index_external.c 19 Jan 2014 18:43:51 -0000 1.32 @@ -249,9 +249,10 @@ do { + int lchild; + bnode_read_block(idx, tmp, block); - int lchild = tmp->lchild; int direct_match = 0; path->max_level = level; @@ -277,10 +278,11 @@ direct_match = cmp == 0; break; } - - lchild = e->gchild; } + if (i == 0) lchild = tmp->lchild; + else lchild = bnode_get_ith_element_of_node(idx, tmp, i - 1)->gchild; + path->node[level].key_index = i; /* Leave early if this was a direct match with a separation key. |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:43:31
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23916 Modified Files: index_external.c Log Message: Do not use goto within the loop. Instead we keep the information one scope longer before leaving the function early. Also improved comments. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.30 retrieving revision 1.31 diff -u -d -r1.30 -r1.31 --- index_external.c 19 Jan 2014 18:43:07 -0000 1.30 +++ index_external.c 19 Jan 2014 18:43:29 -0000 1.31 @@ -252,11 +252,18 @@ bnode_read_block(idx, tmp, block); int lchild = tmp->lchild; + int direct_match = 0; path->max_level = level; path->node[level].block = block; - /* Find the appropriate slot. TODO: Use binary search */ + /* Find the slot with the separation key for the given text. The slot with + * the separation key for a given text is the slot whose value is not + * lexicographically smaller than the text but whose left neighbor slot value + * is lexicographically smaller than the text. + * + * TODO: Use binary search + */ for (i=0; i < tmp->num_elements; i++) { int cmp; @@ -264,27 +271,33 @@ if (!bnode_compare_string(idx, e, text, &cmp)) return 0; - if (!cmp) - { - path->node[level].key_index = i; - /* Direct match with a separation key */ - goto out; - } - /* if str > text then we ascend to lchild (except if we are at a leaf, of course) */ - if (cmp > 0) + if (cmp >= 0) + { + direct_match = cmp == 0; break; + } lchild = e->gchild; } path->node[level].key_index = i; + /* Leave early if this was a direct match with a separation key. + * In this case, we do not need to descend to the child */ + if (direct_match) + goto out; + if (block == lchild && !tmp->leaf) { fprintf(stderr, "Endless loop detected!\n"); return 0; } + + /* Otherwise, it was no direct match, and the separation key is lexicographically strictly + * greater hence the sub tree defined by the left child, which is lexicographically strictly + * smaller. will contain the key (or a better suited value if the key is not contained). + */ block = lchild; level++; if (level == BNODE_PATH_MAX_NODES) |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:43:09
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23883 Modified Files: index_external.c Log Message: Refactored the comparision of the bnode string with a given string into an own function. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.29 retrieving revision 1.30 diff -u -d -r1.29 -r1.30 --- index_external.c 19 Jan 2014 18:42:21 -0000 1.29 +++ index_external.c 19 Jan 2014 18:43:07 -0000 1.30 @@ -196,6 +196,26 @@ return str; } +/** + * Compares contents represented by the given element with the given text. If str is the contents, + * then the function places the return value of strcmp(str, text) into *out_cmp. + * + * @param idx + * @param e + * @param text + * @param out_cmp + * @return 0 on failure, 1 on success. + */ +static int bnode_compare_string(struct index_external *idx, struct bnode_element *e, const char *text, int *out_cmp) +{ + char *str = bnode_read_string(idx, e); + if (!str) return 0; + + *out_cmp = strcmp(str, text); + free(str); + return 1; +} + #define BNODE_PATH_MAX_NODES 24 /** @@ -241,11 +261,8 @@ { int cmp; struct bnode_element *e = bnode_get_ith_element_of_node(idx, tmp, i); - char *str = bnode_read_string(idx, e); - if (!str) return 0; - - cmp = strcmp(str, text); - free(str); + if (!bnode_compare_string(idx, e, text, &cmp)) + return 0; if (!cmp) { |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:42:47
|
Update of /cvsroot/simplemail/simplemail/tests In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23825/tests Modified Files: index_unittest.c Log Message: Added function to check whether all suffixes of a string are contained in a index. Use it for Zauberlehrling. Index: index_unittest.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/tests/index_unittest.c,v retrieving revision 1.10 retrieving revision 1.11 diff -u -d -r1.10 -r1.11 --- index_unittest.c 19 Jan 2014 18:35:24 -0000 1.10 +++ index_unittest.c 19 Jan 2014 18:42:44 -0000 1.11 @@ -183,6 +183,31 @@ CU_ASSERT(0); } +/*******************************************************/ + +static int test_index_contains_all_suffixes_callback(int did, void *userdata) +{ + (*(int*)userdata) = 1; +} + +static void test_index_contains_all_suffixes(struct index *index, const char *text, int did) +{ + int i; + int textl = strlen(text); + for (i=0; i<textl; i++) + { + int called = 0; + index_find_documents(index, test_index_contains_all_suffixes_callback, &called, 1, text + i); + CU_ASSERT(called == 1); + if (called != 1) + { + fprintf(stderr, "Offset %d couldn't be found\n", i); + } + } +} + +/*******************************************************/ + static void test_index_for_algorithm(struct index_algorithm *alg, const char *name) { struct index *index; @@ -204,6 +229,8 @@ ok = index_put_document(index,20,zauberlehrling); CU_ASSERT(ok != 0); + test_index_contains_all_suffixes(index, zauberlehrling, 20); + text = read_file_contents("of-human-bondage.txt"); CU_ASSERT(text != NULL); |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:42:24
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23771 Modified Files: index_external.c Log Message: When splitting a node, the second node's lchild must point to the rchild of the former median. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.28 retrieving revision 1.29 diff -u -d -r1.28 -r1.29 --- index_external.c 19 Jan 2014 18:41:50 -0000 1.28 +++ index_external.c 19 Jan 2014 18:42:21 -0000 1.29 @@ -513,7 +513,7 @@ /* Second node */ idx->tmp3->num_elements = idx->max_elements_per_node - (median + 1); - + idx->tmp3->lchild = me->gchild; idx->tmp3->leaf = tmp->leaf; memcpy(bnode_get_ith_element_of_node(idx, idx->tmp3, 0), bnode_get_ith_element_of_node(idx, tmp, median + 1), idx->tmp3->num_elements * sizeof(struct bnode_element)); bnode_clear_elements(idx, idx->tmp3, idx->tmp3->num_elements); |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:41:53
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23698 Modified Files: index_external.c Log Message: Fixed a memory leak. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.27 retrieving revision 1.28 diff -u -d -r1.27 -r1.28 --- index_external.c 19 Jan 2014 18:41:26 -0000 1.27 +++ index_external.c 19 Jan 2014 18:41:50 -0000 1.28 @@ -585,6 +585,7 @@ str = bnode_read_string(idx, be); if (!str) return 0; cmp = strncmp(text, str, strlen(text)); + free(str); if (!cmp) { callback(be->did, userdata); |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:41:28
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23657 Modified Files: index_external.c Log Message: When splitting a node, the first node must contain all elements until the median. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.26 retrieving revision 1.27 diff -u -d -r1.26 -r1.27 --- index_external.c 19 Jan 2014 18:41:04 -0000 1.26 +++ index_external.c 19 Jan 2014 18:41:26 -0000 1.27 @@ -509,7 +509,7 @@ struct bnode_element me_copy = *me; /* First node */ - tmp->num_elements = median - 1; + tmp->num_elements = median; /* Second node */ idx->tmp3->num_elements = idx->max_elements_per_node - (median + 1); |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:41:06
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23615 Modified Files: index_external.c Log Message: Improved the dumping of the index. For instance, newlines are no longer shown. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.25 retrieving revision 1.26 diff -u -d -r1.25 -r1.26 --- index_external.c 19 Jan 2014 18:40:46 -0000 1.25 +++ index_external.c 19 Jan 2014 18:41:04 -0000 1.26 @@ -330,11 +330,30 @@ char buf[16]; e = bnode_get_ith_element_of_node(idx, tmp, i); - str = bnode_read_string(idx, e); + if (!(str = bnode_read_string(idx, e))) + { + fprintf(stderr, "Couldn't read string!\n"); + exit(-1); + } snprintf(buf,sizeof(buf),"%s", str); - printf("%d: %d: %d: %s\n", level, block, i, buf); + { + int k; + for (k=0;k<level;k++) + printf(" "); + } + printf("l%d: b%d: i%d: o%04d ", level, block, i, e->str_offset); + { + int k; + for (k=0;k<strlen(buf);k++) + { + if (!buf[k]) break; + if (buf[k] == 10) buf[k] = ' '; + printf("%c", buf[k]); + } + printf("\n"); + } if (!tmp->leaf) dump_index(idx, e->gchild, level + 1); |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:40:48
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23557 Modified Files: index_external.c Log Message: Implemented the case that non-direct-root-children can be splitted. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.24 retrieving revision 1.25 diff -u -d -r1.24 -r1.25 --- index_external.c 19 Jan 2014 18:40:23 -0000 1.24 +++ index_external.c 19 Jan 2014 18:40:46 -0000 1.25 @@ -443,105 +443,92 @@ int block; bnode *tmp = idx->tmp; struct bnode_path path; + struct bnode_element new_element; if (!bnode_lookup(idx, text, &path)) return 0; current_level = path.max_level; - block = path.node[current_level].block; - i = path.node[current_level].key_index; - - bnode_read_block(idx, tmp, block); + new_element.str_offset = offset; + new_element.str_len = strlen(text); + new_element.did = did; + new_element.gchild = 0; - if (1) + while (current_level >= 0) { + struct bnode_element *e; + + block = path.node[current_level].block; + i = path.node[current_level].key_index; + + bnode_read_block(idx, tmp, block); + + if (!tmp->leaf && current_level == path.max_level) + { + fprintf(stderr, "Cannot insert into a non-leaf\n"); + exit(1); + } + /* Recall that we allocate in our temps one more entry than it would fit in the block, so we surly can * insert the entry now */ - struct bnode_element *e = bnode_get_ith_element_of_node(idx, tmp, i); + e = bnode_get_ith_element_of_node(idx, tmp, i); memmove(e + 1, e, (tmp->num_elements - i) * sizeof(struct bnode_element)); /* New element */ - e->str_offset = offset; - e->str_len = strlen(text); - e->gchild = 0; - e->did = did; + *e = new_element; tmp->num_elements++; - } - if (tmp->num_elements == idx->max_elements_per_node) - { - /* Now we split the node into two nodes. We keep the median out as we - * insert it as a separation value for the two nodes on the parent. - */ - - int median = tmp->num_elements / 2; - struct bnode_element *me = bnode_get_ith_element_of_node(idx, tmp, median); - struct bnode_element me_copy = *me; - - /* First node */ - tmp->num_elements = median - 1; - tmp->leaf = 1; - - /* Second node */ - idx->tmp3->num_elements = idx->max_elements_per_node - median; - idx->tmp3->leaf = 1; - memcpy(bnode_get_ith_element_of_node(idx, idx->tmp3, 0), bnode_get_ith_element_of_node(idx, tmp, median), idx->tmp3->num_elements * sizeof(struct bnode_element)); - bnode_clear_elements(idx, idx->tmp3, idx->tmp3->num_elements); - int tmp3block = bnode_add_block(idx, idx->tmp3); - - /* This should be done as late as possible */ - bnode_clear_elements(idx, tmp, median); - bnode_write_block(idx, tmp, block); - - if (block == idx->root_node) - { - /* Create a new root block if the root was getting full */ - tmp->num_elements = 1; - tmp->lchild = idx->root_node; - tmp->leaf = 0; - me_copy.gchild = tmp3block; - *bnode_get_ith_element_of_node(idx, tmp, 0) = me_copy; - bnode_clear_elements(idx, tmp, 1); - idx->root_node = bnode_add_block(idx, tmp); - } else + if (tmp->num_elements == idx->max_elements_per_node) { - int lchild; + /* Now we split the node into two nodes. We keep the median out as we + * insert it as a separation value for the two nodes on the parent. + */ - /* Read root node, we want to add the new element */ - bnode_read_block(idx, tmp, idx->root_node); + int median = tmp->num_elements / 2; + struct bnode_element *me = bnode_get_ith_element_of_node(idx, tmp, median); + struct bnode_element me_copy = *me; - lchild = tmp->lchild; - i = 0; - /* Find the separation key whose left child points to the splitted block */ - for (i=0; i<tmp->num_elements && lchild != block; i++) - lchild = bnode_get_ith_element_of_node(idx, tmp, i)->gchild; + /* First node */ + tmp->num_elements = median - 1; - assert(lchild == path.node[current_level].block); - assert(i == path.node[current_level - 1].key_index); + /* Second node */ + idx->tmp3->num_elements = idx->max_elements_per_node - (median + 1); - lchild = path.node[current_level].block; - i = path.node[current_level - 1].key_index; + idx->tmp3->leaf = tmp->leaf; + memcpy(bnode_get_ith_element_of_node(idx, idx->tmp3, 0), bnode_get_ith_element_of_node(idx, tmp, median + 1), idx->tmp3->num_elements * sizeof(struct bnode_element)); + bnode_clear_elements(idx, idx->tmp3, idx->tmp3->num_elements); + int tmp3block = bnode_add_block(idx, idx->tmp3); - if (tmp->num_elements == idx->max_elements_per_node) + /* This should be done as late as possible */ + bnode_clear_elements(idx, tmp, median); + bnode_write_block(idx, tmp, block); + + if (block == idx->root_node) { - fprintf(stderr, "Splitting a non-direct-child of the root node %d is not supported for now!\n", block); - exit(1); + assert(current_level == 0); + + /* Create a new root block if the root was getting full */ + tmp->num_elements = 1; + tmp->lchild = idx->root_node; + tmp->leaf = 0; + me_copy.gchild = tmp3block; + *bnode_get_ith_element_of_node(idx, tmp, 0) = me_copy; + bnode_clear_elements(idx, tmp, 1); + idx->root_node = bnode_add_block(idx, tmp); + break; } - struct bnode_element *e = bnode_get_ith_element_of_node(idx, tmp, i); - memmove(e + 1, e, (tmp->num_elements - i) * sizeof(struct bnode_element)); - *e = me_copy; - e->gchild = tmp3block; - tmp->num_elements++; - bnode_write_block(idx, tmp, idx->root_node); + new_element = me_copy; + new_element.gchild = tmp3block; + } else + { + /* There was enough space in the block */ + bnode_write_block(idx, tmp, block); + break; } - } else - { - /* There was enough space in the block, so we simply write it out */ - bnode_write_block(idx, tmp, block); + current_level--; } - return 1; } |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:40:26
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23517 Modified Files: index_external.c Log Message: Added another debug function to count the number of leaves. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.23 retrieving revision 1.24 diff -u -d -r1.23 -r1.24 --- index_external.c 19 Jan 2014 18:40:05 -0000 1.23 +++ index_external.c 19 Jan 2014 18:40:23 -0000 1.24 @@ -394,6 +394,40 @@ bnode_free(idx, tmp); } + +/** + * Verify whether the index is consitent, i.e., whether all strings are in increasing order. + * + * @param + * @param block + * @param level + */ +static int count_index(struct index_external *idx, int block, int level) +{ + int i, count = 0; + bnode *tmp = bnode_create(idx); + + if (!bnode_read_block(idx, tmp, block)) + return; + + if (!tmp->leaf) + count += count_index(idx, tmp->lchild, level + 1); + + for (i=0; i<tmp->num_elements; i++) + { + struct bnode_element *e; + int rc; + + e = bnode_get_ith_element_of_node(idx, tmp, i); + if (!tmp->leaf) + count += count_index(idx, e->gchild, level + 1); + count++; + } + + bnode_free(idx, tmp); + return count; +} + /** * Inserts the given string into the bnode tree. * |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:40:07
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23475 Modified Files: index_external.c Log Message: Clear the proper tmp3 block when splitting. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.22 retrieving revision 1.23 diff -u -d -r1.22 -r1.23 --- index_external.c 19 Jan 2014 18:39:31 -0000 1.22 +++ index_external.c 19 Jan 2014 18:40:05 -0000 1.23 @@ -453,7 +453,7 @@ idx->tmp3->num_elements = idx->max_elements_per_node - median; idx->tmp3->leaf = 1; memcpy(bnode_get_ith_element_of_node(idx, idx->tmp3, 0), bnode_get_ith_element_of_node(idx, tmp, median), idx->tmp3->num_elements * sizeof(struct bnode_element)); - bnode_clear_elements(idx, tmp, idx->tmp3->num_elements); + bnode_clear_elements(idx, idx->tmp3, idx->tmp3->num_elements); int tmp3block = bnode_add_block(idx, idx->tmp3); /* This should be done as late as possible */ |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:39:34
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23403 Modified Files: index_external.c Log Message: Clear the tmp block after copying elements from it. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.21 retrieving revision 1.22 diff -u -d -r1.21 -r1.22 --- index_external.c 19 Jan 2014 18:39:02 -0000 1.21 +++ index_external.c 19 Jan 2014 18:39:31 -0000 1.22 @@ -448,9 +448,6 @@ /* First node */ tmp->num_elements = median - 1; tmp->leaf = 1; - bnode_clear_elements(idx, tmp, median); - /* This should be done as late as possible */ - bnode_write_block(idx, tmp, block); /* Second node */ idx->tmp3->num_elements = idx->max_elements_per_node - median; @@ -459,6 +456,10 @@ bnode_clear_elements(idx, tmp, idx->tmp3->num_elements); int tmp3block = bnode_add_block(idx, idx->tmp3); + /* This should be done as late as possible */ + bnode_clear_elements(idx, tmp, median); + bnode_write_block(idx, tmp, block); + if (block == idx->root_node) { /* Create a new root block if the root was getting full */ |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:39:05
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23343 Modified Files: index_external.c Log Message: Determine the index (and lchild) via information stored in the btree path. When we go up to the root after a node became full, we use now the information stored in the bnode path. The old variant is still there, but will vanish soon. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.20 retrieving revision 1.21 diff -u -d -r1.20 -r1.21 --- index_external.c 19 Jan 2014 18:38:19 -0000 1.20 +++ index_external.c 19 Jan 2014 18:39:02 -0000 1.21 @@ -27,6 +27,7 @@ * times from the external media. */ +#include <assert.h> #include <errno.h> #include <stdlib.h> #include <stdio.h> @@ -477,11 +478,16 @@ lchild = tmp->lchild; i = 0; - /* Find the separation key whose left child points to the splitted block */ for (i=0; i<tmp->num_elements && lchild != block; i++) lchild = bnode_get_ith_element_of_node(idx, tmp, i)->gchild; + assert(lchild == path.node[current_level].block); + assert(i == path.node[current_level - 1].key_index); + + lchild = path.node[current_level].block; + i = path.node[current_level - 1].key_index; + if (tmp->num_elements == idx->max_elements_per_node) { fprintf(stderr, "Splitting a non-direct-child of the root node %d is not supported for now!\n", block); |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:38:21
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23267 Modified Files: index_external.c Log Message: Introduce current_level variable. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.19 retrieving revision 1.20 diff -u -d -r1.19 -r1.20 --- index_external.c 19 Jan 2014 18:37:09 -0000 1.19 +++ index_external.c 19 Jan 2014 18:38:19 -0000 1.20 @@ -404,6 +404,7 @@ static int bnode_insert_string(struct index_external *idx, int did, int offset, const char *text) { int i; + int current_level; int block; bnode *tmp = idx->tmp; struct bnode_path path; @@ -411,8 +412,9 @@ if (!bnode_lookup(idx, text, &path)) return 0; - block = path.node[path.max_level].block; - i = path.node[path.max_level].key_index; + current_level = path.max_level; + block = path.node[current_level].block; + i = path.node[current_level].key_index; bnode_read_block(idx, tmp, block); |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:37:12
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23153 Modified Files: index_external.c Log Message: Minor code simplification. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.18 retrieving revision 1.19 diff -u -d -r1.18 -r1.19 --- index_external.c 19 Jan 2014 18:36:50 -0000 1.18 +++ index_external.c 19 Jan 2014 18:37:09 -0000 1.19 @@ -232,6 +232,9 @@ int lchild = tmp->lchild; + path->max_level = level; + path->node[level].block = block; + /* Find the appropriate slot. TODO: Use binary search */ for (i=0; i < tmp->num_elements; i++) { @@ -245,8 +248,6 @@ if (!cmp) { - path->max_level = level; - path->node[level].block = block; path->node[level].key_index = i; /* Direct match with a separation key */ goto out; @@ -259,8 +260,6 @@ lchild = e->gchild; } - path->max_level = level; - path->node[level].block = block; path->node[level].key_index = i; if (block == lchild && !tmp->leaf) |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:36:52
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23094 Modified Files: index_external.c Log Message: Remember the complete path when ascending while looking up. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.17 retrieving revision 1.18 diff -u -d -r1.17 -r1.18 --- index_external.c 19 Jan 2014 18:36:31 -0000 1.17 +++ index_external.c 19 Jan 2014 18:36:50 -0000 1.18 @@ -195,26 +195,40 @@ return str; } +#define BNODE_PATH_MAX_NODES 24 + +/** + * Represents a path in the bnode tree. + */ +struct bnode_path +{ + int max_level; + struct + { + int block; + int key_index; + } node[BNODE_PATH_MAX_NODES]; +}; + /** * Lookup the given text. * * @param idx * @param text - * @param out_block + * @param out_block_array * @param out_index * @return */ -static int bnode_lookup(struct index_external *idx, const char *text, int *out_block, int *out_index) +static int bnode_lookup(struct index_external *idx, const char *text, struct bnode_path *path) { int i; bnode *tmp = idx->tmp; - int prev_block; int block = idx->root_node; + int level = 0; do { bnode_read_block(idx, tmp, block); - prev_block = block; int lchild = tmp->lchild; @@ -231,6 +245,9 @@ if (!cmp) { + path->max_level = level; + path->node[level].block = block; + path->node[level].key_index = i; /* Direct match with a separation key */ goto out; } @@ -242,18 +259,22 @@ lchild = e->gchild; } + path->max_level = level; + path->node[level].block = block; + path->node[level].key_index = i; + if (block == lchild && !tmp->leaf) { fprintf(stderr, "Endless loop detected!\n"); return 0; } block = lchild; + level++; + if (level == BNODE_PATH_MAX_NODES) + return 0; } while (!tmp->leaf); out: - *out_block = prev_block; - *out_index = i; - return 1; } @@ -386,10 +407,14 @@ int i; int block; bnode *tmp = idx->tmp; + struct bnode_path path; - if (!bnode_lookup(idx, text, &block, &i)) + if (!bnode_lookup(idx, text, &path)) return 0; + block = path.node[path.max_level].block; + i = path.node[path.max_level].key_index; + bnode_read_block(idx, tmp, block); if (1) @@ -494,10 +519,14 @@ struct bnode_element *be; int text_len = strlen(text); int nd = 0; + struct bnode_path path; - if (!bnode_lookup(idx, text, &block, &i)) + if (!bnode_lookup(idx, text, &path)) return 0; + block = path.node[path.max_level].block; + i = path.node[path.max_level].key_index; + if (!bnode_read_block(idx, tmp, block)) return 0; |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:36:33
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv23054 Modified Files: index_external.c Log Message: Function index_external_find_documents() now uses bnode_find_string(). Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.16 retrieving revision 1.17 diff -u -d -r1.16 -r1.17 --- index_external.c 19 Jan 2014 18:34:45 -0000 1.16 +++ index_external.c 19 Jan 2014 18:36:31 -0000 1.17 @@ -488,24 +488,35 @@ static int bnode_find_string(struct index_external *idx, const char *text, int (*callback)(int did, void *userdata), void *userdata) { int i; + int block; + char *str; bnode *tmp = idx->tmp; + struct bnode_element *be; int text_len = strlen(text); + int nd = 0; - bnode_read_block(idx, tmp, 0); + if (!bnode_lookup(idx, text, &block, &i)) + return 0; - /* Find the appropriate slot. TODO: Use binary search */ - for (i=0; i < tmp->num_elements; i++) + if (!bnode_read_block(idx, tmp, block)) + return 0; + + for (;i<tmp->num_elements;i++) { - struct bnode_element *e = bnode_get_ith_element_of_node(idx, tmp, i); - if (text_len > tmp->num_elements) - continue; - char *str = bnode_read_string(idx, e); + int cmp; + be = bnode_get_ith_element_of_node(idx, tmp, i); + str = bnode_read_string(idx, be); if (!str) return 0; - if (!strncmp(str, text, text_len)) + cmp = strncmp(text, str, strlen(text)); + if (!cmp) { - callback(e->did, userdata); + callback(be->did, userdata); + nd++; } + else + break; } + return nd; } void index_external_dispose(struct index *index) @@ -599,17 +610,17 @@ idx = (struct index_external*)index; + if (num_substrings != 1) + { + fprintf(stderr, "Searching with only one sub string is supported for now.\n"); + exit(-1); + } + va_copy(substrings_copy,substrings); for (i=0;i<num_substrings;i++) { - bnode_find_string(idx, va_arg(substrings_copy, char *), callback, userdata); -// bnode_insert_string( -// if (!strstr(d->txt,va_arg(substrings_copy,char *))) -// { -// take = 0; -// break; -// } + nd = bnode_find_string(idx, va_arg(substrings_copy, char *), callback, userdata); } va_end(substrings_copy); |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:35:26
|
Update of /cvsroot/simplemail/simplemail/tests In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv22931/tests Modified Files: index_unittest.c makefile Added Files: of-human-bondage.txt.gz Log Message: Added "Of Human Bondage" for testing purposes. --- NEW FILE: of-human-bondage.txt.gz --- (This appears to be a binary file; contents omitted.) Index: index_unittest.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/tests/index_unittest.c,v retrieving revision 1.9 retrieving revision 1.10 diff -u -d -r1.9 -r1.10 --- index_unittest.c 19 Jan 2014 18:34:17 -0000 1.9 +++ index_unittest.c 19 Jan 2014 18:35:24 -0000 1.10 @@ -128,6 +128,48 @@ /*******************************************************/ + +/** + * Read the content of the file given by the filename. + * + * @param filename the name of the file which should be read. + * + * @return the contents or NULL on an error. The returned + * value must be freed with free() when no longer in use. + */ +static char *read_file_contents(const char *filename) +{ + long size; + char *ret = NULL; + char *contents = NULL; + FILE *fh; + + if (!(fh = fopen(filename,"r"))) + return NULL; + + fseek(fh,0,SEEK_END); + size = ftell(fh); + if (size < 1) + goto out; + fseek(fh,0,SEEK_SET); + + if (!(contents = malloc(size+1))) + goto out; + if ((fread(contents, 1, size, fh) != size)) + goto out; + contents[size] = 0; + + ret = contents; + contents = NULL; +out: + fclose(fh); + free(contents); + return ret; +} + + +/*******************************************************/ + static int test_index_naive_callback_called; static int test_index_naive_callback(int did, void *userdata) @@ -144,6 +186,7 @@ static void test_index_for_algorithm(struct index_algorithm *alg, const char *name) { struct index *index; + char *text; int ok; int nd; @@ -161,6 +204,12 @@ ok = index_put_document(index,20,zauberlehrling); CU_ASSERT(ok != 0); + text = read_file_contents("of-human-bondage.txt"); + CU_ASSERT(text != NULL); + + ok = index_put_document(index,32,text); + CU_ASSERT(ok != 0); + nd = index_find_documents(index,test_index_naive_callback,NULL,1,"very"); CU_ASSERT(test_index_naive_callback_called == 1); CU_ASSERT(nd == 1); @@ -172,6 +221,7 @@ CU_ASSERT(nd == 0); index_dispose(index); + free(text); } /*******************************************************/ Index: makefile =================================================================== RCS file: /cvsroot/simplemail/simplemail/tests/makefile,v retrieving revision 1.41 retrieving revision 1.42 diff -u -d -r1.41 -r1.42 --- makefile 24 Dec 2013 09:23:38 -0000 1.41 +++ makefile 19 Jan 2014 18:35:24 -0000 1.42 @@ -45,6 +45,11 @@ perl gen-stubs.pl <test-objs/$@error >$@stubs.c $(CC) $@stubs.c unittest_tmpl.c -DUNITTEST_TABLE=\"$@_table.c\" -DUNITTEST_FILE=\"$<\" $(CFLAGS) -lcunit -L. -l$(SIMPLEMAIL_LIB) $(GLIB_LDFLAGS) $(GTK_LDFLAGS) -o $@ +of-human-bondage.txt: of-human-bondage.txt.gz + gzip -c -d $< >$@ + +index_unittest: of-human-bondage.txt + .PHONY: all all: files $(TESTEXES) memleaks |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:34:47
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv22855 Modified Files: index_external.c Log Message: Added short description and fixed header comment. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.15 retrieving revision 1.16 diff -u -d -r1.15 -r1.16 --- index_external.c 19 Jan 2014 18:33:17 -0000 1.15 +++ index_external.c 19 Jan 2014 18:34:45 -0000 1.16 @@ -1,6 +1,6 @@ /** - * index_external.c - a external string index implementation for SimpleMail. - * Copyright (C) 2012 Sebastian Bauer + * index_external.c - an external string index implementation for SimpleMail. + * Copyright (C) 2013 Sebastian Bauer * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as @@ -18,6 +18,13 @@ /** * @file index_external.c + * + * This is a naive implementation of an external memory data structure that + * can be used for full text search. It is basically a b tree that stores all + * suffixes of each document that is inserted. + * + * It is very slow at the moment, in particular as the strings are read multiple + * times from the external media. */ #include <errno.h> |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:34:19
|
Update of /cvsroot/simplemail/simplemail/tests In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv22802/tests Modified Files: index_unittest.c Log Message: Test whether the callback has really been called. Index: index_unittest.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/tests/index_unittest.c,v retrieving revision 1.8 retrieving revision 1.9 diff -u -d -r1.8 -r1.9 --- index_unittest.c 19 Jan 2014 18:28:07 -0000 1.8 +++ index_unittest.c 19 Jan 2014 18:34:17 -0000 1.9 @@ -133,6 +133,7 @@ static int test_index_naive_callback(int did, void *userdata) { CU_ASSERT(did==4); + test_index_naive_callback_called = 1; } static int test_index_naive_callback2(int did, void *userdata) @@ -146,6 +147,8 @@ int ok; int nd; + test_index_naive_callback_called = 0; + index = index_create(alg, name); CU_ASSERT(index != NULL); @@ -159,6 +162,7 @@ CU_ASSERT(ok != 0); nd = index_find_documents(index,test_index_naive_callback,NULL,1,"very"); + CU_ASSERT(test_index_naive_callback_called == 1); CU_ASSERT(nd == 1); ok = index_remove_document(index,4); @@ -183,5 +187,5 @@ /* @Test */ void test_index_external(void) { - test_index_for_algorithm(&index_external, "external-index.dat"); + test_index_for_algorithm(&index_external, "/tmp/external-index.dat"); } |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:33:19
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv22696 Modified Files: index_external.c Log Message: Added two debug functions: dump_index() and verify_index(). Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.14 retrieving revision 1.15 diff -u -d -r1.14 -r1.15 --- index_external.c 19 Jan 2014 18:32:50 -0000 1.14 +++ index_external.c 19 Jan 2014 18:33:17 -0000 1.15 @@ -278,6 +278,95 @@ } /** + * Dump the entire index. + * + * @param idx + * @param block + * @param level + */ +static void dump_index(struct index_external *idx, int block, int level) +{ + int i; + bnode *tmp = bnode_create(idx); + if (!bnode_read_block(idx, tmp, block)) + return; + + printf("dump_index(block=%d, level=%d, leaf=%d) elements=%d\n", block, level, tmp->leaf, tmp->num_elements); + if (!tmp->leaf) + dump_index(idx, tmp->lchild, level + 1); + + for (i=0; i<tmp->num_elements; i++) + { + char *str; + struct bnode_element *e; + char buf[16]; + + e = bnode_get_ith_element_of_node(idx, tmp, i); + str = bnode_read_string(idx, e); + snprintf(buf,sizeof(buf),"%s", str); + + printf("%d: %d: %d: %s\n", level, block, i, buf); + + if (!tmp->leaf) + dump_index(idx, e->gchild, level + 1); + + free(str); + } + + bnode_free(idx, tmp); +} + +/** + * Verify whether the index is consitent, i.e., whether all strings are in increasing order. + * + * @param + * @param block + * @param level + */ +static void verify_index(struct index_external *idx, int block, int level) +{ + int i; + bnode *tmp = bnode_create(idx); + char *str1 = strdup(""); + char *str2; + int offset1 = -1; + + if (!bnode_read_block(idx, tmp, block)) + return; + + if (!tmp->leaf) + verify_index(idx, tmp->lchild, level + 1); + + for (i=0; i<tmp->num_elements; i++) + { + struct bnode_element *e; + int rc; + + e = bnode_get_ith_element_of_node(idx, tmp, i); + str2 = bnode_read_string(idx, e); + if ((rc = strcmp(str1,str2)) > 0) + { + int *a = 0; + printf("violation\n"); + printf("%d: %d: %d: %d-%d=%d |%s |%s (%d %d)\n", level, block, i, str1[0], str2[0], rc, str1, str2, offset1, e->str_offset); + *a = 0; + exit(1); + } + + + if (!tmp->leaf) + verify_index(idx, e->gchild, level + 1); + + free(str1); + str1 = str2; + offset1= e->str_offset; + } + + free(str1); + bnode_free(idx, tmp); +} + +/** * Inserts the given string into the bnode tree. * * @param idx |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:32:52
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv22601 Modified Files: index_external.c Log Message: Seek at the end of the file before determing the offset of the new string. The offset would be wrong otherwise as we seek in the same file when reading the strings. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.13 retrieving revision 1.14 diff -u -d -r1.13 -r1.14 --- index_external.c 19 Jan 2014 18:32:01 -0000 1.13 +++ index_external.c 19 Jan 2014 18:32:50 -0000 1.14 @@ -475,6 +475,8 @@ idx = (struct index_external*)index; /* Determine position and write text */ + if (fseek(idx->string_file, 0, SEEK_END) != 0) + return 0; long offset = ftell(idx->string_file); fputs(text, idx->string_file); |
From: Sebastian B. <sb...@us...> - 2014-01-19 18:32:03
|
Update of /cvsroot/simplemail/simplemail In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv22538 Modified Files: index_external.c Log Message: Ensure 0-byte when reading a string. Index: index_external.c =================================================================== RCS file: /cvsroot/simplemail/simplemail/index_external.c,v retrieving revision 1.12 retrieving revision 1.13 diff -u -d -r1.12 -r1.13 --- index_external.c 19 Jan 2014 18:31:43 -0000 1.12 +++ index_external.c 19 Jan 2014 18:32:01 -0000 1.13 @@ -184,6 +184,7 @@ return 0; if (fread(str, 1, element->str_len, idx->string_file) != element->str_len) return 0; + str[element->str_len] = 0; return str; } |