From: <tho...@us...> - 2011-05-11 15:12:27
|
Revision: 4483 http://bigdata.svn.sourceforge.net/bigdata/?rev=4483&view=rev Author: thompsonbry Date: 2011-05-11 15:12:20 +0000 (Wed, 11 May 2011) Log Message: ----------- Refactored slightly and extended the test case and the worksheet to force a split of a buddy bucket where global depth == local depth. This case will force the introduction of a new directory page. I will work on the logic for introducing the new directory page next. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/architecture/htree.xls branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestHTree.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/architecture/htree.xls =================================================================== (Binary files differ) Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTree.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTree.java 2011-05-11 14:26:34 UTC (rev 4482) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTree.java 2011-05-11 15:12:20 UTC (rev 4483) @@ -479,28 +479,43 @@ * method signature to pass an insert enum {ALLDUPS,DUPKEYS,NODUPS}. */ public byte[] insert(final byte[] key, final byte[] value) { + if (key == null) throw new IllegalArgumentException(); + + // the current directory page. DirectoryPage current = getRoot(); // start at the root. + + // #of prefix bits already consumed. int prefixLength = 0;// prefix length of the root is always zero. + + // buddyOffset into [current]. int buddyOffset = 0; // buddyOffset of the root is always zero. + while (true) { + // skip prefixLength bits and then extract globalDepth bits. final int hashBits = current.getLocalHashCode(key, prefixLength); + // find the child directory page or bucket page. final AbstractPage child = current.getChild(hashBits, buddyOffset); + if (child.isLeaf()) { + /* * Found the bucket page, update it. */ + final BucketPage bucketPage = (BucketPage) child; + // attempt to insert the tuple into the bucket. + if(!bucketPage.insert(key, value, current, buddyOffset)) { - + // TODO if(parent.isReadOnly()) parent = copyOnWrite(); - + if (current.globalDepth == child.globalDepth) { - + /* * There is only one buddy hash bucket on the page. To * split the page, we have to introduce a new directory @@ -509,8 +524,12 @@ * TODO Introduce new directory page if sole buddy * bucket is full. */ + addDirectoryPageAndSplitBucketPage(current, + buddyOffset, bucketPage); - throw new UnsupportedOperationException(); + // The children of [current] have changed so we will + // search current again. + continue; } @@ -519,22 +538,36 @@ // Try again. The children have changed. continue; + } + return null; // TODO should be Void return? or depends on enum controlling dups behavior? + } + /* * Recursive descent into a child directory page. We have to update * the prefixLength and compute the offset of the buddy hash table * within the child before descending into the child. */ + + // increase prefix length by the #of address bits consumed by the + // buddy hash table. TODO child.globalDepth might always be + // [addressBits] for a directory page... prefixLength = prefixLength + child.globalDepth; + + // find the offset of the buddy hash table in the child. buddyOffset = HTreeUtil .getBuddyOffset(hashBits, current.globalDepth, child.globalDepth/* localDepthOfChild */); + + // update current so we can search in the child. current = (DirectoryPage) child; + } - } + } // insert() + public byte[] remove(final byte[] key) { // TODO Remove 1st match, returning value. throw new UnsupportedOperationException(); @@ -622,103 +655,135 @@ nleaves++; // One more bucket page in the hash tree. - /* - * Update pointers in buddy hash table in the parent. - * - * There will be [npointers] slots in the appropriate buddy hash table - * in the parent directory which point to the old bucket page. The upper - * 1/2 of those pointers will be modified to point to the new bucket - * page. The lower 1/2 of the pointers will be unchanged. - */ - { + // update the pointers in the parent. + updatePointersInParent(parent, buddyOffset, oldDepth, oldBucket, + newBucket); + + // redistribute buddy buckets between old and new pages. + redistributeBuddyBuckets(oldDepth, newDepth, oldBucket, newBucket); - // #of address slots in the parent buddy hash table. - final int slotsPerBuddy = (1 << parent.globalDepth); + // TODO assert invariants? + + } - // #of pointers in the parent buddy hash table to the old bucket. - final int npointers = 1 << (parent.globalDepth - oldDepth); - - // Must be at least two slots since we will change at least one. - assert slotsPerBuddy > 1 : "slotsPerBuddy=" + slotsPerBuddy; + /** + * Update pointers in buddy hash table in the parent in order to link the + * new {@link BucketPage} into the parent {@link DirectoryPage}. + * <p> + * There will be [npointers] slots in the appropriate buddy hash table in + * the parent {@link DirectoryPage} which point to the old + * {@link BucketPage}. The upper 1/2 of those pointers will be modified to + * point to the new {@link BucketPage}. The lower 1/2 of the pointers will + * be unchanged. + * + * @param parent + * The parent {@link DirectoryPage}. + * @param buddyOffset + * The buddyOffset within the <i>parent</i>. This identifies + * which buddy hash table in the parent must be its pointers + * updated such that it points to both the original child and new + * child. + * @param oldDepth + * The depth of the oldBucket before the split. + * @param oldBucket + * The old {@link BucketPage}. + * @param newBucket + * The new {@link BucketPage}. + */ + private void updatePointersInParent(final DirectoryPage parent, + final int buddyOffset, final int oldDepth, + final BucketPage oldBucket, final BucketPage newBucket) { - // Must be at least two pointers since we will change at least one. - assert npointers > 1 : "npointers=" + npointers; - - // The first slot in the buddy hash table in the parent. - final int firstSlot = buddyOffset; - - // The last slot in the buddy hash table in the parent. - final int lastSlot = buddyOffset + slotsPerBuddy; + // #of address slots in the parent buddy hash table. + final int slotsPerBuddy = (1 << parent.globalDepth); - /* - * Count pointers to the old bucket page. There should be - * [npointers] of them and they should be contiguous. - * - * Note: We can test References here rather than comparing addresses - * because we know that the parent and the old bucket are both - * mutable. This means that their childRef is defined and their - * storage address is NULL. - * - * TODO This logic should be in DirectoryPage#dump() - */ - int firstPointer = -1; - int nfound = 0; - boolean discontiguous = false; - for (int i = firstSlot; i < lastSlot; i++) { - if (parent.childRefs[i] == oldBucket.self) { - if (firstPointer == -1) - firstPointer = i; - nfound++; - if (((MutableDirectoryPageData) parent.data).childAddr[i] != IRawStore.NULL) { - throw new RuntimeException( - "Child address should be NULL since child is dirty"); - } - } else { - if (firstPointer != -1 && nfound != npointers) { - discontiguous = true; - } + // #of pointers in the parent buddy hash table to the old bucket. + final int npointers = 1 << (parent.globalDepth - oldDepth); + + // Must be at least two slots since we will change at least one. + assert slotsPerBuddy > 1 : "slotsPerBuddy=" + slotsPerBuddy; + + // Must be at least two pointers since we will change at least one. + assert npointers > 1 : "npointers=" + npointers; + + // The first slot in the buddy hash table in the parent. + final int firstSlot = buddyOffset; + + // The last slot in the buddy hash table in the parent. + final int lastSlot = buddyOffset + slotsPerBuddy; + + /* + * Count pointers to the old bucket page. There should be + * [npointers] of them and they should be contiguous. + * + * Note: We can test References here rather than comparing addresses + * because we know that the parent and the old bucket are both + * mutable. This means that their childRef is defined and their + * storage address is NULL. + * + * TODO This logic should be in DirectoryPage#dump() + */ + int firstPointer = -1; + int nfound = 0; + boolean discontiguous = false; + for (int i = firstSlot; i < lastSlot; i++) { + if (parent.childRefs[i] == oldBucket.self) { + if (firstPointer == -1) + firstPointer = i; + nfound++; + if (((MutableDirectoryPageData) parent.data).childAddr[i] != IRawStore.NULL) { + throw new RuntimeException( + "Child address should be NULL since child is dirty"); } + } else { + if (firstPointer != -1 && nfound != npointers) { + discontiguous = true; + } } - if (firstPointer == -1) - throw new RuntimeException("No pointers to child"); - if (nfound != npointers) - throw new RuntimeException("Expected " + npointers - + " pointers to child, but found=" + nfound); - if (discontiguous) - throw new RuntimeException( - "Pointers to child are discontiguous in parent's buddy hash table."); + } + if (firstPointer == -1) + throw new RuntimeException("No pointers to child"); + if (nfound != npointers) + throw new RuntimeException("Expected " + npointers + + " pointers to child, but found=" + nfound); + if (discontiguous) + throw new RuntimeException( + "Pointers to child are discontiguous in parent's buddy hash table."); - // Update the upper 1/2 of the pointers to the new bucket. - for (int i = firstPointer + (npointers >> 1); i < npointers; i++) { + // Update the upper 1/2 of the pointers to the new bucket. + for (int i = firstPointer + (npointers >> 1); i < npointers; i++) { - if (parent.childRefs[i] != oldBucket.self) - throw new RuntimeException("Does not point to old child."); - - // update the references to the new bucket. - parent.childRefs[i] = (Reference) newBucket.self; - - } + if (parent.childRefs[i] != oldBucket.self) + throw new RuntimeException("Does not point to old child."); + // update the references to the new bucket. + parent.childRefs[i] = (Reference) newBucket.self; + } - - // redistribute buddy buckets between old and new pages. - redistributeBuddyBuckets(oldDepth, newDepth, oldBucket, newBucket); - - // TODO assert invariants? - + } /** * Redistribute the buddy buckets. * <p> - * Note: We are not changing the #of buddy buckets, just their size and - * the page on which they are found. Any tuples in a source bucket will - * wind up in the same bucket afterwards, but the page and offset on the - * page of the buddy bucket may have been changed. + * Note: We are not changing the #of buddy buckets, just their size and the + * page on which they are found. Any tuples in a source bucket will wind up + * in the same bucket afterwards, but the page and offset on the page of the + * buddy bucket may have been changed. * <p> - * We proceed backwards, moving the upper half of the buddy buckets to - * the new bucket page first and then spreading out the lower half of - * the source page among the new bucket boundaries on the page. + * We proceed backwards, moving the upper half of the buddy buckets to the + * new bucket page first and then spreading out the lower half of the source + * page among the new bucket boundaries on the page. + * + * @param oldDepth + * The depth of the old {@link BucketPage} before the split. + * @param newDepth + * The depth of the old and new {@link BucketPage} after the + * split (this is just oldDepth+1). + * @param oldBucket + * The old {@link BucketPage}. + * @param newBucket + * The new {@link BucketPage}. */ private void redistributeBuddyBuckets(final int oldDepth, final int newDepth, final BucketPage oldBucket, @@ -869,6 +934,24 @@ } } + + /** + * Split when <code>globalDepth == localDepth</code>. This case requires the + * introduction of a new {@link DirectoryPage}. + * + * @param parent + * The parent. + * @param buddyOffset + * The offset of the buddy hash table within the parent. + * @param oldBucket + * The {@link BucketPage} to be split. + */ + private void addDirectoryPageAndSplitBucketPage(final DirectoryPage parent, + final int buddyOffset, final BucketPage oldBucket) { + + throw new UnsupportedOperationException(); + + } /** * Persistence capable abstract base class for HTree pages. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestHTree.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestHTree.java 2011-05-11 14:26:34 UTC (rev 4482) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestHTree.java 2011-05-11 15:12:20 UTC (rev 4483) @@ -296,7 +296,109 @@ .lookupAll(new byte[] { 0x01 })); AbstractBTreeTestCase.assertSameIterator(// new byte[][] {}, htree.lookupAll(new byte[] { 0x02 })); - + + /* + * 4. Insert another duplicate key. The buddy bucket is now full + * again. It is only the only buddy bucket on the page. + * + * Note: At this point if we insert another duplicate key the page + * size will be doubled because all keys on the page are duplicates + * and there is only one buddy bucket on the page. + * + * However, rather than test the doubling of the page size, this + * example is written to test a split where global depth == local + * depth, which will cause the introduction of a new directory page + * in the htree. + */ + htree.insert(new byte[] { 0x01 }, new byte[] { 0x01 }); + assertEquals("nnodes", 1, htree.getNodeCount()); + assertEquals("nleaves", 3, htree.getLeafCount()); + assertEquals("nentries", 4, htree.getEntryCount()); + htree.dump(Level.ALL, System.err, true/* materialize */); + assertTrue(root == htree.getRoot()); + assertEquals(4, root.childRefs.length); + assertTrue(a == (BucketPage) root.childRefs[0].get()); + assertTrue(c == (BucketPage) root.childRefs[1].get()); + assertTrue(b == (BucketPage) root.childRefs[2].get()); + assertTrue(b == (BucketPage) root.childRefs[3].get()); + assertEquals(2, root.getGlobalDepth()); + assertEquals(2, a.getGlobalDepth());// localDepth has increased. + assertEquals(1, b.getGlobalDepth());// + assertEquals(2, c.getGlobalDepth());// localDepth is same as [a]. + assertTrue(htree.contains(new byte[] { 0x01 })); + assertFalse(htree.contains(new byte[] { 0x02 })); + assertEquals(new byte[] { 0x01 }, htree + .lookupFirst(new byte[] { 0x01 })); + assertNull(htree.lookupFirst(new byte[] { 0x02 })); + AbstractBTreeTestCase.assertSameIterator( + // + new byte[][] { new byte[] { 0x01 }, new byte[] { 0x01 }, + new byte[] { 0x01 }, new byte[] { 0x01 } }, htree + .lookupAll(new byte[] { 0x01 })); + AbstractBTreeTestCase.assertSameIterator(// + new byte[][] {}, htree.lookupAll(new byte[] { 0x02 })); + + /* + * 4. Insert 0x20. This key is directed into the same buddy bucket + * as the 0x01 keys that we have been inserting. That buddy bucket + * is already full and, further, it is the only buddy bucket on the + * page. Since we are not inserting a duplicate key we can split the + * buddy bucket. However, since global depth == local depth (i.e., + * only one buddy bucket on the page), this split will introduce a + * new directory page. The new directory page basically codes for + * the additional prefix bits required to differentiate the two + * distinct keys such that they are directed into the appropriate + * buckets. + * + * Note: The #of new directory levels which have to be introduced + * here is a function of the #of prefix bits which have to be + * consumed before a distinction can be made between the existing + * key (0x01) and the new key (0x20). With an address space of 2 + * bits, each directory level examines the next 2-bits of the key. + * The key (x20) was chosen since the distinction can be made by + * adding only one directory level (the keys differ in the first 4 + * bits). + * + * TODO At this point we should also prune the buckets [b] and [c] + * since they are empty and replace them with null references. + */ + htree.insert(new byte[] { 0x20 }, new byte[] { 0x20 }); + assertEquals("nnodes", 2, htree.getNodeCount()); + assertEquals("nleaves", 4, htree.getLeafCount()); + assertEquals("nentries", 5, htree.getEntryCount()); + htree.dump(Level.ALL, System.err, true/* materialize */); + assertTrue(root == htree.getRoot()); + assertEquals(4, root.childRefs.length); + final DirectoryPage d = (DirectoryPage)root.childRefs[0].get(); + assertTrue(d == (DirectoryPage) root.childRefs[0].get()); + assertTrue(d == (DirectoryPage) root.childRefs[1].get()); + assertTrue(b == (BucketPage) root.childRefs[2].get()); + assertTrue(b == (BucketPage) root.childRefs[3].get()); + assertEquals(4, d.childRefs.length); + final BucketPage e = (BucketPage)d.childRefs[1].get(); + assertTrue(a == (BucketPage) d.childRefs[0].get()); + assertTrue(e == (BucketPage) d.childRefs[1].get()); + assertTrue(c == (BucketPage) d.childRefs[2].get()); + assertTrue(c == (BucketPage) d.childRefs[3].get()); + assertEquals(2, root.getGlobalDepth()); + assertEquals(2, d.getGlobalDepth()); + assertEquals(2, a.getGlobalDepth());// unchanged + assertEquals(2, e.getGlobalDepth());// same as [a]. + assertEquals(1, b.getGlobalDepth());// unchanged. + assertEquals(2, c.getGlobalDepth());// unchanged. + assertTrue(htree.contains(new byte[] { 0x01 })); + assertFalse(htree.contains(new byte[] { 0x02 })); + assertEquals(new byte[] { 0x01 }, htree + .lookupFirst(new byte[] { 0x01 })); + assertNull(htree.lookupFirst(new byte[] { 0x02 })); + AbstractBTreeTestCase.assertSameIterator( + // + new byte[][] { new byte[] { 0x01 }, new byte[] { 0x01 }, + new byte[] { 0x01 }, new byte[] { 0x01 } }, htree + .lookupAll(new byte[] { 0x01 })); + AbstractBTreeTestCase.assertSameIterator(// + new byte[][] {}, htree.lookupAll(new byte[] { 0x02 })); + // TODO REMOVE (or test suite for remove). // TODO Continue progression here? This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |