From: <tho...@us...> - 2014-01-14 16:06:29
|
Revision: 7795 http://bigdata.svn.sourceforge.net/bigdata/?rev=7795&view=rev Author: thompsonbry Date: 2014-01-14 16:06:20 +0000 (Tue, 14 Jan 2014) Log Message: ----------- Commit provides partial fix for #763. Martyn has modified the CustomByteArrayFrontCodedList to support search against a front-coded list with duplicate keys. However, the tests developed for the HTree show a problem recovering all keys when duplicates are inserted. I have extended the tests that he developed to cover more dups and a variety of valued for the #of address bits in the HTree. The dups test is now integrated into CI and will show 2 new test failures. I have generated a new lgpl-utils dependency, published it to our maven repo, and updated the top-level build.properties, .classpath, and pom.xml files. mvn deploy:deploy-file \ -DgroupId=com.bigdata \ -DartifactId=lgpl-utils \ -Dversion=1.0.7-011414 \ -Dpackaging=jar \ -DrepositoryId=bigdata.releases \ -Durl=scpexe://www.systap.com/srv/www/htdocs/systap.com/maven/releases/ \ -Dfile=bigdata/lib/lgpl-utils-1.0.7-011414.jar Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/.classpath branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestAll_HTree.java branches/BIGDATA_RELEASE_1_3_0/build.properties branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/CustomByteArrayFrontCodedList.java branches/BIGDATA_RELEASE_1_3_0/pom.xml Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestDuplicates.java Removed Paths: ------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.6-020610.jar Modified: branches/BIGDATA_RELEASE_1_3_0/.classpath =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/.classpath 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/.classpath 2014-01-14 16:06:20 UTC (rev 7795) @@ -32,7 +32,7 @@ <classpathentry kind="src" path="bigdata-gas/src/java"/> <classpathentry kind="src" path="bigdata-gas/src/test"/> <classpathentry exported="true" kind="lib" path="bigdata/lib/dsi-utils-1.0.6-020610.jar"/> - <classpathentry exported="true" kind="lib" path="bigdata/lib/lgpl-utils-1.0.6-020610.jar"/> + <classpathentry exported="true" kind="lib" path="bigdata/lib/lgpl-utils-1.0.7-140114.jar"/> <classpathentry kind="lib" path="bigdata-jini/lib/apache/zookeeper-3.3.3.jar"/> <classpathentry exported="true" kind="lib" path="bigdata/lib/jetty/jetty-continuation-7.2.2.v20101205.jar"/> <classpathentry exported="true" kind="lib" path="bigdata/lib/jetty/jetty-http-7.2.2.v20101205.jar"/> Deleted: branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.6-020610.jar =================================================================== (Binary files differ) Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar =================================================================== (Binary files differ) Property changes on: branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestAll_HTree.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestAll_HTree.java 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestAll_HTree.java 2014-01-14 16:06:20 UTC (rev 7795) @@ -92,6 +92,8 @@ suite.addTestSuite(TestReopen.class); // test of storing null values under a key with persistence. suite.addTestSuite(TestNullValues.class); + // test duplicate keys (with index checkpoint). + suite.addTestSuite(TestDuplicates.class); /* * test of transient HTree's (no backing store). Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestDuplicates.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestDuplicates.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestDuplicates.java 2014-01-14 16:06:20 UTC (rev 7795) @@ -0,0 +1,297 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.htree; + +import java.io.IOException; + +import com.bigdata.btree.ITupleIterator; +import com.bigdata.rawstore.IRawStore; +import com.bigdata.rawstore.SimpleMemoryRawStore; + +/** + * Test {@link HTree} with duplicate Keys. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/763" > + * Stochastic Results With Analytic Query Mode </a> + * + * @author Martyn Cutcher + */ +public class TestDuplicates extends AbstractHTreeTestCase { + + /** + * + */ + public TestDuplicates() { + + } + + /** + * @param name + */ + public TestDuplicates(String name) { + super(name); + } + +// private static final boolean bufferNodes = true; + + /** + * Tests the ability to store values against duplicate + * keys. + * + * @throws IOException + * @throws Exception + */ + public void test_duplicateKeys() throws IOException, Exception { + + final IRawStore store = new SimpleMemoryRawStore(); + + try { + + HTree htree = getHTree(store, 3/* addressBits */, + false/* rawRecords */, true/* persistent */); + + final byte[] k1 = new byte[] { 1 }; + final byte[] v2 = new byte[] { 2 }; + final byte[] v3 = new byte[] { 3 }; + + assertNull(htree.lookupFirst(k1)); + assertFalse(htree.contains(k1)); + + assertNull(htree.insert(k1, v2)); + + assertEquals(htree.lookupFirst(k1), v2); + assertTrue(htree.contains(k1)); + + assertNull(htree.insert(k1, v3)); + + // test before checkpoint. + assertEquals(2, getCount(htree, k1)); + + final long addrCheckpoint1 = htree.writeCheckpoint(); + + htree = HTree.load(store, addrCheckpoint1, true/* readOnly */); + + assertEquals(htree.lookupFirst(k1), v3); + assertTrue(htree.contains(k1)); + + // test after checkpoint. + assertEquals(2, getCount(htree, k1)); + + } finally { + + store.destroy(); + + } + + } + + public void test_duplicateKeyRangeScans_3bits_500dups() { + + doTest(3, 500); + + } + + public void test_duplicateKeyRangeScans_4bits_500dups() { + + doTest(4, 500); + + } + + public void test_duplicateKeyRangeScans_10bits_500dups() { + + doTest(10, 500); + + } + + public void test_duplicateKeyRangeScans_3bits_5000dups() { + + doTest(3, 5000); + + } + + public void test_duplicateKeyRangeScans_4bits_5000dups() { + + doTest(4, 5000); + + } + + public void test_duplicateKeyRangeScans_10bits_5000dups() { + + doTest(10, 5000); + + } + + /** + * Test helper for a test based on duplicate keys. The test is designed to + * verify that the keys are stored correctly and that a scan of the records + * having that key returns all entries stored under that key. + * + * @param addressBits + * The #of address bits. + * @param ndups + * The #of duplicate entries for a given key. + * + * FIXME If the addressBits is 10 then this test can fail for + * specific dups values - eg 500 or 2000. This appears to be an + * independent failure at the {@link HTree} layer rather than in + * the key buffer search layer. + */ + private void doTest(final int addressBits, final int ndups) { + + final IRawStore store = new SimpleMemoryRawStore(); + // final IRawStore store = getRWStore(); + + try { + HTree htree = getHTree(store, addressBits, false/* rawRecords */, + true/* persistent */); + + final byte[] k1 = new byte[] { 1, 2, 3, 4 }; + final byte[] k2 = new byte[] { 2, 3, 4 }; + final byte[] k3 = new byte[] { 3, 4 }; + final byte[] v1 = new byte[] { 1 }; + final byte[] v2 = new byte[] { 2 }; + final byte[] v3 = new byte[] { 3 }; + + for (int i = 0; i < ndups; i++) { + assertNull(htree.insert(k1, v1)); + assertNull(htree.insert(k2, v2)); + assertNull(htree.insert(k3, v3)); + } + + assertTrue(htree.contains(k1)); // 2000 dups fails here with 10 bit depth + assertTrue(htree.contains(k2)); + assertTrue(htree.contains(k3)); + + // Check first with MutableBuckets + assertEquals(ndups, getCount(htree, k1)); + assertEquals(ndups, getCount(htree, k2)); + assertEquals(ndups, getCount(htree, k3)); + + final long addrCheckpoint1 = htree.writeCheckpoint(); + + htree = HTree.load(store, addrCheckpoint1, true/* readOnly */); + + assertTrue(htree.contains(k1)); + assertTrue(htree.contains(k2)); + assertTrue(htree.contains(k3)); + + // Test after checkpoint. + assertEquals(ndups, getCount(htree, k1)); + assertEquals(ndups, getCount(htree, k2)); + assertEquals(ndups, getCount(htree, k3));// 500 dups fails here with 10bit depth + + } finally { + + store.destroy(); + + } + + } + +// private IRawStore getRWStore() { +// +// final Properties properties = getProperties(); +// +// properties.setProperty(Options.CREATE_TEMP_FILE, "true"); +// +// properties.setProperty(Options.DELETE_ON_EXIT, "true"); +// +// properties.setProperty(Options.BUFFER_MODE, BufferMode.DiskRW.toString()); +// +// properties.setProperty(Options.WRITE_CACHE_ENABLED, "" + true); +// +// return new Journal(properties);//.getBufferStrategy(); +// +// } + + /** + * Return the #of entries having the specified key. + * + * @param htree + * The index. + * @param k1 + * The key. + * @return + */ + private int getCount(final HTree htree, final byte[] k1) { + final ITupleIterator<?> iter = htree.lookupAll(k1); + int count = 0; + while (iter.hasNext()) { + iter.next(); + count++; + } + + return count; + } + + /** + * Simple test where the keys and the values are dups. + * + * @throws IOException + * @throws Exception + */ + public void test_duplicateKeyValues() throws IOException, Exception { + + final IRawStore store = new SimpleMemoryRawStore(); + + try { + + HTree htree = getHTree(store, 3/* addressBits */, + false/* rawRecord */, true/* persistent */); + + final byte[] k1 = new byte[] { 1 }; + final byte[] v2 = new byte[] { 2 }; + + assertNull(htree.lookupFirst(k1)); + assertFalse(htree.contains(k1)); + + assertNull(htree.insert(k1, v2)); + + assertEquals(htree.lookupFirst(k1), v2); + assertTrue(htree.contains(k1)); + + assertNull(htree.insert(k1, v2)); + + // before checkpoint. + assertEquals(2, getCount(htree, k1)); + + final long addrCheckpoint1 = htree.writeCheckpoint(); + + htree = HTree.load(store, addrCheckpoint1, true/* readOnly */); + + assertEquals(htree.lookupFirst(k1), v2); + assertTrue(htree.contains(k1)); + + // after checkpoint. + assertEquals(2, getCount(htree, k1)); + + } finally { + + store.destroy(); + + } + + } + +} Modified: branches/BIGDATA_RELEASE_1_3_0/build.properties =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/build.properties 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/build.properties 2014-01-14 16:06:20 UTC (rev 7795) @@ -65,7 +65,7 @@ log4j.version=1.2.17 fastutil.version=5.1.5 dsiutils.version=1.0.6-020610 -lgplutils.version=1.0.6-020610 +lgplutils.version=1.0.7-040114 ganglia-version=1.0.1 gas-version=0.1.0 Modified: branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties 2014-01-14 16:06:20 UTC (rev 7795) @@ -39,7 +39,7 @@ release.dir=ant-release # The build version. -build.ver=1.0.6 +build.ver=1.0.7 # Set true to do a snapshot build. This changes the value of ${version} to # include the date. Modified: branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml 2014-01-14 16:06:20 UTC (rev 7795) @@ -124,7 +124,7 @@ <bottom> <![CDATA[ <i> -Portions copyright © 2006-2009 SYSTAP, LLC. All Rights Reserved.<br> +Portions copyright © 2006-2014 SYSTAP, LLC. All Rights Reserved.<br> Portions copyright © 2005-2009 Sebastiano Vigna </i>]]></bottom> <tag name="todo" scope="all" description="TODO:" /> Modified: branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/CustomByteArrayFrontCodedList.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/CustomByteArrayFrontCodedList.java 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/CustomByteArrayFrontCodedList.java 2014-01-14 16:06:20 UTC (rev 7795) @@ -1373,7 +1373,7 @@ // final int base = 0; - final BackingBuffer bb = this.bb; +// final BackingBuffer bb = this.bb; /* * We will test each entry having an index that is an even multiple of @@ -1396,23 +1396,8 @@ /* * Compare the probe with the full length byte[] at index [mid]. */ - final int tmp; - { + final int tmp = comparePos(mid, key); - // The index into the backing buffer of index [mid]. - int pos = p[mid]; - - // The #of bytes in the full length byte[] at index [mid]. - final int blen = bb.readInt(pos); - - // Skip the #of bytes required to code that length. - pos += count(blen); - - // Compare key vs actual (in buffer). - tmp = compareBytes(key, 0, key.length, bb, pos, blen); - - } - if (tmp > 0) { // Actual GT probe, restrict lower bound and try again. @@ -1425,10 +1410,19 @@ } else { - // Found: return offset. + // duplicate check to see if previous is also a match + if (mid > 0 && comparePos(mid - 1, key) == 0) { - return offset; + // in which case set it as the highest + high = mid - 1; + } else { + + // Found: return offset. + return offset; + + } + } } @@ -1440,7 +1434,35 @@ return -(offset + 1); } + + /** + * Compares the caller's key to a full length key at a specific offset + * in the {@link BackingBuffer}. + * + * @param index + * The index into the full length keys. + * @param key + * The probe key. + * + * @return A value which indicates whether the key at that offset into the + * backing buffer is LT, GT, or EQ to the caller's key. + */ + private int comparePos(final int index, final byte[] key) { + // The index into the backing buffer of index [index]. + int pos = p[index]; + + // The #of bytes in the full length byte[] at index [index]. + final int blen = bb.readInt(pos); + + // Skip the #of bytes required to code that length. + pos += count(blen); + + // Compare key vs actual (in buffer). + return compareBytes(key, 0, key.length, bb, pos, blen); + + } + /** * Compare up to <i>len</i> bytes in <i>a</i> interpreted as unsigned bytes * against the bytes in the {@link BackingBuffer} starting at offset Modified: branches/BIGDATA_RELEASE_1_3_0/pom.xml =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/pom.xml 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/pom.xml 2014-01-14 16:06:20 UTC (rev 7795) @@ -94,7 +94,7 @@ <log4j.version>1.2.17</log4j.version> <fastutil.version>5.1.5</fastutil.version> <dsiutils.version>1.0.6-020610</dsiutils.version> - <lgplutils.version>1.0.6-020610</lgplutils.version> + <lgplutils.version>1.0.7-140114</lgplutils.version> <bigdata.ganglia.version>1.0.1</bigdata.ganglia.version> </properties> <!-- TODO Can we declare the versions of the dependencies here as @@ -387,11 +387,11 @@ mvn deploy:deploy-file \ -DgroupId=com.bigdata \ -DartifactId=lgpl-utils \ - -Dversion=1.0.6-020610 \ + -Dversion=1.0.7-140114 \ -Dpackaging=jar \ -DrepositoryId=bigdata.releases \ -Durl=scpexe://www.systap.com/srv/www/htdocs/systap.com/maven/releases/ \ - -Dfile=bigdata/lib/lgpl-utils-1.0.6-020610.jar + -Dfile=bigdata/lib/lgpl-utils-1.0.7-140114.jar --> <groupId>com.bigdata</groupId> <artifactId>lgpl-utils</artifactId> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |