|
From: <tho...@us...> - 2014-01-14 16:06:29
|
Revision: 7795
http://bigdata.svn.sourceforge.net/bigdata/?rev=7795&view=rev
Author: thompsonbry
Date: 2014-01-14 16:06:20 +0000 (Tue, 14 Jan 2014)
Log Message:
-----------
Commit provides partial fix for #763. Martyn has modified the CustomByteArrayFrontCodedList to support search against a front-coded list with duplicate keys.
However, the tests developed for the HTree show a problem recovering all keys when duplicates are inserted. I have extended the tests that he developed to cover more dups and a variety of valued for the #of address bits in the HTree. The dups test is now integrated into CI and will show 2 new test failures.
I have generated a new lgpl-utils dependency, published it to our maven repo, and updated the top-level build.properties, .classpath, and pom.xml files.
mvn deploy:deploy-file \
-DgroupId=com.bigdata \
-DartifactId=lgpl-utils \
-Dversion=1.0.7-011414 \
-Dpackaging=jar \
-DrepositoryId=bigdata.releases \
-Durl=scpexe://www.systap.com/srv/www/htdocs/systap.com/maven/releases/ \
-Dfile=bigdata/lib/lgpl-utils-1.0.7-011414.jar
Modified Paths:
--------------
branches/BIGDATA_RELEASE_1_3_0/.classpath
branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestAll_HTree.java
branches/BIGDATA_RELEASE_1_3_0/build.properties
branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties
branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml
branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/CustomByteArrayFrontCodedList.java
branches/BIGDATA_RELEASE_1_3_0/pom.xml
Added Paths:
-----------
branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar
branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestDuplicates.java
Removed Paths:
-------------
branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.6-020610.jar
Modified: branches/BIGDATA_RELEASE_1_3_0/.classpath
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/.classpath 2014-01-14 13:57:27 UTC (rev 7794)
+++ branches/BIGDATA_RELEASE_1_3_0/.classpath 2014-01-14 16:06:20 UTC (rev 7795)
@@ -32,7 +32,7 @@
<classpathentry kind="src" path="bigdata-gas/src/java"/>
<classpathentry kind="src" path="bigdata-gas/src/test"/>
<classpathentry exported="true" kind="lib" path="bigdata/lib/dsi-utils-1.0.6-020610.jar"/>
- <classpathentry exported="true" kind="lib" path="bigdata/lib/lgpl-utils-1.0.6-020610.jar"/>
+ <classpathentry exported="true" kind="lib" path="bigdata/lib/lgpl-utils-1.0.7-140114.jar"/>
<classpathentry kind="lib" path="bigdata-jini/lib/apache/zookeeper-3.3.3.jar"/>
<classpathentry exported="true" kind="lib" path="bigdata/lib/jetty/jetty-continuation-7.2.2.v20101205.jar"/>
<classpathentry exported="true" kind="lib" path="bigdata/lib/jetty/jetty-http-7.2.2.v20101205.jar"/>
Deleted: branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.6-020610.jar
===================================================================
(Binary files differ)
Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar
===================================================================
(Binary files differ)
Property changes on: branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestAll_HTree.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestAll_HTree.java 2014-01-14 13:57:27 UTC (rev 7794)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestAll_HTree.java 2014-01-14 16:06:20 UTC (rev 7795)
@@ -92,6 +92,8 @@
suite.addTestSuite(TestReopen.class);
// test of storing null values under a key with persistence.
suite.addTestSuite(TestNullValues.class);
+ // test duplicate keys (with index checkpoint).
+ suite.addTestSuite(TestDuplicates.class);
/*
* test of transient HTree's (no backing store).
Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestDuplicates.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestDuplicates.java (rev 0)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestDuplicates.java 2014-01-14 16:06:20 UTC (rev 7795)
@@ -0,0 +1,297 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved.
+
+Contact:
+ SYSTAP, LLC
+ 4501 Tower Road
+ Greensboro, NC 27410
+ lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+package com.bigdata.htree;
+
+import java.io.IOException;
+
+import com.bigdata.btree.ITupleIterator;
+import com.bigdata.rawstore.IRawStore;
+import com.bigdata.rawstore.SimpleMemoryRawStore;
+
+/**
+ * Test {@link HTree} with duplicate Keys.
+ *
+ * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/763" >
+ * Stochastic Results With Analytic Query Mode </a>
+ *
+ * @author Martyn Cutcher
+ */
+public class TestDuplicates extends AbstractHTreeTestCase {
+
+ /**
+ *
+ */
+ public TestDuplicates() {
+
+ }
+
+ /**
+ * @param name
+ */
+ public TestDuplicates(String name) {
+ super(name);
+ }
+
+// private static final boolean bufferNodes = true;
+
+ /**
+ * Tests the ability to store values against duplicate
+ * keys.
+ *
+ * @throws IOException
+ * @throws Exception
+ */
+ public void test_duplicateKeys() throws IOException, Exception {
+
+ final IRawStore store = new SimpleMemoryRawStore();
+
+ try {
+
+ HTree htree = getHTree(store, 3/* addressBits */,
+ false/* rawRecords */, true/* persistent */);
+
+ final byte[] k1 = new byte[] { 1 };
+ final byte[] v2 = new byte[] { 2 };
+ final byte[] v3 = new byte[] { 3 };
+
+ assertNull(htree.lookupFirst(k1));
+ assertFalse(htree.contains(k1));
+
+ assertNull(htree.insert(k1, v2));
+
+ assertEquals(htree.lookupFirst(k1), v2);
+ assertTrue(htree.contains(k1));
+
+ assertNull(htree.insert(k1, v3));
+
+ // test before checkpoint.
+ assertEquals(2, getCount(htree, k1));
+
+ final long addrCheckpoint1 = htree.writeCheckpoint();
+
+ htree = HTree.load(store, addrCheckpoint1, true/* readOnly */);
+
+ assertEquals(htree.lookupFirst(k1), v3);
+ assertTrue(htree.contains(k1));
+
+ // test after checkpoint.
+ assertEquals(2, getCount(htree, k1));
+
+ } finally {
+
+ store.destroy();
+
+ }
+
+ }
+
+ public void test_duplicateKeyRangeScans_3bits_500dups() {
+
+ doTest(3, 500);
+
+ }
+
+ public void test_duplicateKeyRangeScans_4bits_500dups() {
+
+ doTest(4, 500);
+
+ }
+
+ public void test_duplicateKeyRangeScans_10bits_500dups() {
+
+ doTest(10, 500);
+
+ }
+
+ public void test_duplicateKeyRangeScans_3bits_5000dups() {
+
+ doTest(3, 5000);
+
+ }
+
+ public void test_duplicateKeyRangeScans_4bits_5000dups() {
+
+ doTest(4, 5000);
+
+ }
+
+ public void test_duplicateKeyRangeScans_10bits_5000dups() {
+
+ doTest(10, 5000);
+
+ }
+
+ /**
+ * Test helper for a test based on duplicate keys. The test is designed to
+ * verify that the keys are stored correctly and that a scan of the records
+ * having that key returns all entries stored under that key.
+ *
+ * @param addressBits
+ * The #of address bits.
+ * @param ndups
+ * The #of duplicate entries for a given key.
+ *
+ * FIXME If the addressBits is 10 then this test can fail for
+ * specific dups values - eg 500 or 2000. This appears to be an
+ * independent failure at the {@link HTree} layer rather than in
+ * the key buffer search layer.
+ */
+ private void doTest(final int addressBits, final int ndups) {
+
+ final IRawStore store = new SimpleMemoryRawStore();
+ // final IRawStore store = getRWStore();
+
+ try {
+ HTree htree = getHTree(store, addressBits, false/* rawRecords */,
+ true/* persistent */);
+
+ final byte[] k1 = new byte[] { 1, 2, 3, 4 };
+ final byte[] k2 = new byte[] { 2, 3, 4 };
+ final byte[] k3 = new byte[] { 3, 4 };
+ final byte[] v1 = new byte[] { 1 };
+ final byte[] v2 = new byte[] { 2 };
+ final byte[] v3 = new byte[] { 3 };
+
+ for (int i = 0; i < ndups; i++) {
+ assertNull(htree.insert(k1, v1));
+ assertNull(htree.insert(k2, v2));
+ assertNull(htree.insert(k3, v3));
+ }
+
+ assertTrue(htree.contains(k1)); // 2000 dups fails here with 10 bit depth
+ assertTrue(htree.contains(k2));
+ assertTrue(htree.contains(k3));
+
+ // Check first with MutableBuckets
+ assertEquals(ndups, getCount(htree, k1));
+ assertEquals(ndups, getCount(htree, k2));
+ assertEquals(ndups, getCount(htree, k3));
+
+ final long addrCheckpoint1 = htree.writeCheckpoint();
+
+ htree = HTree.load(store, addrCheckpoint1, true/* readOnly */);
+
+ assertTrue(htree.contains(k1));
+ assertTrue(htree.contains(k2));
+ assertTrue(htree.contains(k3));
+
+ // Test after checkpoint.
+ assertEquals(ndups, getCount(htree, k1));
+ assertEquals(ndups, getCount(htree, k2));
+ assertEquals(ndups, getCount(htree, k3));// 500 dups fails here with 10bit depth
+
+ } finally {
+
+ store.destroy();
+
+ }
+
+ }
+
+// private IRawStore getRWStore() {
+//
+// final Properties properties = getProperties();
+//
+// properties.setProperty(Options.CREATE_TEMP_FILE, "true");
+//
+// properties.setProperty(Options.DELETE_ON_EXIT, "true");
+//
+// properties.setProperty(Options.BUFFER_MODE, BufferMode.DiskRW.toString());
+//
+// properties.setProperty(Options.WRITE_CACHE_ENABLED, "" + true);
+//
+// return new Journal(properties);//.getBufferStrategy();
+//
+// }
+
+ /**
+ * Return the #of entries having the specified key.
+ *
+ * @param htree
+ * The index.
+ * @param k1
+ * The key.
+ * @return
+ */
+ private int getCount(final HTree htree, final byte[] k1) {
+ final ITupleIterator<?> iter = htree.lookupAll(k1);
+ int count = 0;
+ while (iter.hasNext()) {
+ iter.next();
+ count++;
+ }
+
+ return count;
+ }
+
+ /**
+ * Simple test where the keys and the values are dups.
+ *
+ * @throws IOException
+ * @throws Exception
+ */
+ public void test_duplicateKeyValues() throws IOException, Exception {
+
+ final IRawStore store = new SimpleMemoryRawStore();
+
+ try {
+
+ HTree htree = getHTree(store, 3/* addressBits */,
+ false/* rawRecord */, true/* persistent */);
+
+ final byte[] k1 = new byte[] { 1 };
+ final byte[] v2 = new byte[] { 2 };
+
+ assertNull(htree.lookupFirst(k1));
+ assertFalse(htree.contains(k1));
+
+ assertNull(htree.insert(k1, v2));
+
+ assertEquals(htree.lookupFirst(k1), v2);
+ assertTrue(htree.contains(k1));
+
+ assertNull(htree.insert(k1, v2));
+
+ // before checkpoint.
+ assertEquals(2, getCount(htree, k1));
+
+ final long addrCheckpoint1 = htree.writeCheckpoint();
+
+ htree = HTree.load(store, addrCheckpoint1, true/* readOnly */);
+
+ assertEquals(htree.lookupFirst(k1), v2);
+ assertTrue(htree.contains(k1));
+
+ // after checkpoint.
+ assertEquals(2, getCount(htree, k1));
+
+ } finally {
+
+ store.destroy();
+
+ }
+
+ }
+
+}
Modified: branches/BIGDATA_RELEASE_1_3_0/build.properties
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/build.properties 2014-01-14 13:57:27 UTC (rev 7794)
+++ branches/BIGDATA_RELEASE_1_3_0/build.properties 2014-01-14 16:06:20 UTC (rev 7795)
@@ -65,7 +65,7 @@
log4j.version=1.2.17
fastutil.version=5.1.5
dsiutils.version=1.0.6-020610
-lgplutils.version=1.0.6-020610
+lgplutils.version=1.0.7-040114
ganglia-version=1.0.1
gas-version=0.1.0
Modified: branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties 2014-01-14 13:57:27 UTC (rev 7794)
+++ branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties 2014-01-14 16:06:20 UTC (rev 7795)
@@ -39,7 +39,7 @@
release.dir=ant-release
# The build version.
-build.ver=1.0.6
+build.ver=1.0.7
# Set true to do a snapshot build. This changes the value of ${version} to
# include the date.
Modified: branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml 2014-01-14 13:57:27 UTC (rev 7794)
+++ branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml 2014-01-14 16:06:20 UTC (rev 7795)
@@ -124,7 +124,7 @@
<bottom>
<![CDATA[
<i>
-Portions copyright © 2006-2009 SYSTAP, LLC. All Rights Reserved.<br>
+Portions copyright © 2006-2014 SYSTAP, LLC. All Rights Reserved.<br>
Portions copyright © 2005-2009 Sebastiano Vigna
</i>]]></bottom>
<tag name="todo" scope="all" description="TODO:" />
Modified: branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/CustomByteArrayFrontCodedList.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/CustomByteArrayFrontCodedList.java 2014-01-14 13:57:27 UTC (rev 7794)
+++ branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/CustomByteArrayFrontCodedList.java 2014-01-14 16:06:20 UTC (rev 7795)
@@ -1373,7 +1373,7 @@
// final int base = 0;
- final BackingBuffer bb = this.bb;
+// final BackingBuffer bb = this.bb;
/*
* We will test each entry having an index that is an even multiple of
@@ -1396,23 +1396,8 @@
/*
* Compare the probe with the full length byte[] at index [mid].
*/
- final int tmp;
- {
+ final int tmp = comparePos(mid, key);
- // The index into the backing buffer of index [mid].
- int pos = p[mid];
-
- // The #of bytes in the full length byte[] at index [mid].
- final int blen = bb.readInt(pos);
-
- // Skip the #of bytes required to code that length.
- pos += count(blen);
-
- // Compare key vs actual (in buffer).
- tmp = compareBytes(key, 0, key.length, bb, pos, blen);
-
- }
-
if (tmp > 0) {
// Actual GT probe, restrict lower bound and try again.
@@ -1425,10 +1410,19 @@
} else {
- // Found: return offset.
+ // duplicate check to see if previous is also a match
+ if (mid > 0 && comparePos(mid - 1, key) == 0) {
- return offset;
+ // in which case set it as the highest
+ high = mid - 1;
+ } else {
+
+ // Found: return offset.
+ return offset;
+
+ }
+
}
}
@@ -1440,7 +1434,35 @@
return -(offset + 1);
}
+
+ /**
+ * Compares the caller's key to a full length key at a specific offset
+ * in the {@link BackingBuffer}.
+ *
+ * @param index
+ * The index into the full length keys.
+ * @param key
+ * The probe key.
+ *
+ * @return A value which indicates whether the key at that offset into the
+ * backing buffer is LT, GT, or EQ to the caller's key.
+ */
+ private int comparePos(final int index, final byte[] key) {
+ // The index into the backing buffer of index [index].
+ int pos = p[index];
+
+ // The #of bytes in the full length byte[] at index [index].
+ final int blen = bb.readInt(pos);
+
+ // Skip the #of bytes required to code that length.
+ pos += count(blen);
+
+ // Compare key vs actual (in buffer).
+ return compareBytes(key, 0, key.length, bb, pos, blen);
+
+ }
+
/**
* Compare up to <i>len</i> bytes in <i>a</i> interpreted as unsigned bytes
* against the bytes in the {@link BackingBuffer} starting at offset
Modified: branches/BIGDATA_RELEASE_1_3_0/pom.xml
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/pom.xml 2014-01-14 13:57:27 UTC (rev 7794)
+++ branches/BIGDATA_RELEASE_1_3_0/pom.xml 2014-01-14 16:06:20 UTC (rev 7795)
@@ -94,7 +94,7 @@
<log4j.version>1.2.17</log4j.version>
<fastutil.version>5.1.5</fastutil.version>
<dsiutils.version>1.0.6-020610</dsiutils.version>
- <lgplutils.version>1.0.6-020610</lgplutils.version>
+ <lgplutils.version>1.0.7-140114</lgplutils.version>
<bigdata.ganglia.version>1.0.1</bigdata.ganglia.version>
</properties>
<!-- TODO Can we declare the versions of the dependencies here as
@@ -387,11 +387,11 @@
mvn deploy:deploy-file \
-DgroupId=com.bigdata \
-DartifactId=lgpl-utils \
- -Dversion=1.0.6-020610 \
+ -Dversion=1.0.7-140114 \
-Dpackaging=jar \
-DrepositoryId=bigdata.releases \
-Durl=scpexe://www.systap.com/srv/www/htdocs/systap.com/maven/releases/ \
- -Dfile=bigdata/lib/lgpl-utils-1.0.6-020610.jar
+ -Dfile=bigdata/lib/lgpl-utils-1.0.7-140114.jar
-->
<groupId>com.bigdata</groupId>
<artifactId>lgpl-utils</artifactId>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|