This list is closed, nobody may subscribe to it.
2010 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(139) |
Aug
(94) |
Sep
(232) |
Oct
(143) |
Nov
(138) |
Dec
(55) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2011 |
Jan
(127) |
Feb
(90) |
Mar
(101) |
Apr
(74) |
May
(148) |
Jun
(241) |
Jul
(169) |
Aug
(121) |
Sep
(157) |
Oct
(199) |
Nov
(281) |
Dec
(75) |
2012 |
Jan
(107) |
Feb
(122) |
Mar
(184) |
Apr
(73) |
May
(14) |
Jun
(49) |
Jul
(26) |
Aug
(103) |
Sep
(133) |
Oct
(61) |
Nov
(51) |
Dec
(55) |
2013 |
Jan
(59) |
Feb
(72) |
Mar
(99) |
Apr
(62) |
May
(92) |
Jun
(19) |
Jul
(31) |
Aug
(138) |
Sep
(47) |
Oct
(83) |
Nov
(95) |
Dec
(111) |
2014 |
Jan
(125) |
Feb
(60) |
Mar
(119) |
Apr
(136) |
May
(270) |
Jun
(83) |
Jul
(88) |
Aug
(30) |
Sep
(47) |
Oct
(27) |
Nov
(23) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(3) |
Oct
|
Nov
|
Dec
|
2016 |
Jan
|
Feb
|
Mar
(4) |
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <btm...@us...> - 2010-12-07 23:03:44
|
Revision: 3999 http://bigdata.svn.sourceforge.net/bigdata/?rev=3999&view=rev Author: btmurphy Date: 2010-12-07 23:03:37 +0000 (Tue, 07 Dec 2010) Log Message: ----------- [branch dev-btm]: CHECKPOINT - added info to testzoo.config for com.bigdata.quorum.ServiceImpl component so that the zookeeper tests now using smart proxy zookeeper wrapper can start the QuorumPeerService and the tests can pass; changed AbstractZooTestCase to now start the smart proxy based com.bigdata.quorum.ServiceImpl instead of org.apache.zookeeper.server.quorum.QuorumPeerMain; and cleaned up debug statements in AbstractFedZooTestCase from the last checkpoint checkin Modified Paths: -------------- branches/dev-btm/bigdata-jini/src/test/com/bigdata/jini/start/AbstractFedZooTestCase.java branches/dev-btm/bigdata-jini/src/test/com/bigdata/zookeeper/AbstractZooTestCase.java branches/dev-btm/bigdata-jini/src/test/com/bigdata/zookeeper/testzoo.config Modified: branches/dev-btm/bigdata-jini/src/test/com/bigdata/jini/start/AbstractFedZooTestCase.java =================================================================== --- branches/dev-btm/bigdata-jini/src/test/com/bigdata/jini/start/AbstractFedZooTestCase.java 2010-12-07 22:31:55 UTC (rev 3998) +++ branches/dev-btm/bigdata-jini/src/test/com/bigdata/jini/start/AbstractFedZooTestCase.java 2010-12-07 23:03:37 UTC (rev 3999) @@ -56,7 +56,6 @@ import net.jini.admin.Administrable; import net.jini.core.lookup.ServiceItem; import net.jini.core.lookup.ServiceTemplate; -import net.jini.discovery.DiscoveryGroupManagement; import net.jini.lookup.ServiceDiscoveryManager; //BTM - FOR_ZOOKEEPER_SMART_PROXY - END @@ -188,10 +187,8 @@ //BTM - FOR_ZOOKEEPER_SMART_PROXY - BEGIN // Graceful shutdown of QuorumPeerService ServiceDiscoveryManager sdm = fed.getServiceDiscoveryManager(); - DiscoveryGroupManagement ldm = - (DiscoveryGroupManagement)(sdm.getDiscoveryManager()); Class[] quorumServiceType = - new Class[] {com.bigdata.service.QuorumPeerService.class}; + new Class[] {QuorumPeerService.class}; ServiceTemplate quorumServiceTmpl = new ServiceTemplate(null, quorumServiceType, null); ServiceItem[] items = Modified: branches/dev-btm/bigdata-jini/src/test/com/bigdata/zookeeper/AbstractZooTestCase.java =================================================================== --- branches/dev-btm/bigdata-jini/src/test/com/bigdata/zookeeper/AbstractZooTestCase.java 2010-12-07 22:31:55 UTC (rev 3998) +++ branches/dev-btm/bigdata-jini/src/test/com/bigdata/zookeeper/AbstractZooTestCase.java 2010-12-07 23:03:37 UTC (rev 3999) @@ -58,6 +58,18 @@ import com.bigdata.jini.util.ConfigMath; import com.bigdata.resources.ResourceFileFilter; +//BTM - FOR_ZOOKEEPER_SMART_PROXY - BEGIN +import com.bigdata.service.QuorumPeerService; +import com.sun.jini.admin.DestroyAdmin; +import net.jini.admin.Administrable; +import net.jini.core.discovery.LookupLocator; +import net.jini.core.lookup.ServiceItem; +import net.jini.core.lookup.ServiceTemplate; +import net.jini.discovery.DiscoveryGroupManagement; +import net.jini.discovery.LookupDiscoveryManager; +import net.jini.lookup.ServiceDiscoveryManager; +//BTM - FOR_ZOOKEEPER_SMART_PROXY - END + /** * Abstract base class for zookeeper integration tests. * @@ -147,6 +159,10 @@ // the chosen client port. int clientPort = -1; +//BTM - FOR_ZOOKEEPER_SMART_PROXY - BEGIN + String hostname; +//BTM - FOR_ZOOKEEPER_SMART_PROXY - END + public void setUp() throws Exception { try { @@ -160,7 +176,7 @@ final int leaderPort = getPort(3888/* suggestedPort */); //BTM - PRE_ZOOKEEPER_SMART_PROXY - BEGIN //BTM - PRE_ZOOKEEPER_SMART_PROXY final String servers = "1=localhost:" + peerPort + ":" + leaderPort; - String hostname = com.bigdata.util.config.NicUtil.getIpAddress("default.nic", "default", true); + hostname = com.bigdata.util.config.NicUtil.getIpAddress("default.nic", "default", true); final String servers = "1="+hostname+":" + peerPort + ":" + leaderPort; //BTM - PRE_ZOOKEEPER_SMART_PROXY - END @@ -255,6 +271,42 @@ } +//BTM - FOR_ZOOKEEPER_SMART_PROXY - BEGIN + // Gracefully shutdown QuorumPeerService + // For graceful shutdown of QuorumPeerService + LookupLocator[] locs = + new LookupLocator[] + { new LookupLocator("jini://"+hostname) }; + LookupDiscoveryManager ldm = + new LookupDiscoveryManager + (DiscoveryGroupManagement.NO_GROUPS, + locs, null); + //wait no more than N secs for lookup to be discovered + int nWait = 3; + for (int i=0; i<nWait; i++) { + if ( (ldm.getRegistrars()).length > 0 ) break; + com.bigdata.util.Util.delayMS(1L*1000L); + } + ServiceDiscoveryManager sdm = + new ServiceDiscoveryManager(ldm, null); + Class[] quorumServiceType = + new Class[] {QuorumPeerService.class}; + ServiceTemplate quorumServiceTmpl = + new ServiceTemplate(null, quorumServiceType, null); + ServiceItem[] items = + sdm.lookup(quorumServiceTmpl, Integer.MAX_VALUE, null); + for (int i=0; i<items.length; i++) { + QuorumPeerService zk = (QuorumPeerService)(items[i].service); + try { + Object admin = ((Administrable)zk).getAdmin(); + ((DestroyAdmin)admin).destroy(); + } catch(Exception e) { + log.warn("failure on zookeeper destroy ["+zk+"]", e); + } + } + sdm.terminate(); +//BTM - FOR_ZOOKEEPER_SMART_PROXY - END + for (ProcessHelper h : listener.running) { // destroy zookeeper service iff we started it. Modified: branches/dev-btm/bigdata-jini/src/test/com/bigdata/zookeeper/testzoo.config =================================================================== --- branches/dev-btm/bigdata-jini/src/test/com/bigdata/zookeeper/testzoo.config 2010-12-07 22:31:55 UTC (rev 3998) +++ branches/dev-btm/bigdata-jini/src/test/com/bigdata/zookeeper/testzoo.config 2010-12-07 23:03:37 UTC (rev 3999) @@ -6,6 +6,31 @@ import com.bigdata.jini.util.ConfigMath; import com.bigdata.util.config.NicUtil; +bigdata { + + fedname = System.getProperty("federation.name","testFed"); + zrootname = System.getProperty("bigdata.zrootname","testZroot"); + + // logging configuration (value is a URI!) + log4j="file:"+System.getProperty("log4j.path", + ConfigMath.getAbsolutePath(new File("bigdata/src/resources/logging")) + )+"/log4j.properties"; + + private static localIpAddr = + NicUtil.getIpAddress("default.nic", "default", true); +} + +com.bigdata.jini.start.config.ServiceConfiguration { + defaultJavaArgs = new String[]{ + "-server", + "-ea", + "-Djava.security.policy="+ConfigMath.getAbsolutePath(new File("policy.all")) + }; + + serviceDir = ConfigMath.getAbsoluteFile(new File(bigdata.fedname)); + log4j = bigdata.log4j; +} + /** * Zookeeper server configuration. */ @@ -33,8 +58,6 @@ private static appHome = System.getProperty("app.home", ConfigMath.getAbsolutePath(new File(".")) ); - private static localIpAddr = - NicUtil.getIpAddress("default.nic", "default", true); /* A comma delimited list of the known zookeeper servers together * with their assigned "myid". @@ -60,7 +83,7 @@ * syntax is obvious, if ugly. */ // standalone - servers = ConfigUtil.concat( new String[] { "1=", localIpAddr, ":2888:3888" } ); + servers = ConfigUtil.concat( new String[] { "1=", bigdata.localIpAddr, ":2888:3888" } ); // ensemble //servers="1=zoo1:2888:3888, 2=zoo2:2888:3888, 3=zoo3:2888:3888"; @@ -98,8 +121,16 @@ * located since we change to the service directory before starting * the service. */ - log4j="file:"+System.getProperty("log4j.path", - ConfigMath.getAbsolutePath(new File("bigdata/src/resources/logging")) - )+"/log4j.properties"; + log4j=bigdata.log4j; +} -} +com.bigdata.quorum.ServiceImpl { + serviceDir = org.apache.zookeeper.server.quorum.QuorumPeerMain.serviceDir; + clientPort=org.apache.zookeeper.server.quorum.QuorumPeerMain.clientPort; + tickTime=org.apache.zookeeper.server.quorum.QuorumPeerMain.tickTime; + initLimit=org.apache.zookeeper.server.quorum.QuorumPeerMain.initLimit; + syncLimit=org.apache.zookeeper.server.quorum.QuorumPeerMain.syncLimit; + servers=org.apache.zookeeper.server.quorum.QuorumPeerMain.servers; + + log4j=bigdata.log4j; +} \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-12-07 22:32:03
|
Revision: 3998 http://bigdata.svn.sourceforge.net/bigdata/?rev=3998&view=rev Author: mrpersonick Date: 2010-12-07 22:31:55 +0000 (Tue, 07 Dec 2010) Log Message: ----------- adding test for optional join groups Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2010-12-07 22:31:55 UTC (rev 3998) @@ -0,0 +1,980 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 23, 2010 + */ + +package com.bigdata.bop.engine; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.UUID; +import java.util.concurrent.CancellationException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executor; +import java.util.concurrent.FutureTask; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; + +import junit.framework.TestCase2; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.Var; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.ap.E; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.ap.R; +import com.bigdata.bop.bindingSet.ArrayBindingSet; +import com.bigdata.bop.bindingSet.HashBindingSet; +import com.bigdata.bop.bset.ConditionalRoutingOp; +import com.bigdata.bop.bset.StartOp; +import com.bigdata.bop.constraint.EQ; +import com.bigdata.bop.constraint.EQConstant; +import com.bigdata.bop.constraint.NEConstant; +import com.bigdata.bop.fed.TestFederatedQueryEngine; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.bop.solutions.SliceOp.SliceStats; +import com.bigdata.io.DirectBufferPoolAllocator.IAllocationContext; +import com.bigdata.journal.BufferMode; +import com.bigdata.journal.ITx; +import com.bigdata.journal.Journal; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; +import com.bigdata.striterator.ChunkedArrayIterator; +import com.bigdata.striterator.Dechunkerator; +import com.bigdata.striterator.ICloseableIterator; +import com.bigdata.util.InnerCause; +import com.bigdata.util.concurrent.LatchedExecutor; +import com.ibm.icu.impl.ByteBuffer; + +/** + * Test suite for the {@link QueryEngine} against a local database instance. + * <p> + * Note: The {@link BOp}s are unit tested separately. This test suite is focused + * on interactions when {@link BOp}s are chained together in a query, such as a + * sequence of pipeline joins, a slice applied to a query, etc. + * + * <pre> + * -Dlog4j.configuration=bigdata/src/resources/logging/log4j.properties + * </pre> + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestQueryEngine.java 3950 2010-11-17 02:14:08Z thompsonbry $ + * + * @see TestFederatedQueryEngine + * + * @todo write a unit and stress tests for deadlines. + */ +public class TestQueryEngineOptionalJoins extends TestCase2 { + + /** + * + */ + public TestQueryEngineOptionalJoins() { + } + + /** + * @param name + */ + public TestQueryEngineOptionalJoins(String name) { + super(name); + } + + @Override + public Properties getProperties() { + + final Properties p = new Properties(super.getProperties()); + + p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient + .toString()); + + return p; + + } + + static private final String namespace = "ns"; + Journal jnl; + QueryEngine queryEngine; + + public void setUp() throws Exception { + + jnl = new Journal(getProperties()); + + loadData(jnl); + + queryEngine = new QueryEngine(jnl); + + queryEngine.init(); + + } + + /** + * Create and populate relation in the {@link #namespace}. + */ + private void loadData(final Journal store) { + + // create the relation. + final R rel = new R(store, namespace, ITx.UNISOLATED, new Properties()); + rel.create(); + + // data to insert (in key order for convenience). + final E[] a = {// + new E("Paul", "Mary"),// [0] + new E("Paul", "Brad"),// [1] + + new E("John", "Mary"),// [0] + new E("John", "Brad"),// [1] + + new E("Mary", "Brad"),// [1] + + new E("Brad", "Fred"),// [1] + new E("Brad", "Leon"),// [1] + }; + + // insert data (the records are not pre-sorted). + rel.insert(new ChunkedArrayIterator<E>(a.length, a, null/* keyOrder */)); + + // Do commit since not scale-out. + store.commit(); + + } + + public void tearDown() throws Exception { + + if (queryEngine != null) { + queryEngine.shutdownNow(); + queryEngine = null; + } + + if (jnl != null) { + jnl.destroy(); + jnl = null; + } + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, + * empty {@link IBindingSet}. + * + * @param bindingSet + * the binding set. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet bindingSet) { + + return new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { new IBindingSet[] { bindingSet } }); + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, chunk + * containing all of the specified {@link IBindingSet}s. + * + * @param bindingSets + * the binding sets. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet[] bindingSets) { + + return new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { bindingSets }); + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, chunk + * containing all of the specified {@link IBindingSet}s. + * + * @param bindingSetChunks + * the chunks of binding sets. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet[][] bindingSetChunks) { + + return new ThickAsynchronousIterator<IBindingSet[]>(bindingSetChunks); + + } + + /** + * Unit test for optional join group. Three joins are used and target a + * {@link SliceOp}. The 2nd and 3rd joins are an optional join group. + * Intermediate results which do not succeed on the optional join are + * forwarded to the {@link SliceOp} which is the target specified by the + * {@link PipelineOp.Annotations#ALT_SINK_REF}. + * + * The optional join group takes the form: + * (a b) + * optional { + * (b c) + * (c d) + * } + * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be four solutions + * that succeed the optional join group: + * + * (paul mary brad fred) + * (paul mary brad leon) + * (john mary brad fred) + * (john mary brad leon) + * + * and five more that don't succeed the optional join group: + * + * (paul brad) * + * (john brad) * + * (mary brad) * + * (brad fred) + * (brad leon) + * + * In this cases marked with a *, ?c will become temporarily bound to fred + * and leon (since brad knows fred and leon), but the (c d) tail will fail + * since fred and leon don't know anyone else. At this point, the ?c binding + * must be removed from the solution. + */ + public void test_query_join2_optionals() throws Exception { + + final int startId = 1; + final int joinId1 = 2; + final int predId1 = 3; + final int joinId2 = 4; + final int predId2 = 5; + final int joinId3 = 6; + final int predId3 = 7; + final int sliceId = 8; + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { a, b }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { b, c }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred3Op = new Predicate<E>( + new IVariableOrConstant[] { c, d }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId3),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); + + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { join1Op },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); + + final PipelineOp join3Op = new PipelineJoin<E>(// + new BOp[] { join2Op },// + new NV(Predicate.Annotations.BOP_ID, joinId3),// + new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{join3Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final RunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // four solutions where the optional join succeeds. + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ), + // plus anything we read from the first access path which did not + // pass the optional join + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ) + }; + + assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(5, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + } + + /** + * Unit test for optional join group with a filter. Three joins are used + * and target a {@link SliceOp}. The 2nd and 3rd joins are an optional join + * group. Intermediate results which do not succeed on the optional join are + * forwarded to the {@link SliceOp} which is the target specified by the + * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group + * contains a filter. + * + * The optional join group takes the form: + * (a b) + * optional { + * (b c) + * (c d) + * filter(d != Leon) + * } + * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be two solutions + * that succeed the optional join group: + * + * (paul mary brad fred) + * (john mary brad fred) + * + * and five more that don't succeed the optional join group: + * + * (paul brad) * + * (john brad) * + * (mary brad) * + * (brad fred) + * (brad leon) + * + * In this cases marked with a *, ?c will become temporarily bound to fred + * and leon (since brad knows fred and leon), but the (c d) tail will fail + * since fred and leon don't know anyone else. At this point, the ?c binding + * must be removed from the solution. + * + * The filter (d != Leon) will prune the two solutions: + * + * (paul mary brad leon) + * (john mary brad leon) + * + * since ?d is bound to Leon in those cases. + */ + public void test_query_optionals_filter() throws Exception { + + final int startId = 1; + final int joinId1 = 2; + final int predId1 = 3; + final int joinId2 = 4; + final int predId2 = 5; + final int joinId3 = 6; + final int predId3 = 7; + final int sliceId = 8; + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { a, b }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { b, c }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred3Op = new Predicate<E>( + new IVariableOrConstant[] { c, d }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId3),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); + + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { join1Op },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); + + final PipelineOp join3Op = new PipelineJoin<E>(// + new BOp[] { join2Op },// + new NV(Predicate.Annotations.BOP_ID, joinId3),// + new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// + // constraint d != Leon + new NV(PipelineJoin.Annotations.CONSTRAINTS, + new IConstraint[] { new NEConstant(d, new Constant<String>("Leon")) }), + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{join3Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final RunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // two solutions where the optional join succeeds. + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + // plus anything we read from the first access path which did not + // pass the optional join + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ) + }; + + assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(5, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + } + + /** + * Unit test for optional join group with a filter on a variable outside + * the optional join group. Three joins are used and target a + * {@link SliceOp}. The 2nd and 3rd joins are an optional join + * group. Intermediate results which do not succeed on the optional join are + * forwarded to the {@link SliceOp} which is the target specified by the + * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group + * contains a filter that uses a variable outside the optional join group. + * + * The query takes the form: + * (a b) + * optional { + * (b c) + * (c d) + * filter(a != Paul) + * } + * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be two solutions + * that succeed the optional join group: + * + * (john mary brad fred) + * (john mary brad leon) + * + * and six more that don't succeed the optional join group: + * + * (paul mary) * + * (paul brad) * + * (john brad) + * (mary brad) + * (brad fred) + * (brad leon) + * + * In this cases marked with a *, ?a is bound to Paul even though there is + * a filter that specifically prohibits a = Paul. This is because the filter + * is inside the optional join group, which means that solutions can still + * include a = Paul, but the optional join group should not run in that + * case. + */ + public void test_query_optionals_filter2() throws Exception { + + final int startId = 1; + final int joinId1 = 2; + final int predId1 = 3; + final int joinId2 = 4; + final int predId2 = 5; + final int joinId3 = 6; + final int predId3 = 7; + final int sliceId = 8; + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + + /* + * Not quite sure how to write this one. I think it probably goes + * something like this: + * + * 1. startOp + * 2. join1Op(a b) + * 3. conditionalRoutingOp( if a = Paul then goto sliceOp ) + * 4. join2Op(b c) + * 5. join3Op(c d) + * 6. sliceOp + */ + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{startOp}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final RunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // two solutions where the optional join succeeds. + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("John") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + // plus anything we read from the first access path which did not + // pass the optional join + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ) + }; + + assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(6, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + } + + /** + * Verify the expected solutions. + * + * @param expected + * @param itr + */ + static public void assertSameSolutions(final IBindingSet[] expected, + final IAsynchronousIterator<IBindingSet[]> itr) { + try { + int n = 0; + while (itr.hasNext()) { + final IBindingSet[] e = itr.next(); + if (log.isInfoEnabled()) + log.info(n + " : chunkSize=" + e.length); + for (int i = 0; i < e.length; i++) { + if (log.isInfoEnabled()) + log.info(n + " : " + e[i]); + if (n >= expected.length) { + fail("Willing to deliver too many solutions: n=" + n + + " : " + e[i]); + } + if (!expected[n].equals(e[i])) { + fail("n=" + n + ", expected=" + expected[n] + + ", actual=" + e[i]); + } + n++; + } + } + assertEquals("Wrong number of solutions", expected.length, n); + } finally { + itr.close(); + } + } + + /** + * Verifies that the iterator visits the specified objects in some arbitrary + * ordering and that the iterator is exhausted once all expected objects + * have been visited. The implementation uses a selection without + * replacement "pattern". + * <p> + * Note: If the objects being visited do not correctly implement hashCode() + * and equals() then this can fail even if the desired objects would be + * visited. When this happens, fix the implementation classes. + */ + static public <T> void assertSameSolutionsAnyOrder(final T[] expected, + final Iterator<T> actual) { + + assertSameSolutionsAnyOrder("", expected, actual); + + } + + /** + * Verifies that the iterator visits the specified objects in some arbitrary + * ordering and that the iterator is exhausted once all expected objects + * have been visited. The implementation uses a selection without + * replacement "pattern". + * <p> + * Note: If the objects being visited do not correctly implement hashCode() + * and equals() then this can fail even if the desired objects would be + * visited. When this happens, fix the implementation classes. + */ + static public <T> void assertSameSolutionsAnyOrder(final String msg, + final T[] expected, final Iterator<T> actual) { + + try { + + /* + * Populate a map that we will use to realize the match and + * selection without replacement logic. The map uses counters to + * handle duplicate keys. This makes it possible to write tests in + * which two or more binding sets which are "equal" appear. + */ + + final int nrange = expected.length; + + final java.util.Map<T, AtomicInteger> range = new java.util.LinkedHashMap<T, AtomicInteger>(); + + for (int j = 0; j < nrange; j++) { + + AtomicInteger count = range.get(expected[j]); + + if (count == null) { + + count = new AtomicInteger(); + + } + + range.put(expected[j], count); + + count.incrementAndGet(); + + } + + // Do selection without replacement for the objects visited by + // iterator. + + for (int j = 0; j < nrange; j++) { + + if (!actual.hasNext()) { + + fail(msg + + ": Iterator exhausted while expecting more object(s)" + + ": index=" + j); + + } + + final T actualObject = actual.next(); + + if (log.isInfoEnabled()) + log.info("visting: " + actualObject); + + AtomicInteger counter = range.get(actualObject); + + if (counter == null || counter.get() == 0) { + + fail("Object not expected" + ": index=" + j + ", object=" + + actualObject); + + } + + counter.decrementAndGet(); + + } + + if (actual.hasNext()) { + + fail("Iterator will deliver too many objects."); + + } + + } finally { + + if (actual instanceof ICloseableIterator<?>) { + + ((ICloseableIterator<T>) actual).close(); + + } + + } + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java 2010-12-06 22:09:42 UTC (rev 3997) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java 2010-12-07 22:31:55 UTC (rev 3998) @@ -26,11 +26,11 @@ package com.bigdata.rdf.sail; -import java.util.Arrays; import java.util.Collection; import java.util.LinkedList; import java.util.List; import java.util.Properties; + import org.apache.log4j.Logger; import org.openrdf.model.Literal; import org.openrdf.model.URI; @@ -49,6 +49,13 @@ import org.openrdf.query.algebra.TupleExpr; import org.openrdf.query.algebra.ValueExpr; import org.openrdf.query.algebra.Var; +import org.openrdf.repository.Repository; +import org.openrdf.repository.RepositoryConnection; +import org.openrdf.repository.sail.SailRepository; +import org.openrdf.repository.sail.SailTupleQuery; +import org.openrdf.sail.Sail; +import org.openrdf.sail.memory.MemoryStore; + import com.bigdata.rdf.axioms.NoAxioms; import com.bigdata.rdf.store.BD; import com.bigdata.rdf.vocab.NoVocabulary; @@ -92,9 +99,160 @@ public TestNestedOptionals(String arg0) { super(arg0); } + + public void testNestedOptionals() throws Exception { + + final Sail sail = new MemoryStore(); + sail.initialize(); + final Repository repo = new SailRepository(sail); + final RepositoryConnection cxn = repo.getConnection(); + cxn.setAutoCommit(false); + + try { + + final ValueFactory vf = sail.getValueFactory(); - public void testNestedOptionals1() throws Exception { + /* + * Create some terms. + */ + final URI john = vf.createURI(BD.NAMESPACE + "john"); + final URI mary = vf.createURI(BD.NAMESPACE + "mary"); + final URI leon = vf.createURI(BD.NAMESPACE + "leon"); + final URI paul = vf.createURI(BD.NAMESPACE + "paul"); + final URI brad = vf.createURI(BD.NAMESPACE + "brad"); + final URI fred = vf.createURI(BD.NAMESPACE + "fred"); + final URI knows = vf.createURI(BD.NAMESPACE + "knows"); + /* + * Create some statements. + */ + cxn.add(paul, knows, mary); + cxn.add(paul, knows, brad); + + cxn.add(john, knows, mary); + cxn.add(john, knows, brad); + + cxn.add(mary, knows, brad); + cxn.add(brad, knows, fred); + cxn.add(brad, knows, leon); + + /* + * Note: The either flush() or commit() is required to flush the + * statement buffers to the database before executing any operations + * that go around the sail. + */ + cxn.commit(); + + { + + String query = + "prefix bd: <"+BD.NAMESPACE+"> " + + "select * " + + "where { " + + " ?a bd:knows ?b . " + + " OPTIONAL { " + + " ?b bd:knows ?c . " + + " ?c bd:knows ?d . " + + " } " + + "}"; + + final SailTupleQuery tupleQuery = (SailTupleQuery) + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(false /* includeInferred */); + + if (INFO) { + log.info(query); + final TupleQueryResult result = tupleQuery.evaluate(); + while (result.hasNext()) { + log.info(result.next()); + } + } + + final Collection<BindingSet> answer = new LinkedList<BindingSet>(); + answer.add(createBindingSet()); + + // result = tupleQuery.evaluate(); + // compare(result, answer); + + } + + { + + String query = + "prefix bd: <"+BD.NAMESPACE+"> " + + "select * " + + "where { " + + " ?a bd:knows ?b . " + + " OPTIONAL { " + + " ?b bd:knows ?c . " + + " ?c bd:knows ?d . " + + " filter(?a != bd:paul) " + + " } " + + "}"; + + final SailTupleQuery tupleQuery = (SailTupleQuery) + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(false /* includeInferred */); + + if (INFO) { + log.info(query); + final TupleQueryResult result = tupleQuery.evaluate(); + while (result.hasNext()) { + log.info(result.next()); + } + } + + final Collection<BindingSet> answer = new LinkedList<BindingSet>(); + answer.add(createBindingSet()); + + // result = tupleQuery.evaluate(); + // compare(result, answer); + + } + + { + + String query = + "prefix bd: <"+BD.NAMESPACE+"> " + + "select * " + + "where { " + + " ?a bd:knows ?b . " + + " OPTIONAL { " + + " ?b bd:knows ?c . " + + " ?c bd:knows ?d . " + + " filter(?d != bd:leon) " + + " } " + + "}"; + + final SailTupleQuery tupleQuery = (SailTupleQuery) + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(false /* includeInferred */); + + if (INFO) { + log.info(query); + final TupleQueryResult result = tupleQuery.evaluate(); + while (result.hasNext()) { + log.info(result.next()); + } + } + + final Collection<BindingSet> answer = new LinkedList<BindingSet>(); + answer.add(createBindingSet()); + + // result = tupleQuery.evaluate(); + // compare(result, answer); + + } + + } finally { + cxn.close(); + sail.shutDown(); + } + + } + + private void __testNestedOptionals1() throws Exception { + final BigdataSail sail = getSail(); sail.initialize(); final BigdataSailRepository repo = new BigdataSailRepository(sail); @@ -197,7 +355,7 @@ } - public void testNestedOptionals2() throws Exception { + private void __testNestedOptionals2() throws Exception { final BigdataSail sail = getSail(); sail.initialize(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <btm...@us...> - 2010-12-06 22:09:50
|
Revision: 3997 http://bigdata.svn.sourceforge.net/bigdata/?rev=3997&view=rev Author: btmurphy Date: 2010-12-06 22:09:42 +0000 (Mon, 06 Dec 2010) Log Message: ----------- [branch dev-btm]: CHECKPOINT - changes to allow smart proxy wrapper for zookeeper to be started by ServicesManagerService; also changes to allow TestServiceStarter test to start smart proxy zookeeper [more fixes to junit tests needed] Modified Paths: -------------- branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/ServicesManagerStartupTask.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/config/ServicesManagerConfiguration.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/config/ZookeeperServerConfiguration.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/process/ZookeeperProcessHelper.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/quorum/ServiceImpl.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/service/jini/util/JiniServicesHelper.java branches/dev-btm/bigdata-jini/src/test/com/bigdata/jini/start/AbstractFedZooTestCase.java branches/dev-btm/bigdata-jini/src/test/com/bigdata/jini/start/TestServiceStarter.java branches/dev-btm/bigdata-jini/src/test/com/bigdata/zookeeper/AbstractZooTestCase.java branches/dev-btm/src/resources/config/bigdataCluster.config Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/ServicesManagerStartupTask.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/ServicesManagerStartupTask.java 2010-12-06 21:29:19 UTC (rev 3996) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/ServicesManagerStartupTask.java 2010-12-06 22:09:42 UTC (rev 3997) @@ -141,7 +141,12 @@ if (serviceConfig instanceof ZookeeperServerConfiguration) { System.out.println("\n---- ServicesManagerStartupTask.doStartup: startZookeeperService() ----"); - startZookeeperService(config); +//BTM - PRE_ZOOKEEPER_SMART_PROXY - BEGIN +//BTM - PRE_ZOOKEEPER_SMART_PROXY startZookeeperService(config); + startZookeeperService + ( ((ZookeeperServerConfiguration)serviceConfig).classType, + config ); +//BTM - PRE_ZOOKEEPER_SMART_PROXY - END } @@ -227,23 +232,38 @@ * * @return <code>true</code> if an instance was started successfully. */ - protected boolean startZookeeperService(final Configuration config) - throws ConfigurationException, IOException { - +//BTM - PRE_ZOOKEEPER_SMART_PROXY - BEGIN +//BTM - PRE_ZOOKEEPER_SMART_PROXY protected boolean startZookeeperService(final Configuration config) +//BTM - PRE_ZOOKEEPER_SMART_PROXY throws ConfigurationException, IOException { +//BTM - PRE_ZOOKEEPER_SMART_PROXY +//BTM - PRE_ZOOKEEPER_SMART_PROXY try { +//BTM - PRE_ZOOKEEPER_SMART_PROXY +//BTM - PRE_ZOOKEEPER_SMART_PROXY return ZookeeperProcessHelper.startZookeeper(config, service) > 0; +//BTM - PRE_ZOOKEEPER_SMART_PROXY +//BTM - PRE_ZOOKEEPER_SMART_PROXY } catch (Throwable t) { +//BTM - PRE_ZOOKEEPER_SMART_PROXY +//BTM - PRE_ZOOKEEPER_SMART_PROXY log.error( +//BTM - PRE_ZOOKEEPER_SMART_PROXY "Could not start zookeeper service: " + t, t); +//BTM - PRE_ZOOKEEPER_SMART_PROXY +//BTM - PRE_ZOOKEEPER_SMART_PROXY return false; +//BTM - PRE_ZOOKEEPER_SMART_PROXY +//BTM - PRE_ZOOKEEPER_SMART_PROXY } +//BTM - PRE_ZOOKEEPER_SMART_PROXY +//BTM - PRE_ZOOKEEPER_SMART_PROXY } +//BTM - PRE_ZOOKEEPER_SMART_PROXY + protected boolean startZookeeperService + (Class classType, Configuration config) + throws ConfigurationException, IOException + { try { - - return ZookeeperProcessHelper.startZookeeper(config, service) > 0; - + return ZookeeperProcessHelper.startZookeeper + (classType, config, service) > 0; } catch (Throwable t) { - - log.error( - "Could not start zookeeper service: " + t, t); - + log.error("Could not start zookeeper service: " + t, t); return false; - } - } +//BTM - PRE_ZOOKEEPER_SMART_PROXY - END /** * If necessary, start the jini core services on this host. Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/config/ServicesManagerConfiguration.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/config/ServicesManagerConfiguration.java 2010-12-06 21:29:19 UTC (rev 3996) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/config/ServicesManagerConfiguration.java 2010-12-06 22:09:42 UTC (rev 3997) @@ -225,7 +225,7 @@ } else if (a.equals(QuorumPeerMain.class.getName())) { System.out.println("\n*** ServicesManagerConfiguration.getConfigurations: QuorumPeerMain BEGIN"); - v.add(new ZookeeperServerConfiguration(config)); + v.add(new ZookeeperServerConfiguration(QuorumPeerMain.class, config)); System.out.println("*** ServicesManagerConfiguration.getConfigurations: QuorumPeerMain END\n"); } else if (a.equals(TransactionServer.class.getName())) { @@ -261,6 +261,12 @@ //BTM - BEGIN: smart proxy impls ------------------------------------------------------------ + } else if (a.equals(com.bigdata.quorum.ServiceImpl.class.getName())) { + +System.out.println("\n*** ServicesManagerConfiguration.getConfigurations: com.bigdata.quorum.ServiceImpl BEGIN"); + v.add(new ZookeeperServerConfiguration(com.bigdata.quorum.ServiceImpl.class, config)); +System.out.println("*** ServicesManagerConfiguration.getConfigurations: com.bigdata.quorum.ServiceImpl END\n"); + } else if (a.equals(com.bigdata.transaction.ServiceImpl.class.getName())) {//transaction service System.out.println("\n*** ServicesManagerConfiguration.getConfigurations: com.bigdata.transaction.ServiceImpl BEGIN"); Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/config/ZookeeperServerConfiguration.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/config/ZookeeperServerConfiguration.java 2010-12-06 21:29:19 UTC (rev 3996) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/config/ZookeeperServerConfiguration.java 2010-12-06 22:09:42 UTC (rev 3997) @@ -257,15 +257,32 @@ } +//BTM - PRE_ZOOKEEPER_SMART_PROXY - BEGIN + public Class classType; + private Configuration jiniConfig; +//BTM - PRE_ZOOKEEPER_SMART_PROXY - END + /** * @param className * @param config * @throws ConfigurationException */ - public ZookeeperServerConfiguration(final Configuration config) - throws ConfigurationException { +//BTM - PRE_ZOOKEEPER_SMART_PROXY - BEGIN +//BTM - PRE_ZOOKEEPER_SMART_PROXY public ZookeeperServerConfiguration(final Configuration config) +//BTM - PRE_ZOOKEEPER_SMART_PROXY throws ConfigurationException { +//BTM - PRE_ZOOKEEPER_SMART_PROXY +//BTM - PRE_ZOOKEEPER_SMART_PROXY super(QuorumPeerMain.class.getName(), config); + public ZookeeperServerConfiguration(Class classType, + Configuration config) + throws ConfigurationException + { - super(QuorumPeerMain.class.getName(), config); + super(classType.getName(), config); + this.classType = classType; + this.jiniConfig = config; +//BTM - PRE_ZOOKEEPER_SMART_PROXY - END +//BTM +System.out.println("*** ZookeeperServerConfiguration: constructor ***"); servers = (String) config.getEntry(Options.NAMESPACE, Options.SERVERS, String.class); @@ -455,6 +472,7 @@ public ZookeeperServiceStarter newServiceStarter(IServiceListener listener, ZookeeperServerEntry entry) { +System.out.println("*** ZookeeperServerConfiguration ---> newServiceStarter ***"); return new ZookeeperServiceStarter(listener, entry); } @@ -511,6 +529,7 @@ ZookeeperServerEntry entry) { super(listener); +System.out.println("*** ZookeeperServerConfiguration.ZookeeperServiceStarter: constructor - BEGIN ***"); if (entry == null) throw new IllegalArgumentException(); @@ -524,6 +543,7 @@ */ dataDir = new File(ZookeeperServerConfiguration.this.dataDir, Integer.toString(entry.id)).getAbsoluteFile(); +System.out.println("*** ZookeeperServerConfiguration.ZookeeperServiceStarter: dataDir = "+dataDir); if (log.isInfoEnabled()) log.info(Options.DATA_DIR + "=" + dataDir); @@ -536,6 +556,7 @@ */ dataLogDir = new File(ZookeeperServerConfiguration.this.dataLogDir, Integer.toString(entry.id)).getAbsoluteFile(); +System.out.println("*** ZookeeperServerConfiguration.ZookeeperServiceStarter: dataLogDir = "+dataLogDir); if (log.isInfoEnabled()) log.info(Options.DATA_LOG_DIR + "=" + dataLogDir); @@ -544,9 +565,12 @@ configFile = new File(dataDir, ZookeeperServerConfiguration.this.configFile) .getAbsoluteFile(); +System.out.println("*** ZookeeperServerConfiguration.ZookeeperServiceStarter: generated configFile = "+configFile); // the server id is written on this file. myidFile = new File(dataDir, "myid"); +System.out.println("*** ZookeeperServerConfiguration.ZookeeperServiceStarter: myidFile = "+myidFile); +System.out.println("*** ZookeeperServerConfiguration.ZookeeperServiceStarter: constructor - END ***"); } @@ -618,7 +642,14 @@ /* * Write the zookeeper server configuration file. */ - writeZookeeperConfigFile(); +//BTM - PRE_ZOOKEEPER_SMART_PROXY - BEGIN +//BTM - PRE_ZOOKEEPER_SMART_PROXY writeZookeeperConfigFile(); + if ( (Options.NAMESPACE).equals(classType.getName()) ) { + writeZookeeperConfigFile(); + } else { + writeConfigFile();// Jini config file + } +//BTM - PRE_ZOOKEEPER_SMART_PROXY - END /* * Start the server. @@ -682,7 +713,9 @@ // the configuration file. cmds.add(configFile.toString()); - +//BTM - FOR_ZOOKEEPER_SMART_PROXY - BEGIN + cmds.add("com.bigdata.quorum."+getGroups("groupsToJoin")); +//BTM - FOR_ZOOKEEPER_SMART_PROXY - END } @Override @@ -859,6 +892,113 @@ } +//BTM - FOR_ZOOKEEPER_SMART_PROXY - BEGIN ------------------------------------ + protected void writeConfigFile() throws IOException { + + // 1. generate the contents to be written to the file + final String contents; + { + final StringWriter out = new StringWriter(); + writeConfigFile(out); + out.flush(); + contents = out.toString(); + } + + // 2. write the contents generated above to the file. + { + final Writer out2 = + new OutputStreamWriter + (new BufferedOutputStream + (new FileOutputStream(configFile))); + try { + out2.write(contents); + out2.flush(); + } finally { + out2.close(); + } + } + } + + protected void writeConfigFile(final Writer out) throws IOException { + // 1. Comments + writeComments(out); + out.write("\n"); + + // 2. import statements + for (String i : getImports()) {// write import statements + out.write("import " + i + ";\n"); + } + out.write("\n"); + + // 3. Open component name + out.write("\n\n" + classType.getPackage().getName() + " {\n"); + { + out.write(" "+Options.DATA_DIR+" = new File("+"\""+dataDir.toString()+"\""+");\n"); + out.write(" "+Options.DATA_LOG_DIR+" = new File("+"\""+dataLogDir.toString()+"\""+");\n"); + out.write(" "+Options.CLIENT_PORT+" = "+clientPort+";\n"); + out.write(" "+Options.SERVERS+" = "+"\""+servers+"\""+";\n"); + for(Map.Entry<String, String> entry : other.entrySet()) { + String key = entry.getKey(); + String val = entry.getValue(); + out.write(" "+key+" = "+val+";\n"); + } + + out.write(" "+getGroups("groupsToJoin")+";\n"); + out.write(" locsToJoin=new LookupLocator[]{ };\n"); + out.write(" static discoveryManager = new LookupDiscoveryManager(groupsToJoin,locsToJoin,null,this);\n"); + }// END BLOCK - 4. Close component name + out.write("}\n"); + } + + protected void writeComments(final Writer out) throws IOException { + out.write("// className=" + className + "\n"); + out.write("// date=" + new java.util.Date() + "\n"); + } + + public String[] getImports() { + return new String[] + { "java.net.NetworkInterface", + "com.sun.jini.config.ConfigUtil", + "net.jini.constraint.BasicMethodConstraints", + "net.jini.core.constraint.ConnectionRelativeTime", + "net.jini.core.constraint.InvocationConstraints", + "net.jini.jeri.BasicILFactory", + "net.jini.jeri.BasicJeriExporter", + "net.jini.jeri.tcp.TcpServerEndpoint", + "net.jini.core.discovery.LookupLocator", + "net.jini.discovery.LookupDiscoveryManager", + "com.bigdata.util.config.NicUtil", + "com.bigdata.util.config.ConfigDeployUtil" + }; + } + + protected String getGroups(String entryName) { + String fedname = null; + String zrootname = null;//in some test configs + String defaultGroupName = "UNKNOWN"; + try { + fedname = (String) jiniConfig.getEntry + ("bigdata", "fedname", + String.class, "fedname-unset"); + zrootname = (String) jiniConfig.getEntry + ("bigdata", "zrootname", + String.class, null); + } catch(Exception e) {//swallow + } + if (fedname == null) fedname = defaultGroupName; + + StringBuffer strBuf = + new StringBuffer(entryName+"=new String[]{"); + strBuf.append( com.bigdata.jini.util.ConfigMath.q(fedname) ); + if (zrootname != null) { + strBuf.append + (","+com.bigdata.jini.util.ConfigMath.q(zrootname)); + } + strBuf.append("}"); + + return strBuf.toString(); + } +//BTM - FOR_ZOOKEEPER_SMART_PROXY - END -------------------------------------- } /** Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/process/ZookeeperProcessHelper.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/process/ZookeeperProcessHelper.java 2010-12-06 21:29:19 UTC (rev 3996) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/process/ZookeeperProcessHelper.java 2010-12-06 22:09:42 UTC (rev 3997) @@ -147,13 +147,23 @@ * * @see ZookeeperServerConfiguration#newServiceStarter(IServiceListener, ZookeeperServerEntry) */ - static public int startZookeeper(final Configuration config, - final IServiceListener listener) throws ConfigurationException, - IOException { +//BTM - PRE_ZOOKEEPER_SMART_PROXY - BEGIN +//BTM - PRE_ZOOKEEPER_SMART_PROXY static public int startZookeeper(final Configuration config, +//BTM - PRE_ZOOKEEPER_SMART_PROXY final IServiceListener listener) throws ConfigurationException, +//BTM - PRE_ZOOKEEPER_SMART_PROXY IOException { +//BTM - PRE_ZOOKEEPER_SMART_PROXY +//BTM - PRE_ZOOKEEPER_SMART_PROXY final ZookeeperServerConfiguration serverConfig = new ZookeeperServerConfiguration( +//BTM - PRE_ZOOKEEPER_SMART_PROXY config); + static public int startZookeeper + (final Class classType, + final Configuration config, + final IServiceListener listener) + throws ConfigurationException, IOException + { + final ZookeeperServerConfiguration serverConfig = + new ZookeeperServerConfiguration(classType, config); +//BTM - PRE_ZOOKEEPER_SMART_PROXY - END - final ZookeeperServerConfiguration serverConfig = new ZookeeperServerConfiguration( - config); - //BTM log.warn("\n------------ ZookeeperProcessHelper.startZookeeper: [localhost="+thisInetAddr.getHostName()+", clientPort="+serverConfig.clientPort+"]\n"); //BTM com.bigdata.util.Util.printStr("TestBigdata.debug","\n------------ ZookeeperProcessHelper.startZookeeper: [localhost="+thisInetAddr.getHostName()+", clientPort="+serverConfig.clientPort+"]\n"); Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/quorum/ServiceImpl.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/quorum/ServiceImpl.java 2010-12-06 21:29:19 UTC (rev 3996) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/quorum/ServiceImpl.java 2010-12-06 22:09:42 UTC (rev 3997) @@ -98,6 +98,7 @@ import java.net.SocketException; import java.net.UnknownHostException; import java.rmi.RemoteException; +import java.rmi.RMISecurityManager; import java.rmi.server.ExportException; import java.util.ArrayList; import java.util.Collections; @@ -117,11 +118,20 @@ /** * Backend (admin) zookeeper based implementation of the quorum peer service. + * + * Note: this class is currently declared public rather than the preferred + * package protected scope. This is so that the JiniServicesHelper + * utility can instantiate this class in the tests that are currently + * implemented to interact directly with the service's backend; + * as opposed to starting the service with the ServiceStarter and + * then interacting with the service through the discovered service + * frontend. */ +public class ServiceImpl implements PrivateInterface { - private static Logger logger = LogUtil.getLog4jLogger - ( (ServiceImpl.class).getName() ) ; + private static Logger logger = + LogUtil.getLog4jLogger(COMPONENT_NAME); private static String shutdownStr; private static String killStr; @@ -267,6 +277,9 @@ //Initialize the service from the config private void init(String[] args) throws Exception { + if(System.getSecurityManager() == null) { + System.setSecurityManager(new RMISecurityManager()); + } config = ConfigurationProvider.getInstance ( args, (this.getClass()).getClassLoader() ); @@ -1388,4 +1401,116 @@ } } } + + /** + * This main() method is provided because it may be desirable (for + * testing or other reasons) to be able to start this service using + * a command line that is either manually entered in a command window + * or supplied to the java ProcessBuilder class for execution. + * <p> + * The mechanism that currently employs the ProcessBuilder class to + * execute a dynamically generated command line will be referred to + * as the 'ServiceConfiguration mechanism', which involves the use of + * the following ServiceConfiguration class hierarchy, + * <p> + * <ul> + * <li> ZookeeperConfiguration + * <li> JavaServiceConfiguration + * <li> ServiceConfiguration + * </ul> + * </p> + * The ServicesConfiguration mechanism may involve the use of the + * ServicesManagerService directly to execute this service, or it may + * involve the use of the junit framework to start this service. In + * either case, a command line is constructed from information that is + * specified at each of the various ServiceConfiguration levels, and + * is ultimately executed in a ProcessBuilder instance (in the + * ProcessHelper class). + * <p> + * In order for this method to know whether or not the + * ServiceConfiguration mechanism is being used to start the service, + * this method must be told that the ServiceConfiguration mechanism is + * being used. This is done by setting the system property named + * <code>usingServiceConfiguration</code> to any non-null value. + * <p> + * When the ServiceConfiguration mechanism is <i>not</i> used to start + * this service, this method assumes that the element at index 0 + * of the args array references the path to the jini configuration + * file that will be input by this method to this service's constructor. + * On the other hand, when the ServiceConfiguration mechanism <i>is</i> + * used to start this service, the service's configuration is handled + * differently, as described below. + * <p> + * When using the ServiceConfiguration mechanism, in addition to + * generating a command line to start the service, although an initial, + * pre-constructed jini configuration file is supplied (to the + * ServicesManagerService or the test framework, for example), a + * second jini configuration file is generated <i>on the fly</i> as + * well. When generating that new configuration file, a subset of the + * components and entries specified in the initial jini configuration + * are retrieved and placed in the new configuration being generated. + * It is that second, newly-generated configuration file that is input + * to this method through the args elements at index 0. + * <p> + * When the ServiceConfiguration mechanism is used to invoke this + * method, this method makes a number of assumptions. One assumption + * is that there is a component with name equal to the value, + * "org.apache.zookeeper.server.quorum.QuorumPeerMain", as well as + * either a component itself, or entries corresponding to a component, + * with name equal to the fully qualified name of this class (or both). + * Another assumption is that an entry named 'args' is associated with + * that component. The 'args' entry is assumed to be a <code>String</code> + * array in which one of the elments is specified to be a system + * property named 'config' whose value is equal to the path and + * filename of yet a third jini configuration file; that is, something + * of the form, "-Dconfig=<path-to-another-jini-config>". It is this + * third jini configuration file that the service will ultimately use + * to initialize itself when the ServiceConfiguration mechanism is + * being used to start the service. In that case then, this method + * will retrieve the path to the third jini configuration file from + * the configuration file supplied to this method in the args array + * at index 0, and then replace the element at index 0 with that + * path; so that when the service is instantiated (using this class' + * constructor), that third configuration file is made available to + * the service instance. + * <p> + * Note that, unlike the other service implementations, this service + * always generates its own service id the very first time it is + * started, persists that service id, and retrieves and reuses it + * on restarts. + * <p> + * Note that once an instance of this service implementation class + * has been created, that instance is stored in the <code>thisImpl</code> + * field to prevent the instance from being garbage collected until + * the service is actually shutdown. + */ + + private static ServiceImpl thisImpl; + + public static void main(String[] args) { + logger.debug("[main]: appHome="+System.getProperty("appHome")); + try { + // If the system property with name "config" is set, then + // use the value of that property to override the value + // input in the first element of the args array + ArrayList<String> argsList = new ArrayList<String>(); + int begIndx = 0; + String configFile = System.getProperty("config"); + if(configFile != null) { + // Replace args[0] with config file location + argsList.add(configFile); + begIndx = 1; + } + for(int i=begIndx; i<args.length; i++) { + argsList.add(args[i]); + } + logger.debug("[main]: instantiating service [new ServiceImpl]"); + thisImpl = new ServiceImpl + ( argsList.toArray(new String[argsList.size()]), + new com.bigdata.service.jini.FakeLifeCycle() ); + } catch(Throwable t) { + logger.log(Level.WARN, + "failed to start callable executor service", t); + } + } } Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/service/jini/util/JiniServicesHelper.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/service/jini/util/JiniServicesHelper.java 2010-12-06 21:29:19 UTC (rev 3996) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/service/jini/util/JiniServicesHelper.java 2010-12-06 22:09:42 UTC (rev 3997) @@ -555,8 +555,14 @@ // start zookeeper (a server instance). //BTM log.warn("\n---------------- JiniServicesHelper.innerStart >>> START ZOOKEEPER\n"); //BTM com.bigdata.util.Util.printStr("TestBigdata.debug","\n---------------- JiniServicesHelper.innerStart >>> START ZOOKEEPER\n"); - final int nstarted = ZookeeperProcessHelper.startZookeeper( - config, serviceListener); +//BTM - PRE_ZOOKEEPER_SMART_PROXY - BEGIN +//BTM - PRE_ZOOKEEPER_SMART_PROXY final int nstarted = ZookeeperProcessHelper.startZookeeper( +//BTM - PRE_ZOOKEEPER_SMART_PROXY config, serviceListener); + final int nstarted = + ZookeeperProcessHelper.startZookeeper + (com.bigdata.quorum.ServiceImpl.class, //BTM - was QuorumPeerMain.class + config, serviceListener); +//BTM - PRE_ZOOKEEPER_SMART_PROXY - END //BTM log.warn("\n---------------- JiniServicesHelper.innerStart >>> START ZOOKEEPER - DONE\n"); //BTM com.bigdata.util.Util.printStr("TestBigdata.debug","\n---------------- JiniServicesHelper.innerStart >>> START ZOOKEEPER - DONE\n"); Modified: branches/dev-btm/bigdata-jini/src/test/com/bigdata/jini/start/AbstractFedZooTestCase.java =================================================================== --- branches/dev-btm/bigdata-jini/src/test/com/bigdata/jini/start/AbstractFedZooTestCase.java 2010-12-06 21:29:19 UTC (rev 3996) +++ branches/dev-btm/bigdata-jini/src/test/com/bigdata/jini/start/AbstractFedZooTestCase.java 2010-12-06 22:09:42 UTC (rev 3997) @@ -50,6 +50,16 @@ import com.bigdata.service.jini.JiniClient; import com.bigdata.service.jini.JiniFederation; +//BTM - FOR_ZOOKEEPER_SMART_PROXY - BEGIN +import com.bigdata.service.QuorumPeerService; +import com.sun.jini.admin.DestroyAdmin; +import net.jini.admin.Administrable; +import net.jini.core.lookup.ServiceItem; +import net.jini.core.lookup.ServiceTemplate; +import net.jini.discovery.DiscoveryGroupManagement; +import net.jini.lookup.ServiceDiscoveryManager; +//BTM - FOR_ZOOKEEPER_SMART_PROXY - END + /** * Abstract base class for unit tests requiring a running zookeeper and a * running federation as configured from a test resource. @@ -143,7 +153,11 @@ //BTM - FOR_CLIENT_SERVICE - END // if necessary, start zookeeper (a server instance). - ZookeeperProcessHelper.startZookeeper(config, listener); +//BTM - PRE_ZOOKEEPER_SMART_PROXY - BEGIN +//BTM - PRE_ZOOKEEPER_SMART_PROXY ZookeeperProcessHelper.startZookeeper(config, listener); + ZookeeperProcessHelper.startZookeeper(com.bigdata.quorum.ServiceImpl.class, config, listener); +//ZookeeperProcessHelper.startZookeeper(org.apache.zookeeper.server.quorum.QuorumPeerMain.class, config, listener); +//BTM - PRE_ZOOKEEPER_SMART_PROXY - END /* * FIXME We need to start a jini lookup service for groups = {fedname} @@ -171,6 +185,29 @@ System.err.println(getName() + ": tearing down zrootname=" + zrootname); +//BTM - FOR_ZOOKEEPER_SMART_PROXY - BEGIN + // Graceful shutdown of QuorumPeerService + ServiceDiscoveryManager sdm = fed.getServiceDiscoveryManager(); + DiscoveryGroupManagement ldm = + (DiscoveryGroupManagement)(sdm.getDiscoveryManager()); + Class[] quorumServiceType = + new Class[] {com.bigdata.service.QuorumPeerService.class}; + ServiceTemplate quorumServiceTmpl = + new ServiceTemplate(null, quorumServiceType, null); + ServiceItem[] items = + sdm.lookup(quorumServiceTmpl, Integer.MAX_VALUE, null); + + for (int i=0; i<items.length; i++) { + QuorumPeerService zk = (QuorumPeerService)(items[i].service); + try { + Object admin = ((Administrable)zk).getAdmin(); + ((DestroyAdmin)admin).destroy(); + } catch(Exception e) { + log.warn("failure on zookeeper destroy ["+zk+"]", e); + } + } +//BTM - FOR_ZOOKEEPER_SMART_PROXY - END + // destroy any processes started by this test suite. for (ProcessHelper t : listener.running) { Modified: branches/dev-btm/bigdata-jini/src/test/com/bigdata/jini/start/TestServiceStarter.java =================================================================== --- branches/dev-btm/bigdata-jini/src/test/com/bigdata/jini/start/TestServiceStarter.java 2010-12-06 21:29:19 UTC (rev 3996) +++ branches/dev-btm/bigdata-jini/src/test/com/bigdata/jini/start/TestServiceStarter.java 2010-12-06 22:09:42 UTC (rev 3997) @@ -176,7 +176,8 @@ final ServiceItem serviceItem; IService proxy = null; Service smartProxy = null; - final String physicalServiceZPath; +//BTM (12/06/2010) final String physicalServiceZPath; +String physicalServiceZPath = null; { final List<String> children = zookeeper.getChildren( @@ -194,14 +195,52 @@ * Note: You could explicitly build the correct zpath using the * serviceUUID obtained from the service proxy. */ - physicalServiceZPath = logicalServiceZPath + "/" - + children.get(0); - - // get the serviceUUID from the physicalServiceZNode's data. - final UUID serviceUUID = (UUID) SerializerUtil - .deserialize(zookeeper.getData(physicalServiceZPath, - false/* watch */, new Stat())); - +//BTM - BEGIN (12/06/2010) ---------------------------------------------- +//BTM The note above appears to be no longer true. That is, +//BTM at some point, the tests or infrastructure were changed +//BTM in such a way that there are now 2 children instead of +//BTM the 1 child the note above says is expected. Currently, +//BTM the children are masterElection and physicalServices, +//BTM but the code below that retrieves the serviceUUID from +//BTM zookeeper must retrieve it from the physicalServices +//BTM children, not the masterElection child. The original +//BTM code invoked children.get(0), assuming either there +//BTM was only 1 child or assuming that the physicalServices +//BTM child would always be placed at index 0 of the +//BTM children list above zookeeper.getChildren() was called. +//BTM Unfortunately, this is not alway the case. And when/if +//BTM an attempt is made to deserialize the serviceUUID using +//BTM the masterElection child, an EOF exception is encountered +//BTM because serviceUUID information was never stored under +//BTM the masterElection znode. To address this then, the +//BTM code below was changed to loop through the children +//BTM list, catching the exception and exiting when a +//BTM a valid serviceUUID is successfully deserialized. +//BTM +//BTM physicalServiceZPath = logicalServiceZPath + "/" +//BTM + children.get(0); +//BTM // get the serviceUUID from the physicalServiceZNode's data. +//BTM final UUID serviceUUID = (UUID) SerializerUtil +//BTM .deserialize(zookeeper.getData(physicalServiceZPath, +//BTM false/* watch */, new Stat())); +//BTM + UUID serviceUUID = null; + for (String child : children) { + physicalServiceZPath = logicalServiceZPath+"/"+child; + try { + serviceUUID = + (UUID) SerializerUtil.deserialize + ( zookeeper.getData(physicalServiceZPath, + false,//watch + new Stat()) ); + break; + } catch(Exception e) {//swallow + } + } + assertTrue( "failed to deserialize serviceUUID " + +"[children="+children+"]", + (serviceUUID != null) ); +//BTM - END -------------------------------------------------------------- serviceItem = discoverService(serviceUUID); // verify that the service item is registered with jini. Modified: branches/dev-btm/bigdata-jini/src/test/com/bigdata/zookeeper/AbstractZooTestCase.java =================================================================== --- branches/dev-btm/bigdata-jini/src/test/com/bigdata/zookeeper/AbstractZooTestCase.java 2010-12-06 21:29:19 UTC (rev 3996) +++ branches/dev-btm/bigdata-jini/src/test/com/bigdata/zookeeper/AbstractZooTestCase.java 2010-12-06 22:09:42 UTC (rev 3997) @@ -153,12 +153,16 @@ if (log.isInfoEnabled()) log.info(getName()); - + // find ports that are not in use. clientPort = getPort(2181/* suggestedPort */); final int peerPort = getPort(2888/* suggestedPort */); final int leaderPort = getPort(3888/* suggestedPort */); - final String servers = "1=localhost:" + peerPort + ":" + leaderPort; +//BTM - PRE_ZOOKEEPER_SMART_PROXY - BEGIN +//BTM - PRE_ZOOKEEPER_SMART_PROXY final String servers = "1=localhost:" + peerPort + ":" + leaderPort; + String hostname = com.bigdata.util.config.NicUtil.getIpAddress("default.nic", "default", true); + final String servers = "1="+hostname+":" + peerPort + ":" + leaderPort; +//BTM - PRE_ZOOKEEPER_SMART_PROXY - END // create a temporary file for zookeeper's state. dataDir = File.createTempFile("test", ".zoo"); @@ -200,9 +204,14 @@ this.sessionTimeout = tickTime * 2; // if necessary, start zookeeper (a server instance). - ZookeeperProcessHelper.startZookeeper(config, listener); +//BTM - PRE_ZOOKEEPER_SMART_PROXY - BEGIN +//BTM - PRE_ZOOKEEPER_SMART_PROXY ZookeeperProcessHelper.startZookeeper(config, listener); +//BTM - PRE_ZOOKEEPER_SMART_PROXY +//BTM - PRE_ZOOKEEPER_SMART_PROXY zookeeperAccessor = new ZooKeeperAccessor("localhost:" + clientPort, sessionTimeout); - zookeeperAccessor = new ZooKeeperAccessor("localhost:" + clientPort, sessionTimeout); + zookeeperAccessor = new ZooKeeperAccessor(hostname+":" + clientPort, sessionTimeout); + ZookeeperProcessHelper.startZookeeper(com.bigdata.quorum.ServiceImpl.class, config, listener); +//BTM - PRE_ZOOKEEPER_SMART_PROXY - END zookeeper = zookeeperAccessor.getZookeeper(); Modified: branches/dev-btm/src/resources/config/bigdataCluster.config =================================================================== --- branches/dev-btm/src/resources/config/bigdataCluster.config 2010-12-06 21:29:19 UTC (rev 3996) +++ branches/dev-btm/src/resources/config/bigdataCluster.config 2010-12-06 22:09:42 UTC (rev 3997) @@ -1371,6 +1371,21 @@ properties = new NV[] { }; } + +com.bigdata.quorum.ServiceImpl { + + args = new String[]{ + "-Xmx200m", + + "-Djava.util.logging.config.file=@NAS@/dist/bigdata/var/config/logging/quorum-logging.properties", + "-Dlog4j.configuration=@NAS@/dist/bigdata/var/config/logging/quorum-logging.properties", + "-Dlog4j.primary.configuration=@NAS@/dist/bigdata/var/config/logging/quorum-logging.properties", + "-DusingServiceConfiguration=true", + "-Dbigdata.logDir=@NAS@/dist/bigdata/var/log", + "-DappHome=@APP_HOME@", + "-Dconfig=@NAS@/dist/bigdata/var/config/jini/quorum.config" + }; +} //BTM - END /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-12-06 21:29:26
|
Revision: 3996 http://bigdata.svn.sourceforge.net/bigdata/?rev=3996&view=rev Author: mrpersonick Date: 2010-12-06 21:29:19 +0000 (Mon, 06 Dec 2010) Log Message: ----------- added an ant task to run the print queries helper Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/build.xml Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/build.xml =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/build.xml 2010-12-06 21:28:37 UTC (rev 3995) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/build.xml 2010-12-06 21:29:19 UTC (rev 3996) @@ -1,3 +1,4 @@ + <!-- $Id: build.xml 2266 2009-10-26 18:21:50Z mrpersonick $ --> <!-- --> <!-- do "ant bundle-jar" in the parent directory first. --> @@ -246,7 +247,46 @@ </classpath> </java> </target> + + <target name="run-print-queries" depends="compile" description="Prints some sample benchmark queries to be run against the loaded data."> + <java classname="benchmark.bigdata.TestDriver" fork="true" failonerror="true" dir="${build.dir}/bin"> + <!-- -runs # is the #of query mix runs (default is 500). --> + <arg value="-runs" /> + <arg value="${bsbm.runs}" /> + <!-- -w # is the #of warmup query mixes (default is 50). --> + <arg value="-w" /> + <arg value="${bsbm.w}" /> + + <!-- -mt # is the #of concurrent clients. --> + <arg value="-mt" /> + <arg value="${bsbm.mt}" /> + + <!-- -qdir dir is the query directory (default is queries). --> + <!--<arg value="-qdir"/><arg value="src/resources/bsbm_data"/>--> + + <!-- -idir dir is the test data directory (default td_data). --> + <arg value="-idir" /> + <arg value="${bsbm.dataDir}" /> + + <!-- The randomizer seed. --> + <arg value="-seed" /> + <arg value="${bsbm.seed}" /> + + <!-- -o file is the name of the xml output file. --> + <arg value="-o" /> + <arg value="${bsbm.resultsDir}/benchmark_result_pc${bsbm.pc}_runs${bsbm.runs}_mt${bsbm.mt}.xml" /> + + <!-- The SPARQL endpoint. --> + <arg value="http://localhost:${bsbm.nanoServerPort}" /> + + <classpath> + <path refid="runtime.classpath" /> + </classpath> + </java> + </target> + + <target name="set-properties" depends="compile" description="Set or change properties for a kb instance. The new values are read from stdin."> <java classname="com.bigdata.rdf.sail.BigdataSailHelper" fork="true" failonerror="true"> <!-- This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-12-06 21:28:44
|
Revision: 3995 http://bigdata.svn.sourceforge.net/bigdata/?rev=3995&view=rev Author: mrpersonick Date: 2010-12-06 21:28:37 +0000 (Mon, 06 Dec 2010) Log Message: ----------- helper class to print bsbm queries Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/java/benchmark/bigdata/ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/java/benchmark/bigdata/TestDriver.java Added: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/java/benchmark/bigdata/TestDriver.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/java/benchmark/bigdata/TestDriver.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/java/benchmark/bigdata/TestDriver.java 2010-12-06 21:28:37 UTC (rev 3995) @@ -0,0 +1,50 @@ + package benchmark.bigdata; + +import java.io.IOException; + +import org.apache.log4j.xml.DOMConfigurator; + +import benchmark.testdriver.Query; + +public class TestDriver extends benchmark.testdriver.TestDriver { + + public TestDriver(String[] args) { + super(args); + } + + public void printQueries(final int nrRun) { + + queryMix.setRun(nrRun); + while(queryMix.hasNext()) { + Query next = queryMix.getNext(); + Object[] queryParameters = parameterPool.getParametersForQuery(next); + next.setParameters(queryParameters); +// if(ignoreQueries[next.getNr()-1]) + queryMix.setCurrent(0, -1.0); +// else { +// server.executeQuery(next, next.getQueryType()); +// } + System.out.println("query " + next.getNr() + ":"); + System.out.println(next.getQueryString()); + System.out.println(""); +// try { +// System.in.read(); +// } catch (IOException ex) { +// ex.printStackTrace(); +// } + } + + } + + public static void main(String[] argv) { + DOMConfigurator.configureAndWatch( "log4j.xml", 60*1000 ); + TestDriver testDriver = new TestDriver(argv); + testDriver.init(); + +// for (int i = 0; i < 3; i++) +// testDriver.printQueries(i); + testDriver.printQueries(0); + + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-05 20:38:46
|
Revision: 3994 http://bigdata.svn.sourceforge.net/bigdata/?rev=3994&view=rev Author: thompsonbry Date: 2010-12-05 20:38:35 +0000 (Sun, 05 Dec 2010) Log Message: ----------- javadoc Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java 2010-12-03 22:26:40 UTC (rev 3993) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java 2010-12-05 20:38:35 UTC (rev 3994) @@ -100,7 +100,37 @@ * @param data */ public MutableBucketData(final IBucketData data) { - + + /* + * @todo this is the initial capacity. the buffer can grow as necessary + * until we have reached the maximum number of entries permitted by the + * branching factor (and for HTree we should let the branching factor + * grow as well if we are under the target page size!). + * + * The caller should be passing in the estimated bufferSize to achieve a + * target page size. + */ + buf = new ByteArrayBuffer(Bytes.kilobyte32*4); + + /* + * @todo this is an exact fit. The caller should be passing in the + * configured branchingFactor. + * + * Actually, an ArrayList or simply an extendable or pre-extended array + * would do fine here. All we need to do is translate the index of the + * entry into the offset into the buffer. There are very specialized + * data structures which can do better for this than an array by leaving + * "holes" to minimize the amount of copying when inserting, but that + * should not be a problem with a compacting pass over the buffer. We + * can either compact over each hole, copy down things into "good" fits + * in the holes, or copy things into a new array. This would be a pretty + * natural fit with a single free buffer which is available on the + * HTree/BTree and swapped with the buffer in a leaf/bucket each time it + * is compacted. Any of these schemes will do less copying that the + * current B+Tree scheme, which copies up/down all the time. + */ + index = new OpenIntIntHashMap(data.getKeyCount()); + } /** @@ -122,14 +152,25 @@ * fit a model which estimates the size of the resulting page based on * the size of the buffer and then grow the buffer until we can * satisfy the target page size. + * + * FIXME The buffer should be LARGER than the target value required to + * model a full page in order to have some efficiency with update + * operations when the page is nearly full. Probably we should use + * 1.5x to 2x the target page size for the bufferSize. */ public MutableBucketData(final int bufferSize, final int branchingFactor) { - final int initialBufferSize = Math - .min(Bytes.kilobyte32 * 4, bufferSize); + /* + * Initialize the buffer and the index using the given values. + * + * The bufferSize should be adaptive. + * + * The branchingFactor could be adaptive as well (but the B+Tree logic + * would have to be updated to handle leaves which are over or under the + * nominal branching factor). + */ + buf = new ByteArrayBuffer(bufferSize); - buf = new ByteArrayBuffer(initialBufferSize); - index = new OpenIntIntHashMap(branchingFactor/* initialCapacity */); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-03 22:26:48
|
Revision: 3993 http://bigdata.svn.sourceforge.net/bigdata/?rev=3993&view=rev Author: thompsonbry Date: 2010-12-03 22:26:40 +0000 (Fri, 03 Dec 2010) Log Message: ----------- Continued work on the HTree, especially on the interface, persistent data record, and mutable data record for the hash bucket. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -35,6 +35,8 @@ import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; +import java.util.Iterator; +import java.util.NoSuchElementException; import org.apache.log4j.Logger; @@ -368,7 +370,7 @@ if (nkeys > 0) { final int byteLength = BytesUtil - .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB))/* nbits */); final byte[] a = new byte[byteLength]; @@ -443,22 +445,45 @@ return encodeLive(leaf, buf).data(); } - - /** - * A read-only view of the data for a B+Tree leaf based on a compact record - * format. While some fields are cached, for the most part the various data - * fields, including the keys and values, are accessed in place in the data - * record in order to minimize the memory footprint of the leaf. The keys and - * values are coded using a caller specified {@link IRabaCoder}. The specific - * coding scheme is specified by the {@link IndexMetadata} for the B+Tree - * instance and is not stored within the leaf data record. - * <p> - * Note: The leading byte of the record format codes for a leaf, a double-linked - * leaf or a node in a manner which is compatible with {@link ReadOnlyNodeData}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ + + /** + * A read-only view of the data for a B+Tree leaf based on a compact record + * format. While some fields are cached, for the most part the various data + * fields, including the keys and values, are accessed in place in the data + * record in order to minimize the memory footprint of the leaf. The keys + * and values are coded using a caller specified {@link IRabaCoder}. The + * specific coding scheme is specified by the {@link IndexMetadata} for the + * B+Tree instance and is not stored within the leaf data record. The use of + * prefix coding for keys is a good general choices, but should not be used + * in combination with a hash tree unless an order preserving hashing + * function is being used. + * <p> + * Note: The leading byte of the record format codes for a leaf, a + * double-linked leaf or a node in a manner which is compatible with + * {@link ReadOnlyNodeData}. + * <p> + * The {@link DefaultLeafCoder} automatically maintains hash values for keys + * for an {@link IBucketData} record. The hash values of the keys in the + * bucket will have a shared prefix (the MSB hash prefix) which corresponds + * to the globalDepth of the path through the hash tree leading to this + * bucket less the localDepth of this bucket. It is therefore possible to + * store only the LSB bits of the hash values in the page and reconstruct + * the hash values using the MSB bits from the path through the hash tree. + * In order to be able to reconstruct the full hash code key based solely on + * local information, the MSB bits can be written out once and the LSB bits + * can be written out once per tuple. Testing the hash value of a key may + * then be done considering only the LSB bits of the hash value. This + * storage scheme also has the advantage that the hash value is not + * restricted to an int32 and is therefore compatible with the use of + * cryptographic hash functions. (If hash values are stored in a B+Tree leaf + * they will not shared this prefix property and can not be compressed in + * this manner). + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + * @version $Id: DefaultLeafCoder.java 3991 2010-12-03 18:48:02Z thompsonbry + * $ + */ static private class ReadOnlyLeafData extends AbstractReadOnlyNodeData<ILeafData> implements ILeafData, IBucketData { @@ -645,7 +670,7 @@ O_hashKeys = pos; final int byteLength = BytesUtil - .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB))/* nbits */); if (nkeys > 0) { @@ -804,7 +829,7 @@ O_hashKeys = pos; final int byteLength = BytesUtil - .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB))/* nbits */); if (nkeys > 0) { @@ -994,7 +1019,7 @@ final int lengthMSB = 32/* hashBitLength */- lengthLSB; final int byteLength = BytesUtil.bitFlagByteLength(lengthMSB - + nkeys * lengthMSB/* nbits */); + + (nkeys * lengthLSB)/* nbits */); final InputBitStream ibs = b.slice(O_hashKeys, byteLength) .getInputBitStream(); @@ -1018,8 +1043,102 @@ } } - - final public IRaba getKeys() { + + public Iterator<Integer> hashIterator(final int h) { + + return new HashMatchIterator(h); + + } + + /** + * Visits the index of each bucket entry having a matching hash code. + * + * @todo a trie over the hash entries would provide much faster search. + */ + private class HashMatchIterator implements Iterator<Integer> { + + private final int h; + private final int lengthMSB; + private final InputBitStream ibs; + private int currentIndex = 0; + private Integer nextResult = null; + + private HashMatchIterator(final int h) { + + this.h = h; + + lengthMSB = 32/* hashBitLength */- lengthLSB; + + final int byteLength = BytesUtil.bitFlagByteLength(lengthMSB + + (nkeys * lengthLSB)/* nbits */); + + ibs = b.slice(O_hashKeys, byteLength) + .getInputBitStream(); + + } + + public boolean hasNext() { + + final int n = getKeyCount(); + + while (nextResult == null && currentIndex < n) { + + final int index = currentIndex++; + + int h1; + try { + + // We do not need to re-position the ibs. +// final long position = lengthMSB + currentIndex +// * lengthLSB; +// ibs.position(position); + + h1 = ibs.readInt(lengthLSB); + + h1 |= hashMSB; + + } catch (IOException ex) { + + throw new RuntimeException(ex); + + } + + if (h1 == h) { + + nextResult = Integer.valueOf(index); + + break; + + } + + } + + return nextResult != null; + + } + + public Integer next() { + + if (!hasNext()) + throw new NoSuchElementException(); + + final Integer tmp = nextResult; + + nextResult = null; + + return tmp; + + } + + public void remove() { + + throw new UnsupportedOperationException(); + + } + + } + + final public IRaba getKeys() { return keys; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -32,6 +32,7 @@ import com.bigdata.btree.IOverflowHandler; import com.bigdata.btree.IndexSegment; +import com.bigdata.btree.data.DefaultLeafCoder; import com.bigdata.btree.data.IAbstractNodeDataCoder; import com.bigdata.btree.data.ILeafData; import com.bigdata.btree.raba.IRaba; @@ -46,6 +47,11 @@ * @todo The hash of the key should be part of the ITuple interface so it can be * passed along based on the application level encoding of the key. * + * @todo Consider organizing the hash values of the keys in the page using a + * trie for faster lookup. This could be done when they are serialized (in + * which case this decision disappears into the {@link DefaultLeafCoder}) + * or dynamically. + * * @todo Support out-of-line representations of the key and/or value for a tuple * when they are large. The definition of "large" can be a configuration * value for the index metadata. For example, 1/4 of the target page size @@ -124,8 +130,19 @@ * * @todo IRaba keys plus IRaba vals. */ - final int[] data; + final int[] entries; + /** + * The data record. A mutable is used for all mutation operations. The data + * record is replaced by a read-only record used when the hash bucket is + * made persistent. A read-only data record is automatically converted into + * a mutable record when a mutation operation is requested. + * <p> + * Note: This is package private in order to expose it to + * {@link HashDirectory}. + */ + private IBucketData data; + protected void setLocalHashBits(final int localHashBits) { this.localHashBits = localHashBits; @@ -133,7 +150,9 @@ } public int getLocalHashBits() { + return localHashBits; + } /** @@ -148,7 +167,7 @@ for (int i = 0; i < size; i++) { if (i > 0) sb.append(','); - sb.append(Integer.toString(data[i])); + sb.append(Integer.toString(entries[i])); } sb.append("}}"); return sb.toString(); @@ -174,7 +193,7 @@ this.localHashBits = localHashBits; - this.data = new int[bucketSize]; + this.entries = new int[bucketSize]; // one more bucket. htbl.nbuckets++; @@ -200,7 +219,7 @@ for (int i = 0; i < size; i++) { - if (data[i] == key) + if (entries[i] == key) return true; } @@ -287,7 +306,7 @@ */ public void insert(final int h, final int key) { - if (size == data.length) { + if (size == entries.length) { /* * The bucket must be split, potentially recursively. @@ -375,7 +394,7 @@ } - data[size++] = key; + entries[size++] = key; // one more entry in the index. htbl.nentries++; @@ -397,7 +416,7 @@ for (int i = 0; i < size; i++) { - if (data[i] == key) { + if (entries[i] == key) { // #of tuples remaining beyond this point. final int length = size - i - 1; @@ -405,7 +424,7 @@ if (length > 0) { // Keep the array dense by copying down by one. - System.arraycopy(data, i + 1/* srcPos */, data/* dest */, + System.arraycopy(entries, i + 1/* srcPos */, entries/* dest */, i/* destPos */, length); } @@ -461,7 +480,7 @@ @Override public Integer next() { - return data[current++]; + return entries[current++]; } @Override @@ -568,14 +587,12 @@ * IBucketData */ - public int getHash(int index) { - // TODO Auto-generated method stub - return 0; + public int getHash(final int index) { + return data.getHash(index); } public int getLengthMSB() { - // TODO Auto-generated method stub - return 0; + return data.getLengthMSB(); } /* @@ -583,49 +600,39 @@ */ public boolean hasVersionTimestamps() { - // TODO Auto-generated method stub - return false; + return data.hasVersionTimestamps(); } public AbstractFixedByteArrayBuffer data() { - // TODO Auto-generated method stub - return null; + return data.data(); } public int getKeyCount() { - // TODO Auto-generated method stub - return 0; + return data.getKeyCount(); } public IRaba getKeys() { - // TODO Auto-generated method stub - return null; + return data.getKeys(); } public long getMaximumVersionTimestamp() { - // TODO Auto-generated method stub - return 0; + return data.getMaximumVersionTimestamp(); } public long getMinimumVersionTimestamp() { - // TODO Auto-generated method stub - return 0; + return data.getMinimumVersionTimestamp(); } public int getSpannedTupleCount() { - // TODO Auto-generated method stub - return 0; + return data.getKeyCount(); } public boolean isCoded() { - // TODO Auto-generated method stub - return false; + return data.isCoded(); } final public boolean isLeaf() { - return true; - } /** @@ -635,55 +642,43 @@ * {@link IBucketData} is automatically converted into a mutable instance. */ final public boolean isReadOnly() { - -// return data.isReadOnly(); - // TODO Auto-generated method stub - return false; - + return data.isReadOnly(); } /* * ILeafData */ - public boolean getDeleteMarker(int index) { - // TODO Auto-generated method stub - return false; + public boolean getDeleteMarker(final int index) { + return data.getDeleteMarker(index); } public long getNextAddr() { - // TODO Auto-generated method stub - return 0; + return data.getNextAddr(); } public long getPriorAddr() { - // TODO Auto-generated method stub - return 0; + return data.getPriorAddr(); } public int getValueCount() { - // TODO Auto-generated method stub - return 0; + return data.getValueCount(); } public IRaba getValues() { - // TODO Auto-generated method stub - return null; + return data.getValues(); } - public long getVersionTimestamp(int index) { - // TODO Auto-generated method stub - return 0; + public long getVersionTimestamp(final int index) { + return data.getVersionTimestamp(index); } public boolean hasDeleteMarkers() { - // TODO Auto-generated method stub - return false; + return data.hasDeleteMarkers(); } public boolean isDoubleLinked() { - // TODO Auto-generated method stub - return false; + return data.isDoubleLinked(); } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -775,7 +775,7 @@ // Adjust the #of local bits to be considered. bold.setLocalHashBits(bold.getLocalHashBits() + 1); // The new bucket. - bnew = new HashBucket(htbl, bold.getLocalHashBits(), bold.data.length/* bucketSize */); + bnew = new HashBucket(htbl, bold.getLocalHashBits(), bold.entries.length/* bucketSize */); // // The address for the new bucket. // final int addrBNew = htbl.buckets.size(); // Add to the chain of buckets. @@ -862,7 +862,7 @@ { // the new bucket. bnew = new HashBucket(htbl, bold.getLocalHashBits() + 1, - bold.data.length/* bucketSize */); + bold.entries.length/* bucketSize */); // // Add to the chain of buckets. // htbl.buckets.add(bnew); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -528,8 +528,7 @@ * have access to the store or ITuple will have to have indirection * support. */ - public HashTree(final int initialCapacity, - final int bucketSize) { + public HashTree(final int initialCapacity, final int bucketSize) { // @todo pass in the store reference per AbstractBTree. this.store = new SimpleMemoryRawStore(); Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,32 @@ +package com.bigdata.htree; + +import com.bigdata.btree.ITuple; + +/** + * Extended interface to support hash buckets. + * + * @author tho...@us... + * + * @param <E> + * + * @todo The reason for having this on ITuple is to make it practical for the + * hash code to be defined in terms of application specific data types + * rather than the unsigned byte[] key (but the latter could of course be + * decoded by the hash function before computing the hash of the + * application data type, except for things like Unicode keys). + * <p> + * This should probably be lifted onto {@link ITuple} and + * {@link #getKeyHash()} should be declared to throw an + * {@link UnsupportedOperationException} if the hash code of the key is + * not being stored. + */ +public interface IHashTuple<E> extends ITuple<E> { + +// int getHashBitLength(); + + /** + * The int32 hash value of the key. + */ + int getKeyHash(); + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,617 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 25, 2009 + */ + +package com.bigdata.htree; + +import java.util.Iterator; + +import cern.colt.map.OpenIntIntHashMap; + +import com.bigdata.btree.ITuple; +import com.bigdata.btree.MutableLeafData; +import com.bigdata.btree.raba.IRaba; +import com.bigdata.htree.data.IBucketData; +import com.bigdata.io.AbstractFixedByteArrayBuffer; +import com.bigdata.io.ByteArrayBuffer; +import com.bigdata.io.IDataRecord; +import com.bigdata.rawstore.Bytes; + +/** + * Implementation maintains Java objects corresponding to the persistent data + * and defines methods for a variety of mutations on the {@link IBucketData} + * record which operate by direct manipulation of the Java objects. + * <p> + * Note: package private fields are used so that they may be directly accessed + * by the {@link HashBucket} class. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: MutableLeafData.java 2265 2009-10-26 12:51:06Z thompsonbry $ + * + * @todo Consider mutable implementation based on a compacting record ala GOM. + * This is especially attractive for the hash tree. The implementation + * would have to be wholly different from the {@link MutableLeafData} + * class. Instead of managing the {@link IRaba} for the keys and values + * separately, each {@link ITuple} would be appended into a byte[] (or + * {@link IDataRecord}). There would be a budget for that backing buffer + * which is the maximum in-memory size of the bucket. An index would + * provide random access into the buffer for only those entries which are + * "live" and a counter is maintain of the #of entries in the buffer which + * are no longer in use. When the buffer capacity is reached, the buffer + * is compacted by copying all of the entries accessible from the index + * into a new buffer and the old buffer is released. + * <p> + * Records which are too large for the buffer should be moved out of line. + * <p> + * This can be used in combination with a dynamically maintained trie for + * fast hash lookup, or we could just scan the entries. + * <p> + * Lexicon key search can scan the entries using the index. Scanning can + * have a side-effect in which the position of the entry offsets in the + * index is swapped if the keys are out of order. This would give us + * MonetDB style "cracking". The keys would have to be put into full order + * no later than when the record is made persistent. + * <p> + * Even though mutation is not thread safe, compacting the data record + * must not cause the assignment of indices to tuples to change when the + * caller is iterating through the entries by index. + * + * @todo When the record is serialized, do we need to allow a decision function + * to examine the record and decide whether it must be split? Since we do + * not have a fixed target for the page size, but only a budget, and since + * compression of keys, values, metadata, and the encoded record can all + * be applied, it seems that the decision function should be in terms of + * the buffer budget and a maximum #of entries (e.g., B+Tree branching + * factor or an equivalent hash bucket threshold). + */ +public class MutableBucketData implements IBucketData { + + private IDataRecord buf; + + private /*@todo final*/ OpenIntIntHashMap index; + + /** + * Constructor used when converting a persistent data record into a mutable + * one. + * + * @param data + */ + public MutableBucketData(final IBucketData data) { + + } + + /** + * + * @param bufferSize + * The initial size of the backing byte[]. + * @param branchingFactor + * The maximum #of tuples which may be stored in the data record. + * + * @todo The buffer must be permitted grow until it is sufficient to encode + * approximately one page worth of tuples. + * <p> + * is typically on the order of the size of a page, e.g., 4k. Since + * the data record will be encoded and possible compressed before it + * is written onto the store, this can be larger than the target. + * <p> + * In order to avoid problems where the objects are much smaller than + * expected, we should allow the backing buffer to grow or we should + * fit a model which estimates the size of the resulting page based on + * the size of the buffer and then grow the buffer until we can + * satisfy the target page size. + */ + public MutableBucketData(final int bufferSize, final int branchingFactor) { + + final int initialBufferSize = Math + .min(Bytes.kilobyte32 * 4, bufferSize); + + buf = new ByteArrayBuffer(initialBufferSize); + + index = new OpenIntIntHashMap(branchingFactor/* initialCapacity */); + + } + + @Override + public int getHash(int index) { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getKeyCount() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getLengthMSB() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public Iterator<Integer> hashIterator(int h) { + // TODO Auto-generated method stub + return null; + } + + @Override + public boolean getDeleteMarker(int index) { + // TODO Auto-generated method stub + return false; + } + + @Override + public long getNextAddr() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public long getPriorAddr() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getValueCount() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public IRaba getValues() { + // TODO Auto-generated method stub + return null; + } + + @Override + public long getVersionTimestamp(int index) { + // TODO Auto-generated method stub + return 0; + } + + @Override + public boolean hasDeleteMarkers() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean hasVersionTimestamps() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean isDoubleLinked() { + // TODO Auto-generated method stub + return false; + } + + @Override + public AbstractFixedByteArrayBuffer data() { + // TODO Auto-generated method stub + return null; + } + + @Override + public IRaba getKeys() { + // TODO Auto-generated method stub + return null; + } + + @Override + public long getMaximumVersionTimestamp() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public long getMinimumVersionTimestamp() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getSpannedTupleCount() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public boolean isCoded() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean isLeaf() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean isReadOnly() { + // TODO Auto-generated method stub + return false; + } + +// /** +// * The keys for the entries in the bucket. Unlike a B+Tree, the keys are NOT +// * maintained in a sorted order. Search proceeds by scanning for matching +// * hash codes and filtering for matching keys. +// */ +// final MutableKeyBuffer keys; +// +// /** +// * The values for the entries in the bucket. There is one value per key. The +// * value MAY be null. +// */ +// final MutableValueBuffer vals; +// +// /** +// * The deletion markers IFF isolation is supported by the {@link HTree}. +// */ +// final boolean[] deleteMarkers; +// +// /** +// * The version timestamps IFF isolation is supported by the {@link HTree}. +// */ +// final long[] versionTimestamps; +// +// /** +// * The minimum version timestamp. +// * +// * @todo these fields add 16 bytes to each {@link MutableBucketData} object +// * even when we do not use them. It would be better to use a subclass +// * or tack them onto the end of the {@link #versionTimestamps} array. +// */ +// long minimumVersionTimestamp; +// long maximumVersionTimestamp; +// +// /** +// * Create an empty data record with internal arrays dimensioned for the +// * specified branching factor. +// * +// * @param branchingFactor +// * The maximum #of entries in the hash bucket before it will +// * overflow or be split. Since the goal is to manage the size +// * of the bucket on the disk and since we do not known the size +// * of the bucket's data record until it is being evicted, this +// * value places an upper bound after which the bucket will be +// * @param hasVersionTimestamps +// * <code>true</code> iff version timestamps will be maintained. +// * @param hasDeleteMarkers +// * <code>true</code> iff delete markers will be maintained. +// */ +// public MutableBucketData(final int branchingFactor, +// final boolean hasVersionTimestamps, final boolean hasDeleteMarkers) { +// +// keys = new MutableKeyBuffer(branchingFactor + 1); +// +// vals = new MutableValueBuffer(branchingFactor + 1); +// +// versionTimestamps = (hasVersionTimestamps ? new long[branchingFactor + 1] +// : null); +// +// // init per API specification. +// minimumVersionTimestamp = Long.MAX_VALUE; +// maximumVersionTimestamp = Long.MIN_VALUE; +// +// deleteMarkers = (hasDeleteMarkers ? new boolean[branchingFactor + 1] +// : null); +// +// } +// +// /** +// * Copy ctor. +// * +// * @param branchingFactor +// * The branching factor for the owning B+Tree. +// * @param src +// * The source leaf. +// */ +// public MutableBucketData(final int branchingFactor, final ILeafData src) { +// +// keys = new MutableKeyBuffer(branchingFactor + 1, src.getKeys()); +// +// vals = new MutableValueBuffer(branchingFactor + 1, src.getValues()); +// +// versionTimestamps = (src.hasVersionTimestamps() ? new long[branchingFactor + 1] +// : null); +// +// deleteMarkers = (src.hasDeleteMarkers() ? new boolean[branchingFactor + 1] +// : null); +// +// final int nkeys = keys.size(); +// +// if (versionTimestamps != null) { +// +// for (int i = 0; i < nkeys; i++) { +// +// versionTimestamps[i] = src.getVersionTimestamp(i); +// +// } +// +// minimumVersionTimestamp = src.getMinimumVersionTimestamp(); +// +// maximumVersionTimestamp = src.getMaximumVersionTimestamp(); +// +// } else { +// +// minimumVersionTimestamp = Long.MAX_VALUE; +// +// maximumVersionTimestamp = Long.MIN_VALUE; +// +// +// } +// +// if (deleteMarkers != null) { +// +// for (int i = 0; i < nkeys; i++) { +// +// deleteMarkers[i] = src.getDeleteMarker(i); +// +// } +// +// } +// +// } +// +// /** +// * Ctor based on just "data" -- used by unit tests. +// * +// * @param keys +// * A representation of the defined keys in the leaf. +// * @param values +// * An array containing the values found in the leaf. +// * @param versionTimestamps +// * An array of the version timestamps (iff the version metadata +// * is being maintained). +// * @param deleteMarkers +// * An array of the delete markers (iff the version metadata is +// * being maintained). +// */ +// public MutableBucketData(final MutableKeyBuffer keys, +// final MutableValueBuffer values, final long[] versionTimestamps, +// final boolean[] deleteMarkers) { +// +// assert keys != null; +// assert values != null; +// assert keys.capacity() == values.capacity(); +// if (versionTimestamps != null) { +// assert versionTimestamps.length == keys.capacity(); +// } +// if (deleteMarkers != null) { +// assert deleteMarkers.length == keys.capacity(); +// } +// +// this.keys = keys; +// this.vals = values; +// this.versionTimestamps = versionTimestamps; +// this.deleteMarkers = deleteMarkers; +// +// if (versionTimestamps != null) +// recalcMinMaxVersionTimestamp(); +// +// } +// +// /** +// * Range check a tuple index. +// * +// * @param index +// * The index of a tuple in [0:nkeys]. +// * @return <code>true</code> +// * +// * @throws IndexOutOfBoundsException +// * if the index is not in the legal range. +// */ +// final protected boolean rangeCheckTupleIndex(final int index) { +// +// if (index < 0 || index > getKeys().size()) +// throw new IndexOutOfBoundsException(); +// +// return true; +// +// } +// +// /** +// * No - this is a mutable data record. +// */ +// final public boolean isReadOnly() { +// +// return false; +// +// } +// +// /** +// * No. +// */ +// final public boolean isCoded() { +// +// return false; +// +// } +// +// final public AbstractFixedByteArrayBuffer data() { +// +// throw new UnsupportedOperationException(); +// +// } +// +// public final long getVersionTimestamp(final int index) { +// +// if (versionTimestamps == null) +// throw new UnsupportedOperationException(); +// +// assert rangeCheckTupleIndex(index); +// +// return versionTimestamps[index]; +// +// } +// +// final public long getMinimumVersionTimestamp() { +// +// if (versionTimestamps == null) +// throw new UnsupportedOperationException(); +// +// return minimumVersionTimestamp; +// +// } +// +// final public long getMaximumVersionTimestamp() { +// +// if (versionTimestamps == null) +// throw new UnsupportedOperationException(); +// +// return maximumVersionTimestamp; +// +// } +// +// public final boolean getDeleteMarker(final int index) { +// +// if (deleteMarkers == null) +// throw new UnsupportedOperationException(); +// +// assert rangeCheckTupleIndex(index); +// +// return deleteMarkers[index]; +// +// } +// +// final public IRaba getValues() { +// +// return vals; +// +// } +// +// final public IRaba getKeys() { +// +// return keys; +// +// } +// +// /** +// * Always returns <code>true</code>. +// */ +// final public boolean isLeaf() { +// +// return true; +// +// } +// +// /** +// * For a leaf the #of entries is always the #of keys. +// */ +// final public int getSpannedTupleCount() { +// +// return getKeys().size(); +// +// } +// +// final public int getValueCount() { +// +// return vals.size(); +// +// } +// +// final public boolean hasDeleteMarkers() { +// +// return deleteMarkers != null; +// +// } +// +// final public boolean hasVersionTimestamps() { +// +// return versionTimestamps != null; +// +// } +// +// final public int getKeyCount() { +// +// return keys.size(); +// +// } +// +// /** +// * No - this class does not support double-linked leaves (only the +// * {@link IndexSegment} actually uses double-linked leaves). +// */ +// final public boolean isDoubleLinked() { +// +// return false; +// +// } +// +// final public long getNextAddr() { +// +// throw new UnsupportedOperationException(); +// +// } +// +// final public long getPriorAddr() { +// +// throw new UnsupportedOperationException(); +// +// } +// +// /** +// * Recalculate the min/max version timestamp on the leaf. The caller is +// * responsible for propagating the new min/max to the ancestors of the leaf. +// * +// * @throws UnsupportedOperationException +// * if the leaf is not maintaining per-tuple version timestamps. +// */ +// void recalcMinMaxVersionTimestamp() { +// +// // must be maintaining version timestamps. +// if (versionTimestamps == null) +// throw new UnsupportedOperationException(); +// +// final int nkeys = keys.nkeys; +// +// long min = Long.MAX_VALUE; +// long max = Long.MIN_VALUE; +// +// for (int i = 0; i < nkeys; i++) { +// +// final long t = versionTimestamps[i]; +// +// if (t < min) +// min = t; +// +// if (t > max) +// max = t; +// +// } +// +// minimumVersionTimestamp = min; +// maximumVersionTimestamp = max; +// +// } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -26,40 +26,23 @@ */ package com.bigdata.htree.data; -import com.bigdata.btree.IOverflowHandler; -import com.bigdata.btree.IndexSegment; -import com.bigdata.btree.data.IAbstractNodeDataCoder; +import java.util.Iterator; + import com.bigdata.btree.data.ILeafData; -import com.bigdata.btree.raba.IRaba; -import com.bigdata.rawstore.IRawStore; /** - * Interface for the data record of a hash bucket. + * Interface for the data record of a hash bucket. The hash bucket extends the + * B+Tree leaf data record interface. A hash bucket page may be shared by + * multiple directory entries (this is one of the principle tenants of + * extendible hashing). However, the bucket is just a bucket to each such + * directory entry. There is no sense of offset addressing into the shared + * bucket. * <p> - * The hash bucket extends the B+Tree leaf node page. However, hash buckets must - * have the HASH_KEYS flag enabled and SHOULD NOT use prefix compression unless - * (a) an order preserving hash function is used; and (b) the tuples are in key - * order within the page. - * <p> - * The hash values of the keys in the bucket will have a shared prefix (when - * using an MSB hash prefix) which corresponds to the globalDepth of the path - * through the hash tree leading to this bucket less the localDepth of this - * bucket. It is therefore possible (in principle) to store only the LSB bits of - * the hash values in the page and reconstruct the hash values using the MSB - * bits from the path through the hash tree. In order to be able to reconstruct - * the full hash code key based solely on local information, the MSB bits can be - * written out once and the LSB bits can be written out once per tuple. Testing - * the hash value of a key may then be done considering only the LSB bits of the - * hash value. This storage scheme also has the advantage that the hash value is - * not restricted to an int32 and is therefore compatible with the use of - * cryptographic hash functions. (If hash values are stored in a B+Tree leaf - * they will not shared this prefix property and can not be compressed in this - * manner). - * <p> * The {@link ILeafData#getPriorAddr()} and {@link ILeafData#getNextAddr()} * fields of the {@link ILeafData} record are reserved by the hash tree to * encode the search order for range queries when used in combination with an * order preserving hash function. + * <p> * * @author tho...@us... */ @@ -72,10 +55,11 @@ // */ // int getLocalDepth(); -// /** -// * The total bit length of the hash values of the keys. -// */ -// int getHashBitLength(); + /** + * Return the #of entries in the hash bucket (all keys, not just the + * distinct keys). + */ + int getKeyCount(); /** * The length (in bits) of the MSB prefix shared by the hash values of the @@ -91,6 +75,29 @@ * @return The hash value of that key. */ int getHash(int index); + + /** + * Return an {@link Iterator} which visits the index of each entry in the + * hash bucket having the given hash code. + * + * @param h + * The hash code. + * + * @todo Note: There is a design tradeoff between autoboxing of the + * <code>int</code> index and allowing the {@link IBucketData} class + * to encapsulate the iterator pattern together with any setup which + * can be done once per scan for a given hash code. For example, using + * a BitInputStream. The iterator allows us to amortize the cost of + * that setup, but we pay for the autoboxing of the index values. + * However, autobox primitives tend to be quite cheap as they are + * rapidly reclaimed by GC. + * <p> + * It is possible to implement an extension interface which returns + * the [int] index without autoboxing. If this method signature is + * modified to return that interface then the implementation can avoid + * autoboxing. + */ + Iterator<Integer> hashIterator(int h); // /** // * The storage address of the last overflow page in the overflow chain. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -32,6 +32,8 @@ import java.io.IOException; import java.util.Arrays; import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.Random; import java.util.TreeMap; @@ -53,6 +55,7 @@ import com.bigdata.btree.raba.IRaba; import com.bigdata.btree.raba.codec.RandomKeysGenerator; import com.bigdata.cache.HardReferenceQueue; +import com.bigdata.htree.data.IBucketData; import com.bigdata.io.SerializerUtil; import com.bigdata.rawstore.Bytes; import com.bigdata.rawstore.IRawStore; @@ -479,6 +482,12 @@ } + if(n1 instanceof IBucketData) { + + assertSameHashCodes((IBucketData) n1, (IBucketData) n2); + + } + assertSameRaba(n1.getValues(), n2.getValues()); } @@ -668,6 +677,68 @@ } + /** + * Verifies details for the {@link IBucketData} interface. + * + * @param b1 + * A hash bucket. + * @param b2 + * Another hash bucket. + */ + static public void assertSameHashCodes(final IBucketData b1, final IBucketData b2) { + + // The key and value counts must be the same. + final int n = b1.getKeyCount(); + assertEquals("keyCount", n, b2.getKeyCount()); + assertEquals("valueCount", n, b1.getValueCount()); + assertEquals("valueCount", n, b2.getValueCount()); + + assertEquals("lengthMSB", b1.getLengthMSB(), b2.getLengthMSB()); + + /* + * Verify that the same hash codes are reported at each index position. + */ + for (int i = 0; i < n; i++) { + + final int h1 = b1.getHash(i); + + final int h2 = b2.getHash(i); + + if (h1 != h2) { + + assertEquals("getHash(" + i + ")", h1, h2); + + } + + } + + /* + * Now verify that the same hash matches are reported for each + * visited hash code. + */ + for (int i = 0; i < n; i++) { + + final int h1 = b1.getHash(i); + + final List<Integer> indices = new LinkedList<Integer>(); + + final Iterator<Integer> eitr = b1.hashIterator(h1); + + while (eitr.hasNext()) { + + indices.add(eitr.next()); + + } + + final Integer[] hashCodes = indices.toArray(new Integer[indices + .size()]); + + assertSameIterator("hashCodes", hashCodes, b2.hashIterator(h1)); + + } + + } + /** * Special purpose helper used to vet {@link Node#childAddr}. * @@ -677,7 +748,7 @@ * @param node * The node. */ - public void assertChildKeys(final long[] childAddr, final Node node) { + static public void assertChildKeys(final long[] childAddr, final Node node) { final int nChildAddr = childAddr.length; @@ -720,7 +791,7 @@ * @param node * The node. */ - public void assertKeys(final byte[][] keys, final AbstractNode<?> node) { + static public void assertKeys(final byte[][] keys, final AbstractNode<?> node) { // // verify the capacity of the keys[] on the node. // assertEquals("keys[] capacity", (node.maxKeys + 1) * stride, @@ -763,7 +834,7 @@ * @param node * The node. */ - public void assertEntryCounts(final int[] expected, final INodeData node) { + static public void assertEntryCounts(final int[] expected, final INodeData node) { final int len = expected.length; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -27,8 +27,6 @@ package com.bigdata.btree.data; - -import com.bigdata.btree.raba.IRaba; import com.bigdata.btree.raba.ReadOnlyKeysRaba; import com.bigdata.btree.raba.ReadOnlyValuesRaba; import com.bigdata.io.DataOutputBuffer; Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,68 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.htree; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * Aggregates test suites into increasing dependency order. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestAll extends TestCase { + + /** + * + */ + public TestAll() { + } + + /** + * @param arg0 + */ + public TestAll(String arg0) { + super(arg0); + } + + /** + * Returns a test that will run each of the implementation specific test + * suites in turn. + */ + public static Test suite() + { + + final TestSuite suite = new TestSuite("HTree"); + + suite.addTest(com.bigdata.htree.data.TestAll.suite()); + + suite.addTestSuite(TestExtensibleHashing.class); + + return suite; + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,49 @@ +package com.bigdata.htree.data; + +import com.bigdata.btree.data.AbstractLeafDataRecordTestCase; +import com.bigdata.btree.data.ILeafData; +import com.bigdata.btree.raba.IRaba; + +/** + * Abstract class for tests of {@link IBucketData} implementations. + */ +abstract public class AbstractHashBucketDataRecordTestCase extends + AbstractLeafDataRecordTestCase { + + public AbstractHashBucketDataRecordTestCase() { + + super(); + + } + + public AbstractHashBucketDataRecordTestCase(String name) { + + super(name); + + } + + protected ILeafData mockLeafFactory(final IRaba keys, final IRaba vals, + final boolean[] deleteMarkers, final long[] versionTimestamps) { + + /* + * Note: This computes the MSB prefix and the hash codes using the + * standard Java semantics for the hash of a byte[]. In practice, the + * hash value is normally computed from the key using an application + * specified hash function. + */ + final int lengthMSB = 0; + + final int[] hashCodes = new int[keys.size()]; + + for (int i = 0; i < hashCodes.length; i++) { + + hashCodes[i] = keys.get(i).hashCode(); + + } + + return new MockBucketData(keys, vals, deleteMarkers, versionTimestamps, + lengthMSB, hashCodes); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -1,5 +1,10 @@ package com.bigdata.htree.data; +import it.unimi.dsi.fastutil.Hash; + +import java.util.Iterator; +import java.util.NoSuchElementException; + import com.bigdata.btree.data.MockLeafData; import com.bigdata.btree.raba.IRaba; @@ -72,4 +77,57 @@ } + public Iterator<Integer> hashIterator(final int h) { + + return new HashMatchIterator(h); + + } + + /** + * Visits the index of each bucket entry having a matching hash code. + */ + private class HashMatchIterator implements Iterator<Integer> { + + private final int h; + private int currentIndex = 0; + private Integer nextResult = null; + + private HashMatchIterator(final int h) { + this.h = h; + } + + public boolean hasNext() { + final int n = getKeyCount(); + while (nextResult == null && currentIndex < n) { + final int index = currentIndex++; + final int h1 = getHash(index); + if (h1 == h) { + nextResult = Integer.valueOf(index); + break; + } + } + return nextResult != null; + } + + public Integer next() { + + if (!hasNext()) + throw new NoSuchElementException(); + + final Integer tmp = nextResult; + + nextResult = null; + + return tmp; + + } + + public void remove() { + + throw new UnsupportedOperationException(); + + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -67,6 +67,8 @@ /* * Test w/ all key and value coders suitable for leaves. * + * @todo test the mutable bucket data record + * * @todo test w/ linked-leaf (order preserving hash functions). * * @todo test w/ out-of-line tuples (when too large for the page). Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java (from rev 3991, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,68 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 5, 2009 + */ + +package com.bigdata.htree.data; + +import com.bigdata.btree.data.DefaultLeafCoder; +import com.bigdata.btree.data.ILeafData; +import com.bigdata.btree.raba.codec.CanonicalHuffmanRabaCoder; + +/** + * Test suite for the HTree {@link ILeafData} records (accessing coded data in + * place). + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman extends + AbstractHashBucketDataRecordTestCase { + + /** + * + */ + public TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman() { + } + + /** + * @param name + */ + public TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman(String name) { + super(name); + } + + protected void setUp() throws Exception { + + super.setUp(); + + coder = new DefaultLeafCoder(// + CanonicalHuffmanRabaCoder.INSTANCE,// keys + CanonicalHuffmanRabaCoder.INSTANCE // vals + ); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -29,8 +29,6 @@ import com.bigdata.btree.data.AbstractLeafDataRecordTestCase; import com.bigdata.btree.data.DefaultLeafCoder; -import com.bigdata.btree.data.ILeafData; -import com.bigdata.btree.raba.IRaba; import com.bigdata.btree.raba.codec.SimpleRabaCoder; /** @@ -65,28 +63,4 @@ } - protected ILeafData mockLeafFactory(final IRaba keys, final IRaba vals, - final boolean[] deleteMarkers, final long[] versionTimestamps) { - - /* - * Note: This computes the MSB prefix and the hash codes using the - * standard Java semantics for the hash of a byte[]. In practice, the - * hash value is normally computed from the key using an application - * specified hash function. - */ - final int lengthMSB = 0; - - final int[] hashCodes = new int[keys.size()]; - - for (int i = 0; i < hashCodes.length; i++) { - - hashCodes[i] = keys.get(i).hashCode(); - - } - - return new MockBucketData(keys, vals, deleteMarkers, versionTimestamps, - lengthMSB, hashCodes); - - } - } Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -1,93 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 5, 2009 - */ - -package com.bigdata.htree.data; - -import com.bigdata.btree.data.AbstractLeafDataRecordTestCase; -import com.bigdata.btree.data.DefaultLeafCoder; -import com.bigdata.btree.data.ILeafData; -import com.bigdata.btree.raba.IRaba; -import com.bigdata.btree.raba.codec.CanonicalHuffmanRabaCoder; - -/** - * Test suite for the HTree {@link ILeafData} records (accessing coded data in - * place). - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman extends AbstractLeafDataRecordTestCase { - - /** - * - */ - public TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman() { - } - - /** - * @param name - */ - public TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman(String name) { - super(name); - } - - protected void setUp() throws Exception { - - super.setUp(); - - coder = new DefaultLeafCoder(// - CanonicalHuffmanRabaCoder.INSTANCE,// keys - CanonicalHuffmanRabaCoder.INSTANCE // vals - ); - - } - - protected ILeafData mockLeafFactory(final IRaba keys, final IRaba vals, - final boolean[] deleteMarkers, final long[] versionTimestamps) { - - /* - * Note: This computes the MSB prefix and the hash codes using the - * standard Java semantics for the hash of a byte[]. In practice, the - * hash value is normally computed from the key using an application - * specified hash function. - */ - final int lengthMSB = 0; - - final int[] hashCodes = new int[keys.size()]; - - for (int i = 0; i < hashCodes.length; i++) { - - hashCodes[i] = keys.get(i).hashCode(); - - } - - return new MockBucketData(keys, vals, deleteMarkers, versionTimestamps, - lengthMSB, hashCodes); - - } - -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-03 22:25:20
|
Revision: 3992 http://bigdata.svn.sourceforge.net/bigdata/?rev=3992&view=rev Author: thompsonbry Date: 2010-12-03 22:25:13 +0000 (Fri, 03 Dec 2010) Log Message: ----------- javadoc Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/DumpIndexSegment.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/DumpIndexSegment.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/DumpIndexSegment.java 2010-12-03 18:48:02 UTC (rev 3991) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/DumpIndexSegment.java 2010-12-03 22:25:13 UTC (rev 3992) @@ -284,7 +284,7 @@ * The owning B+Tree. * @param addr * The address of the data record in the backing store. - * @param data + * @param buf * The data record. * * @return The node or leaf. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-03 18:48:11
|
Revision: 3991 http://bigdata.svn.sourceforge.net/bigdata/?rev=3991&view=rev Author: thompsonbry Date: 2010-12-03 18:48:02 +0000 (Fri, 03 Dec 2010) Log Message: ----------- Modified the Checkpoint record to support HTree as well as BTree. This change introduces a new version for the checkpoint record and is backwards compatible. Modified the DefaultLeafCoder to support hash buckets with the optional inclusion of 32-bit hash codes into the record. This does not change the binary layout of the leaf when hash values are not included and is therefore backward compatible. Added unit tests for the DefaultLeafCoder when used to store the data for an HTree bucket. Moved the HTree classes out of test. They are not ready for use, but the modification to support the hash bucket data page require that the various interfaces are declared in the src/java rather than the src/test code paths. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/Checkpoint.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/AbstractReadOnlyNodeData.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractNodeOrLeafDataRecordTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/MockLeafData.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/AbstractHashPage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashFunction.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IDirectoryData.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestExtensibleHashing.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/ExtensibleHashMap.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableCheckpoint.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashDirectory.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashFunction.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/Checkpoint.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/Checkpoint.java 2010-12-01 21:43:35 UTC (rev 3990) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/Checkpoint.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -34,17 +34,59 @@ // persistent and immutable. private long addrMetadata; - private long addrRoot; - private int height; - private int nnodes; - private int nleaves; - private int nentries; + private long addrRoot; // of root node/leaf for BTree; rootDir for HTree. + private int height; // height for BTree; globalDepth for HTree. + private int nnodes; // #of directories for HTree + private int nleaves; // #of buckets for HTree. + private int nentries; // #of tuples in the index. private long counter; - /** Note: added in {@link #VERSION1} and presumed 0L in earlier versions. */ private long addrBloomFilter; - /** + /** + * Added in {@link #VERSION1}. This is a short field allowing for 65536 + * different possible index types. + */ + private IndexTypeEnum indexType; + + /** + * Type safe enumeration of index types. + */ + public static enum IndexTypeEnum { + + /** BTree. */ + BTree((short)0), + + /** Extendable hash tree. */ + HTree((short)1); + + private IndexTypeEnum(final short code) { + + this.code = code; + + } + + private final short code; + + public short getCode() { + + return code; + + } + + static public IndexTypeEnum valueOf(final short code) { + switch (code) { + case 0: + return BTree; + case 1: + return HTree; + default: + throw new IllegalArgumentException("code=" + code); + } + } + } + + /** * The address used to read this {@link Checkpoint} record from the * store. * <p> @@ -99,20 +141,45 @@ return addrBloomFilter; } - - /** - * The height of the tree - ZERO(0) means just a root leaf. Values - * greater than zero give the #of levels of abstract nodes. There is - * always one layer of leaves which is not included in this value. - */ + + /** + * The height of a B+Tree. ZERO(0) means just a root leaf. Values greater + * than zero give the #of levels of abstract nodes. There is always one + * layer of leaves which is not included in this value. + * + * @return The global depth and ZERO (0) unless the checkpoint record is for + * an {@link IndexTypeEnum#BTree} + */ public final int getHeight() { - return height; + switch (indexType) { + case BTree: + return height; + default: + throw new UnsupportedOperationException(); + } } + /** + * The global depth of the root directory (HTree only). + * + * @return The global depth and ZERO (0) unless the checkpoint record is for + * an {@link IndexTypeEnum#HTree} + */ + public final int getGlobalDepth() { + + switch (indexType) { + case HTree: + return height; + default: + throw new UnsupportedOperationException(); + } + + } + /** - * The #of non-leaf nodes. + * The #of non-leaf nodes (B+Tree) or directories (HTree). */ public final int getNodeCount() { @@ -121,7 +188,7 @@ } /** - * The #of leaves. + * The #of leaves (B+Tree) or hash buckets (HTree). */ public final int getLeafCount() { @@ -130,7 +197,7 @@ } /** - * The #of index entries. + * The #of index entries (aka tuple count). */ public final int getEntryCount() { @@ -155,7 +222,10 @@ public final String toString() { return "Checkpoint" + // - "{height=" + height + // + "{indexType=" + indexType + // + (indexType == IndexTypeEnum.BTree ? ",height=" + height + : (indexType == IndexTypeEnum.HTree ? ",globalDepth=" + + height : "")) + ",nnodes=" + nnodes + // ",nleaves=" + nleaves + // ",nentries=" + nentries + // @@ -195,7 +265,9 @@ 0, // nnodes 0, // nleaves 0, // nentries - 0L // counter + 0L, // counter + IndexTypeEnum.BTree // indexType + ); } @@ -223,7 +295,8 @@ 0, // nnodes 0, // nleaves 0, // nentries - oldCheckpoint.counter + oldCheckpoint.counter,// + IndexTypeEnum.BTree// ); } @@ -276,15 +349,19 @@ btree.nnodes,// btree.nleaves,// btree.nentries,// - btree.counter.get()// + btree.counter.get(),// + IndexTypeEnum.BTree// ); } - private Checkpoint(final long addrMetadata, final long addrRoot, - final long addrBloomFilter, final int height, final int nnodes, - final int nleaves, final int nentries, final long counter) { + private Checkpoint(final long addrMetadata, final long addrRoot, + final long addrBloomFilter, final int height, final int nnodes, + final int nleaves, final int nentries, final long counter, + final IndexTypeEnum indexType) { + assert indexType != null; + /* * Note: The constraint on [addrMetadata] is relaxed in order to permit * a transient BTree (no backing store). @@ -313,6 +390,8 @@ this.counter = counter; + this.indexType = indexType; + } /** @@ -327,11 +406,17 @@ * {@link Checkpoint} record. */ private static transient final int VERSION0 = 0x0; + + /** + * Adds the {@link #indexType} field and the {@link #globalDepth} field, + * which is present only for {@link IndexTypeEnum#HTree}. + */ + private static transient final int VERSION1 = 0x1; /** * The current version. */ - private static transient final int VERSION = VERSION0; + private static transient final int VERSION = VERSION1; /** * Write the {@link Checkpoint} record on the store, setting @@ -386,8 +471,13 @@ final int version = in.readInt(); - if (version != VERSION0) - throw new IOException("Unknown version: " + version); + switch (version) { + case VERSION0: + case VERSION1: + break; + default: + throw new IOException("Unknown version: " + version); + } this.addrMetadata = in.readLong(); @@ -405,7 +495,19 @@ this.counter = in.readLong(); - in.readLong(); // unused. + switch (version) { + case VERSION0: + in.readLong(); // unused + indexType = IndexTypeEnum.BTree; + break; + case VERSION1: + this.indexType = IndexTypeEnum.valueOf(in.readShort()); + in.readShort();// ignored. + in.readInt();// ignored. + break; + default: + throw new AssertionError(); + } in.readLong(); // unused. @@ -431,10 +533,20 @@ out.writeLong(counter); - out.writeLong(0L/*unused*/); + /* + * 8 bytes follow. + */ - out.writeLong(0L/*unused*/); - - } + out.writeShort(indexType.getCode()); + out.writeShort(0/* unused */); + out.writeInt(0/* unused */); + /* + * 8 bytes follow. + */ + + out.writeLong(0L/* unused */); + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/AbstractReadOnlyNodeData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/AbstractReadOnlyNodeData.java 2010-12-01 21:43:35 UTC (rev 3990) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/AbstractReadOnlyNodeData.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -129,6 +129,14 @@ */ protected static final short DELTA_VERSION_TIMESTAMPS = 1 << 2; + /** + * Bit flag indicating that the int32 hash of the key should be stored in + * the leaf data record. The function used to compute hash code will be + * known to the owning data structure. This is primarily intended for use + * with hash trees. + */ + protected static final short FLAG_HASH_KEYS = 1 << 3; + /** * The size of the field in the data record which encodes whether the data * record represents a B+Tree {@link #NODE}, a {@link #LEAF}, or a Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java 2010-12-01 21:43:35 UTC (rev 3990) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -43,6 +43,7 @@ import com.bigdata.btree.raba.IRaba; import com.bigdata.btree.raba.codec.ICodedRaba; import com.bigdata.btree.raba.codec.IRabaCoder; +import com.bigdata.htree.data.IBucketData; import com.bigdata.io.AbstractFixedByteArrayBuffer; import com.bigdata.io.DataOutputBuffer; @@ -53,7 +54,7 @@ * @version $Id$ */ public class DefaultLeafCoder implements IAbstractNodeDataCoder<ILeafData>, - Externalizable { + Externalizable { /** * @@ -189,12 +190,16 @@ short flags = 0; final boolean hasDeleteMarkers = leaf.hasDeleteMarkers(); final boolean hasVersionTimestamps = leaf.hasVersionTimestamps(); + final boolean hasHashKeys = leaf instanceof IBucketData; // @todo add hasHashKeys() method? if (hasDeleteMarkers) { flags |= AbstractReadOnlyNodeData.FLAG_DELETE_MARKERS; } if (hasVersionTimestamps) { flags |= AbstractReadOnlyNodeData.FLAG_VERSION_TIMESTAMPS; } + if(hasHashKeys) { + flags |= AbstractReadOnlyNodeData.FLAG_HASH_KEYS; + } buf.putShort(flags); @@ -341,6 +346,88 @@ } + // hash codes of the keys (MSB prefix plus LSB coded). +// final int O_hashKeys; + if (hasHashKeys) { + + // The bit length of the hash values. + final int hashBitLength = 32;//((IBucketData)leaf).getHashBitLength(); + + // The bit length of the shared MSB prefix. + final int lengthMSB = ((IBucketData)leaf).getLengthMSB(); + + // The bit length of the LSB which differ for each hash value. + final int lengthLSB = hashBitLength - lengthMSB; + +// buf.putShort((short) hashBitLength); + + buf.putShort((short) lengthMSB); + +// O_hashKeys = buf.pos(); + + if (nkeys > 0) { + + final int byteLength = BytesUtil + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + + final byte[] a = new byte[byteLength]; + + final OutputBitStream obs = new OutputBitStream(a); + + try { + + // The hash of the first key. + int h = ((IBucketData) leaf).getHash(0/* index */); + + // Drop off the LSB bits, leaving the MSB bits in the LSB position. + h = h >>> lengthLSB; + +// // Reverse bits to since obs writes the LSB of the int. +// h = Integer.reverse(h); + + // The MSB prefix. + obs.writeInt(h, lengthMSB/* MSB bits */); + + // The LSB of the hash of each key. + for (int i = 0; i < nkeys; i++) { + + // The hash of this key. + h = ((IBucketData)leaf).getHash(i); + + // Drop off the MSB bits. + h = h >>> lengthMSB; + +// // Reverse bits since obs writes the LSB of the int. +// h = Integer.reverse(h); + + // The LSB. + obs.writeInt(h, lengthLSB); + + } + + // copy onto the buffer. + buf.put(a); + + } catch (IOException e) { + throw new RuntimeException(e); + // Note: close is not necessary if flushed and backed by + // byte[]. + // } finally { + // try { + // obs.close(); + // } catch (IOException e) { + // log.error(e); + // } + } + + } + +// } else { +// +// O_hashKeys = -1; + + } + // Slice containing the coded leaf. final AbstractFixedByteArrayBuffer slice = buf.slice(// O_origin, buf.pos() - O_origin); @@ -373,7 +460,7 @@ * @version $Id$ */ static private class ReadOnlyLeafData extends AbstractReadOnlyNodeData<ILeafData> - implements ILeafData { + implements ILeafData, IBucketData { /** The backing buffer. */ private final AbstractFixedByteArrayBuffer b; @@ -407,6 +494,25 @@ */ private final int versionTimestampBits; + /** + * Offset of the int32 hash values in the buffer encoding hash value of + * the tuple keys -or- <code>-1</code> if the leaf does not report those + * data. + */ + private final int O_hashKeys; + + /** + * The #of bits used to code the hash keys -or- ZERO (0) if they are not + * present. (The length of the MSB hash prefix is 32-lengthLSB.) + */ + private final int lengthLSB; + + /** + * The MSB hash prefix shared by all hash codes on this page -or- ZERO + * (0) if hash codes are not present in the page. + */ + private final int hashMSB; + public final AbstractFixedByteArrayBuffer data() { return b; @@ -469,6 +575,7 @@ pos += SIZEOF_FLAGS; final boolean hasVersionTimestamps = ((flags & FLAG_VERSION_TIMESTAMPS) != 0); final boolean hasDeleteMarkers = ((flags & FLAG_DELETE_MARKERS) != 0); + final boolean hasHashKeys = ((flags & FLAG_HASH_KEYS) != 0); this.nkeys = buf.getInt(pos); pos += SIZEOF_NKEYS; @@ -523,6 +630,49 @@ } + if(hasHashKeys) { + + final int lengthMSB = buf.getShort(pos); + pos += 2; + + lengthLSB = 32 /* hashBitLength */- lengthMSB; + + /* + * The byte offset to the start of the bit coded hash keys. The + * first bit coded value is the MSB prefix. You need to skip + * over that when indexing into the LSB array. + */ + O_hashKeys = pos; + + final int byteLength = BytesUtil + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + + if (nkeys > 0) { + + final InputBitStream ibs = buf.slice(pos, byteLength) + .getInputBitStream(); + + try { + hashMSB = ibs.readInt(lengthMSB); + } catch (IOException ex) { + // Note: should not be thrown. + throw new RuntimeException(ex); + } + + } else { + + hashMSB = 0; + + } + + } else { + + O_hashKeys = -1; + lengthLSB = 0; + hashMSB = 0; + + } + // save reference to buffer this.b = buf; @@ -584,6 +734,7 @@ pos += SIZEOF_FLAGS; final boolean hasVersionTimestamps = ((flags & FLAG_VERSION_TIMESTAMPS) != 0); final boolean hasDeleteMarkers = ((flags & FLAG_DELETE_MARKERS) != 0); + final boolean hasHashKeys = ((flags & FLAG_HASH_KEYS) != 0); this.nkeys = buf.getInt(pos); pos += SIZEOF_NKEYS; @@ -638,6 +789,49 @@ } + if(hasHashKeys) { + + final int lengthMSB = buf.getShort(pos); + pos += 2; + + lengthLSB = 32 /* hashBitLength */- lengthMSB; + + /* + * The byte offset to the start of the bit coded hash keys. The + * first bit coded value is the MSB prefix. You need to skip + * over that when indexing into the LSB array. + */ + O_hashKeys = pos; + + final int byteLength = BytesUtil + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + + if (nkeys > 0) { + + final InputBitStream ibs = buf.slice(pos, byteLength) + .getInputBitStream(); + + try { + hashMSB = ibs.readInt(lengthMSB); + } catch (IOException ex) { + // Note: should not be thrown. + throw new RuntimeException(ex); + } + + } else { + + hashMSB = 0; + + } + + } else { + + O_hashKeys = -1; + lengthLSB = 0; + hashMSB = 0; + + } + // save reference to buffer this.b = buf; @@ -709,6 +903,12 @@ } + final public boolean hasHashKeys() { + + return (flags & FLAG_HASH_KEYS) != 0; + + } + public long getMinimumVersionTimestamp() { if (!hasVersionTimestamps()) @@ -770,7 +970,55 @@ return b.getBit((O_deleteMarkers << 3) + index); } + + final public int getLengthMSB() { + + + if (!hasHashKeys()) + throw new UnsupportedOperationException(); + + final int lengthMSB = 32/* hashBitLength */- lengthLSB; + + return lengthMSB; + + } + final public int getHash(final int index) { + + if (index < 0 || index >= nkeys) + throw new IllegalArgumentException(); + + if (!hasHashKeys()) + throw new UnsupportedOperationException(); + + final int lengthMSB = 32/* hashBitLength */- lengthLSB; + + final int byteLength = BytesUtil.bitFlagByteLength(lengthMSB + + nkeys * lengthMSB/* nbits */); + + final InputBitStream ibs = b.slice(O_hashKeys, byteLength) + .getInputBitStream(); + + try { + + final long position = lengthMSB + index * lengthLSB; + + ibs.position(position); + + int h = ibs.readInt(lengthLSB); + + h |= hashMSB; + + return h; + + } catch(IOException ex) { + + throw new RuntimeException(ex); + + } + + } + final public IRaba getKeys() { return keys; @@ -942,6 +1190,26 @@ } + if (leaf instanceof IBucketData) { + + final IBucketData d = (IBucketData)leaf; + + sb.append(",\nhashCodes={lengthMSB=" + d.getLengthMSB() + + ",tuples=["); + + for (int i = 0; i < nkeys; i++) { + + if (i > 0) + sb.append(", "); + + sb.append(d.getHash(i)); + + } + + sb.append("]"); + + } + return sb; } Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/AbstractHashPage.java (from rev 3990, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/AbstractHashPage.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/AbstractHashPage.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -0,0 +1,100 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Dec 1, 2010 + */ +package com.bigdata.htree; + +import java.lang.ref.Reference; + +import org.apache.log4j.Logger; + +import com.bigdata.btree.PO; + +/** + * Abstract class for both directory and data pages for a hash index. + */ +abstract public class AbstractHashPage <T extends AbstractHashPage +/* + * DO-NOT-USE-GENERIC-HERE. The compiler will fail under Linux (JDK 1.6.0_14, + * _16). + */ +> extends PO //implements IAbstractNode, IAbstractNodeData +{ + + private final static transient Logger log = Logger + .getLogger(AbstractHashPage.class); + + /** + * Transient back reference to the index to which this directory belongs. + */ + protected transient HashTree htbl; + + /** + * <p> + * A {@link Reference} to this page. This is created when the page is + * created and effectively provides a canonical {@link Reference} object for + * any given page. + * </p> + * + * @todo Do we need back references for recursive directories? + */ + transient protected final Reference<? extends AbstractHashPage<T>> self; + + /** + * Disallowed. + */ + private AbstractHashPage() { + + throw new UnsupportedOperationException(); + + } + + protected AbstractHashPage(final HashTree htbl, final boolean dirty) { + + if(htbl == null) + throw new IllegalArgumentException(); + + this.htbl = htbl; + + // reference to self: reused to link parents and children. + this.self = htbl.newRef(this); + + if (!dirty) { + + /* + * Nodes default to being dirty, so we explicitly mark this as + * clean. This is ONLY done for the de-serialization constructors. + */ + + setDirty(false); + + } + +// @todo Add to the hard reference queue. +// btree.touch(this); + + } + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java (from rev 3990, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -0,0 +1,106 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Dec 1, 2010 + */ +package com.bigdata.htree; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.UUID; + +/** + * Configuration options. + * + * @todo Reconcile with IndexMetadata. + */ +public class HTableMetadata implements Externalizable { + + /** + * The unique identifier for the index. + */ + private UUID uuid; + + /** + * Function used to generate hash values from keys. + */ + private HashFunction hashFunction; + + private Object directoryCoder; + + private Object bucketCoder; + + /** + * Function decides whether to split a page, link an overflow page, or + * expand the size of a page. + */ + // private SplitFunction splitFunction; + + /** + * De-serialization constructor. + */ + public HTableMetadata() { + + } + + /** + * Anonymous hash index. + * + * @param uuid + * The unique index identifier. + */ + public HTableMetadata(final UUID uuid) { + + this(null/* name */, uuid); + + } + + /** + * Named hash index + * + * @param name + * The index name. + * @param uuid + * The unique index identifier. + */ + public HTableMetadata(final String name, final UUID uuid) { + + } + + @Override + public void readExternal(ObjectInput in) throws IOException, + ClassNotFoundException { + // TODO Auto-generated method stub + throw new UnsupportedOperationException(); + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + // TODO Auto-generated method stub + throw new UnsupportedOperationException(); + } + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java (from rev 3990, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -0,0 +1,689 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Nov 29, 2010 + */ +package com.bigdata.htree; + +import java.util.Iterator; + +import org.apache.log4j.Logger; + +import com.bigdata.btree.IOverflowHandler; +import com.bigdata.btree.IndexSegment; +import com.bigdata.btree.data.IAbstractNodeDataCoder; +import com.bigdata.btree.data.ILeafData; +import com.bigdata.btree.raba.IRaba; +import com.bigdata.htree.data.IBucketData; +import com.bigdata.htree.data.IDirectoryData; +import com.bigdata.io.AbstractFixedByteArrayBuffer; +import com.bigdata.rawstore.IRawStore; + +/** + * A (very) simple hash bucket. The bucket holds N int32 keys. + * + * @todo The hash of the key should be part of the ITuple interface so it can be + * passed along based on the application level encoding of the key. + * + * @todo Support out-of-line representations of the key and/or value for a tuple + * when they are large. The definition of "large" can be a configuration + * value for the index metadata. For example, 1/4 of the target page size + * or (1k assuming a target page size of 4k). It should also be possible + * to specify that the value is always out of line (this corresponds to + * the common practice in a relational database of indexing into a + * persistent heap rather than the perfect indices with their inline data + * which we use for RDF statements). + * <p> + * The easiest way to do this is to treat the key and value separately and + * write them as raw records onto the backing store if they exceed the + * configured threshold. For the B+Tree, we can not readily move the key + * out of line since we need it for search, but it is easy to do this for + * the HTree. (For now, I suggest that we live with the constraint that + * the key can not be moved out of line for the B+Tree.) For both index + * structures, it is easy to move the value out of line. The tuple + * metadata will stay inline regardless. + * <p> + * In order to resolve out of line keys and/or values the + * {@link ILeafData} will need access to the {@link IRawStore} reference. + * This may require an API change to {@link IRaba} and/or + * {@link IAbstractNodeDataCoder} (the latter also needs to be modified to + * work with {@link IDirectoryData} records) in order to made the + * {@link IRawStore} reference available when the record is serialized + * and/or deserialized. + * <p> + * When the tuple is deleted, the raw record reference for its key and/or + * value must also be deleted. + * <p> + * During a bulk index build, the raw record must be copied to the target + * index store, e.g., an {@link IndexSegment} using an + * {@link IOverflowHandler}. + */ +public class HashBucket extends AbstractHashPage<HashBucket>// +// implements IBucketData// +{ + + private final transient static Logger log = Logger + .getLogger(HashBucket.class); + + /** + * The #of hash code bits which are in use by this {@link HashBucket}. + * <p> + * Note: There are <code>2^(globalBits-localBits)</code> dictionary entries + * which address a given page. Initially, globalBits := 1 and localBits := + * 0. For these values, we have <code>2^(1-0) == 2</code> references to the + * initial page of the hash table. + * + * @todo If we need to examine this when we change the size of the address + * space then it makes more sense to have this as local metadata in + * the address table than as local data in the bucket (the latter + * would require us to visit each bucket when expanding the address + * space). This only needs to be 4 bits to express values in [0:31]. + * + * @todo When overflow buckets are chained together, does each bucket have + * {@link #localHashBits}? If they do, then we need to make sure that + * all buckets in the chain are updated. If {@link #localHashBits} is + * only marked on the first bucket in the chain then we need to + * correctly ignore it on overflow buckets. + * + * @todo adjusting this dirties the bucket (unless the #of local bits its + * stored in the address table entry, but that increases the in-memory + * burden of the address table). + */ + private int localHashBits; + + /** + * The #of keys stored in the bucket. The keys are stored in a dense array. + * For a given {@link #size}, the only indices of the array which have any + * data are [0:{@link #size}-1]. + */ + int size; + + /** + * The user data for the bucket. + * + * @todo IRaba keys plus IRaba vals. + */ + final int[] data; + + protected void setLocalHashBits(final int localHashBits) { + + this.localHashBits = localHashBits; + + } + + public int getLocalHashBits() { + return localHashBits; + } + + /** + * Human friendly representation. + */ + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(super.toString()); + sb.append("{localHashBits=" + getLocalHashBits()); + sb.append(",size=" + size); + sb.append(",values={"); + for (int i = 0; i < size; i++) { + if (i > 0) + sb.append(','); + sb.append(Integer.toString(data[i])); + } + sb.append("}}"); + return sb.toString(); + } + + /** + * Create a new mutable bucket. + * + * @param htbl + * @param localHashBits + * @param bucketSize + */ + public HashBucket(final HashTree htbl, + final int localHashBits, final int bucketSize) { + + super(htbl, true/* dirty */); + + if (localHashBits < 0 || localHashBits > 32) + throw new IllegalArgumentException(); + + if (bucketSize <= 0) + throw new IllegalArgumentException(); + + this.localHashBits = localHashBits; + + this.data = new int[bucketSize]; + + // one more bucket. + htbl.nbuckets++; + + } + + /** + * Return <code>true</code> if the bucket contains the key. + * + * @param h + * The hash code of the key. + * @param key + * The key. + * + * @return <code>true</code> if the key was found in the bucket. + * + * @todo passing in the hash code here makes sense when the bucket stores + * the hash values, e.g., if we always do that or if we have an out of + * bucket reference to a raw record because the tuple did not fit in + * the bucket. + */ + public boolean contains(final int h, final int key) { + + for (int i = 0; i < size; i++) { + + if (data[i] == key) + return true; + + } + + return false; + + } + + /** + * Type safe enumeration reports on the various outcomes when attempting to + * insert a tuple into a page. + * + * @todo The problem with this enumeration (or with using a return code per + * the following javadoc) is that page splits are going to be deferred + * until the page is evicted unless there is an aspect of the split + * function which decides based on the #of tuples on the page. If a + * the split function reports that the page is over capacity when it + * is evicted, then we need to decide whether to split the page, chain + * an overflow page, or use a larger capacity page. + * <p> + * What we should do is scan the page if an insert would fail (or if + * the serialization of the page would fail) and determine what local + * depth we would need to successfully split the page (e.g., no more + * than 70% of the items would be in any prefix at a given depth). + * That can be used to guide the decision to use overflow pages or + * expand the directory. + * <p> + * What are some fast techniques for counting the #of bits which we + * need to make the necessary distinctions in the bucket? Should we + * build a trie over the hash codes? + */ + private static enum InsertEnum { + /** + * The tuple was inserted successfully into this page. + * + * @todo This could be reported as ZERO (0), which is an indication that + * NO expansions where required to insert the tuple into the page. + */ + OK, + /** + * The insert failed because the page is full. Further, the tuple has + * the same key value as all other tuples on the page. Therefore, either + * the insert must be directed into an overflow page or the page size + * must be allowed to increase. + * + * @todo This could be reported as {@link Integer#MAX_VALUE}, which is + * an indication that infinite expansions will not make it + * possible to insert the key into this page (e.g., an overflow + * page is required). [Alternatively, this could report the + * necessary page size if we allow the page size to expand.] + */ + KEYS_ARE_IDENTICAL, + /** + * The insert failed because the page is full. Further, the hash + * associated with the tuple is the same as the hash for all other keys + * on the page. In this case, the insert operation will eventually + * succeed if the address space is expanded (one or more times). + * + * @todo This could be reported as the #of bits which are in common for + * the keys in this page. That could be used to determine how many + * expansions would be required before the key could be inserted. + * [If KEYS_ARE_IDENTICAL is handled by reporting the necessary + * page size, then this could report the #of hash bits which are + * identical using a negative integer (flipping the sign).] + */ + HASH_IS_IDENTICAL; + } + + /** + * Insert the key into the bucket (duplicates are allowed). It is an error + * if the bucket is full. + * + * @param h + * The hash code of the key. + * @param key + * The key. + * + * @return <code>false</code> iff the bucket must be split. + * + * @todo The caller needs to be careful that [h] is the full hash code for + * the key. Normally this is not a problem, but we sometimes wind up + * with masked off hash codes, especially during splits and merges, + * and those must not be passed in here. + */ + public void insert(final int h, final int key) { + + if (size == data.length) { + + /* + * The bucket must be split, potentially recursively. + * + * Note: Splits need to be triggered based on information which is + * only available to the bucket when it considers the insert of a + * specific tuple, including whether the tuple is promoted to a raw + * record reference, whether the bucket has deleted tuples which can + * be compacted, etc. + * + * @todo I need to figure out where the control logic goes to manage + * the split. If the bucket handles splits, then we need to pass in + * the table reference. + */ + + // split the bucket and insert the record (recursive?) + split(key, this); + + /* + * Insert the key into the expanded hash table (this will insert + * into either the old or the new bucket, depending on the hash code + * for the key). + * + * FIXME There are a variety of special conditions which need to be + * handled by insert(), especially all keys have the same value or + * the same int32 hash code or the tuple is too large for the + * bucket. Those conditions all need to be handled before requested + * a split. Since insert() has to handle all of this, it is also + * responsible for re-attempting the key insertion after the split. + * + * The next step is to handle cases where splitting the bucket once + * does not result in a bucket with sufficient space for the new + * key. There are actually two cases here: (1) the hash codes of the + * keys are distinct, so if we double the address space enough times + * the insert will succeed; (2) the hash codes of the keys are + * identical, so no amount of expansion of the address space will + * permit the insert to succeed and an overflow page must be used. + * For (1) we can also chose to use an overflow page in order to + * prevent run away expansion of the address space. + * + * This class needs to be converted to use persistence and to use an + * IRaba for keys/values. For the sake of the unit tests, it needs + * to be parameterized for the overflow versus expand decision and + * the IRaba for the keys needs to be defined such that we have a + * guaranteed split when there are three integer keys (or a split + * function could be used to make this decision based on more + * general criteria). [Could also use a pure- append binary raba w/ + * compacting if the raba is full and there are deleted tuples.] + */ + if (log.isDebugEnabled()) + log.debug("retrying insert: key=" + key); + + /* + * @todo This can recurse until the address space reaches the + * maximum possible address space and then throw an exception. The + * code should be modified to use a decision function for growing + * the page, chaining an overflow page, or splitting the page (when + * it would cause the address space to be doubled). + */ + htbl.insert(key); + +// { +// // the hash value of the key. +// final int h = htbl.hash(key); +// // the address of the bucket for that hash code. +// final int addr = htbl.getRoot().addrOf(h); +// // the bucket for that address. +// final SimpleBucket btmp = htbl.getBucketAtStoreAddr(addr); +// if (btmp.insert(h, key)) { +// // insert was successful. +// return; +// } +// /* +// */ +// +// log +// .fatal("Split of bucket did not map space available for new key: key=" +// + key + ", table=" + htbl.dump()); +// +// throw new UnsupportedOperationException(); +// +// } + + return; + + } + + data[size++] = key; + + // one more entry in the index. + htbl.nentries++; + + } + + /** + * Delete a tuple having the specified key. If there is more than one such + * tuple, then a random tuple having the key is deleted. + * + * @param h + * The hash code of the key. + * @param key + * The key. + * + * @todo return the delete tuple. + */ + public boolean delete(final int h, final int key) { + + for (int i = 0; i < size; i++) { + + if (data[i] == key) { + + // #of tuples remaining beyond this point. + final int length = size - i - 1; + + if (length > 0) { + + // Keep the array dense by copying down by one. + System.arraycopy(data, i + 1/* srcPos */, data/* dest */, + i/* destPos */, length); + + } + + size--; + + // one less entry in the index. + htbl.nentries--; + + return true; + + } + + } + + return false; + + } + + /** + * The #of entries in the bucket. + */ + public int getEntryCount() { + + return size; + + } + + /** + * Visit the entries in any order. + */ + public Iterator<Integer/* key */> getEntries() { + + return new EntryIterator(); + + } + + /** + * Visits the entries in the page. + */ + private class EntryIterator implements Iterator<Integer> { + + private int current = 0; + + private EntryIterator() { + + } + + @Override + public boolean hasNext() { + return current < size; + } + + @Override + public Integer next() { + return data[current++]; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + } + + @Override + public void delete() throws IllegalStateException { + // TODO Auto-generated method stub + throw new UnsupportedOperationException(); + } + + /** + * Split the bucket, adjusting the address map iff necessary. How this + * proceeds depends on whether the hash #of bits used in the bucket is equal + * to the #of bits used to index into the bucket address table. There are + * two cases: + * <p> + * Case 1: If {@link #globalHashBits} EQ the + * {@link HashBucket#localHashBits}, then the bucket address table is out + * of space and needs to be resized. + * <p> + * Case 2: If {@link #globalHashBits} is GT + * {@link HashBucket#localHashBits}, then there will be at least two + * entries in the bucket address table which point to the same bucket. One + * of those entries is relabeled. The record is then inserted based on the + * new #of hash bits to be considered. If it still does not fit, then either + * handle by case (1) or case (2) as appropriate. + * <p> + * Note that records which are in themselves larger than the bucket size + * must eventually be handled by: (A) using an overflow record; (B) allowing + * the bucket to become larger than the target page size (using a larger + * allocation slot or becoming a blob); or (C) recording the tuple as a raw + * record and maintaining only the full hash code of the tuple and its raw + * record address in the bucket (this would allow us to automatically + * promote long literals out of the hash bucket and a similar approach might + * be used for a B+Tree leaf, except that a long key will still cause a + * problem [also, this implies that deleting a bucket or leaf on the + * unisolated index of the RWStore might require a scan of the IRaba to + * identify blob references which must also be deleted, so it makes sense to + * track those as part of the bucket/leaf metadata). + * + * @param h + * The key which triggered the split. + * @param bold + * The bucket lacking sufficient room for the key which triggered + * the split. + * + * @todo caller will need an exclusive lock if this is to be thread safe. + * + * @todo Overflow buckets (or oversize buckets) are required when all hash + * bits considered by the local bucket are the same, when all keys in + * the local bucket are the same, and when the record to be inserted + * is larger than the bucket. In order to handle these cases we may + * need to more closely integrate the insert/split logic since + * detecting some of these cases requires transparency into the + * bucket. + * + * FIXME The caller could decide to switch to a larger page size or + * chain overflow pages together in order to increase storage + * utilization or handle buckets having large populations of identical + * keys (or keys with the same int32 hash code). [This decision must + * be made before we decide to split.] + * + * FIXME The caller should handle the promotion of large tuples to raw + * records when they are inserted, so we do not need to handle that + * here either. + */ + private void split(final int key, final HashBucket bold) { + + final int globalHashBits = htbl.getGlobalHashBits(); + + if (log.isDebugEnabled()) + log.debug("globalBits=" + globalHashBits + ",localHashBits=" + + bold.getLocalHashBits() + ",key=" + key); + + if (globalHashBits < bold.getLocalHashBits()) { + // This condition should never arise. + throw new AssertionError(); + } + + if (globalHashBits == bold.getLocalHashBits()) { + /* + * The address table is out of space and needs to be resized. + */ + htbl.getRoot().doubleAddressSpaceAndSplitBucket(key, bold); + // fall through + } + + if (globalHashBits > bold.getLocalHashBits()) { + /* + * Split the bucket. + */ + htbl.getRoot().splitBucket(key, bold); + // fall through. + } + + } + + /* + * IBucketData + */ + + public int getHash(int index) { + // TODO Auto-generated method stub + return 0; + } + + public int getLengthMSB() { + // TODO Auto-generated method stub + return 0; + } + + /* + * IAbstractNodeData + */ + + public boolean hasVersionTimestamps() { + // TODO Auto-generated method stub + return false; + } + + public AbstractFixedByteArrayBuffer data() { + // TODO Auto-generated method stub + return null; + } + + public int getKeyCount() { + // TODO Auto-generated method stub + return 0; + } + + public IRaba getKeys() { + // TODO Auto-generated method stub + return null; + } + + public long getMaximumVersionTimestamp() { + // TODO Auto-generated method stub + return 0; + } + + public long getMinimumVersionTimestamp() { + // TODO Auto-generated method stub + return 0; + } + + public int getSpannedTupleCount() { + // TODO Auto-generated method stub + return 0; + } + + public boolean isCoded() { + // TODO Auto-generated method stub + return false; + } + + final public boolean isLeaf() { + + return true; + + } + + /** + * The result depends on the implementation. The {@link HashBucket} will be + * mutable when it is first created and is made immutable when it is + * persisted. If there is a mutation operation, the backing + * {@link IBucketData} is automatically converted into a mutable instance. + */ + final public boolean isReadOnly() { + +// return data.isReadOnly(); + // TODO Auto-generated method stub + return false; + + } + + /* + * ILeafData + */ + + public boolean getDeleteMarker(int index) { + // TODO Auto-generated method stub + return false; + } + + public long getNextAddr() { + // TODO Auto-generated method stub + return 0; + } + + public long getPriorAddr() { + // TODO Auto-generated method stub + return 0; + } + + public int getValueCount() { + // TODO Auto-generated method stub + return 0; + } + + public IRaba getValues() { + // TODO Auto-generated method stub + return null; + } + + public long getVersionTimestamp(int index) { + // TODO Auto-generated method stub + return 0; + } + + public boolean hasDeleteMarkers() { + // TODO Auto-generated method stub + return false; + } + + public boolean isDoubleLinked() { + // TODO Auto-generated method stub + return false; + } + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java (from rev 3990, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashDirectory.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -0,0 +1,989 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Dec 1, 2010 + */ +package com.bigdata.htree; + +import java.lang.ref.Reference; +import java.util.Formatter; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; + +import org.apache.log4j.Logger; + +import com.bigdata.rawstore.IRawStore; +import com.bigdata.util.concurrent.Memoizer; + +/** + * A simple (flat) directory for an extensible hashing. + */ +public class HashDirectory extends AbstractHashPage<HashDirectory> { + + private final transient static Logger log = Logger + .getLogger(HashDirectory.class); + +/* FIXME We need a data record (interface and implementations) for the directory + * page. The data record for a bucket is more similar to a B+Tree leaf than is + * the data record for a directory to a B+Tree node. + */ +// /** +// * The data record. {@link MutableNodeData} is used for all mutation +// * operations. {@link ReadOnlyNodeData} is used when the {@link Node} is +// * made persistent. A read-only data record is automatically converted into +// * a {@link MutableNodeData} record when a mutation operation is requested. +// * <p> +// * Note: This is package private in order to expose it to {@link Leaf}. +// * +// * @todo consider volatile and private for {@link Node#data} and +// * {@link Leaf#data} with accessors and settors at package private +// * where necessary. +// */ +// INodeData data; + + /** + * The #of hash code bits which are in use by the {@link #addressMap}. Each + * hash bucket also as a local #of hash bits. Given <code>i</code> is the + * #of global hash bits and <code>j</code> is the number of hash bits in + * some bucket, there will be <code>2^(i-j)</code> addresses which point to + * the same bucket. + */ + private int globalHashBits; + + /** + * The maximum number of directories entries which are permitted. + * + * @todo This can be ignored for a hash tree (recursive directories). In + * that case we are concerned with when to split the directory because + * the page is full rather than an absolute maximum on the address + * space size. + */ + private final int maximumCapacity; + + /** + * The address map. You index into this map using {@link #globalHashBits} + * out of the hash code for a probe key. The values are storage addresses + * for the backing {@link IRawStore}. The address will be {@link #NULL} if + * the corresponding child is dirty, in which case {@link #childRefs} will + * always have a {@link Reference} to the dirty child. This pattern is used + * in combination with either strong references or weak references and a + * ring buffer to manage the incremental eviction of dirty pages. + * + * @todo make this into a private IDirectoryData record. + * <p> + * It seems likely that we want to also record the local depth for + * each child in the IDataDirectory record and a flag indicating + * whether the child is a bucket or a directory page. + */ + private long[] addressMap; + + /** + * <p> + * Weak references to child pages (may be directories or buckets). The + * capacity of this array depends on the #of global bits for the directory. + * </p> + * <p> + * Note: This should not be marked as volatile. Volatile does not make the + * elements of the array volatile, only the array reference itself. The + * field would be final except that we clear the reference when stealing the + * array or deleting the node. + * </p> + * + * @todo document why package private (AbstractBTree.loadChild uses this but + * maybe that method could be moved to Node). + */ + private transient/* volatile */Reference<AbstractHashPage<?>>[] childRefs; + + public String toString() { + + return super.toString(); + + } + + /** + * Dumps the buckets in the directory along with metadata about the + * directory. + * + * @param sb + * Where to write the dump. + */ + protected void dump(final StringBuilder sb) { + + // used to remember the visited pages by their addresses (when non-NULL) + final Set<Long/* addrs */> visitedAddrs = new LinkedHashSet<Long>(); + + // used to remember the visited pages when they are transient. + final Map<AbstractHashPage/* children */, Integer/* label */> visitedChildren = new LinkedHashMap<AbstractHashPage, Integer>(); + + // used to format the address table. + final Formatter f = new Formatter(sb); + + // scan through the address table. + for (int index = 0; index < addressMap.length; index++) { + + boolean visited = false; + + long addr = addressMap[index]; + + if (addr != NULL && !visitedAddrs.add(addr)) { + + visited = true; + + } + + HashBucket b = (HashBucket) (childRefs[index]).get(); + + if (b != null && visitedChildren.containsKey(b)) { + + visited = true; + + } else { + + visitedChildren.put(b, index); + + } + + if(b == null) { + + // materialize the bucket. + b = getBucketFromEntryIndex(index); + + addr = b.getIdentity(); + + } + + /* + * The label will be either the storage address followed by "P" (for + * Persistent) or the index of the directory entry followed by "T" + * (for Transient). + */ + final String label = addr == 0L ? (visitedChildren.get(b) + "T") + : (addr + "P"); + + f.format("\n%2d [%" + globalHashBits + "s] => (%8s)", index, + Integer.toBinaryString(HashTree.maskOff(index, + globalHashBits)), label); + + if (!visited) { + + /* + * Show the bucket details the first time we visit it. + */ + + // The #of local hash bits for the target page. + final int localHashBits = b.getLocalHashBits(); + + // The #of entries in this directory for that target page. + final int nrefs = HashTree.pow2(globalHashBits + - localHashBits); + + sb.append(" [k=" + b.getLocalHashBits() + ", n=" + nrefs + + "] {"); + + final Iterator<Integer> eitr = b.getEntries(); + + boolean first = true; + + while(eitr.hasNext()) { + + if (!first) + sb.append(", "); + + sb.append(eitr.next()/*.getObject()*/); + + first = false; + + } + + sb.append("}"); + + } + + } + + sb.append('\n'); + + } + + /** + * Create a new mutable directory page. + * + * @param htbl + * @param initialCapacity + * The initial capacity is the #of buckets which may be stored in + * the hash table before it must be resized. It is expressed in + * buckets and not tuples because there is not (in general) a + * fixed relationship between the size of a bucket and the #of + * tuples which can be stored in that bucket. This will be + * rounded up to the nearest power of two. + * @param maximumCapacity + * @param bucketSize + * + * @todo both maximumCapacity and bucketSize will go away. The maximum + * capacity will be replaced by a decision function for splitting the + * directory page. The bucketSize will be replaced by a decision + * function for splitting, overflowing, or growing the bucket page. + */ + @SuppressWarnings("unchecked") + protected HashDirectory(final HashTree htbl, + final int initialCapacity, final int maximumCapacity, + final int bucketSize) { + + super(htbl, true /* dirty */); + + if (initialCapacity <= 0) + throw new IllegalArgumentException(); + + if (maximumCapacity < initialCapacity) + throw new IllegalArgumentException(); + + this.maximumCapacity = maximumCapacity; + + /* + * Setup the hash table given the ini... [truncated message content] |
From: <tho...@us...> - 2010-12-01 21:43:44
|
Revision: 3990 http://bigdata.svn.sourceforge.net/bigdata/?rev=3990&view=rev Author: thompsonbry Date: 2010-12-01 21:43:35 +0000 (Wed, 01 Dec 2010) Log Message: ----------- Checkpoint for the extendible hash index. This begins to migrate the code to a persistence capable index using an approach similar to the B+Tree to manage the directory page and buckets (data pages). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/ExtensibleHashMap.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableCheckpoint.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashDirectory.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashFunction.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleBucket.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleExtensibleHashMap.java Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java 2010-12-01 21:43:35 UTC (rev 3990) @@ -0,0 +1,100 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Dec 1, 2010 + */ +package com.bigdata.htbl; + +import java.lang.ref.Reference; + +import org.apache.log4j.Logger; + +import com.bigdata.btree.PO; + +/** + * Abstract class for both directory and data pages for a hash index. + */ +abstract public class AbstractHashPage <T extends AbstractHashPage +/* + * DO-NOT-USE-GENERIC-HERE. The compiler will fail under Linux (JDK 1.6.0_14, + * _16). + */ +> extends PO //implements IAbstractNode, IAbstractNodeData +{ + + private final static transient Logger log = Logger + .getLogger(AbstractHashPage.class); + + /** + * Transient back reference to the index to which this directory belongs. + */ + protected transient ExtensibleHashMap htbl; + + /** + * <p> + * A {@link Reference} to this page. This is created when the page is + * created and effectively provides a canonical {@link Reference} object for + * any given page. + * </p> + * + * @todo Do we need back references for recursive directories? + */ + transient protected final Reference<? extends AbstractHashPage<T>> self; + + /** + * Disallowed. + */ + private AbstractHashPage() { + + throw new UnsupportedOperationException(); + + } + + protected AbstractHashPage(final ExtensibleHashMap htbl, final boolean dirty) { + + if(htbl == null) + throw new IllegalArgumentException(); + + this.htbl = htbl; + + // reference to self: reused to link parents and children. + this.self = htbl.newRef(this); + + if (!dirty) { + + /* + * Nodes default to being dirty, so we explicitly mark this as + * clean. This is ONLY done for the de-serialization constructors. + */ + + setDirty(false); + + } + +// @todo Add to the hard reference queue. +// btree.touch(this); + + } + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/ExtensibleHashMap.java (from rev 3988, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleExtensibleHashMap.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/ExtensibleHashMap.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/ExtensibleHashMap.java 2010-12-01 21:43:35 UTC (rev 3990) @@ -0,0 +1,849 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Nov 29, 2010 + */ +package com.bigdata.htbl; + +import java.lang.ref.Reference; +import java.lang.ref.SoftReference; +import java.lang.ref.WeakReference; +import java.util.Iterator; +import java.util.Map; + +import org.apache.log4j.Logger; + +import com.bigdata.btree.AbstractBTree; +import com.bigdata.btree.AbstractNode; +import com.bigdata.btree.ISimpleBTree; +import com.bigdata.btree.Node; +import com.bigdata.io.SerializerUtil; +import com.bigdata.rawstore.Bytes; +import com.bigdata.rawstore.IRawStore; +import com.bigdata.rawstore.SimpleMemoryRawStore; + +import cutthecrap.utils.striterators.Expander; +import cutthecrap.utils.striterators.IStriterator; +import cutthecrap.utils.striterators.Striterator; + +/** + * <p> + * An implementation of an extensible hash map using a 32 bit hash code and a + * fixed length int[] for the bucket. The keys are int32 values. The data stored + * in the hash map is just the key. Buckets provide a perfect fit for N keys. + * This is used to explore the dynamics of the extensible hashing algorithm + * using some well known examples. + * </p> + * <h2>Extensible Hashing</h2> + * <p> + * Extensible (or extendable) hashing uses a directory and pages (or buckets) to + * store the data. Directories make it possible to share pages, which can + * improve the storage efficiency, but directory schemes can suffer from + * exponential growth when the hash values are not uniformly distributed (a + * variety of schemes may be used to compensate for that growth, including + * overflow pages, multi-level directories, better hash functions, etc). + * Extensible hashing uses a global depth, <i>d</i>, which corresponds to the + * <i>d</i>-bits of the hash value used to select the directory entry. Each + * bucket has a local depth which is used to track the #of directory entries + * which target the same bucket. When an insert would fail due to insufficient + * space in the target bucket, the bucket must either be split, increased in + * size, or linked with an overflow page. If the directory has only a single + * reference to a given page, then the directory must be doubled when the page + * is split. Overflow pages can be used to defer doubling the size of the + * directory (and are required when all hash bits are the same, such as when the + * keys are identical), but overflow pages can degrade point test performance + * since a miss on the primary bucket will cause a read through to the page(s) + * of the overflow chain. Increasing the size of the bucket can also be used to + * defer the doubling of the address space (or handle duplicate keys) and can + * translate into better IO efficiency. Extensible hash trees use recursive + * directory structures. Recursive directories essentially allow a hash table to + * be imposed on the overflow pages which would otherwise be associated with the + * primary page and thereby increase the efficiency of access to hash values + * with skewed distributions. Order preserving hash functions may be used to + * create hash indices which cluster keys (which can improve efficiency at all + * levels of the memory hierarchy) and support range scans. Order preserving + * multi-dimensional extensible hashing is a generalization of extensible + * hashing suitable for spatial data and clustered aggregations. The design of + * the directory (and handling of split/overflow decision) for both single and + * multi-dimensional extensible hashing can have an enormous impact on the size + * of the directory and the storage and retrieval efficiency of the index. + * </p> + * <h2>Implementation Design</h2> + * <p> + * </p> + * <p> + * This implementation is not thread-safe. I have not attempted to provide for + * visibility guarantees when resizing the map and I have not attempted to + * provide for concurrent updates. The implementation exists solely to explore + * the extensible hashing algorithm. + * </p> + * + * @todo Name htable implies single level directory where htree implies a + * multi-level directory. Figure out the directory scheme and lock in the + * name for the class. + * + * @todo Backfill in the notes on the implementation design as it gets locked + * in. + * + * @todo We can not directly implement {@link Map} unless the hash table is + * configured to NOT permit duplicates. + * + * @todo Integrate a ring buffer for retention of frequently accessed pages per + * the weak reference policy observed by the BTree with touch() + */ +public class ExtensibleHashMap +implements ISimpleBTree // @todo rename ISimpleBTree interface +// @todo implement IAutoBoxBTree and rename IAutoBoxBTree interface. +// @todo implements IRangeQuery (iff using order preserving hashing) +, HashFunction +{ + + private final transient static Logger log = Logger + .getLogger(ExtensibleHashMap.class); + +// /** +// * The buckets. The first bucket is pre-allocated when the address table is +// * setup and all addresses in the table are initialized to point to that +// * bucket. Thereafter, buckets are allocated when a bucket is split. +// * +// * @todo This needs to become an {@link IRawStore} reference, but first we +// * need to provide {@link Reference}s from the directory to the +// * buckets and then we can allow for persistence of dirty pages. +// */ +// protected final ArrayList<SimpleBucket> buckets; + + /** + * The backing store. + */ + private final IRawStore store; + + /** + * The root directory. This is replaced each time copy-on-write triggers a + * cascade of updates. + * <p> + * This hard reference is cleared to <code>null</code> if an index is + * {@link #close() closed}. {@link #getRoot()} automatically uses + * {@link #reopen()} to reload the root so that closed indices may be + * transparently made ready for further use (indices are closed to reduce + * their resource burden, not to make their references invalid). The + * {@link AbstractHashPage} and derived classes <em>assume</em> that the + * root is non-null. This assumption is valid if {@link #close()} is invoked + * by the application in a manner consistent with the single-threaded + * contract for the index. + * <p> + * Note: This field MUST be marked as [volatile] in order to guarantee + * correct semantics for double-checked locking in {@link #reopen()}. + * + * @see http://en.wikipedia.org/wiki/Double-checked_locking + */ + protected volatile HashDirectory root; + + /** + * The root directory. The root is replaced each time copy-on-write triggers + * a cascade of updates. + * <p> + * The hard reference to the root node is cleared if the index is + * {@link #close() closed}. This method automatically {@link #reopen()}s the + * index if it is closed, making it available for use. + */ + final protected HashDirectory getRoot() { + + // make sure that the root is defined. + if (root == null) + reopen(); + + return root; + + } + + /** + * This is part of a {@link #close()}/{@link #reopen()} protocol that may + * be used to reduce the resource burden of an {@link AbstractBTree}. The + * method delegates to {@link #_reopen()} if double-checked locking + * demonstrates that the {@link #root} is <code>null</code> (indicating + * that the index has been closed). This method is automatically invoked by + * a variety of methods that need to ensure that the index is available for + * use. + * + * @see #close() + * @see #isOpen() + * @see #getRoot() + * + * @todo import the rest of this protocol. + */ + final protected void reopen() { + + if (root == null) { + + /* + * reload the root node. + * + * Note: This is synchronized to avoid race conditions when + * re-opening the index from the backing store. + * + * Note: [root] MUST be marked as [volatile] to guarantee correct + * semantics. + * + * See http://en.wikipedia.org/wiki/Double-checked_locking + */ + + synchronized(this) { + + if (root == null) { + + // invoke with lock on [this]. + _reopen(); + + } + + } + + } + + } + + /** + * This method is invoked by {@link #reopen()} once {@link #root} has been + * show to be <code>null</code> with double-checked locking. When invoked in + * this context, the caller is guaranteed to hold a lock on <i>this</i>. + * This is done to ensure that at most one thread gets to re-open the index + * from the backing store. + */ + /*abstract*/ protected void _reopen() { + // @todo ... + } + + /* + * Checkpoint metadata. + */ + + /** + * The height of the index. The height is the #of levels minus one. + */ + final public int getHeight() { + + return 1; + + } + + /** + * The #of directory pages in the index. + */ + final public int getDirectoryCount() { + + return 1; + + } + + /** + * The #of buckets in the index. + */ + final public int getBucketCount() { + + return nbuckets; +// return buckets.size(); + + } + protected int nbuckets; // @todo init from checkpoint + + /** + * The #of entries (aka tuples) in the index. When the index supports delete + * markers, this value also includes tuples which have been marked as + * deleted but not yet purged from the index. + */ + final public int getEntryCount() { + + return nentries; + + } + protected int nentries; // @todo init from checkpoint + + /** + * The backing store. + */ + final public IRawStore getStore() { + + return store; + + } + + /* + * Static utility methods and data. + */ + + /** + * An array of mask values. The index in the array is the #of bits of the + * hash code to be considered. The value at that index in the array is the + * mask to be applied to mask off to zero the high bits of the hash code + * which are to be ignored. + */ + static private final int[] masks; + static { + + masks = new int[32]; + + // Populate the array of masking values. + for (int i = 0; i < 32; i++) { + + masks[i] = getMaskBits(i); + + } + } + + /** + * Return a bit mask which reveals only the low N bits of an int32 value. + * + * @param nbits + * The #of bits to be revealed. + * @return The mask. + */ + static int getMaskBits(final int nbits) { + + if (nbits < 0 || nbits > 32) + throw new IllegalArgumentException(); + + int mask = 0; + int bit; + + for (int i = 0; i < nbits; i++) { + + bit = (1 << i); + + mask |= bit; + + } + + // System.err.println(nbits +" : "+Integer.toBinaryString(mask)); + + return mask; + + } + + /** + * Find the first power of two which is GTE the given value. This is used to + * compute the size of the address space (in bits) which is required to + * address a hash table with that many buckets. + */ + static int getMapSize(final int initialCapacity) { + + if (initialCapacity <= 0) + throw new IllegalArgumentException(); + + int i = 1; + + while ((1 << i) < initialCapacity) + i++; + + return i; + + } + + /** + * Mask off all but the lower <i>nbits</i> of the hash value. + * + * @param h + * The hash value. + * @param nbits + * The #of bits to consider. + * + * @return The hash value considering only the lower <i>nbits</i>. + */ + static protected int maskOff(final int h, final int nbits) { + + if (nbits < 0 || nbits > 32) + throw new IllegalArgumentException(); + + final int v = h & masks[nbits]; + + return v; + + } + + /** + * Return <code>2^n</code>. + * + * @param n + * The exponent. + * + * @return The result. + */ + static protected int pow2(final int n) { + +// return (int) Math.pow(2d, n); + return 1 << n; + + } + + /** + * Return the #of entries in the address map for a page having the given + * local depth. This is <code>2^(globalHashBits - localHashBits)</code>. The + * following table shows the relationship between the global hash bits (gb), + * the local hash bits (lb) for a page, and the #of directory entries for + * that page (nentries). + * + * <pre> + * gb lb nentries + * 1 0 2 + * 1 1 1 + * 2 0 4 + * 2 1 2 + * 2 2 1 + * 3 0 8 + * 3 1 4 + * 3 2 2 + * 3 3 1 + * 4 0 16 + * 4 1 8 + * 4 2 4 + * 4 3 2 + * 4 4 1 + * </pre> + * + * @param localHashBits + * The local depth of the page in [0:{@link #globalHashBits}]. + * + * @return The #of directory entries for that page. + * + * @throws IllegalArgumentException + * if either argument is less than ZERO (0). + * @throws IllegalArgumentException + * if <i>localHashBits</i> is greater than + * <i>globalHashBits</i>. + */ + static protected int getSlotsForPage(final int globalHashBits, + final int localHashBits) { + + if(localHashBits < 0) + throw new IllegalArgumentException(); + + if(globalHashBits < 0) + throw new IllegalArgumentException(); + + if(localHashBits > globalHashBits) + throw new IllegalArgumentException(); + + // The #of address map entries for this page. + final int numSlotsForPage = pow2(globalHashBits - localHashBits); + + return numSlotsForPage; + + } + + /** + * Human friendly representation. + */ + public String toString() { + + final StringBuilder sb = new StringBuilder(); + + sb.append(getClass().getName()); + + sb.append("{globalHashBits=" + getGlobalHashBits()); + + sb.append(",addrSpaceSize=" + getAddressSpaceSize()); + + sb.append(",entryCount=" + getEntryCount()); + + sb.append(",bucketCount=" + getBucketCount()); + + // @todo checkpoint record. + + // @todo index metadata record. + + sb.append("}"); + + return sb.toString(); + + } + + /** + * Dump a representation of the hash index. + */ + public String dump() { + + final StringBuilder sb = new StringBuilder(); + + // basic information. + sb.append(toString()); + + // Dump the data in the index. + getRoot().dump(sb); + + return sb.toString(); + + } + + /** + * + * @param initialCapacity + * The initial capacity is the #of buckets which may be stored in + * the hash table before it must be resized. It is expressed in + * buckets and not tuples because there is not (in general) a + * fixed relationship between the size of a bucket and the #of + * tuples which can be stored in that bucket. This will be + * rounded up to the nearest power of two. + * @param bucketSize + * The #of int tuples which may be stored in a bucket. + * + * @todo Configuration options: + * <p> + * Split, Grow, or Overflow decision function (this subsumes the + * bucketSize parameter since a decision function could consider only + * the #of keys on the page). + * <p> + * Initial and maximum directory size (the latter only for hash tables + * rather than hash trees). [It is perfectly Ok to set the #of global + * bits initially to the #of distinctions which can be persisted in a + * directory page.] + * <p> + * Hash function. Order preserving hashing requires more than just an + * appropriate hashing function. The directory may need to be managed + * differently and pages may need to be chained. Also, hash functions + * with more than 32-bits will require other changes (hash:int to + * hash:Object, some tables must be larger, etc.) + * <p> + * Decision function to inline the key/value of the tuple or to write + * them as a raw record and link to that record using its in-store + * address. + * <p> + * Directory and page encoders. + * <p> + * IRaba implementations for the keys and values (including an option + * for a pure append binary representation with compaction of deleted + * tuples). [Some hash table schemes depend on page transparency for + * the dictionary.] + */ + public ExtensibleHashMap(final int initialCapacity, + final int bucketSize) { + + // @todo pass in the store reference per AbstractBTree. + this.store = new SimpleMemoryRawStore(); + + root = new HashDirectory(this, initialCapacity, + Bytes.kilobyte32 * 4/* maximumCapacity */, bucketSize); + +// /* +// * Now work backwards to determine the size of the address space (in +// * buckets). +// */ +// final int addressSpaceSize = SimpleExtensibleHashMap.pow2(root +// .getGlobalHashBits()); +// +// buckets = new ArrayList<SimpleBucket>(addressSpaceSize/* initialCapacity */); +// +// // Note: the local bits of the first bucket is set to ZERO (0). +// buckets.add(new SimpleBucket(this, 0/* localHashBits */, bucketSize)); + + } + + /** + * @todo Generalize w/ hash function from index metadata (the current + * implementation assumes that the hash of an int key is that int). + */ + public int hash(final Object o) { + return ((Integer)o).intValue(); + } + + /** + * @deprecated by {@link #hash(Object)} or maybe hash(byte[]). + */ + public int hash(final int key) { + + return key; + + } + + /** + * Return the pre-allocated bucket having the given address. + * <p> + * Note: The caller is responsible for ensuring that duplicate instances of + * a given bucket or directory are not loaded from the backing store for the + * same hash index object. + * + * @param addr + * The address of the bucket on the backing store. + * + * @return The bucket. + */ + protected HashBucket getBucketAtStoreAddr(final long addr) { + + // @todo various timing and counter stuff. + + return (HashBucket) SerializerUtil.deserialize(store.read(addr)); + + } + + /** + * The #of hash bits which are being used by the address table. + */ + public int getGlobalHashBits() { + + return getRoot().getGlobalHashBits(); + + } + + /** + * The size of the address space is <code>2^{@link #globalHashBits}</code>. + */ + public int getAddressSpaceSize() { + + return pow2(getGlobalHashBits()); + + } + + /* + * IAutoBoxBTree + * + * @todo API Alignment with IAutoBoxBTree + */ + + /** + * Return <code>true</code> iff the hash table contains the key. + * <p> + * Lookup: Compute h(K) and right shift (w/o sign extension) by i bits. Use + * this to index into the bucket address table. The address in the table is + * the bucket address and may be used to directly read the bucket. + * + * @param key + * The key. + * + * @return <code>true</code> iff the key was found. + */ + public boolean contains(final int key) { + + return getRoot().contains(key); + + } + + /** + * Insert the key into the hash table. Duplicates are allowed. + * <p> + * Insert: Per lookup. On overflow, we need to split the bucket moving the + * existing records (and the new record) into new buckets. + * + * @see #split(int, int, HashBucket) + * + * @param key + * The key. + * + * @todo rename as append() method. insert() should retain the semantics of + * replacing the existing tuple for the key. might rename insert to + * put(). + */ + public void insert(final int key) { + + getRoot().insert(key); + + } + + /** + * Delete the key from the hash table (in the case of duplicates, a random + * entry having that key is deleted). + * <p> + * Delete: Buckets may be removed no later than when they become empty and + * doing this is a local operation with costs similar to splitting a bucket. + * Likewise, it is clearly possible to coalesce buckets which underflow + * before they become empty by scanning the 2^(i-j) buckets indexed from the + * entries in the bucket address table using i bits from h(K). [I need to + * research handling deletes a little more, including under what conditions + * it is cost effective to reduce the size of the bucket address table + * itself.] + * + * @param key + * The key. + * + * @return <code>true</code> iff a tuple having that key was deleted. + * + * @todo return the deleted tuple. + * + * @todo merge buckets when they underflow/become empty? (but note that we + * do not delete anything from the hash map for a hash join, just + * insert, insert, insert). + */ + public boolean delete(final int key) { + + return getRoot().delete(key); + + } + + /* + * ISimpleBTree + */ + + @Override + public boolean contains(byte[] key) { + // TODO Auto-generated method stub + return false; + } + + @Override + public byte[] insert(byte[] key, byte[] value) { + // TODO Auto-generated method stub + return null; + } + + @Override + public byte[] lookup(byte[] key) { + // TODO Auto-generated method stub + return null; + } + + @Override + public byte[] remove(byte[] key) { + // TODO Auto-generated method stub + return null; + } + + /* + * Core CRUD methods. + * + * @todo The core B+Tree methods accept and return ITuple objects. The core + * hash index methods should do the same thing. + */ + + /** + * Visit the buckets. + * <p> + * Note: This is NOT thread-safe! + */ + public Iterator<HashBucket> buckets() { + + return getRoot().buckets(); + + } + + /** + * Return an iterator which visits all entries in the hash table. + */ + @SuppressWarnings("unchecked") + public Iterator<Integer> getEntries() { + final IStriterator sitr = new Striterator(buckets()) + .addFilter(new Expander() { + private static final long serialVersionUID = 1L; + + @Override + protected Iterator expand(final Object obj) { + return ((HashBucket) obj).getEntries(); + } + }); + return (Iterator<Integer>) sitr; + } + + /** + * Create the reference that will be used by a {@link Node} to refer to its + * children (nodes or leaves). + * + * @param child + * A node. + * + * @return A reference to that node. + * + * @see AbstractNode#self + * @see SoftReference + * @see WeakReference + */ + final <T extends AbstractHashPage<T>> Reference<AbstractHashPage<T>> newRef( + final AbstractHashPage<T> child) { + + /* + * Note: If the parent refers to its children using soft references the + * the presence of the parent will tend to keep the children wired into + * memory until the garbage collector is forced to sweep soft references + * in order to make room on the heap. Such major garbage collections + * tend to make the application "hesitate". + * + * @todo it may be that frequently used access paths in the btree should + * be converted dynamically from a weak reference to soft reference in + * order to bias the garbage collector to leave those paths alone. if we + * play this game then we should limit the #of soft references and make + * the node choose among its children for those it will hold with a soft + * reference so that the notion of frequent access is dynamic and can + * change as the access patterns on the index change. + */ + + if (store == null) { + + /* + * Note: Used for transient BTrees. + */ + + return new HardReference<AbstractHashPage<T>>(child); + + } else { + + return new WeakReference<AbstractHashPage<T>>( child ); +// return new SoftReference<AbstractNode>( child ); // causes significant GC "hesitations". + } + + + } + + /** + * A class that provides hard reference semantics for use with transient + * indices. While the class extends {@link WeakReference}, it internally + * holds a hard reference and thereby prevents the reference from being + * cleared. This approach is necessitated on the one hand by the use of + * {@link Reference} objects for linking directories and buckets, etc. and + * on the other hand by the impossibility of defining your own direct + * subclass of {@link Reference} (a runtime security manager exception will + * result). + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + * + * @param <T> + */ + static class HardReference<T> extends WeakReference<T> { + + final private T ref; + + HardReference(final T ref) { + + super(null); + + this.ref = ref; + + } + + /** + * Returns the hard reference. + */ + public T get() { + + return ref; + + } + + /** + * Overridden as a NOP. + */ + public void clear() { + + // NOP + + } + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableCheckpoint.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableCheckpoint.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableCheckpoint.java 2010-12-01 21:43:35 UTC (rev 3990) @@ -0,0 +1,468 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Dec 1, 2010 + */ +package com.bigdata.htbl; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.nio.ByteBuffer; + +import com.bigdata.btree.BTree; +import com.bigdata.btree.IBloomFilter; +import com.bigdata.btree.IndexMetadata; +import com.bigdata.io.SerializerUtil; +import com.bigdata.rawstore.IRawStore; + +/** + * A checkpoint record is written each time the hash index is flushed to the + * store. + * + * @todo Update for hash table, potentially using the same checkpoint record for + * the B+Tree and hash tables. + * <p> + * Note: In order to create a btree use + * {@link BTree#create(IRawStore, IndexMetadata)} to write the initial + * {@link IndexMetadata} record and the initial check point on the store. + * It will then load the {@link BTree} from the {@link Checkpoint} record + * and you can start using the index. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: Checkpoint.java 3318 2010-07-27 18:36:58Z thompsonbry $ + */ +public class HTableCheckpoint implements Externalizable { + + // transient and set on write or read-back. + transient long addrCheckpoint; + + // persistent and immutable. + private long addrMetadata; + private long addrRoot; + private int height; + private int nnodes; + private int nleaves; + private int nentries; + private long counter; + + /** Note: added in {@link #VERSION1} and presumed 0L in earlier versions. */ + private long addrBloomFilter; + + /** + * The address used to read this {@link Checkpoint} record from the + * store. + * <p> + * Note: This is set as a side-effect by {@link #write(IRawStore)}. + * + * @throws IllegalStateException + * if the {@link Checkpoint} record has not been written on + * a store. + */ + final public long getCheckpointAddr() { + + if (addrCheckpoint == 0L) { + + throw new IllegalStateException(); + + } + + return addrCheckpoint; + + } + + /** + * Address that can be used to read the {@link IndexMetadata} record for + * the index from the store. + */ + final public long getMetadataAddr() { + + return addrMetadata; + + } + + /** + * Address of the root node or leaf of the {@link BTree}. + * + * @return The address of the root -or- <code>0L</code> iff the btree + * does not have a root. + */ + final public long getRootAddr() { + + return addrRoot; + + } + + /** + * Address of the {@link IBloomFilter}. + * + * @return The address of the bloom filter -or- <code>0L</code> iff the + * btree does not have a bloom filter. + */ + final public long getBloomFilterAddr() { + + return addrBloomFilter; + + } + + /** + * The height of the tree - ZERO(0) means just a root leaf. Values + * greater than zero give the #of levels of abstract nodes. There is + * always one layer of leaves which is not included in this value. + */ + public final int getHeight() { + + return height; + + } + + /** + * The #of non-leaf nodes. + */ + public final int getNodeCount() { + + return nnodes; + + } + + /** + * The #of leaves. + */ + public final int getLeafCount() { + + return nleaves; + + } + + /** + * The #of index entries. + */ + public final int getEntryCount() { + + return nentries; + + } + + /** + * Return the value of the counter stored in the {@link Checkpoint} + * record. + */ + public final long getCounter() { + + return counter; + + } + + /** + * A human readable representation of the state of the {@link Checkpoint} + * record. + */ + public final String toString() { + + return "Checkpoint" + // + "{height=" + height + // + ",nnodes=" + nnodes + // + ",nleaves=" + nleaves + // + ",nentries=" + nentries + // + ",counter=" + counter + // + ",addrRoot=" + addrRoot + // + ",addrMetadata=" + addrMetadata + // + ",addrBloomFilter=" + addrBloomFilter + // + ",addrCheckpoint=" + addrCheckpoint + // + "}"; + + } + + /** + * De-serialization ctor. + */ + public HTableCheckpoint() { + + } + + /** + * Create the first checkpoint record for a new {@link BTree} from a + * {@link IndexMetadata} record. The root of the {@link BTree} will NOT + * exist (its address will be <code>0L</code>). Once written on the store + * the {@link Checkpoint} record may be used to obtain a corresponding + * instance of a {@link BTree} object. + * + * @param metadata + * The index metadata record. + */ + public HTableCheckpoint(final IndexMetadata metadata ) { + + this( // + metadata.getMetadataAddr(), // + 0L,// No root yet. + 0L,// No bloom filter yet. + 0, // height + 0, // nnodes + 0, // nleaves + 0, // nentries + 0L // counter + ); + + } + + /** + * Create the first checkpoint record for an existing {@link BTree} when it + * is propagated on overflow onto a new backing {@link IRawStore}. The + * {@link #counter} is propagated to the new {@link Checkpoint} but + * otherwise the initialization is as if for an empty {@link BTree}. + * + * @param metadata + * The index metadata record. + * @param oldCheckpoint + * The last {@link Checkpoint} for the index on the old backing + * store. The {@link Checkpoint#counter} is propagated to the new + * {@link Checkpoint} record. + */ + public HTableCheckpoint(final IndexMetadata metadata, final HTableCheckpoint oldCheckpoint ) { + + this( // + metadata.getMetadataAddr(), // + 0L,// No root yet. + 0L,// No bloom filter yet. + 0, // height + 0, // nnodes + 0, // nleaves + 0, // nentries + oldCheckpoint.counter + ); + + } + +// /** +// * Creates a {@link Checkpoint} record from a {@link BTree}. +// * <p> +// * Pre-conditions: +// * <ul> +// * <li>The root is clean.</li> +// * <li>The metadata record is clean.</li> +// * <li>The optional bloom filter is clean if it is defined.</li> +// * </ul> +// * Note: if the root is <code>null</code> then the root is assumed to be +// * clean and the root address from the last {@link Checkpoint} record is +// * used. Otherwise the address of the root is used (in which case it MUST be +// * defined). +// * +// * @param btree +// * The btree. +// */ +// public HTableCheckpoint(final BTree btree) { +// +// this(btree.metadata.getMetadataAddr(),// +// /* +// * root node or leaf. +// * +// * Note: if the [root] reference is not defined then we use the +// * address in the last checkpoint record. if that is 0L then +// * there is no root and a new root leaf will be created on +// * demand. +// */ +// (btree.root == null ? btree.getCheckpoint().getRootAddr() +// : btree.root.getIdentity()),// +// /* +// * optional bloom filter. +// * +// * Note: if the [bloomFilter] reference is not defined then we +// * use the address in the last checkpoint record. if that is 0L +// * then there is no bloom filter. If the [bloomFilter] reference +// * is defined but the bloom filter has been disabled, then we +// * also write a 0L so that the bloom filter is no longer +// * reachable from the new checkpoint. +// */ +// (btree.bloomFilter == null ? btree.getCheckpoint() +// .getBloomFilterAddr() +// : btree.bloomFilter.isEnabled() ? btree.bloomFilter +// .getAddr() : 0L),// +// btree.height,// +// btree.nnodes,// +// btree.nleaves,// +// btree.nentries,// +// btree.counter.get()// +// ); +// +// } + + private HTableCheckpoint(final long addrMetadata, final long addrRoot, + final long addrBloomFilter, final int height, final int nnodes, + final int nleaves, final int nentries, final long counter) { + + /* + * Note: The constraint on [addrMetadata] is relaxed in order to permit + * a transient BTree (no backing store). + */ +// assert addrMetadata != 0L; + // MUST be valid addr. + this.addrMetadata = addrMetadata; + + // MAY be 0L (tree initially has no root) + this.addrRoot = addrRoot; + + /* + * MAY be 0L (bloom filter is optional and an new bloom filter is clear, + * so it will not be written out until something is written on the + * index). + */ + this.addrBloomFilter = addrBloomFilter; + + this.height = height; + + this.nnodes = nnodes; + + this.nleaves = nleaves; + + this.nentries = nentries; + + this.counter = counter; + + } + + /** + * Initial serialization version. + * <p> + * Note: The fields of the {@link Checkpoint} record use fixed length + * representations in order to support the possibility that we might do an + * in place update of a {@link Checkpoint} record as part of a data + * migration strategy. For the same reason, the {@link Checkpoint} record + * includes some unused fields. Those fields are available for future + * version changes without requiring us to change the length of the + * {@link Checkpoint} record. + */ + private static transient final int VERSION0 = 0x0; + + /** + * The current version. + */ + private static transient final int VERSION = VERSION0; + + /** + * Write the {@link Checkpoint} record on the store, setting + * {@link #addrCheckpoint} as a side effect. + * + * @param store + * + * @throws IllegalStateException + * if the {@link Checkpoint} record has already been + * written. + */ + final public void write(final IRawStore store) { + + if (addrCheckpoint != 0L) { + + throw new IllegalStateException(); + + } + + final byte[] data = SerializerUtil.serialize(this); + + addrCheckpoint = store.write(ByteBuffer.wrap(data)); + + } + + /** + * Read a {@link Checkpoint} record from a store. + * + * @param store + * The store. + * @param addrCheckpoint + * The address from which to read the {@link Checkpoint} + * record. This address is set on the {@link Checkpoint} + * record as a side-effect. + */ + public static HTableCheckpoint load(final IRawStore store, final long addrCheckpoint) { + + if (store == null) + throw new IllegalArgumentException(); + + final ByteBuffer buf = store.read(addrCheckpoint); + + final HTableCheckpoint checkpoint = (HTableCheckpoint) SerializerUtil.deserialize(buf); + + checkpoint.addrCheckpoint = addrCheckpoint; + + return checkpoint; + + } + + public void readExternal(final ObjectInput in) throws IOException, ClassNotFoundException { + + final int version = in.readInt(); + + if (version != VERSION0) + throw new IOException("Unknown version: " + version); + + this.addrMetadata = in.readLong(); + + this.addrRoot = in.readLong(); + + this.addrBloomFilter = in.readLong(); + + this.height = in.readInt(); + + this.nnodes = in.readInt(); + + this.nleaves = in.readInt(); + + this.nentries = in.readInt(); + + this.counter = in.readLong(); + + in.readLong(); // unused. + + in.readLong(); // unused. + + } + + public void writeExternal(final ObjectOutput out) throws IOException { + + out.writeInt(VERSION); + + out.writeLong(addrMetadata); + + out.writeLong(addrRoot); + + out.writeLong(addrBloomFilter); + + out.writeInt(height); + + out.writeInt(nnodes); + + out.writeInt(nleaves); + + out.writeInt(nentries); + + out.writeLong(counter); + + out.writeLong(0L/*unused*/); + + out.writeLong(0L/*unused*/); + + } + +} + Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java 2010-12-01 21:43:35 UTC (rev 3990) @@ -0,0 +1,102 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Dec 1, 2010 + */ +package com.bigdata.htbl; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.UUID; + +/** + * Configuration options. + * + * @todo Reconcile with IndexMetadata. + */ +public class HTableMetadata implements Externalizable { + + /** + * The unique identifier for the index. + */ + private UUID uuid; + + /** + * Function used to generate hash values from keys. + */ + private HashFunction hashFunction; + + /** + * Function decides whether to split a page, link an overflow page, or + * expand the size of a page. + */ + // private SplitFunction splitFunction; + + /** + * De-serialization constructor. + */ + public HTableMetadata() { + + } + + /** + * Anonymous hash index. + * + * @param uuid + * The unique index identifier. + */ + public HTableMetadata(final UUID uuid) { + + this(null/* name */, uuid); + + } + + /** + * Named hash index + * + * @param name + * The index name. + * @param uuid + * The unique index identifier. + */ + public HTableMetadata(final String name, final UUID uuid) { + + } + + @Override + public void readExternal(ObjectInput in) throws IOException, + ClassNotFoundException { + // TODO Auto-generated method stub + throw new UnsupportedOperationException(); + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + // TODO Auto-generated method stub + throw new UnsupportedOperationException(); + } + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java (from rev 3988, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleBucket.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java 2010-12-01 21:43:35 UTC (rev 3990) @@ -0,0 +1,518 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Nov 29, 2010 + */ +package com.bigdata.htbl; + +import java.util.Iterator; + +import org.apache.log4j.Logger; + +import com.bigdata.btree.data.ILeafData; + +/** + * A (very) simple hash bucket. The bucket holds N int32 keys. + * + * @todo Share the {@link ILeafData} interface? Or define a common base + * interface for hash buckets and b+tree leaves and then specialize it for + * the different kinds of indices? + */ +public class HashBucket extends AbstractHashPage<HashBucket> { + + private final transient static Logger log = Logger + .getLogger(HashBucket.class); + + /** + * The #of hash code bits which are in use by this {@link HashBucket}. + * <p> + * Note: There are <code>2^(globalBits-localBits)</code> dictionary entries + * which address a given page. Initially, globalBits := 1 and localBits := + * 0. For these values, we have <code>2^(1-0) == 2</code> references to the + * initial page of the hash table. + * + * @todo If we need to examine this when we change the size of the address + * space then it makes more sense to have this as local metadata in + * the address table than as local data in the bucket (the latter + * would require us to visit each bucket when expanding the address + * space). This only needs to be 4 bits to express values in [0:31]. + * + * @todo When overflow buckets are chained together, does each bucket have + * {@link #localHashBits}? If they do, then we need to make sure that + * all buckets in the chain are updated. If {@link #localHashBits} is + * only marked on the first bucket in the chain then we need to + * correctly ignore it on overflow buckets. + * + * @todo adjusting this dirties the bucket (unless the #of local bits its + * stored in the address table entry, but that increases the in-memory + * burden of the address table). + */ + int localHashBits; + + /** + * The #of keys stored in the bucket. The keys are stored in a dense array. + * For a given {@link #size}, the only indices of the array which have any + * data are [0:{@link #size}-1]. + */ + int size; + + /** + * The user data for the bucket. + * + * @todo IRaba keys plus IRaba vals, but the encoded representation must + * support out of line keys/values. That means that the IRaba will + * have to have access to the store or ITuple will have to have + * indirection support. + */ + final int[] data; + + /** + * Human friendly representation. + */ + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(super.toString()); + sb.append("{localHashBits=" + localHashBits); + sb.append(",size=" + size); + sb.append(",values={"); + for (int i = 0; i < size; i++) { + if (i > 0) + sb.append(','); + sb.append(Integer.toString(data[i])); + } + sb.append("}}"); + return sb.toString(); + } + + /** + * Create a new mutable bucket. + * + * @param htbl + * @param localHashBits + * @param bucketSize + */ + public HashBucket(final ExtensibleHashMap htbl, + final int localHashBits, final int bucketSize) { + + super(htbl, true/* dirty */); + + if (localHashBits < 0 || localHashBits > 32) + throw new IllegalArgumentException(); + + if (bucketSize <= 0) + throw new IllegalArgumentException(); + + this.localHashBits = localHashBits; + + this.data = new int[bucketSize]; + + // one more bucket. + htbl.nbuckets++; + + } + + /** + * Return <code>true</code> if the bucket contains the key. + * + * @param h + * The hash code of the key. + * @param key + * The key. + * + * @return <code>true</code> if the key was found in the bucket. + * + * @todo passing in the hash code here makes sense when the bucket stores + * the hash values, e.g., if we always do that or if we have an out of + * bucket reference to a raw record because the tuple did not fit in + * the bucket. + */ + public boolean contains(final int h, final int key) { + + for (int i = 0; i < size; i++) { + + if (data[i] == key) + return true; + + } + + return false; + + } + + /** + * Type safe enumeration reports on the various outcomes when attempting to + * insert a tuple into a page. + * + * @todo The problem with this enumeration (or with using a return code per + * the following javadoc) is that page splits are going to be deferred + * until the page is evicted unless there is an aspect of the split + * function which decides based on the #of tuples on the page. If a + * the split function reports that the page is over capacity when it + * is evicted, then we need to decide whether to split the page, chain + * an overflow page, or use a larger capacity page. + * <p> + * What we should do is scan the page if an insert would fail (or if + * the serialization of the page would fail) and determine what local + * depth we would need to successfully split the page (e.g., no more + * than 70% of the items would be in any prefix at a given depth). + * That can be used to guide the decision to use overflow pages or + * expand the directory. + * <p> + * What are some fast techniques for counting the #of bits which we + * need to make the necessary distinctions in the bucket? Should we + * build a trie over the hash codes? + */ + private static enum InsertEnum { + /** + * The tuple was inserted successfully into this page. + * + * @todo This could be reported as ZERO (0), which is an indication that + * NO expansions where required to insert the tuple into the page. + */ + OK, + /** + * The insert failed because the page is full. Further, the tuple has + * the same key value as all other tuples on the page. Therefore, either + * the insert must be directed into an overflow page or the page size + * must be allowed to increase. + * + * @todo This could be reported as {@link Integer#MAX_VALUE}, which is + * an indication that infinite expansions will not make it + * possible to insert the key into this page (e.g., an overflow + * page is required). [Alternatively, this could report the + * necessary page size if we allow the page size to expand.] + */ + KEYS_ARE_IDENTICAL, + /** + * The insert failed because the page is full. Further, the hash + * associated with the tuple is the same as the hash for all other keys + * on the page. In this case, the insert operation will eventually + * succeed if the address space is expanded (one or more times). + * + * @todo This could be reported as the #of bits which are in common for + * the keys in this page. That could be used to determine how many + * expansions would be required before the key could be inserted. + * [If KEYS_ARE_IDENTICAL is handled by reporting the necessary + * page size, then this could report the #of hash bits which are + * identical using a negative integer (flipping the sign).] + */ + HASH_IS_IDENTICAL; + } + + /** + * Insert the key into the bucket (duplicates are allowed). It is an error + * if the bucket is full. + * + * @param h + * The hash code of the key. + * @param key + * The key. + * + * @return <code>false</code> iff the bucket must be split. + * + * @todo The caller needs to be careful that [h] is the full hash code for + * the key. Normally this is not a problem, but we sometimes wind up + * with masked off hash codes, especially during splits and merges, + * and those must not be passed in here. + */ + public void insert(final int h, final int key) { + + if (size == data.length) { + + /* + * The bucket must be split, potentially recursively. + * + * Note: Splits need to be triggered based on information which is + * only available to the bucket when it considers the insert of a + * specific tuple, including whether the tuple is promoted to a raw + * record reference, whether the bucket has deleted tuples which can + * be compacted, etc. + * + * @todo I need to figure out where the control logic goes to manage + * the split. If the bucket handles splits, then we need to pass in + * the table reference. + */ + + // split the bucket and insert the record (recursive?) + split(key, this); + + /* + * Insert the key into the expanded hash table (this will insert + * into either the old or the new bucket, depending on the hash code + * for the key). + * + * FIXME There are a variety of special conditions which need to be + * handled by insert(), especially all keys have the same value or + * the same int32 hash code or the tuple is too large for the + * bucket. Those conditions all need to be handled before requested + * a split. Since insert() has to handle all of this, it is also + * responsible for re-attempting the key insertion after the split. + * + * The next step is to handle cases where splitting the bucket once + * does not result in a bucket with sufficient space for the new + * key. There are actually two cases here: (1) the hash codes of the + * keys are distinct, so if we double the address space enough times + * the insert will succeed; (2) the hash codes of the keys are + * identical, so no amount of expansion of the address space will + * permit the insert to succeed and an overflow page must be used. + * For (1) we can also chose to use an overflow page in order to + * prevent run away expansion of the address space. + * + * This class needs to be converted to use persistence and to use an + * IRaba for keys/values. For the sake of the unit tests, it needs + * to be parameterized for the overflow versus expand decision and + * the IRaba for the keys needs to be defined such that we have a + * guaranteed split when there are three integer keys (or a split + * function could be used to make this decision based on more + * general criteria). [Could also use a pure- append binary raba w/ + * compacting if the raba is full and there are deleted tuples.] + */ + if (log.isDebugEnabled()) + log.debug("retrying insert: key=" + key); + + /* + * @todo This can recurse until the address space reaches the + * maximum possible address space and then throw an exception. The + * code should be modified to use a decision function for growing + * the page, chaining an overflow page, or splitting the page (when + * it would cause the address space to be doubled). + */ + htbl.insert(key); + +// { +// // the hash value of the key. +// final int h = htbl.hash(key); +// // the address of the bucket for that hash code. +// final int addr = htbl.getRoot().addrOf(h); +//... [truncated message content] |
From: <tho...@us...> - 2010-12-01 19:22:16
|
Revision: 3989 http://bigdata.svn.sourceforge.net/bigdata/?rev=3989&view=rev Author: thompsonbry Date: 2010-12-01 19:22:10 +0000 (Wed, 01 Dec 2010) Log Message: ----------- Marked some things as final Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/ChildIterator.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/ChildIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/ChildIterator.java 2010-11-30 16:12:58 UTC (rev 3988) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/ChildIterator.java 2010-12-01 19:22:10 UTC (rev 3989) @@ -73,7 +73,7 @@ * @exception IllegalArgumentException * if fromKey is given and is greater than toKey. */ - public ChildIterator(Node node, byte[] fromKey, byte[] toKey) { + public ChildIterator(final Node node, final byte[] fromKey, final byte[] toKey) { assert node != null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-30 16:13:07
|
Revision: 3988 http://bigdata.svn.sourceforge.net/bigdata/?rev=3988&view=rev Author: thompsonbry Date: 2010-11-30 16:12:58 +0000 (Tue, 30 Nov 2010) Log Message: ----------- Worked through the split of a bucket and the split of a bucket where the address space must be doubled first. The next step is to handle cases where splitting the bucket once does not result in a bucket with sufficient space for the new key. There are actually two cases here: (1) the hash codes of the keys are distinct, so if we double the address space enough times the insert will succeed; (2) the hash codes of the keys are identical, so no amount of expansion of the address space will permit the insert to succeed and an overflow page must be used. For (1) we can also chose to use an overflow page in order to prevent run away expansion of the address space. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleBucket.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleExtensibleHashMap.java Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleBucket.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleBucket.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleBucket.java 2010-11-30 16:12:58 UTC (rev 3988) @@ -0,0 +1,276 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Nov 29, 2010 + */ +package com.bigdata.htbl; + +import java.util.Iterator; + +import org.apache.log4j.Logger; + +import com.bigdata.btree.PO; + +/** + * A (very) simple hash bucket. The bucket holds N int32 keys. + * + * @todo There should be transient metadata for the address of the bucket (both + * the slot index and the store address). + * + * @todo Extend {@link PO} + */ +class SimpleBucket {// extends PO { + + private final transient static Logger log = Logger + .getLogger(SimpleExtensibleHashMap.class); + + /** + * The #of hash code bits which are in use by this {@link SimpleBucket}. + * <p> + * Note: There are <code>2^(globalBits-localBits)</code> dictionary entries + * which address a given page. Initially, globalBits := 1 and localBits := + * 0. For these values, we have <code>2^(1-0) == 2</code> references to the + * initial page of the hash table. + * + * @todo If we need to examine this when we change the size of the address + * space then it makes more sense to have this as local metadata in + * the address table than as local data in the bucket (the latter + * would require us to visit each bucket when expanding the address + * space). This only needs to be 4 bits to express values in [0:31]. + * + * @todo When overflow buckets are chained together, does each bucket have + * {@link #localHashBits}? If they do, then we need to make sure that + * all buckets in the chain are updated. If {@link #localHashBits} is + * only marked on the first bucket in the chain then we need to + * correctly ignore it on overflow buckets. + * + * @todo adjusting this dirties the bucket (unless the #of local bits its + * stored in the address table entry, but that increases the in-memory + * burden of the address table). + */ + int localHashBits; + + /** + * The #of keys stored in the bucket. The keys are stored in a dense array. + * For a given {@link #size}, the only indices of the array which have any + * data are [0:{@link #size}-1]. + */ + int size; + + /** + * The user data for the bucket. + * + * @todo IRaba keys plus IRaba vals, but the encoded representation must + * support out of line keys/values. That means that the IRaba will + * have to have access to the store or ITuple will have to have + * indirection support. + */ + final int[] data; + + /** + * Human friendly representation. + */ + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(getClass().getName()); // @todo super.toString() with PO. + // sb.append("{addr="+addr); + sb.append("{localHashBits=" + localHashBits); + sb.append(",size=" + size); + sb.append(",values={"); + for (int i = 0; i < size; i++) { + if (i > 0) + sb.append(','); + sb.append(Integer.toString(data[i])); + } + sb.append("}}"); + return sb.toString(); + } + + public SimpleBucket(final int localHashBits, final int bucketSize) { + + if (localHashBits < 0 || localHashBits > 32) + throw new IllegalArgumentException(); + + this.localHashBits = localHashBits; + + this.data = new int[bucketSize]; + + } + + /** + * Return <code>true</code> if the bucket contains the key. + * + * @param h + * The hash code of the key. + * @param key + * The key. + * + * @return <code>true</code> if the key was found in the bucket. + * + * @todo passing in the hash code here makes sense when the bucket stores + * the hash values, e.g., if we always do that or if we have an out of + * bucket reference to a raw record because the tuple did not fit in + * the bucket. + */ + public boolean contains(final int h, final int key) { + + for (int i = 0; i < size; i++) { + + if (data[i] == key) + return true; + + } + + return false; + + } + + /** + * Insert the key into the bucket (duplicates are allowed). It is an error + * if the bucket is full. + * + * @param h + * The hash code of the key. + * @param key + * The key. + * + * @return <code>false</code> iff the bucket must be split. + * + * @todo The caller needs to be careful that [h] is the full hash code for + * the key. Normally this is not a problem, but we sometimes wind up + * with masked off hash codes, especially during splits and merges, + * and those must not be passed in here. + */ + public boolean insert(final int h, final int key) { + + if (size == data.length) { + /* + * The bucket must be split, potentially recursively. + * + * Note: Splits need to be triggered based on information which is + * only available to the bucket when it considers the insert of a + * specific tuple, including whether the tuple is promoted to a raw + * record reference, whether the bucket has deleted tuples which can + * be compacted, etc. + * + * @todo I need to figure out where the control logic goes to manage + * the split. If the bucket handles splits, then we need to pass in + * the table reference. + */ + return false; + } + + data[size++] = key; + + return true; + + } + + /** + * Delete a tuple having the specified key. If there is more than one such + * tuple, then a random tuple having the key is deleted. + * + * @param h + * The hash code of the key. + * @param key + * The key. + * + * @todo return the delete tuple. + */ + public boolean delete(final int h, final int key) { + + for (int i = 0; i < size; i++) { + + if (data[i] == key) { + + // #of tuples remaining beyond this point. + final int length = size - i - 1; + + if (length > 0) { + + // Keep the array dense by copying down by one. + System.arraycopy(data, i + 1/* srcPos */, data/* dest */, + i/* destPos */, length); + + } + + size--; + + return true; + + } + + } + + return false; + + } + + /** + * The #of entries in the bucket. + */ + public int getEntryCount() { + + return size; + + } + + /** + * Visit the entries in any order. + */ + public Iterator<Integer/* key */> getEntries() { + + return new EntryIterator(); + + } + + /** + * Visits the entries in the page. + */ + private class EntryIterator implements Iterator<Integer> { + + private int current = 0; + + private EntryIterator() { + + } + + @Override + public boolean hasNext() { + return current < size; + } + + @Override + public Integer next() { + return data[current++]; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleExtensibleHashMap.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleExtensibleHashMap.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleExtensibleHashMap.java 2010-11-30 16:12:58 UTC (rev 3988) @@ -0,0 +1,1079 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Nov 29, 2010 + */ +package com.bigdata.htbl; + +import java.util.ArrayList; +import java.util.Formatter; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.log4j.Logger; + +import com.bigdata.btree.BloomFilter; + +import cutthecrap.utils.striterators.Expander; +import cutthecrap.utils.striterators.IStriterator; +import cutthecrap.utils.striterators.Striterator; + +/** + * An implementation of an extensible hash map using a 32 bit hash code and a + * fixed length int[] for the bucket. The keys are int32 values. The data stored + * in the hash map is just the key. Buckets provide a perfect fit for N keys. + * This is used to explore the dynamics of the extensible hashing algorithm + * using some well known examples. + * <p> + * This implementation is not thread-safe. I have not attempted to provide for + * visibility guarantees when resizing the map and I have not attempted to + * provide for concurrent updates. The implementation exists solely to explore + * the extensible hashing algorithm. + * <p> + * The hash code + * + * @todo We can not directly implement {@link Map} unless the hash table is + * configured to NOT permit duplicates. + */ +class SimpleExtensibleHashMap { + + private final transient static Logger log = Logger + .getLogger(SimpleExtensibleHashMap.class); + + /** + * The #of int32 positions which are available in a {@link SimpleBucket} . + */ + private final int bucketSize; + + /** + * The #of hash code bits which are in use by the {@link #addressMap}. Each + * hash bucket also as a local #of hash bits. Given <code>i</code> is the + * #of global hash bits and <code>j</code> is the number of hash bits in + * some bucket, there will be <code>2^(i-j)</code> addresses which point to + * the same bucket. + */ + private int globalHashBits; + + // /** + // * The size of the address space (#of buckets addressable given the #of + // * {@link #globalHashBits} in use). + // */ + // private int addressSpaceSize; + + /** + * The address map. You index into this map using {@link #globalHashBits} + * out of the hash code for a probe key. The value of the map is the index + * into the {@link #buckets} array of the bucket to which that key is + * hashed. + */ + private int[] addressMap; + + /** + * The buckets. The first bucket is pre-allocated when the address table is + * setup and all addresses in the table are initialized to point to that + * bucket. Thereafter, buckets are allocated when a bucket is split. + */ + private final ArrayList<SimpleBucket> buckets; + + /** + * An array of mask values. The index in the array is the #of bits of the + * hash code to be considered. The value at that index in the array is the + * mask to be applied to mask off to zero the high bits of the hash code + * which are to be ignored. + */ + static private final int[] masks; + static { + + masks = new int[32]; + + // Populate the array of masking values. + for (int i = 0; i < 32; i++) { + + masks[i] = getMaskBits(i); + + } + } + + // /** + // * The current mask for the current {@link #globalHashBits}. + // */ + // private int globalMask; + + /** + * Human friendly representation. + */ + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(getClass().getName()); // @todo super.toString() with PO. + sb.append("{bucketSize="+bucketSize); + sb.append(",globalHashBits=" + globalHashBits); + sb.append(",addrSpaceSize=" + addressMap.length); +// sb.append(",addressMap="+Arrays.toString(addressMap)); + sb.append(",buckets="+buckets.size()); + sb.append("}"); +// // used to assign labels to pages. +// final Map<Integer/*addr*/,String/*label*/> labels = new HashMap<Integer, String>(addressMap.length); + // used to remember the visited pages. + final Set<Integer/*addrs*/> visited = new LinkedHashSet<Integer>(addressMap.length); + final Formatter f = new Formatter(sb); + for (int i = 0; i < addressMap.length; i++) { + final int addr = addressMap[i]; + final SimpleBucket b = getBucketAtStoreAddr(addr); + f.format("\n%2d [%" + globalHashBits + "s] => (% 8d)", i, Integer + .toBinaryString(maskOff(i, globalHashBits)), addr); + if (visited.add(addr)) { + // Show the bucket details the first time we visit it. + sb.append(" [k=" + b.localHashBits + "] {"); + for (int j = 0; j < b.size; j++) { + if (j > 0) + sb.append(", "); + sb.append(Integer.toString(b.data[j])); + } + sb.append("}"); + } + // Invariant. + assert b.localHashBits <= globalHashBits; + } + sb.append('\n'); + return sb.toString(); + } + + /** + * + * @param initialCapacity + * The initial capacity is the #of buckets which may be stored in + * the hash table before it must be resized. It is expressed in + * buckets and not tuples because there is not (in general) a + * fixed relationship between the size of a bucket and the #of + * tuples which can be stored in that bucket. This will be + * rounded up to the nearest power of two. + * @param bucketSize + * The #of int tuples which may be stored in a bucket. + * + * @todo Options to govern overflow chaining policy so we can test when + * overflows are not created, which is the simplest condition. + */ + public SimpleExtensibleHashMap(final int initialCapacity, + final int bucketSize) { + + if (initialCapacity <= 0) + throw new IllegalArgumentException(); + + if (bucketSize <= 0) + throw new IllegalArgumentException(); + + this.bucketSize = bucketSize; + + /* + * Setup the hash table given the initialCapacity (in buckets). We need + * to find the first power of two which is GTE the initialCapacity. + */ + globalHashBits = getMapSize(initialCapacity); + + if (globalHashBits > 32) { + /* + * The map is restricted to 32-bit hash codes so we can not address + * this many buckets. + */ + throw new IllegalArgumentException(); + } + + // // save the current masking value for the current #of global bits. + // globalMask = masks[globalHashBits]; + + /* + * Now work backwards to determine the size of the address space (in + * buckets). + */ + final int addressSpaceSize = 1 << globalHashBits; + + /* + * Allocate and initialize the address space. All indices are initially + * mapped onto the same bucket. + */ + addressMap = new int[addressSpaceSize]; + + buckets = new ArrayList<SimpleBucket>(addressSpaceSize/* initialCapacity */); + + // Note: the local bits of the first bucket is set to ZERO (0). + buckets.add(new SimpleBucket(0/* localHashBits */, bucketSize)); + + } + + // private void toString(StringBuilder sb) { + // sb.append("addressMap:"+Arrays.toString(addressMap)); + // } + + /** + * The hash of an int key is that int. + * + * @todo Consider the {@link BloomFilter} hash code logic. It is based on a + * table of functions. It might be a good fit here. + * + * @todo Is is worth while to keep the key and the hash code together in a + * small structure? It is inexpensive to mask off the bits that we do + * not want to consider, but we need to avoid passing the masked off + * hash code into routines which expect the full hash code. + */ + private int hash(final int key) { + + return key; + + } + + /** + * The index into the address table given that we use + * {@link #globalHashBits} of the given hash value. + * <p> + * Note: This is identical to maskOff(h,{@link #globalHashBits}). + */ + private int getIndexOf(final int h) { + + return maskOff(h, globalHashBits); + + } + + /** + * Mask off all but the lower <i>nbits</i> of the hash value. + * + * @param h + * The hash value. + * @param nbits + * The #of bits to consider. + * + * @return The hash value considering only the lower <i>nbits</i>. + */ + static protected int maskOff(final int h, final int nbits) { + + if (nbits < 0 || nbits > 32) + throw new IllegalArgumentException(); + + final int v = h & masks[nbits]; + + return v; + + } + + /** + * The bucket address given the hash code of a key. + * + * @param h + * The hash code of the key. + * + * @todo Consider passing in the #of bits to be considered here. That way we + * always pass around the full hash code but specify how many bits are + * used to interpret it when calling this method. + */ + private int addrOf(final int h) { + + final int index = getIndexOf(h); + + return getAddressFromEntryIndex(index); + + } + + /** + * Return the store address from which the page for the given directory + * entry may be read from the backing store. + * + * @param indexOf + * The index into the address table. + * + * @return The address of the bucket recorded in the address table at that + * index. + */ + public int getAddressFromEntryIndex(final int indexOf) { + + return addressMap[indexOf]; + + } + + /** + * Return the pre-allocated bucket having the given offset into the address table. + * + * @param indexOf + * The index into the address table. + * + * @return The bucket. + */ + protected SimpleBucket getBucketFromEntryIndex(final int indexOf) { + + final int addr = getAddressFromEntryIndex(indexOf); + + return getBucketAtStoreAddr(addr); + + } + + /** + * Return the pre-allocated bucket having the given address. + * + * @param addr + * The address of the bucket on the backing store. + * + * @return The bucket. + */ + protected SimpleBucket getBucketAtStoreAddr(final int addr) { + + return buckets.get(addr); + + } + + /** + * The #of hash bits which are being used by the address table. + */ + public int getGlobalHashBits() { + + return globalHashBits; + + } + + /** + * The size of the address space is <code>2^{@link #globalHashBits}</code>. + */ + public int getAddressSpaceSize() { + + return addressMap.length; + + } + + /** + * The #of buckets backing the map. This is never less than one and never + * greater than the size of the address space. + */ + public int getBucketCount() { + + return buckets.size(); + + } + + /** + * The size of a bucket (the #of int32 values which may be stored in a + * bucket). + */ + public int getBucketSize() { + + return bucketSize; + + } + + /** + * Return the #of entries in the address map for a page having the given + * local depth. This is <code>2^(globalHashBits - localHashBits)</code>. The + * following table shows the relationship between the global hash bits (gb), + * the local hash bits (lb) for a page, and the #of directory entries for + * that page (nentries). + * + * <pre> + * gb lb nentries + * 1 0 2 + * 1 1 1 + * 2 0 4 + * 2 1 2 + * 2 2 1 + * 3 0 8 + * 3 1 4 + * 3 2 2 + * 3 3 1 + * 4 0 16 + * 4 1 8 + * 4 2 4 + * 4 3 2 + * 4 4 1 + * </pre> + * + * @param localHashBits + * The local depth of the page in [0:{@link #globalHashBits}]. + * + * @return The #of directory entries for that page. + * + * @throws IllegalArgumentException + * if either argument is less than ZERO (0). + * @throws IllegalArgumentException + * if <i>localHashBits</i> is greater than + * <i>globalHashBits</i>. + */ + static protected int getSlotsForPage(final int globalHashBits, + final int localHashBits) { + + if(localHashBits < 0) + throw new IllegalArgumentException(); + + if(globalHashBits < 0) + throw new IllegalArgumentException(); + + if(localHashBits > globalHashBits) + throw new IllegalArgumentException(); + + // The #of address map entries for this page. + final int numSlotsForPage = (int) Math.pow(2d, + (globalHashBits - localHashBits)); + + return numSlotsForPage; + + } + + /** + * Return <code>true</code> iff the hash table contains the key. + * <p> + * Lookup: Compute h(K) and right shift (w/o sign extension) by i bits. Use + * this to index into the bucket address table. The address in the table is + * the bucket address and may be used to directly read the bucket. + * + * @param key + * The key. + * + * @return <code>true</code> iff the key was found. + */ + public boolean contains(final int key) { + + final int h = hash(key); + + final int addr = addrOf(h); + + final SimpleBucket b = getBucketAtStoreAddr(addr); + + return b.contains(h, key); + + } + + /** + * Insert the key into the hash table. Duplicates are allowed. + * <p> + * Insert: Per lookup. On overflow, we need to split the bucket moving the + * existing records (and the new record) into new buckets. + * + * @see #split(int, int, SimpleBucket) + * + * @param key + * The key. + * + * @todo define a put() method which returns the old value (no duplicates). + * this could be just sugar over contains(), delete() and insert(). + */ + public void insert(final int key) { + final int h = hash(key); + final int addr = addrOf(h); + final SimpleBucket b = getBucketAtStoreAddr(addr); + if (b.insert(h, key)) { + return; + } + // split the bucket and insert the record (recursive?) + split(key, b); + } + + /** + * Split the bucket, adjusting the address map iff necessary. How this + * proceeds depends on whether the hash #of bits used in the bucket is equal + * to the #of bits used to index into the bucket address table. There are + * two cases: + * <p> + * Case 1: If {@link #globalHashBits} EQ the + * {@link SimpleBucket#localHashBits}, then the bucket address table is out + * of space and needs to be resized. + * <p> + * Case 2: If {@link #globalHashBits} is GT + * {@link SimpleBucket#localHashBits}, then there will be at least two + * entries in the bucket address table which point to the same bucket. One + * of those entries is relabeled. The record is then inserted based on the + * new #of hash bits to be considered. If it still does not fit, then either + * handle by case (1) or case (2) as appropriate. + * <p> + * Note that records which are in themselves larger than the bucket size + * must eventually be handled by: (A) using an overflow record; (B) allowing + * the bucket to become larger than the target page size (using a larger + * allocation slot or becoming a blob); or (C) recording the tuple as a raw + * record and maintaining only the full hash code of the tuple and its raw + * record address in the bucket (this would allow us to automatically + * promote long literals out of the hash bucket and a similar approach might + * be used for a B+Tree leaf, except that a long key will still cause a + * problem [also, this implies that deleting a bucket or leaf on the + * unisolated index of the RWStore might require a scan of the IRaba to + * identify blob references which must also be deleted, so it makes sense to + * track those as part of the bucket/leaf metadata). + * + * @param h + * The key which triggered the split. + * @param bold + * The bucket lacking sufficient room for the key which triggered + * the split. + * + * @todo caller will need an exclusive lock if this is to be thread safe. + * + * @todo Overflow buckets (or oversize buckets) are required when all hash + * bits considered by the local bucket are the same, when all keys in + * the local bucket are the same, and when the record to be inserted + * is larger than the bucket. In order to handle these cases we may + * need to more closely integrate the insert/split logic since + * detecting some of these cases requires transparency into the + * bucket. + * + * FIXME The caller could decide to switch to a larger page size or + * chain overflow pages together in order to increase storage + * utilization or handle buckets having large populations of identical + * keys (or keys with the same int32 hash code). [This decision must + * be made before we decide to split.] + * + * FIXME The caller should handle the promotion of large tuples to raw + * records when they are inserted, so we do not need to handle that + * here either. + */ + private void split(final int key, final SimpleBucket bold) { + + if (log.isDebugEnabled()) + log.debug("globalBits=" + globalHashBits + ",localHashBits=" + + bold.localHashBits + ",key=" + key); + + if (globalHashBits < bold.localHashBits) { + // This condition should never arise. + throw new AssertionError(); + } + + if (globalHashBits == bold.localHashBits) { + /* + * The address table is out of space and needs to be resized. + */ + doubleAddressSpaceAndSplitBucket(key, bold); + // fall through + } + + if (globalHashBits > bold.localHashBits) { + /* + * Split the bucket. + */ + splitBucket(key, bold); + // fall through. + } + + /* + * Insert the key into the expanded hash table (this will insert into + * either the old or the new bucket, depending on the hash code for the + * key). + */ + { + if (log.isDebugEnabled()) + log.debug("retrying insert: key=" + key); + // the hash value of the key. + final int h = hash(key); + // the address of the bucket for that hash code. + final int addr = addrOf(h); + // the bucket for that address. + final SimpleBucket btmp = getBucketAtStoreAddr(addr); + if (btmp.insert(h, key)) { + // insert was successful. + return; + } + /* + * FIXME This could be a variety of special conditions which need to + * be handled, especially all keys have the same value or the same + * int32 hash code or the tuple is too large for the bucket. Those + * conditions all need to be handled before requested a split. Since + * the caller has to handle all of this, we could just return and + * let the caller re-do the insert. + */ + log + .fatal("Split of bucket did not map space available for new key: key=" + + key + ", table=" + toString()); + throw new UnsupportedOperationException(); + } + } + + /** + * The address table is out of space and needs to be resized. + * <p> + * Let {@link #globalHashBits} := {@link #globalHashBits} + 1. This doubles + * the size of the bucket address table. Each original entry becomes two + * entries in the new table. For the specific bucket which is to be split, a + * new bucket is allocated and the 2nd bucket address table for that entry + * is set to the address of the new bucket. The tuples are then assigned to + * the original bucket and the new bucket by considering the additional bit + * of the hash code. Assuming that all keys are distinct, then one split + * will always be sufficient unless all tuples in the original bucket have + * the same hash code when their (i+1)th bit is considered (this can also + * occur if duplicate keys are allow). In this case, we resort to an + * "overflow" bucket (alternatively, the bucket is allowed to be larger than + * the target size and gets treated as a blob). + * <p> + * Note: The caller must re-do the insert. + */ + private void doubleAddressSpaceAndSplitBucket(final int key, + final SimpleBucket bold) { + if (log.isDebugEnabled()) + log.debug("key=" + key); + // the hash value of the key. + final int h = hash(key); +// final int oldGlobalHashBits = globalHashBits; + // The size of the address space before we double it. + final int oldAddrSize = getAddressSpaceSize(); + /* + * The index into the address space for the hash key given the #of bits + * considered before we double the address space. + */ + final int oldIndex = getIndexOf(h); + // // The address of the bucket to be split. + // final int addrOld = addressMap[oldIndex]; + /* + * The address into the new address map of the new bucket (once it gets + * created). + * + * Note: We find that entry by adding the size of the old address table + * to the index within the table of the bucket to be split. + */ + final int newIndex = oldIndex + oldAddrSize; + // final int addrNew = addressMap[newIndex]; + // double the address space. + doubleAddressSpace(); + /* + * Create a new bucket and wire it into the 2nd entry for the hash code + * for that key. + * + * Note: Doubling the address space only gives us TWO (2) address table + * entries for a bucket. Therefore, if we wind up repeatedly inserting + * into the either of the created buckets then the address space will + * have to be double again soon. In order to counter this exponential + * expansion of the address space, it may be necessary to have the + * primary page either overflow into a chain or expand into a larger + * page. + */ + // The #of local hash bits _before_ the page is split. + final int localHashBitsBefore = bold.localHashBits; + final SimpleBucket bnew; + { + // Adjust the #of local bits to be considered. + bold.localHashBits++; + // The new bucket. + bnew = new SimpleBucket(bold.localHashBits, bucketSize); + // The address for the new bucket. + final int addrBNew = buckets.size(); + // Add to the chain of buckets. + buckets.add(bnew); + // Update the address table to point to the new bucket. + addressMap[newIndex] = addrBNew; + } + // Redistribute the tuples between the old and new buckets. + redistributeTuples(bold, bnew, /*globalHashBits,*/ localHashBitsBefore); + /* + * Note: The caller must re-do the insert. + */ + } + + /** + * Split a bucket having more than one reference to the bucket in the + * address table. + * <p> + * There will be at least two entries in the address table which point to + * this bucket. The #of entries depends on the global #of hash bits in use + * and the bucket local #of hash bits in use. It will be 2 if there is a + * difference of one between those values but can be more than 2 and will + * always be an even number. More precisely, there will be exactly 2^( + * {@link #globalHashBits}- {@link SimpleBucket#localHashBits} such entries. + * <p> + * Both the original bucket and the new bucket have their + * {@link SimpleBucket#localHashBits} incremented by one, but the + * {@link #globalHashBits} is unchanged. Of the entries in the bucket + * address table which used to point to the original bucket, the 1st half + * are left alone and the 2nd half are updated to point to the new bucket. + * The entries in the original bucket are rehashed and assigned based on the + * new #of hash bits to be considered to either the original bucket or the + * new bucket. + * <p> + * After invoking this method, the record is then inserted (by the caller) + * based on the new #of hash bits to be considered. The caller is + * responsible for detecting and handling cases which must be handled using + * overflow pages, etc. + * + * FIXME Implement this next. It handles the simpler split case when we only + * need to redistribute the keys but do not need to double the address + * space. + * + * @todo test when more than two references remain and recheck the logic for + * updating the address table. + */ + private void splitBucket(final int key, final SimpleBucket bold) { + if (log.isDebugEnabled()) + log.debug("key=" + key + ", globalHashBits=" + globalHashBits + + ", localHashBits=" + bold.localHashBits); + assert globalHashBits - bold.localHashBits > 0; + // The hash value of the key. + final int h = hash(key); + /* + * @todo add assert to verify that the correct #of address map entries + * were updated. + * + * FIXME When we evict a dirty primary page, we need to update each of + * the entries in the address table which point to that page. This means + * that we need to compute how many such entries there are and the + * update those entries. [This also implies that the address table must + * hold weak/strong references much like the BTree nodes and leaves.] + * Likewise, when we evict an overflow page we need to update the + * predecessor in the chain. This means that we will wire in buckets + * which are chains of pages and evict the entire chain at once, + * starting with the last page in the chain so we can get the addresses + * serialized into the page. If the serialized page is too large, then + * we could wind up splitting the bucket (or increasing the page size) + * when writing out the chain. In fact, we could chose to coalesce the + * chain into a single large page and then let the RWStore blob it if + * necessary. + */ + /* + * Create a new bucket and wire it into the 2nd entry for the hash code + * for that key. + */ + // The address for the new bucket. + final int addrBNew = buckets.size(); + // The new bucket. + final SimpleBucket bnew; + final int localHashBitsBefore = bold.localHashBits; + { + // the new bucket. + bnew = new SimpleBucket(bold.localHashBits + 1, bucketSize); + // Add to the chain of buckets. + buckets.add(bnew); + } + // Hash code with only the local bits showing. + final int localBitsHash = maskOff(h, bold.localHashBits); + // The #of address map entries for this page. + final int numSlotsForPage = (int) Math.pow(2d, globalHashBits + - bold.localHashBits); + // Loop over the upper 1/2 of those slots. + for (int i = (numSlotsForPage / 2); i < numSlotsForPage; i++) { + // Index into address table of an entry pointing to this page. + final int entryIndex = (i << bold.localHashBits) + localBitsHash; + // This entry is updated to point to the new page. + addressMap[entryIndex] = addrBNew; + } + // adjust the #of local bits to be considered on the old bucket. + bold.localHashBits++; + // redistribute the tuples between the old and new buckets. + redistributeTuples(bold, bnew, /*globalHashBits,*/ localHashBitsBefore); + /* + * The caller must re-do the insert. + */ + } + + /** + * Redistribute the keys in the old bucket between the old and new bucket by + * considering one more bit in their hash values. + * <p> + * Note: The move has to be handled in a manner which does not have + * side-effects which put the visitation of the keys in the original bucket + * out of whack. The code below figures out which keys move and which stay + * and copies the ones that move in one step. It then goes back through and + * deletes all keys which are found in the new bucket from the original + * bucket. + * + * @param bold + * The old bucket. + * @param bnew + * The new bucket. + * @param localHashBitsBefore + * The #of local hash bits for the old bucket before the split. + * This is used to decide whether the tuple is being moved to the + * new bucket or left behind in the old bucket. + * + * @todo As a pre-condition to splitting the bucket, we need to verify that + * at least one key is not the same as the others in the bucket. If + * all keys are the same, then we should have followed an overflow + * path instead of a split path. [This needs to be tested by + * insert().] + * + * @todo There can be overflow pages for the bucket. Those pages also need + * to be processed here. The overflow pages then need to be released + * since they no longer have any data. + * + * @todo The index of the page (its slot in the directory) should be part of + * the transient metadata for the page, at which point it can be + * removed from this method signature. + */ + private void redistributeTuples(final SimpleBucket bold, + final SimpleBucket bnew, //final int globalHashBits, + final int localHashBitsBefore) { + /* + * First, run over the entries in the old page. Any entries which will + * be hashed to the new page with the modified address table are + * inserted into the new page. In order to avoid a requirement for an + * entry iterator which handles concurrent modification, we do not + * delete the entries from the old page as they are being copied. + * Instead, those entries will be deleted in a pass over the new page + * below. + * + * Note: The decision concerning whether the tuple will remain in the + * original page or be moved into the new page depends on whether it + * would be inserted into the lower 1/2 of the directory entries (which + * will continue to point to the old page) or the upper 1/2 of the + * directory entries (which will point to the new page). + */ + { + // The #of address map entries for this page. + final int numSlotsForPage = getSlotsForPage(globalHashBits, + localHashBitsBefore); + final int addrSpaceSize = getAddressSpaceSize(); + // The threshold at which the key will be inserted into the new + // page. + final int newPageThreshold = addrSpaceSize>>1;//numSlotsForPage >> 1; + // Iterator visiting the entries in the old page. + final Iterator<Integer> eitr = bold.getEntries(); + // Loop over the tuples in the old page. + while (eitr.hasNext()) { + // A key from the original bucket. + final int key = eitr.next(); + // The full hash code for that key. + final int h = hash(key); + // Mask off all but the global bits. + final int h1 = maskOff(h, globalHashBits); +// // Drop off the lower bits (w/o sign extension). +// final int h2 = h1 >>> localHashBitsBefore; + if (h1 >= newPageThreshold) { + // Move the key to the new bucket. + bnew.insert(h/* hash(key) */, key); + } + } + } + /* + * Now delete any keys which were moved to the new bucket. + */ + { + // Iterator visiting the entries in the new page. + final Iterator<Integer> eitr = bnew.getEntries(); + // Loop over the entries in the new page. + while (eitr.hasNext()) { + // a key from the new bucket. + final int k1 = eitr.next(); + // delete the key from the old bucket. + bold.delete(hash(k1), k1); + } + } + } + + /** + * Doubles the address space. + * <p> + * This allocates a new address table and initializes it with TWO (2) + * identical copies of the current address table, one right after the other + * and increases {@link #globalHashBits} by ONE (1). + * <p> + * This operation preserves the current mapping of hash values into an + * address table when we consider one more bit in those hash values. For + * example, if we used to consider <code>3</code> bits of the hash value + * then we will now consider <code>4</code> bits. If the fourth bit of the + * hash value is ZERO (0) then it addresses into the first copy of the + * address table. If the fourth bit of the hash value is ONE (1) then it + * addresses into the second copy of the address table. Since the entries + * point to the same buckets as they did when we only considered + * <code>3</code> bits of the hash value the mapping of the keys onto the + * buckets is not changed by this operation. + */ + private void doubleAddressSpace() { + + if (log.isInfoEnabled()) + log.info("Doubling the address space: globalBits=" + globalHashBits + + ", addressSpaceSize=" + getAddressSpaceSize()); + + final int oldLength = addressMap.length; + + // allocate a new address table which is twice a large. + final int[] tmp = new int[oldLength << 1]; + + /* + * Copy the current address table into the lower half of the new table. + */ + System.arraycopy(addressMap/* src */, 0/* srcPos */, tmp/* dest */, + 0/* destPos */, oldLength); + + /* + * Copy the current address table into the upper half of the new table. + */ + System.arraycopy(addressMap/* src */, 0/* srcPos */, tmp/* dest */, + oldLength/* destPos */, oldLength); + + // Replace the address table. + addressMap = tmp; + + // Consider one more bit in the hash value of the keys. + globalHashBits += 1; + + } + + private void merge(final int h, final SimpleBucket b) { + throw new UnsupportedOperationException(); + } + + /** + * Delete the key from the hash table (in the case of duplicates, a random + * entry having that key is deleted). + * <p> + * Delete: Buckets may be removed no later than when they become empty and + * doing this is a local operation with costs similar to splitting a bucket. + * Likewise, it is clearly possible to coalesce buckets which underflow + * before they become empty by scanning the 2^(i-j) buckets indexed from the + * entries in the bucket address table using i bits from h(K). [I need to + * research handling deletes a little more, including under what conditions + * it is cost effective to reduce the size of the bucket address table + * itself.] + * + * @param key + * The key. + * + * @return <code>true</code> iff a tuple having that key was deleted. + * + * @todo return the deleted tuple. + * + * @todo merge buckets when they underflow/become empty? (but note that we + * do not delete anything from the hash map for a hash join, just + * insert, insert, insert). + */ + public boolean delete(final int key) { + final int h = hash(key); + final int addr = addrOf(h); + final SimpleBucket b = getBucketAtStoreAddr(addr); + return b.delete(h, key); + } + + /** + * Visit the buckets. + * <p> + * Note: This is NOT thread-safe! + */ + public Iterator<SimpleBucket> buckets() { + + return buckets.iterator(); + + } + + /** + * Return the #of entries in the hash table having the given key. + * + * @param key + * The key. + * + * @return The #of entries having that key. + */ + public int[] getEntryCount(final int key) { + throw new UnsupportedOperationException(); + } + + /** + * Return all entries in the hash table having the given key. + * + * @param key + * The key. + * + * @return The entries in the hash table having that key. + * + * @todo this should return an iterator over the tuples for the real + * implementation. + */ + public int[] getEntries(final int key) { + throw new UnsupportedOperationException(); + } + + /** + * Return an iterator which visits all entries in the hash table. + */ + @SuppressWarnings("unchecked") + public Iterator<Integer> getEntries() { + final IStriterator sitr = new Striterator(buckets.iterator()) + .addFilter(new Expander() { + private static final long serialVersionUID = 1L; + + @Override + protected Iterator expand(final Object obj) { + final SimpleBucket b = (SimpleBucket) obj; + return b.getEntries(); + } + }); + return (Iterator<Integer>) sitr; + } + + /** + * Return an entry in the hash table having the given key. If there is more + * than one entry for that key, then any entry having that key may be + * returned. + * + * @param key + * The key. + * + * @return An entry having that key. + */ + public int getEntry(final int key) { + throw new UnsupportedOperationException(); + } + + /** + * Return a bit mask which reveals only the low N bits of an int32 value. + * + * @param nbits + * The #of bits to be revealed. + * @return The mask. + */ + static int getMaskBits(final int nbits) { + + if (nbits < 0 || nbits > 32) + throw new IllegalArgumentException(); + + // int mask = 1; // mask + // int pof2 = 1; // power of two. + // while (pof2 < nbits) { + // pof2 = pof2 << 1; + // mask |= pof2; + // } + + int mask = 0; + int bit; + + for (int i = 0; i < nbits; i++) { + + bit = (1 << i); + + mask |= bit; + + } + + // System.err.println(nbits +" : "+Integer.toBinaryString(mask)); + + return mask; + + } + + /** + * Find the first power of two which is GTE the given value. This is used to + * compute the size of the address space (in bits) which is required to + * address a hash table with that many buckets. + */ + static int getMapSize(final int initialCapacity) { + + if (initialCapacity <= 0) + throw new IllegalArgumentException(); + + int i = 1; + + while ((1 << i) < initialCapacity) + i++; + + return i; + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java 2010-11-29 12:56:11 UTC (rev 3987) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java 2010-11-30 16:12:58 UTC (rev 3988) @@ -26,8 +26,11 @@ */ package com.bigdata.htbl; -import java.util.ArrayList; -import java.util.Iterator; +import java.util.Arrays; +import java.util.Formatter; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.Set; import junit.framework.TestCase2; @@ -163,105 +166,64 @@ } /** - * Find the first power of two which is GTE the given value. This is used to - * compute the size of the address space (in bits) which is required to - * address a hash table with that many buckets. + * Unit test for {@link SimpleExtensibleHashMap#getMapSize(int)}. */ - private static int getMapSize(final int initialCapacity) { - - if (initialCapacity <= 0) - throw new IllegalArgumentException(); - - int i = 1; - - while ((1 << i) < initialCapacity) - i++; - - return i; - - } - - /** - * Unit test for {@link #getMapSize(int)}. - */ public void test_getMapSize() { - assertEquals(1/* addressSpaceSize */, getMapSize(1)/* initialCapacity */); - assertEquals(1/* addressSpaceSize */, getMapSize(2)/* initialCapacity */); - assertEquals(2/* addressSpaceSize */, getMapSize(3)/* initialCapacity */); - assertEquals(2/* addressSpaceSize */, getMapSize(4)/* initialCapacity */); - assertEquals(3/* addressSpaceSize */, getMapSize(5)/* initialCapacity */); - assertEquals(3/* addressSpaceSize */, getMapSize(6)/* initialCapacity */); - assertEquals(3/* addressSpaceSize */, getMapSize(7)/* initialCapacity */); - assertEquals(3/* addressSpaceSize */, getMapSize(8)/* initialCapacity */); - assertEquals(4/* addressSpaceSize */, getMapSize(9)/* initialCapacity */); + assertEquals(1/* addressSpaceSize */, SimpleExtensibleHashMap.getMapSize(1)/* initialCapacity */); + assertEquals(1/* addressSpaceSize */, SimpleExtensibleHashMap.getMapSize(2)/* initialCapacity */); + assertEquals(2/* addressSpaceSize */, SimpleExtensibleHashMap.getMapSize(3)/* initialCapacity */); + assertEquals(2/* addressSpaceSize */, SimpleExtensibleHashMap.getMapSize(4)/* initialCapacity */); + assertEquals(3/* addressSpaceSize */, SimpleExtensibleHashMap.getMapSize(5)/* initialCapacity */); + assertEquals(3/* addressSpaceSize */, SimpleExtensibleHashMap.getMapSize(6)/* initialCapacity */); + assertEquals(3/* addressSpaceSize */, SimpleExtensibleHashMap.getMapSize(7)/* initialCapacity */); + assertEquals(3/* addressSpaceSize */, SimpleExtensibleHashMap.getMapSize(8)/* initialCapacity */); + assertEquals(4/* addressSpaceSize */, SimpleExtensibleHashMap.getMapSize(9)/* initialCapacity */); - assertEquals(5/* addressSpaceSize */, getMapSize(32)/* initialCapacity */); + assertEquals(5/* addressSpaceSize */, SimpleExtensibleHashMap.getMapSize(32)/* initialCapacity */); - assertEquals(10/* addressSpaceSize */, getMapSize(1024)/* initialCapacity */); + assertEquals(10/* addressSpaceSize */, SimpleExtensibleHashMap.getMapSize(1024)/* initialCapacity */); } /** - * Return a bit mask which reveals only the low N bits of an int32 value. - * - * @param nbits - * The #of bits to be revealed. - * @return The mask. + * Unit test for {@link SimpleExtensibleHashMap#getMaskBits(int)} */ - private static int getMaskBits(final int nbits) { + public void test_getMaskBits() { - if (nbits < 0 || nbits > 32) - throw new IllegalArgumentException(); + assertEquals(0x00000001, SimpleExtensibleHashMap.getMaskBits(1)); + assertEquals(0x00000003, SimpleExtensibleHashMap.getMaskBits(2)); + assertEquals(0x00000007, SimpleExtensibleHashMap.getMaskBits(3)); + assertEquals(0x0000000f, SimpleExtensibleHashMap.getMaskBits(4)); + assertEquals(0x0000001f, SimpleExtensibleHashMap.getMaskBits(5)); + assertEquals(0x0000003f, SimpleExtensibleHashMap.getMaskBits(6)); + assertEquals(0x0000007f, SimpleExtensibleHashMap.getMaskBits(7)); + assertEquals(0x000000ff, SimpleExtensibleHashMap.getMaskBits(8)); -// int mask = 1; // mask -// int pof2 = 1; // power of two. -// while (pof2 < nbits) { -// pof2 = pof2 << 1; -// mask |= pof2; -// } + assertEquals(0x0000ffff, SimpleExtensibleHashMap.getMaskBits(16)); - int mask = 0; - int bit; + assertEquals(0xffffffff, SimpleExtensibleHashMap.getMaskBits(32)); - for (int i = 0; i < nbits; i++) { - - bit = (1 << i); - - mask |= bit; - - } - -// System.err.println(nbits +" : "+Integer.toBinaryString(mask)); - - return mask; - } /** - * Unit test for {@link #getMaskBits(int)} + * Unit test for {@link SimpleExtensibleHashMap#maskOff(int, int)} */ - public void test_getMaskBits() { + public void test_maskOff() { - assertEquals(0x00000001, getMaskBits(1)); - assertEquals(0x00000003, getMaskBits(2)); - assertEquals(0x00000007, getMaskBits(3)); - assertEquals(0x0000000f, getMaskBits(4)); - assertEquals(0x0000001f, getMaskBits(5)); - assertEquals(0x0000003f, getMaskBits(6)); - assertEquals(0x0000007f, getMaskBits(7)); - assertEquals(0x000000ff, getMaskBits(8)); +// SimpleExtensibleHashMap.class; + + assertEquals(0x00000000, SimpleExtensibleHashMap + .maskOff(0/* hash */, 1/* nbits */)); - assertEquals(0x0000ffff, getMaskBits(16)); + assertEquals(0x00000000, SimpleExtensibleHashMap + .maskOff(8/* hash */, 2/* nbits */)); - assertEquals(0xffffffff, getMaskBits(32)); - + assertEquals(0x00000002, SimpleExtensibleHashMap + .maskOff(18/* hash */, 2/* nbits */)); + } -// private static int[] getMaskArray() { -// -// } - /** * Extensible hashing data structure. * @@ -291,816 +253,6 @@ } /** - * An implementation of an extensible hash map using a 32 bit hash code and - * a fixed length int[] for the bucket. The keys are int32 values. The data - * stored in the hash map is just the key. Buckets provide a perfect fit for - * N keys. This is used to explore the dynamics of the extensible hashing - * algorithm using some well known examples. - * <p> - * This implementation is not thread-safe. I have not attempted to provide - * for visibility guarantees when resizing the map and I have not attempted - * to provide for concurrent updates. The implementation exists solely to - * explore the extensible hashing algorithm. - * <p> - * The hash code - */ - private static class SimpleExtensibleHashMap { - - // @todo static logger. -// final transient Logger log = SimpleExtensibleHashMap.class - - /** - * The #of int32 positions which are available in a {@link SimpleBucket} - * . - */ - private final int bucketSize; - - /** - * The #of hash code bits which are in use by the {@link #addressMap}. - * Each hash bucket also as a local #of hash bits. Given <code>i</code> - * is the #of global hash bits and <code>j</code> is the number of hash - * bits in some bucket, there will be <code>2^(i-j)</code> addresses - * which point to the same bucket. - */ - private int globalHashBits; - -// /** -// * The size of the address space (#of buckets addressable given the #of -// * {@link #globalHashBits} in use). -// */ -// private int addressSpaceSize; - - /** - * The address map. You index into this map using - * {@link #globalHashBits} out of the hash code for a probe key. The - * value of the map is the index into the {@link #buckets} array of the - * bucket to which that key is hashed. - */ - private int[] addressMap; - - /** - * The buckets. The first bucket is pre-allocated when the address table - * is setup and all addresses in the table are initialized to point to - * that bucket. Thereafter, buckets are allocated when a bucket is - * split. - */ - private final ArrayList<SimpleBucket> buckets; - - /** - * An array of mask values. The index in the array is the #of bits of - * the hash code to be considered. The value at that index in the array - * is the mask to be applied to mask off to zero the high bits of the - * hash code which are to be ignored. - */ - private final int[] masks; - -// /** -// * The current mask for the current {@link #globalHashBits}. -// */ -// private int globalMask; - - /** - * - * @param initialCapacity - * The initial capacity is the #of buckets which may be - * stored in the hash table before it must be resized. It is - * expressed in buckets and not tuples because there is not - * (in general) a fixed relationship between the size of a - * bucket and the #of tuples which can be stored in that - * bucket. This will be rounded up to the nearest power of - * two. - * @param bucketSize - * The #of int tuples which may be stored in a bucket. - */ - public SimpleExtensibleHashMap(final int initialCapacity, final int bucketSize) { - - if (initialCapacity <= 0) - throw new IllegalArgumentException(); - - if (bucketSize <= 0) - throw new IllegalArgumentException(); - - this.bucketSize = bucketSize; - - /* - * Setup the hash table given the initialCapacity (in buckets). We - * need to find the first power of two which is GTE the - * initialCapacity. - */ - globalHashBits = getMapSize(initialCapacity); - - if (globalHashBits > 32) { - /* - * The map is restricted to 32-bit hash codes so we can not - * address this many buckets. - */ - throw new IllegalArgumentException(); - } - - // Populate the array of masking values. - masks = new int[32]; - - for (int i = 0; i < 32; i++) { - - masks[i] = getMaskBits(i); - - } - -// // save the current masking value for the current #of global bits. -// globalMask = masks[globalHashBits]; - - /* - * Now work backwards to determine the size of the address space (in - * buckets). - */ - final int addressSpaceSize = 1 << globalHashBits; - - /* - * Allocate and initialize the address space. All indices are - * initially mapped onto the same bucket. - */ - addressMap = new int[addressSpaceSize]; - - buckets = new ArrayList<SimpleBucket>(addressSpaceSize/* initialCapacity */); - - // Note: the local bits of the first bucket is set to ZERO (0). - buckets.add(new SimpleBucket(0/* localHashBits */, bucketSize)); - - } - -// private void toString(StringBuilder sb) { -// sb.append("addressMap:"+Arrays.toString(addressMap)); -// } - - /** The hash of an int key is that int. */ - private int hash(final int key) { - - return key; - - } - - /** - * The index into the address table given that we use - * {@link #globalHashBits} of the given hash value. - * <p> - * Note: This is identical to maskOff(h,{@link #globalHashBits}). - */ - private int getIndexOf(final int h) { - - return maskOff(h, globalHashBits); - - } - - /** - * Mask off all but the lower <i>nbits</i> of the hash value. - * - * @param h - * The hash value. - * @param nbits - * The #of bits to consider. - * @return The hash value considering only the lower <i>nbits</i>. - */ - private int maskOff(final int h, final int nbits) { - - if (nbits < 0 || nbits > 32) - throw new IllegalArgumentException(); - - return h & masks[nbits]; - - } - - /** The bucket address given the hash code of a key. */ - private int addrOf(final int h) { - - final int index = getIndexOf(h); - - return addressMap[index]; - - } - - /** - * Return the pre-allocated bucket having the given address. - * - * @param addr - * The address. - * - * @return The bucket. - */ - private SimpleBucket getBucket(final int addr) { - - return buckets.get(addr); - - } - - /** - * The #of hash bits which are being used by the address table. - */ - public int getGlobalHashBits() { - - return globalHashBits; - - } - - /** - * The size of the address space (the #of positions in the address - * table, which is NOT of necessity the same as the #of distinct buckets - * since many address positions can point to the same bucket). - */ - public int getAddressSpaceSize() { - - return addressMap.length; - - } - - /** - * The #of buckets backing the map. - */ - public int getBucketCount() { - - return buckets.size(); - - } - - /** - * The size of a bucket (the #of int32 values which may be stored - * in a bucket). - */ - public int getBucketSize() { - - return bucketSize; - - } - - /** - * Return <code>true</code> iff the hash table contains the key. - * <p> - * Lookup: Compute h(K) and right shift (w/o sign extension) by i bits. - * Use this to index into the bucket address table. The address in the - * table is the bucket address and may be used to directly read the - * bucket. - * - * @param key - * The key. - * - * @return <code>true</code> iff the key was found. - */ - public boolean contains(final int key) { - - final int h = hash(key); - - final int addr = addrOf(h); - - final SimpleBucket b = getBucket(addr); - - return b.contains(h, key); - - } - - /** - * Insert the key into the hash table. Duplicates are allowed. - * <p> - * Insert: Per lookup. On overflow, we need to split the bucket moving - * the existing records (and the new record) into new buckets. - * - * @see #split(int, int, SimpleBucket) - * - * @param key - * The key. - * - * @todo define a put() method which returns the old value (no - * duplicates). this could be just sugar over contains(), delete() - * and insert(). - */ - public void insert(final int key) { - final int h = hash(key); - final int addr = addrOf(h); - final SimpleBucket b = getBucket(addr); - if (b.insert(h, key)) { - ... [truncated message content] |
From: <tho...@us...> - 2010-11-29 12:56:17
|
Revision: 3987 http://bigdata.svn.sourceforge.net/bigdata/?rev=3987&view=rev Author: thompsonbry Date: 2010-11-29 12:56:11 +0000 (Mon, 29 Nov 2010) Log Message: ----------- Some more work on the extensible/extendable hash table. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java 2010-11-29 09:55:04 UTC (rev 3986) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java 2010-11-29 12:56:11 UTC (rev 3987) @@ -698,9 +698,8 @@ // Update the address table to point to the new bucket. addressMap[addrNew] = addrBNew; /* - * FIXME Redistribute the keys in the old bucket between the old - * and new bucket by considering one more bit in their hash - * values. + * Redistribute the keys in the old bucket between the old and + * new bucket by considering one more bit in their hash values. * * Note: The move has to be handled in a manner which does not * have side-effects which put the visitation of the keys in the @@ -715,31 +714,29 @@ * followed an overflow path instead of a split path. */ { - // // flag for each key says whether it moves or stays. - // final boolean[] move = new boolean[bold.size]; + // consider the keys in the old bucket. for (int i = 0; i < bold.size; i++) { // a key from the original bucket. - final int k = bold.data[i]; - // the hash of the key with the #of of local bits. - final int h1 = maskOff(k, bold.localHashBits); + final int k1 = bold.data[i]; + // hash of the key with only the local bits visible + final int h1 = maskOff(k1, bold.localHashBits); if (h1 == oldIndex) { // The key does not move. continue; - // move[i] = false; } else if (h1 == newIndex) { - // The key will move. - // move[i] = true; + // Move the key to the new bucket. bnew.insert(h/* hash(key) */, key); } else { // Must be hashed to one of these two buckets!!! throw new AssertionError(); } } + // Now delete any keys which were moved to the new bucket. for (int i = 0; i < bnew.size; i++) { // a key from the new bucket. - final int k = bnew.data[i]; + final int k1 = bnew.data[i]; // delete the key from the old bucket. - bold.delete(h/* hash(key) */, key); + bold.delete(hash(k1), k1); } } /* @@ -762,25 +759,30 @@ } } if (globalHashBits > b.localHashBits) { - /* - * There will be at least two entries in the address table which - * point to this bucket. One of those entries is relabeled. Both - * the original bucket and the new bucket have their {@link - * SimpleBucket#localHashBits} incremented by one, but the - * {@link #globalHashBits}. Of the entries in the bucket address - * table which used to point to the original bucket, the 1st - * half are left alone and the 2nd half are updated to point to - * the new bucket. (Note that the #of entries depends on the - * global #of hash bits in use and the bucket local #of hash - * bits in use and will be 2 if there is a difference of one - * between those values but can be more than 2 and will always - * be an even number). The entries in the original bucket are - * rehashed and assigned based on the new #of hash bits to be - * considered to either the original bucket or the new bucket. - * The record is then inserted based on the new #of hash bits to - * be considered. If it still does not fit, then either handle - * by case (1) or case (2) as appropriate. - */ + /* + * FIXME Implement this next. It handles the simpler split case + * when we only need to redistribute the keys but do not need to + * double the address space. The logic above can just be + * refactored for this purpose. + * + * There will be at least two entries in the address table which + * point to this bucket. One of those entries is relabeled. Both + * the original bucket and the new bucket have their {@link + * SimpleBucket#localHashBits} incremented by one, but the + * {@link #globalHashBits}. Of the entries in the bucket address + * table which used to point to the original bucket, the 1st + * half are left alone and the 2nd half are updated to point to + * the new bucket. (Note that the #of entries depends on the + * global #of hash bits in use and the bucket local #of hash + * bits in use and will be 2 if there is a difference of one + * between those values but can be more than 2 and will always + * be an even number). The entries in the original bucket are + * rehashed and assigned based on the new #of hash bits to be + * considered to either the original bucket or the new bucket. + * The record is then inserted based on the new #of hash bits to + * be considered. If it still does not fit, then either handle + * by case (1) or case (2) as appropriate. + */ throw new UnsupportedOperationException(); } } @@ -993,17 +995,22 @@ } - /** - * Insert the key into the bucket (duplicates are allowed). It is an - * error if the bucket is full. - * - * @param h - * The hash code of the key. - * @param key - * The key. - * - * @return <code>false</code> iff the bucket must be split. - */ + /** + * Insert the key into the bucket (duplicates are allowed). It is an + * error if the bucket is full. + * + * @param h + * The hash code of the key. + * @param key + * The key. + * + * @return <code>false</code> iff the bucket must be split. + * + * @todo The caller needs to be careful that [h] is the full hash code + * for the key. Normally this is not a problem, but we sometimes + * wind up with masked off hash codes, especially during splits + * and merges, and those must not be passed in here. + */ public boolean insert(final int h, final int key) { if (size == data.length) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2010-11-29 09:55:10
|
Revision: 3986 http://bigdata.svn.sourceforge.net/bigdata/?rev=3986&view=rev Author: martyncutcher Date: 2010-11-29 09:55:04 +0000 (Mon, 29 Nov 2010) Log Message: ----------- Add allocation tests and adjust for history retention. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java 2010-11-29 09:53:27 UTC (rev 3985) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java 2010-11-29 09:55:04 UTC (rev 3986) @@ -481,6 +481,33 @@ } + public void testAllocationReserves() { + final int cReserve16K = 16 * 1024; + final int cReserve128K = 32 * 1024; + + showAllocReserve(false, 64, cReserve16K, cReserve16K); + showAllocReserve(false, 128, cReserve16K, cReserve16K); + showAllocReserve(false, 1024, cReserve16K, cReserve16K); + showAllocReserve(false, 2048, cReserve16K, cReserve16K); + showAllocReserve(false, 3072, cReserve16K, cReserve16K); + showAllocReserve(false, 4096, cReserve16K, cReserve16K); + showAllocReserve(false, 8192, cReserve16K, cReserve16K); + + showAllocReserve(true, 64, cReserve128K, cReserve16K); + showAllocReserve(true, 128, cReserve128K, cReserve16K); + showAllocReserve(true, 1024, cReserve128K, cReserve16K); + showAllocReserve(true, 2048, cReserve128K, cReserve16K); + showAllocReserve(true, 3072, cReserve128K, cReserve16K); + showAllocReserve(true, 4096, cReserve16K, cReserve16K); + showAllocReserve(true, 8192, cReserve128K, cReserve16K); + } + private void showAllocReserve(final boolean optDensity, final int slotSize, final int reserve, final int mod) { + final int ints = FixedAllocator.calcBitSize(optDensity, slotSize, reserve, mod); + // there are max 126 ints available to a FixedAllocator + final int maxuse = (126/(ints+1)) * ints; + System.out.println("Allocate " + ints + ":" + (32 * ints * slotSize) + " for " + slotSize + " in " + reserve + " using " + maxuse + " of 126 possible"); + } + long allocBatch(RWStore rw, int bsize, int asze, int ainc) { long curAddress = rw.physicalAddress(rw.alloc(asze, null)); for (int i = 1; i < bsize; i++) { @@ -786,7 +813,7 @@ int[] faddrs = allocBatchBuffer(rw, 100, startBlob, endBlob); final StringBuilder str = new StringBuilder(); - rw.showAllocators(str); + rw.getStorageStats().showStats(str); System.out.println(str); } finally { @@ -844,8 +871,9 @@ /** * Test of blob allocation and read-back, firstly from cache and then from disk. + * @throws InterruptedException */ - public void test_blob_realloc() { + public void test_blob_realloc() throws InterruptedException { final Journal store = (Journal) getStore(); @@ -877,6 +905,7 @@ // allocate another address, might (or might not) be the same. faddr = bs.write(bb); // rw.alloc(buf, buf.length); + final long pa = bs.getPhysicalAddress(faddr); bb.position(0); System.out.println("Now commit to disk (1)"); @@ -892,8 +921,12 @@ // now delete the memory bs.delete(faddr); - // Must not have been immediately freed. - assertNotSame(0L, bs.getPhysicalAddress(faddr)); + // Must not have been immediately freed if history is retained. + if (rw.getHistoryRetention() != 0) + assertEquals(pa, bs.getPhysicalAddress(faddr)); + else + assertEquals(0L, bs.getPhysicalAddress(faddr)); + /* * Commit before testing for deferred frees. Since there is a @@ -906,6 +939,8 @@ store.commit(); + Thread.currentThread().sleep(10); + // Request release of deferred frees. rw.checkDeferredFrees(true/* freeNow */, store); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2010-11-29 09:53:36
|
Revision: 3985 http://bigdata.svn.sourceforge.net/bigdata/?rev=3985&view=rev Author: martyncutcher Date: 2010-11-29 09:53:27 +0000 (Mon, 29 Nov 2010) Log Message: ----------- 1) Extend the metabits header. 2) Add more relevant data to the storage stats. 3) Enable more flexible allocations of contiguous reservations in preparation for direct buffer allocation. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/PSOutputStream.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/StorageStats.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java 2010-11-24 21:40:07 UTC (rev 3984) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java 2010-11-29 09:53:27 UTC (rev 3985) @@ -237,4 +237,43 @@ m_commit = m_saveCommit; m_saveCommit = null; } + + /** + * Must release allocations made by this allocator. + * + * The commit bits are the old transient bits, so any allocated bits + * set in live, but not in commit, were set within this context. + * + * The m_commit is the m_transients bits at the point of the + * link of the allocationContext with this allocator, bits set in m_live + * that are not set in m_commit, were made by this allocator for the + * aborted context. + * + * L 1100 0110 AC 0111 AB 0110 + * T 1100 1110 1111 1110 + * C 1100 1100 1110 1100 + */ + public void abortshadow() { + for (int i = 0; i < m_live.length; i++) { + m_live[i] &= m_commit[i]; + m_transients[i] = m_live[i] | m_saveCommit[i]; + } + m_commit = m_saveCommit; + } + + /** + * When a session is active, the transient bits do not equate to an ORing + * of the committed bits and the live bits, but rather an ORing of the live + * with all the committed bits since the start of the session. + * When the session is released, the state is restored to an ORing of the + * live and the committed, thus releasing slots for re-allocation. + */ + public void releaseSession() { + if (m_addr != 0) { // check active! + for (int i = 0; i < m_live.length; i++) { + m_transients[i] = m_live[i] | m_commit[i]; + } + } + } + } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2010-11-24 21:40:07 UTC (rev 3984) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2010-11-29 09:53:27 UTC (rev 3985) @@ -42,8 +42,10 @@ public class FixedAllocator implements Allocator { private static final Logger log = Logger.getLogger(FixedAllocator.class); + + private final int cModAllocation = 1 << RWStore.ALLOCATION_SCALEUP; + private final int cMinAllocation = cModAllocation * 1; // must be multiple of cModAllocation -// final private RWWriteCacheService m_writeCache; volatile private int m_freeBits; volatile private int m_freeTransients; @@ -111,7 +113,9 @@ final int bit = offset % allocBlockRange; - if (RWStore.tstBit(block.m_live, bit)) { + if (RWStore.tstBit(block.m_live, bit) + || (this.m_sessionActive && RWStore.tstBit(block.m_transients, bit))) + { return RWStore.convertAddr(block.m_addr) + ((long) m_size * bit); } else { return 0L; @@ -145,6 +149,14 @@ } volatile private IAllocationContext m_context; + + /** + * Indicates whether session protection has been used to protect + * store from re-allocating allocations reachable from read-only + * requests and concurrent transactions. + */ + private boolean m_sessionActive; + public void setAllocationContext(final IAllocationContext context) { if (context == null && m_context != null) { // restore commit bits in AllocBlocks @@ -161,6 +173,21 @@ } /** + * Unwinds the allocations made within the context and clears + */ + public void abortAllocationContext(final IAllocationContext context) { + if (context != null && m_context == context) { + // restore commit bits in AllocBlocks + for (AllocBlock allocBlock : m_allocBlocks) { + allocBlock.abortshadow(); + } + m_context = null; + } else { + throw new IllegalArgumentException(); + } + } + + /** * write called on commit, so this is the point when "transient frees" - the * freeing of previously committed memory can be made available since we * are creating a new commit point - the condition being that m_freeBits @@ -174,6 +201,8 @@ final byte[] buf = new byte[1024]; final DataOutputStream str = new DataOutputStream(new FixedOutputStream(buf)); try { + m_sessionActive = m_store.isSessionProtected(); + str.writeInt(m_size); final Iterator<AllocBlock> iter = m_allocBlocks.iterator(); @@ -185,9 +214,9 @@ str.writeInt(block.m_live[i]); } -// if (!m_store.isSessionPreserved()) { + if (!m_sessionActive) { block.m_transients = block.m_live.clone(); -// } + } /** * If this allocator is shadowed then copy the new committed @@ -314,29 +343,12 @@ m_size = size; - /* - * For smaller allocations we'll allocate a larger span, this is needed - * to ensure the minimum allocation is large enough to guarantee - * a unique address for a BlobAllocator. - */ - if (m_size < 256) { - /* - * Note: 64 ints is 256 bytes is 2048 bits, so 2048 allocation - * slots. - */ - m_bitSize = 64; - } else { - /* - * Note: 32 ints is 128 bytes is 1024 bits, so 1024 allocation - * slots. - */ - m_bitSize = 32; - } + m_bitSize = calcBitSize(true, size, cMinAllocation, cModAllocation); // m_writeCache = cache; // number of blocks in this allocator, bitSize plus 1 for start address - final int numBlocks = 255 / (m_bitSize + 1); + final int numBlocks = 254 / (m_bitSize + 1); /* * Create AllocBlocks for this FixedAllocator, but do not allocate @@ -351,6 +363,82 @@ m_freeTransients = 0; m_freeBits = 32 * m_bitSize * numBlocks; } + + /** + * This determines the size of the reservation required in terms of + * the number of ints each holding bits for 32 slots. + * + * The minimum return value will be 1, for a single int holiding 32 bits. + * + * The maximum value will be the number of ints required to fill the minimum + * reservation. + * + * The minimum reservation will be some multiple of the + * address multiplier that allows alloction blocks to address large addresses + * with an INT32. For example, by setting a minimum reservation at 128K, the + * allocation blocks INT32 start address may be multiplied by 128K to provide + * a physical address. + * + * The minReserve must be a power of 2, eg 1K, 2k or 4K.. etc + * + * A standard minReserve of 16K is plenty big enough, enabling 32TB of + * addressable store. The logical maximum used store is calculated as the + * maximum fixed allocation size * MAX_INT. So a store with a maximum + * fixed slot size of 4K could only allocated 8TB. + * + * Since the allocation size must be MOD 0 the minReserve, the lower the + * minReserve the smaller the allocation may be required for larger + * slot sizes. + * + * Another consideration is file locality. In this case the emphasis is + * on larger contiguous areas to improve the likely locality of allocations + * made by a FixedAllocator. Here the addressability implied by the reserve + * is not an issue, and larger reserves are chosen to improve locality. The + * downside is a potential for more wasted space, but this + * reduces as the store size grows and in large stores (> 10GB) becomes + * insignificant. + * + * Therefore, if a FixedAllocator is to be used in a large store and + * locality needs to be optimised for SATA disk access then the minReserve + * should be high = say 128K, while if the allocator is tuned to ByteBuffer + * allocation, a minallocation of 8 to 16K is more suitable. + * + * A final consideration is allocator reference efficiency in the sense + * to maximise the amount of allocations that can be made. By this I mean + * just how close we can get to MAX_INT allocations. For example, if we + * allow for upto 8192 allocations from a single allocator, but in + * practice average closer to 4096 then the maximum number of allocations + * comes down from MAX_INT to MAX_INT/2. This is also a consideration when + * considering max fixed allocator size, since if we require a large number + * of Blobs this reduces the amount of "virtual" allocations by at least + * a factro of three for each blob (at least 2 fixed allocations for + * content and 1 more for the header). A variation on the current Blob + * implementation could include the header in the first allocation, thus + * reducing the minimum Blob allocations from 3 to 2, but the point still + * holds that too small a max fixed allocation could rmatically reduce the + * number of allocations that could be made. + * + * @param alloc the slot size to be managed + * @param minReserve the minimum reservation in bytes + * @return the size of the int array + */ + public static int calcBitSize(final boolean optDensity, final int alloc, final int minReserve, final int modAllocation) { + final int intAllocation = 32 * alloc; // min 32 bits + + // we need to find smallest number of ints * the intAllocation + // such that totalAllocation % minReserve is 0 + // example 6K intAllocation would need 8 ints for 48K for 16K min + // likewise a 24K intAllocation would require 2 ints + // if optimising for density set min ints to 8 + int nints = optDensity ? 8 : 1; + while ((nints * intAllocation) < minReserve) nints++; + + while ((nints * intAllocation) % modAllocation != 0) nints++; + + System.out.println("calcBitSize for " + alloc + " returns " + nints); + + return nints; + } public String getStats(final AtomicLong counter) { @@ -675,4 +763,14 @@ public void setBucketStats(Bucket b) { m_statsBucket = b; } + + public void releaseSession() { + if (this.m_sessionActive) { + if (log.isTraceEnabled()) + log.trace("Allocator: #" + m_index + " releasing session protection"); + for (AllocBlock ab : m_allocBlocks) { + ab.releaseSession(); + } + } + } } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/PSOutputStream.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/PSOutputStream.java 2010-11-24 21:40:07 UTC (rev 3984) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/PSOutputStream.java 2010-11-29 09:53:27 UTC (rev 3985) @@ -357,11 +357,6 @@ // + ", last alloc: " + precount); // } - if (log.isDebugEnabled()) - log.debug("Writing BlobHdrIdx with " + m_blobHdrIdx + " allocations"); - - // DO NOT USE BLOB ALLOCATOR - // addr = m_store.registerBlob(addr); // returns handle } catch (IOException e) { // e.printStackTrace(); throw new RuntimeException(e); Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-24 21:40:07 UTC (rev 3984) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-29 09:53:27 UTC (rev 3985) @@ -477,7 +477,7 @@ * <code>true</code> iff the backing store is open. */ private volatile boolean m_open = true; - + class WriteCacheImpl extends WriteCache.FileChannelScatteredWriteCache { public WriteCacheImpl(final ByteBuffer buf, final boolean useChecksum, @@ -852,9 +852,10 @@ final int allocBlocks = strBuf.readInt(); m_storageStatsAddr = strBuf.readLong(); - strBuf.readInt(); // reserved7 - strBuf.readInt(); // reserved8 - strBuf.readInt(); // reserved9 + // and let's read in those reserved ints + for (int i = 0; i < cReservedMetaBits; i++) { + strBuf.readInt(); + } m_allocSizes = new int[allocBlocks]; for (int i = 0; i < allocBlocks; i++) { @@ -992,7 +993,8 @@ FileChannelUtility.readAll(m_reopener, ByteBuffer.wrap(buf), addr); - final DataInputStream strBuf = new DataInputStream(new ByteArrayInputStream(buf)); + final ByteArrayInputStream baBuf = new ByteArrayInputStream(buf); + final DataInputStream strBuf = new DataInputStream(baBuf); final int allocSize = strBuf.readInt(); // if Blob < 0 final FixedAllocator allocator; @@ -1009,6 +1011,14 @@ freeList = m_freeFixed[index]; allocator.read(strBuf); + final int chk = ChecksumUtility.getCHK().checksum(buf, + buf.length - baBuf.available()); + + int tstChk = strBuf.readInt(); + if (tstChk != chk) { + throw new IllegalStateException("FixedAllocator checksum error"); + } + allocator.setDiskAddr(i); // store bit, not physical // address! allocator.setFreeList(freeList); @@ -1420,7 +1430,7 @@ if (sze > m_maxFixedAlloc-4) { freeBlob(addr, sze, context); } else { - final Allocator alloc = getBlockByAddress(addr); + final FixedAllocator alloc = getBlockByAddress(addr); /* * There are a few conditions here. If the context owns the * allocator and the allocation was made by this context then it @@ -1429,22 +1439,29 @@ * AllocationContexts, in this situation, the free must ALWAYS * be deferred. * + * If the MIN_RELEASE_AGE is ZERO then we can protect allocations + * and read-only transactions with Session protection, avoiding + * the need to manage deferred frees. + * * FIXME We need unit tests when MIN_RELEASE_AGE is GT ZERO. * * FIXME We need unit test when MIN_RELEASE_AGE is ZERO AND * there are open read-only transactions. */ - boolean alwaysDefer = m_minReleaseAge > 0L - || m_activeTxCount > 0; - if (!alwaysDefer) - alwaysDefer = context == null && !m_contexts.isEmpty(); - if (alwaysDefer) - if (log.isDebugEnabled()) - log.debug("Should defer " + addr + " real: " + physicalAddress(addr)); - if (alwaysDefer || !alloc.canImmediatelyFree(addr, sze, context)) { - deferFree(addr, sze); + if (m_minReleaseAge == 0) { + immediateFree(addr, sze); } else { - immediateFree(addr, sze); + boolean alwaysDefer = m_activeTxCount > 0; + if (!alwaysDefer) + alwaysDefer = context == null && !m_contexts.isEmpty(); + if (alwaysDefer) + if (log.isDebugEnabled()) + log.debug("Should defer " + addr + " real: " + physicalAddress(addr)); + if (alwaysDefer || !alloc.canImmediatelyFree(addr, sze, context)) { + deferFree(addr, sze); + } else { + immediateFree(addr, sze); + } } } } finally { @@ -1452,7 +1469,31 @@ } } + + long getHistoryRetention() { + return m_minReleaseAge; + } + boolean isSessionProtected() { + return m_minReleaseAge == 0 && (m_activeTxCount > 0 || !m_contexts.isEmpty()); + } + + /** + * Sessions will only be used to protect transactions and read-only views + * when the m_minReleaseAge is no zero, otherwise the deferredFree + * approach will be used. + * + * When called, will call through to the Allocators to re-sync the + * transient bits with the committed and live. + */ + void releaseSessions() { + if (m_minReleaseAge == 0) { + for (FixedAllocator fa : m_allocs) { + fa.releaseSession(); + } + } + } + private boolean freeBlob(final int hdr_addr, final int sze, final IAllocationContext context) { if (sze < (m_maxFixedAlloc-4)) throw new IllegalArgumentException("Unexpected address size"); @@ -1498,7 +1539,7 @@ m_allocationLock.lock(); try { - final Allocator alloc = getBlockByAddress(addr); + final FixedAllocator alloc = getBlockByAddress(addr); final int addrOffset = getOffset(addr); if (alloc == null) { throw new IllegalArgumentException("Invalid address provided to immediateFree: " + addr + ", size: " + sze); @@ -1879,9 +1920,10 @@ str.writeInt(m_allocSizes.length); str.writeLong(m_storageStatsAddr); - str.writeInt(0); // reserved7 - str.writeInt(0); // reserved8 - str.writeInt(0); // reserved9 + // Let's reserve ourselves some space + for (int i = 0; i < cReservedMetaBits; i++) { + str.writeInt(0); + } for (int i = 0; i < m_allocSizes.length; i++) { str.writeInt(m_allocSizes[i]); @@ -2126,18 +2168,24 @@ final private int cVersion = 0x0400; /** + * cReservedMetaBits is the reserved space in the metaBits header + * to alloc for binary compatibility moving forward. + * + * If we need to add int values to the header we can do so and reduce the + * reservation by 1 each time + */ + final int cReservedMetaBits = 20; + + /** * MetaBits Header * 0 int version * 1-2 int[2] long deferredFree * 3 int defaultMetaBitsSize * 4 int length of allocation sizes - * 5 int reserved - * 6 int reserved - * 7 int reserved - * 8 int reserved - * 9 int reserved + * 5-6 int[2] storage stats addr + * + 20 reserved */ - final private int cMetaHdrFields = 10; + final private int cMetaHdrFields = 7 + cReservedMetaBits; /** * @see Options#META_BITS_SIZE */ @@ -2697,7 +2745,7 @@ if (addr > 0) { return addr & 0xFFFFFFE0; } else { - final Allocator allocator = getBlock(addr); + final FixedAllocator allocator = getBlock(addr); final int offset = getOffset(addr); final long laddr = allocator.getPhysicalAddress(offset); @@ -2709,14 +2757,14 @@ * handle dual address format, if addr is positive then it is the physical * address, so the Allocators must be searched. **/ - Allocator getBlockByAddress(final int addr) { + FixedAllocator getBlockByAddress(final int addr) { if (addr < 0) { return getBlock(addr); } final Iterator<FixedAllocator> allocs = m_allocs.iterator(); - Allocator alloc = null; + FixedAllocator alloc = null; while (allocs.hasNext()) { alloc = allocs.next(); @@ -2733,10 +2781,10 @@ // return (-addr) >>> OFFSET_BITS; // } - private Allocator getBlock(final int addr) { + private FixedAllocator getBlock(final int addr) { final int index = (-addr) >>> OFFSET_BITS; - return (Allocator) m_allocs.get(index); + return m_allocs.get(index); } private int getOffset(final int addr) { @@ -3422,6 +3470,29 @@ } } + /** + * The ContextAllocation object manages a freeList of associated allocators + * and an overall list of allocators. When the context is detached, all + * allocators must be released and any that has available capacity will be + * assigned to the global free lists. + * + * @param context + * The context to be released from all FixedAllocators. + */ + public void abortContext(final IAllocationContext context) { + assertOpen(); + m_allocationLock.lock(); + try { + final ContextAllocation alloc = m_contexts.remove(context); + + if (alloc != null) { + alloc.release(); + } + } finally { + m_allocationLock.unlock(); + } + } + /** * The ContextAllocation class manages a set of Allocators. * @@ -3487,6 +3558,24 @@ // freeBlobs.addAll(m_freeBlobs); } + void abort() { + final ArrayList<FixedAllocator> freeFixed[] = m_parent != null ? m_parent.m_freeFixed + : m_store.m_freeFixed; + + final IAllocationContext pcontext = m_parent == null ? null + : m_parent.m_context; + + for (FixedAllocator f : m_allFixed) { + f.abortAllocationContext(pcontext); + } + + for (int i = 0; i < m_freeFixed.length; i++) { + freeFixed[i].addAll(m_freeFixed[i]); + } + +// freeBlobs.addAll(m_freeBlobs); + } + FixedAllocator getFreeFixed(final int i) { final ArrayList<FixedAllocator> free = m_freeFixed[i]; if (free.size() == 0) { @@ -4242,6 +4331,10 @@ m_activeTxCount--; if(log.isInfoEnabled()) log.info("#activeTx="+m_activeTxCount); + + if (m_activeTxCount == 0) { + releaseSessions(); + } } finally { m_allocationLock.unlock(); } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/StorageStats.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/StorageStats.java 2010-11-24 21:40:07 UTC (rev 3984) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/StorageStats.java 2010-11-29 09:53:27 UTC (rev 3985) @@ -31,6 +31,7 @@ import java.math.BigDecimal; import java.math.RoundingMode; import java.util.ArrayList; +import java.util.Formatter; /** * Maintains stats on the RWStore allocations, useful for tuning Allocator @@ -64,35 +65,63 @@ * */ public class StorageStats { + final int cVersion = 0x0100; + final int m_maxFixed; public class BlobBucket { final int m_size; + long m_allocationSize; long m_allocations; long m_deletes; + long m_deleteSize; public BlobBucket(final int size) { m_size = size; } public BlobBucket(DataInputStream instr) throws IOException { m_size = instr.readInt(); + m_allocationSize = instr.readLong(); m_allocations = instr.readLong(); + m_deleteSize = instr.readLong(); m_deletes = instr.readLong(); } public void write(DataOutputStream outstr) throws IOException { outstr.writeInt(m_size); + outstr.writeLong(m_allocationSize); outstr.writeLong(m_allocations); + outstr.writeLong(m_deleteSize); outstr.writeLong(m_deletes); } - public void delete() { + public void delete(int sze) { + m_deleteSize += sze; m_deletes++; } - public void allocate() { + public void allocate(int sze) { + m_allocationSize += sze; m_allocations++; } + public long active() { + return m_allocations - m_deletes; + } + public int meanAllocation() { + if (m_allocations == 0) + return 0; + return (int) (m_allocationSize / m_allocations); + } + public float churn() { + if (active() == 0) + return m_allocations; + + BigDecimal allocs = new BigDecimal(m_allocations); + BigDecimal used = new BigDecimal(active()); + + return allocs.divide(used, 2, RoundingMode.HALF_UP).floatValue(); + } } public class Bucket { + final int m_start; final int m_size; int m_allocators; long m_totalSlots; @@ -101,11 +130,13 @@ long m_sizeAllocations; long m_sizeDeletes; - public Bucket(final int size) { + public Bucket(final int size, final int startRange) { m_size = size; + m_start = startRange; } public Bucket(DataInputStream instr) throws IOException { m_size = instr.readInt(); + m_start = instr.readInt(); m_allocators = instr.readInt(); m_slotAllocations = instr.readLong(); m_slotDeletes = instr.readLong(); @@ -115,6 +146,7 @@ } public void write(DataOutputStream outstr) throws IOException { outstr.writeInt(m_size); + outstr.writeInt(m_start); outstr.writeInt(m_allocators); outstr.writeLong(m_slotAllocations); outstr.writeLong(m_slotDeletes); @@ -126,6 +158,12 @@ if (sze < 0) throw new IllegalArgumentException("delete requires positive size, got: " + sze); + if (m_size > 64 && sze < 64) { + // if called from deferFree then may not include size. If so then use + // average size of slots to date as best running estimate. + sze = meanAllocation(); + } + if (sze > m_size) { // sze = ((sze - 1 + m_maxFixed)/ m_maxFixed) * 4; // Blob header @@ -151,6 +189,10 @@ return m_slotAllocations - m_slotDeletes; } + public long emptySlots() { + return m_totalSlots - usedSlots(); + } + public long usedStore() { return m_sizeAllocations - m_sizeDeletes; } @@ -160,23 +202,21 @@ if (usedStore() == 0) return 0.0f; - BigDecimal size = new BigDecimal(m_size * usedSlots()); - BigDecimal store = new BigDecimal(100 * usedStore()); - store = store.divide(size, 2, RoundingMode.HALF_UP); - BigDecimal total = new BigDecimal(100); + BigDecimal size = new BigDecimal(reservedStore()); + BigDecimal store = new BigDecimal(100 * (reservedStore() - usedStore())); - return total.subtract(store).floatValue(); + return store.divide(size, 2, RoundingMode.HALF_UP).floatValue(); } - public float totalWaste() { + public float totalWaste(long total) { if (usedStore() == 0) return 0.0f; - BigDecimal size = new BigDecimal(m_size * m_totalSlots); - BigDecimal store = new BigDecimal(100 * usedStore()); - store = store.divide(size, 2, RoundingMode.HALF_UP); - BigDecimal total = new BigDecimal(100); + long slotWaste = reservedStore() - usedStore(); - return total.subtract(store).floatValue(); + BigDecimal localWaste = new BigDecimal(100 * slotWaste); + BigDecimal totalWaste = new BigDecimal(total); + + return localWaste.divide(totalWaste, 2, RoundingMode.HALF_UP).floatValue(); } public long reservedStore() { return m_size * m_totalSlots; @@ -184,6 +224,40 @@ public void addAlocator() { m_allocators++; } + public float slotChurn() { + // Handle case where we may have deleted all allocations + if (usedSlots() == 0) + return m_slotAllocations; + + BigDecimal allocs = new BigDecimal(m_slotAllocations); + BigDecimal used = new BigDecimal(usedSlots()); + + return allocs.divide(used, 2, RoundingMode.HALF_UP).floatValue(); + } + public float slotsUnused() { + BigDecimal used = new BigDecimal(100 * (m_totalSlots-usedSlots())); + BigDecimal total = new BigDecimal(m_totalSlots); + + return used.divide(total, 2, RoundingMode.HALF_UP).floatValue(); + } + public float percentAllocations(long totalAllocations) { + BigDecimal used = new BigDecimal(100 * m_slotAllocations); + BigDecimal total = new BigDecimal(totalAllocations); + + return used.divide(total, 2, RoundingMode.HALF_UP).floatValue(); + } + public float percentSlotsInuse(long totalInuse) { + BigDecimal used = new BigDecimal(100 * usedSlots()); + BigDecimal total = new BigDecimal(totalInuse); + + return used.divide(total, 2, RoundingMode.HALF_UP).floatValue(); + } + public int meanAllocation() { + if (m_slotAllocations == 0) + return 0; + + return (int) (m_sizeAllocations / m_slotAllocations); + } } final ArrayList<Bucket> m_buckets; @@ -199,8 +273,10 @@ */ public StorageStats(final int[] buckets) { m_buckets = new ArrayList<Bucket>(); + int prevLimit = 0; for (int i = 0; i < buckets.length; i++) { - m_buckets.add(new Bucket(buckets[i]*64)); // slot sizes are 64 multiples + m_buckets.add(new Bucket(buckets[i]*64, prevLimit)); // slot sizes are 64 multiples + prevLimit = buckets[i]*64; } // last fixed allocator needed to compute BlobBuckets m_maxFixed = m_buckets.get(buckets.length-1).m_size; @@ -223,6 +299,10 @@ * @throws IOException */ public StorageStats(final DataInputStream instr) throws IOException { + int version = instr.readInt(); + if (cVersion != version) { + throw new IllegalStateException("StorageStats object is wrong version"); + } m_buckets = new ArrayList<Bucket>(); int nbuckets = instr.readInt(); for (int i = 0; i < nbuckets; i++) { @@ -242,6 +322,8 @@ ByteArrayOutputStream outb = new ByteArrayOutputStream(); DataOutputStream outd = new DataOutputStream(outb); + outd.writeInt(cVersion); + outd.writeInt(m_buckets.size()); for (Bucket b : m_buckets) { @@ -266,14 +348,14 @@ m_blobAllocation += sze; // increment blob bucket - findBlobBucket(sze).allocate(); + findBlobBucket(sze).allocate(sze); } public void deleteBlob(int sze) { m_blobDeletion -= sze; // decrement blob bucket - findBlobBucket(sze).delete(); + findBlobBucket(sze).delete(sze); } private BlobBucket findBlobBucket(final int sze) { @@ -307,57 +389,88 @@ str.append("\n-------------------------\n"); str.append("RWStore Allocator Summary\n"); str.append("-------------------------\n"); - str.append(padRight("AllocatorSize", 16)); - str.append(padLeft("AllocatorCount", 16)); - str.append(padLeft("SlotsAllocated", 16)); - str.append(padLeft("SlotsRecycled", 16)); - str.append(padLeft("SlotsInUse", 16)); - str.append(padLeft("SlotsReserved", 16)); - str.append(padLeft("BytesReserved", 16)); - str.append(padLeft("BytesAppData", 16)); - str.append(padLeft("%SlotWaste", 16)); - str.append(padLeft("%StoreWaste", 16)); - str.append(padLeft("%AppData", 16)); - str.append(padLeft("%StoreFile", 16)); - str.append("\n"); + str.append(String.format("%-16s %16s %16s %16s %16s %16s %16s %16s %16s %16s %16s %16s %16s %16s %16s %16s %16s %16s \n", + "AllocatorSize", + "AllocatorCount", + "SlotsAllocated", + "%SlotsAllocated", + "SlotsRecycled", + "SlotChurn", + "SlotsInUse", + "%SlotsInUse", + "MeanAllocation", + "SlotsReserved", + "%SlotsUnused", + "BytesReserved", + "BytesAppData", + "%SlotWaste", + "%AppData", + "%StoreFile", + "%TotalWaste", + "%FileWaste" + )); long totalAppData = 0; long totalFileStore = 0; + long totalAllocations = 0; + long totalInuse = 0; for (Bucket b: m_buckets) { totalAppData += b.usedStore(); totalFileStore += b.reservedStore(); + totalAllocations += b.m_slotAllocations; + totalInuse += b.usedSlots(); } + long totalWaste = totalFileStore - totalAppData; + for (Bucket b: m_buckets) { - str.append(padRight("" + b.m_size, 16)); - str.append(padLeft("" + b.m_allocators, 16)); - str.append(padLeft("" + b.m_slotAllocations, 16)); - str.append(padLeft("" + b.m_slotDeletes, 16)); - str.append(padLeft("" + b.usedSlots(), 16)); - str.append(padLeft("" + b.m_totalSlots, 16)); - str.append(padLeft("" + b.reservedStore(), 16)); - str.append(padLeft("" + b.usedStore(), 16)); - str.append(padLeft("" + b.slotWaste() + "%", 16)); - str.append(padLeft("" + b.totalWaste() + "%", 16)); - str.append(padLeft("" + dataPercent(b.usedStore(), totalAppData) + "%", 16)); - str.append(padLeft("" + dataPercent(b.reservedStore(), totalFileStore) + "%", 16)); - str.append("\n"); + str.append(String.format("%-16d %16d %16d %16.2f %16d %16.2f %16d %16.2f %16d %16d %16.2f %16d %16d %16.2f %16.2f %16.2f %16.2f %16.2f \n", + b.m_size, + b.m_allocators, + b.m_slotAllocations, + b.percentAllocations(totalAllocations), + b.m_slotDeletes, + b.slotChurn(), + b.usedSlots(), + b.percentSlotsInuse(totalInuse), + b.meanAllocation(), + b.m_totalSlots, + b.slotsUnused(), + b.reservedStore(), + b.usedStore(), + b.slotWaste(), + dataPercent(b.usedStore(), totalAppData), + dataPercent(b.reservedStore(), totalFileStore), + b.totalWaste(totalWaste), + b.totalWaste(totalFileStore) + )); } str.append("\n-------------------------\n"); str.append("BLOBS\n"); str.append("-------------------------\n"); - str.append(padRight("Bucket", 10)); - str.append(padLeft("Allocations", 12)); - str.append(padLeft("Deletes", 12)); - str.append(padLeft("Current", 12)); - str.append("\n"); + str.append(String.format("%-10s %12s %12s %12s %12s %12s %12s %12s %12s\n", + "Bucket(K)", + "Allocations", + "Allocated", + "Deletes", + "Deleted", + "Current", + "Data", + "Mean", + "Churn")); for (BlobBucket b: m_blobBuckets) { - str.append(padRight("" + (b.m_size/1024) + "K", 10)); - str.append(padLeft("" + b.m_allocations, 12)); - str.append(padLeft("" + b.m_deletes, 12)); - str.append(padLeft("" + (b.m_allocations - b.m_deletes), 12)); - str.append("\n"); + str.append(String.format("%-10d %12d %12d %12d %12d %12d %12d %12d %12.2f\n", + b.m_size/1024, + b.m_allocations, + b.m_allocationSize, + b.m_deletes, + b.m_deleteSize, + (b.m_allocations - b.m_deletes), + (b.m_allocationSize - b.m_deleteSize), + b.meanAllocation(), + b.churn() + )); } } @@ -368,32 +481,4 @@ return used.divide(total, 2, RoundingMode.HALF_UP).floatValue(); } - - public static String padLeft(String str, int minlen) { - if (str.length() >= minlen) - return str; - - StringBuffer out = new StringBuffer(); - int pad = minlen - str.length(); - while (pad-- > 0) { - out.append(' '); - } - out.append(str); - - return out.toString(); - } - - public static String padRight(String str, int minlen) { - if (str.length() >= minlen) - return str; - - StringBuffer out = new StringBuffer(); - out.append(str); - int pad = minlen - str.length(); - while (pad-- > 0) { - out.append(' '); - } - - return out.toString(); - } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <btm...@us...> - 2010-11-24 21:40:14
|
Revision: 3984 http://bigdata.svn.sourceforge.net/bigdata/?rev=3984&view=rev Author: btmurphy Date: 2010-11-24 21:40:07 +0000 (Wed, 24 Nov 2010) Log Message: ----------- [branch dev-btm]: CHECKPOINT - modified shard, shardlocator, and executor ServiceImpl and related config files to use QuorumPeerManager to initialize the ZooKeeperAccessor utility instead of using only the 'servers' in the config file Modified Paths: -------------- branches/dev-btm/bigdata/src/java/com/bigdata/resources/LocalResourceManager.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/Constants.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/ServiceImpl.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/executor.config branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/logging.properties branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/quorum/QuorumPeerManager.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/Constants.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/ServiceImpl.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/logging.properties branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/shardlocator.config branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/Constants.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/ServiceImpl.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/logging.properties branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/shard.config Modified: branches/dev-btm/bigdata/src/java/com/bigdata/resources/LocalResourceManager.java =================================================================== --- branches/dev-btm/bigdata/src/java/com/bigdata/resources/LocalResourceManager.java 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata/src/java/com/bigdata/resources/LocalResourceManager.java 2010-11-24 21:40:07 UTC (rev 3984) @@ -331,6 +331,8 @@ + ICounterSet.pathSeparator + IResourceManagerCounters.IndexManager ); +//BTM - FIX NullPointerException - BEGIN +if (tmp3 != null) { synchronized (tmp3) { // Note: detach and then attach since that wipes out @@ -355,6 +357,8 @@ } } }//end live indices +}//endif(tmp3 != null) +//BTM - FIX NullPointerException - BEGIN lastReattachMillis = now; Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/Constants.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/Constants.java 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/Constants.java 2010-11-24 21:40:07 UTC (rev 3984) @@ -121,4 +121,13 @@ Boolean.FALSE; // Boolean.parseBoolean // (IBigdataClient.Options.DEFAULT_COLLECT_PLATFORM_STATISTICS); + + // ZooKeeper client session timeout in seconds. Note that for the + // typical tick time of 2 seconds per tick, the session timeout + // should/will be set to a value between 4 and 40 seconds; because + // ZooKeeper requires that the session timeout always fall between + // 2 and 20 ticks. + int LOWER_BOUND_ZK_SESSION_TIMEOUT = 1; + int UPPER_BOUND_ZK_SESSION_TIMEOUT = Integer.MAX_VALUE; + int DEFAULT_UPPER_BOUND_ZK_SESSION_TIMEOUT = 40; } Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/ServiceImpl.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/ServiceImpl.java 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/ServiceImpl.java 2010-11-24 21:40:07 UTC (rev 3984) @@ -29,6 +29,7 @@ import com.bigdata.attr.ServiceInfo; import com.bigdata.io.SerializerUtil; +import com.bigdata.jini.quorum.QuorumPeerManager; import com.bigdata.jini.start.BigdataZooDefs; import com.bigdata.jini.util.ConfigMath; import com.bigdata.service.IClientServiceCallable; @@ -139,8 +140,6 @@ private EmbeddedCallableExecutor embeddedCallableExecutor; -//BTM private Thread waitThread; - /* Constructor used by Service Starter Framework to start this service */ public ServiceImpl(String[] args, LifeCycle lifeCycle) throws Exception { @@ -310,11 +309,6 @@ serviceId = bootStateUtil.getServiceId(); logger.debug("smsProxyId = null - service generated & persisted " +"(or retreieved) its own proxy id ["+proxyId+"]"); - - setZookeeperConfigInfo(config); - zookeeperAccessor = - new ZooKeeperAccessor - (zookeeperServers, zookeeperSessionTimeout); } else {//ServicesConfiguration pre-generated the proxy id proxyId = smsProxyId; serviceId = com.bigdata.jini.util.JiniUtil.uuid2ServiceID(proxyId); @@ -362,7 +356,8 @@ //properties object for the EmbeddedCallableExecutor Properties props = new Properties(); - props.setProperty("com.bigdata.resources.StoreManager.dataDir", dataDir); + props.setProperty + ("com.bigdata.resources.StoreManager.dataDir", dataDir); int threadPoolSize = Config.getIntEntry(config, COMPONENT_NAME, "threadPoolSize", @@ -462,6 +457,12 @@ Boolean.FALSE); this.sdm = new ServiceDiscoveryManager(ldm, null, config); + if (zookeeperAccessor == null) { + setZookeeperConfigInfo(config, this.sdm); + zookeeperAccessor = + new ZooKeeperAccessor + (zookeeperServers, zookeeperSessionTimeout); + } embeddedCallableExecutor = new EmbeddedCallableExecutor @@ -505,9 +506,6 @@ +", locators=" +Util.writeArrayElementsToString(locatorsToJoin)); -//BTM waitThread = new Util.WaitOnInterruptThread(logger); -//BTM waitThread.start(); - readyState.ready();//ready to accept calls from clients } @@ -566,13 +564,14 @@ futureExporters.removeAll(removeSet); } -//BTM waitThread.interrupt(); -//BTM try { -//BTM waitThread.join(); -//BTM } catch (InterruptedException e) {/*exiting, so swallow*/} - + if (zookeeperAccessor != null) { + try { + zookeeperAccessor.close(); + } catch(InterruptedException e) {//swallow + } + } Util.cleanupOnExit - (innerProxy, serverExporter, futureExporters, joinMgr, sdm, ldm); + (innerProxy, serverExporter, futureExporters, joinMgr, sdm, ldm); // Tell the ServiceStarter framework it's ok to release for gc if(lifeCycle != null) { @@ -756,7 +755,22 @@ logger.debug("[main]: smsProxyId="+smsProxyId); } - setZookeeperConfigInfo(smsConfig); + String[] tmpGroups = + (String[])smsConfig.getEntry + ("com.bigdata.service.jini.JiniClient", "groups", + String[].class, DiscoveryGroupManagement.NO_GROUPS); + LookupLocator[] tmpLocs = + (LookupLocator[])smsConfig.getEntry + ("com.bigdata.service.jini.JiniClient", "locators", + LookupLocator[].class, new LookupLocator[]{ }); + DiscoveryManagement tmpLdm = + new LookupDiscoveryManager(tmpGroups, tmpLocs, null); + ServiceDiscoveryManager tmpSdm = + new ServiceDiscoveryManager(tmpLdm, null); + + setZookeeperConfigInfo(smsConfig, tmpSdm); + tmpLdm.terminate(); + tmpSdm.terminate(); zookeeperAccessor = new ZooKeeperAccessor (zookeeperServers, zookeeperSessionTimeout); @@ -779,7 +793,8 @@ ("[main]: logicalServiceZPath="+logicalServiceZPath); if(physicalServiceZPath != null) { byte[] data = SerializerUtil.serialize(smsEntries); - ZooKeeper zookeeperClient = zookeeperAccessor.getZookeeper(); + ZooKeeper zookeeperClient = + zookeeperAccessor.getZookeeper(); logger.debug("[main]: zookeeper client created"); try { zookeeperClient.create @@ -789,7 +804,7 @@ +"[physicalServiceZPath=" +physicalServiceZPath+"]"); } catch(NodeExistsException e) { - zookeeperClient.setData(physicalServiceZPath, data, -1); + zookeeperClient.setData(physicalServiceZPath,data,-1); logger.debug("[main]: zookeeper znode updated " +"[physicalServiceZPath=" +physicalServiceZPath+"]"); @@ -810,8 +825,10 @@ } } - private static void setZookeeperConfigInfo(Configuration zkConfig) - throws ConfigurationException + private static void setZookeeperConfigInfo + (Configuration zkConfig, + ServiceDiscoveryManager srvcDiscMgr) + throws ConfigurationException, IOException { String zkComponent = "org.apache.zookeeper.ZooKeeper"; @@ -823,18 +840,11 @@ } logger.debug("zookeepeRoot="+zookeeperRoot); - zookeeperServers = - (String)zkConfig.getEntry - (zkComponent, "servers", String.class, null); - if(zookeeperServers == null) { - throw new ConfigurationException - ("zookeeper servers not specified"); - } - logger.debug("zookeeperServers="+zookeeperServers); - - zookeeperSessionTimeout = - (Integer)zkConfig.getEntry - (zkComponent, "sessionTimeout", int.class, 300000); + zookeeperSessionTimeout = + Config.getIntEntry(zkConfig, zkComponent, "sessionTimeout", + DEFAULT_UPPER_BOUND_ZK_SESSION_TIMEOUT, + LOWER_BOUND_ZK_SESSION_TIMEOUT, + UPPER_BOUND_ZK_SESSION_TIMEOUT); logger.debug("zookeeperSessionTimeout="+zookeeperSessionTimeout); ACL[] acl = (ACL[])zkConfig.getEntry @@ -844,5 +854,22 @@ } zookeeperAcl = Arrays.asList(acl); logger.debug("zookeeperAcl="+zookeeperAcl); + +//BTM - if config contains "servers" then by-pass dynamic discovery for now + zookeeperServers = + (String)zkConfig.getEntry + (zkComponent, "servers", String.class, null); + if(zookeeperServers == null) { + QuorumPeerManager tmpPeerMgr = + new QuorumPeerManager + (srvcDiscMgr, zookeeperSessionTimeout, logger); + if (tmpPeerMgr == null) { + throw new IOException("zookeeper ensemble unavailable"); + } + ZooKeeper.States zkState = tmpPeerMgr.getState(); + logger.debug("zookeeper state="+zkState); + zookeeperServers = tmpPeerMgr.getConnectString(); + } + logger.debug("zookeeperServers="+zookeeperServers); } } Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/executor.config =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/executor.config 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/executor.config 2010-11-24 21:40:07 UTC (rev 3984) @@ -111,13 +111,10 @@ com.bigdata.executor.serverILFactory, false, false); } -//NOTE: remove once dynamic discovery of zookeeper is added org.apache.zookeeper.ZooKeeper { zroot = ConfigDeployUtil.getString("federation.name"); - servers = com.bigdata.executor.serverExporterIpAddr+":2888:3888"; - acl = new ACL[] { new ACL(ZooDefs.Perms.ALL, new Id("world", "anyone")) }; Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/logging.properties =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/logging.properties 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/logging.properties 2010-11-24 21:40:07 UTC (rev 3984) @@ -38,3 +38,4 @@ #log4j.logger.com.bigdata.executor=DEBUG #log4j.logger.com.bigdata.executor.EmbeddedCallableExecutor=DEBUG +#org.apache.zookeeper=DEBUG Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/quorum/QuorumPeerManager.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/quorum/QuorumPeerManager.java 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/quorum/QuorumPeerManager.java 2010-11-24 21:40:07 UTC (rev 3984) @@ -406,6 +406,11 @@ } } + // Other public methods defined by this class, not defined by ZooKeeper + public String getConnectString() { + return connectString; + } + // Private methods private ZooKeeper getClient() throws IOException { Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/Constants.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/Constants.java 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/Constants.java 2010-11-24 21:40:07 UTC (rev 3984) @@ -122,4 +122,13 @@ Boolean.FALSE; // Boolean.parseBoolean // (IBigdataClient.Options.DEFAULT_COLLECT_PLATFORM_STATISTICS); + + // ZooKeeper client session timeout in seconds. Note that for the + // typical tick time of 2 seconds per tick, the session timeout + // should/will be set to a value between 4 and 40 seconds; because + // ZooKeeper requires that the session timeout always fall between + // 2 and 20 ticks. + int LOWER_BOUND_ZK_SESSION_TIMEOUT = 1; + int UPPER_BOUND_ZK_SESSION_TIMEOUT = Integer.MAX_VALUE; + int DEFAULT_UPPER_BOUND_ZK_SESSION_TIMEOUT = 40; } Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/ServiceImpl.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/ServiceImpl.java 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/ServiceImpl.java 2010-11-24 21:40:07 UTC (rev 3984) @@ -33,6 +33,7 @@ import com.bigdata.btree.filter.IFilterConstructor; import com.bigdata.btree.proc.IIndexProcedure; import com.bigdata.io.SerializerUtil; +import com.bigdata.jini.quorum.QuorumPeerManager; import com.bigdata.jini.start.BigdataZooDefs; import com.bigdata.jini.util.ConfigMath; import com.bigdata.mdi.PartitionLocator; @@ -142,8 +143,6 @@ private EmbeddedShardLocator embeddedShardLocator; -//BTM private Thread waitThread; - /* Constructor used by Service Starter Framework to start this service */ public ServiceImpl(String[] args, LifeCycle lifeCycle) throws Exception { System.out.println("\nZZZZZ SHARD LOCATOR ServiceImpl: constructor"); @@ -440,11 +439,6 @@ serviceId = bootStateUtil.getServiceId(); logger.debug("smsProxyId = null - service generated & persisted " +"(or retreieved) its own proxy id ["+proxyId+"]"); - - setZookeeperConfigInfo(config); - zookeeperAccessor = - new ZooKeeperAccessor - (zookeeperServers, zookeeperSessionTimeout); } else {//ServicesConfiguration pre-generated the proxy id proxyId = smsProxyId; serviceId = com.bigdata.jini.util.JiniUtil.uuid2ServiceID(proxyId); @@ -590,6 +584,13 @@ Boolean.FALSE); this.sdm = new ServiceDiscoveryManager(ldm, null, config); + if (zookeeperAccessor == null) { + setZookeeperConfigInfo(config, this.sdm); + zookeeperAccessor = + new ZooKeeperAccessor + (zookeeperServers, zookeeperSessionTimeout); + } + embeddedShardLocator = new EmbeddedShardLocator (proxyId, hostname, @@ -631,9 +632,6 @@ +", locators=" +Util.writeArrayElementsToString(locatorsToJoin)); -//BTM waitThread = new Util.WaitOnInterruptThread(logger); -//BTM waitThread.start(); - readyState.ready();//ready to accept calls from clients } @@ -693,11 +691,12 @@ futureExporters.removeAll(removeSet); } -//BTM waitThread.interrupt(); -//BTM try { -//BTM waitThread.join(); -//BTM } catch (InterruptedException e) {/*exiting, so swallow*/} - + if (zookeeperAccessor != null) { + try { + zookeeperAccessor.close(); + } catch(InterruptedException e) {//swallow + } + } Util.cleanupOnExit (innerProxy, serverExporter, futureExporters, joinMgr, sdm, ldm); @@ -883,7 +882,22 @@ logger.debug("[main]: smsProxyId="+smsProxyId); } - setZookeeperConfigInfo(smsConfig); + String[] tmpGroups = + (String[])smsConfig.getEntry + ("com.bigdata.service.jini.JiniClient", "groups", + String[].class, DiscoveryGroupManagement.NO_GROUPS); + LookupLocator[] tmpLocs = + (LookupLocator[])smsConfig.getEntry + ("com.bigdata.service.jini.JiniClient", "locators", + LookupLocator[].class, new LookupLocator[]{ }); + DiscoveryManagement tmpLdm = + new LookupDiscoveryManager(tmpGroups, tmpLocs, null); + ServiceDiscoveryManager tmpSdm = + new ServiceDiscoveryManager(tmpLdm, null); + + setZookeeperConfigInfo(smsConfig, tmpSdm); + tmpLdm.terminate(); + tmpSdm.terminate(); zookeeperAccessor = new ZooKeeperAccessor (zookeeperServers, zookeeperSessionTimeout); @@ -906,7 +920,8 @@ ("[main]: logicalServiceZPath="+logicalServiceZPath); if(physicalServiceZPath != null) { byte[] data = SerializerUtil.serialize(smsEntries); - ZooKeeper zookeeperClient = zookeeperAccessor.getZookeeper(); + ZooKeeper zookeeperClient = + zookeeperAccessor.getZookeeper(); logger.debug("[main]: zookeeper client created"); try { zookeeperClient.create @@ -916,7 +931,7 @@ +"[physicalServiceZPath=" +physicalServiceZPath+"]"); } catch(NodeExistsException e) { - zookeeperClient.setData(physicalServiceZPath, data, -1); + zookeeperClient.setData(physicalServiceZPath,data,-1); logger.debug("[main]: zookeeper znode updated " +"[physicalServiceZPath=" +physicalServiceZPath+"]"); @@ -936,8 +951,10 @@ } } - private static void setZookeeperConfigInfo(Configuration zkConfig) - throws ConfigurationException + private static void setZookeeperConfigInfo + (Configuration zkConfig, + ServiceDiscoveryManager srvcDiscMgr) + throws ConfigurationException, IOException { String zkComponent = "org.apache.zookeeper.ZooKeeper"; @@ -949,18 +966,11 @@ } logger.debug("zookeepeRoot="+zookeeperRoot); - zookeeperServers = - (String)zkConfig.getEntry - (zkComponent, "servers", String.class, null); - if(zookeeperServers == null) { - throw new ConfigurationException - ("zookeeper servers not specified"); - } - logger.debug("zookeeperServers="+zookeeperServers); - - zookeeperSessionTimeout = - (Integer)zkConfig.getEntry - (zkComponent, "sessionTimeout", int.class, 300000); + zookeeperSessionTimeout = + Config.getIntEntry(zkConfig, zkComponent, "sessionTimeout", + DEFAULT_UPPER_BOUND_ZK_SESSION_TIMEOUT, + LOWER_BOUND_ZK_SESSION_TIMEOUT, + UPPER_BOUND_ZK_SESSION_TIMEOUT); logger.debug("zookeeperSessionTimeout="+zookeeperSessionTimeout); ACL[] acl = (ACL[])zkConfig.getEntry @@ -970,5 +980,22 @@ } zookeeperAcl = Arrays.asList(acl); logger.debug("zookeeperAcl="+zookeeperAcl); + +//BTM - if config contains "servers" then by-pass dynamic discovery for now + zookeeperServers = + (String)zkConfig.getEntry + (zkComponent, "servers", String.class, null); + if(zookeeperServers == null) { + QuorumPeerManager tmpPeerMgr = + new QuorumPeerManager + (srvcDiscMgr, zookeeperSessionTimeout, logger); + if (tmpPeerMgr == null) { + throw new IOException("zookeeper ensemble unavailable"); + } + ZooKeeper.States zkState = tmpPeerMgr.getState(); + logger.debug("zookeeper state="+zkState); + zookeeperServers = tmpPeerMgr.getConnectString(); + } + logger.debug("zookeeperServers="+zookeeperServers); } } Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/logging.properties =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/logging.properties 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/logging.properties 2010-11-24 21:40:07 UTC (rev 3984) @@ -38,3 +38,4 @@ #log4j.logger.com.bigdata.metadata=DEBUG #log4j.logger.com.bigdata.metadata.EmbeddedShardLocator=DEBUG +#org.apache.zookeeper=DEBUG Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/shardlocator.config =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/shardlocator.config 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/shardlocator.config 2010-11-24 21:40:07 UTC (rev 3984) @@ -94,13 +94,10 @@ com.bigdata.metadata.serverILFactory, false, false); } -//NOTE: remove once dynamic discovery of zookeeper is added org.apache.zookeeper.ZooKeeper { zroot = ConfigDeployUtil.getString("federation.name"); - servers = com.bigdata.metadata.serverExporterIpAddr+":2888:3888"; - acl = new ACL[] { new ACL(ZooDefs.Perms.ALL, new Id("world", "anyone")) }; Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/Constants.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/Constants.java 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/Constants.java 2010-11-24 21:40:07 UTC (rev 3984) @@ -95,7 +95,7 @@ int UPPER_BOUND_MAX_PARALLEL_TASKS_PER_REQUEST = 1000; int DEFAULT_MAX_PARALLEL_TASKS_PER_REQUEST = Integer.parseInt - (IBigdataClient.Options.DEFAULT_CLIENT_MAX_PARALLEL_TASKS_PER_REQUEST); + (IBigdataClient.Options.DEFAULT_CLIENT_MAX_PARALLEL_TASKS_PER_REQUEST); long LOWER_BOUND_TASK_TIMEOUT = 0; long UPPER_BOUND_TASK_TIMEOUT = Long.MAX_VALUE; @@ -121,4 +121,13 @@ Boolean.FALSE; // Boolean.parseBoolean // (IBigdataClient.Options.DEFAULT_COLLECT_PLATFORM_STATISTICS); + + // ZooKeeper client session timeout in seconds. Note that for the + // typical tick time of 2 seconds per tick, the session timeout + // should/will be set to a value between 4 and 40 seconds; because + // ZooKeeper requires that the session timeout always fall between + // 2 and 20 ticks. + int LOWER_BOUND_ZK_SESSION_TIMEOUT = 1; + int UPPER_BOUND_ZK_SESSION_TIMEOUT = Integer.MAX_VALUE; + int DEFAULT_UPPER_BOUND_ZK_SESSION_TIMEOUT = 40; } Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/ServiceImpl.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/ServiceImpl.java 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/ServiceImpl.java 2010-11-24 21:40:07 UTC (rev 3984) @@ -32,6 +32,7 @@ import com.bigdata.btree.ResultSet; import com.bigdata.btree.filter.IFilterConstructor; import com.bigdata.btree.proc.IIndexProcedure; +import com.bigdata.jini.quorum.QuorumPeerManager; import com.bigdata.jini.start.BigdataZooDefs; import com.bigdata.io.SerializerUtil; import com.bigdata.jini.util.ConfigMath; @@ -150,8 +151,6 @@ private EmbeddedShardService embeddedShardService; -//BTM private Thread waitThread; - /* Constructor used by Service Starter Framework to start this service */ public ServiceImpl(String[] args, LifeCycle lifeCycle) throws Exception { System.out.println("\nSSSSS SHARD SERVICE ServiceImpl: constructor"); @@ -456,11 +455,6 @@ serviceId = bootStateUtil.getServiceId(); logger.debug("smsProxyId = null - service generated & persisted " +"(or retreieved) its own proxy id ["+proxyId+"]"); - - setZookeeperConfigInfo(config); - zookeeperAccessor = - new ZooKeeperAccessor - (zookeeperServers, zookeeperSessionTimeout); } else {//ServicesConfiguration pre-generated the proxy id proxyId = smsProxyId; serviceId = com.bigdata.jini.util.JiniUtil.uuid2ServiceID(proxyId); @@ -621,6 +615,13 @@ UPPER_BOUND_HTTPD_PORT); this.sdm = new ServiceDiscoveryManager(ldm, null, config); + if (zookeeperAccessor == null) { + setZookeeperConfigInfo(config, this.sdm); + zookeeperAccessor = + new ZooKeeperAccessor + (zookeeperServers, zookeeperSessionTimeout); + } + embeddedShardService = new EmbeddedShardService (proxyId, hostname, @@ -663,9 +664,6 @@ +", locators=" +Util.writeArrayElementsToString(locatorsToJoin)); -//BTM waitThread = new Util.WaitOnInterruptThread(logger); -//BTM waitThread.start(); - readyState.ready();//ready to accept calls from clients } @@ -755,13 +753,14 @@ futureExporters.removeAll(removeSet); } -//BTM waitThread.interrupt(); -//BTM try { -//BTM waitThread.join(); -//BTM } catch (InterruptedException e) {/*exiting, so swallow*/} - + if (zookeeperAccessor != null) { + try { + zookeeperAccessor.close(); + } catch(InterruptedException e) {//swallow + } + } Util.cleanupOnExit - (innerProxy, serverExporter, futureExporters, joinMgr, sdm, ldm); + (innerProxy, serverExporter, futureExporters, joinMgr, sdm, ldm); // Tell the ServiceStarter framework it's ok to release for gc if(lifeCycle != null) { @@ -945,7 +944,22 @@ logger.debug("[main]: smsProxyId="+smsProxyId); } - setZookeeperConfigInfo(smsConfig); + String[] tmpGroups = + (String[])smsConfig.getEntry + ("com.bigdata.service.jini.JiniClient", "groups", + String[].class, DiscoveryGroupManagement.NO_GROUPS); + LookupLocator[] tmpLocs = + (LookupLocator[])smsConfig.getEntry + ("com.bigdata.service.jini.JiniClient", "locators", + LookupLocator[].class, new LookupLocator[]{ }); + DiscoveryManagement tmpLdm = + new LookupDiscoveryManager(tmpGroups, tmpLocs, null); + ServiceDiscoveryManager tmpSdm = + new ServiceDiscoveryManager(tmpLdm, null); + + setZookeeperConfigInfo(smsConfig, tmpSdm); + tmpLdm.terminate(); + tmpSdm.terminate(); zookeeperAccessor = new ZooKeeperAccessor (zookeeperServers, zookeeperSessionTimeout); @@ -968,7 +982,8 @@ ("[main]: logicalServiceZPath="+logicalServiceZPath); if(physicalServiceZPath != null) { byte[] data = SerializerUtil.serialize(smsEntries); - ZooKeeper zookeeperClient = zookeeperAccessor.getZookeeper(); + ZooKeeper zookeeperClient = + zookeeperAccessor.getZookeeper(); logger.debug("[main]: zookeeper client created"); try { zookeeperClient.create @@ -978,7 +993,7 @@ +"[physicalServiceZPath=" +physicalServiceZPath+"]"); } catch(NodeExistsException e) { - zookeeperClient.setData(physicalServiceZPath, data, -1); + zookeeperClient.setData(physicalServiceZPath,data,-1); logger.debug("[main]: zookeeper znode updated " +"[physicalServiceZPath=" +physicalServiceZPath+"]"); @@ -998,8 +1013,10 @@ } } - private static void setZookeeperConfigInfo(Configuration zkConfig) - throws ConfigurationException + private static void setZookeeperConfigInfo + (Configuration zkConfig, + ServiceDiscoveryManager srvcDiscMgr) + throws ConfigurationException, IOException { String zkComponent = "org.apache.zookeeper.ZooKeeper"; @@ -1011,18 +1028,11 @@ } logger.debug("zookeepeRoot="+zookeeperRoot); - zookeeperServers = - (String)zkConfig.getEntry - (zkComponent, "servers", String.class, null); - if(zookeeperServers == null) { - throw new ConfigurationException - ("zookeeper servers not specified"); - } - logger.debug("zookeeperServers="+zookeeperServers); - - zookeeperSessionTimeout = - (Integer)zkConfig.getEntry - (zkComponent, "sessionTimeout", int.class, 300000); + zookeeperSessionTimeout = + Config.getIntEntry(zkConfig, zkComponent, "sessionTimeout", + DEFAULT_UPPER_BOUND_ZK_SESSION_TIMEOUT, + LOWER_BOUND_ZK_SESSION_TIMEOUT, + UPPER_BOUND_ZK_SESSION_TIMEOUT); logger.debug("zookeeperSessionTimeout="+zookeeperSessionTimeout); ACL[] acl = (ACL[])zkConfig.getEntry @@ -1032,5 +1042,22 @@ } zookeeperAcl = Arrays.asList(acl); logger.debug("zookeeperAcl="+zookeeperAcl); + +//BTM - if config contains "servers" then by-pass dynamic discovery for now + zookeeperServers = + (String)zkConfig.getEntry + (zkComponent, "servers", String.class, null); + if(zookeeperServers == null) { + QuorumPeerManager tmpPeerMgr = + new QuorumPeerManager + (srvcDiscMgr, zookeeperSessionTimeout, logger); + if (tmpPeerMgr == null) { + throw new IOException("zookeeper ensemble unavailable"); + } + ZooKeeper.States zkState = tmpPeerMgr.getState(); + logger.debug("zookeeper state="+zkState); + zookeeperServers = tmpPeerMgr.getConnectString(); + } + logger.debug("zookeeperServers="+zookeeperServers); } } Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/logging.properties =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/logging.properties 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/logging.properties 2010-11-24 21:40:07 UTC (rev 3984) @@ -38,3 +38,4 @@ #log4j.logger.com.bigdata.shard=DEBUG #log4j.logger.com.bigdata.shard.EmbeddedShardService=DEBUG +#org.apache.zookeeper=DEBUG Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/shard.config =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/shard.config 2010-11-23 21:39:34 UTC (rev 3983) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/shard.config 2010-11-24 21:40:07 UTC (rev 3984) @@ -94,13 +94,10 @@ com.bigdata.shard.serverILFactory, false, false); } -//NOTE: remove once dynamic discovery of zookeeper is added org.apache.zookeeper.ZooKeeper { zroot = ConfigDeployUtil.getString("federation.name"); - servers = com.bigdata.shard.serverExporterIpAddr+":2888:3888"; - acl = new ACL[] { new ACL(ZooDefs.Perms.ALL, new Id("world", "anyone")) }; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <btm...@us...> - 2010-11-23 21:39:41
|
Revision: 3983 http://bigdata.svn.sourceforge.net/bigdata/?rev=3983&view=rev Author: btmurphy Date: 2010-11-23 21:39:34 +0000 (Tue, 23 Nov 2010) Log Message: ----------- [branch dev-btm]: CHECKPOINT - added first cut of the QuorumPeerManager class that wraps the ZooKeeper client for discovery and error handling, added the testQuorumPeerManager method to the QuorumPeerServiceTest class (note also that as part of the previous checkpoint, the zookeeper jar file was upgraded to the latest zookeeper-3.3.2.jar release, which included changing build.xml to reference the new jar file in the compile classpath) Modified Paths: -------------- branches/dev-btm/bigdata-jini/src/test/com/bigdata/quorum/QuorumPeerServiceTest.java Added Paths: ----------- branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/quorum/ branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/quorum/QuorumPeerManager.java Added: branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/quorum/QuorumPeerManager.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/quorum/QuorumPeerManager.java (rev 0) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/quorum/QuorumPeerManager.java 2010-11-23 21:39:34 UTC (rev 3983) @@ -0,0 +1,694 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.jini.quorum; + +import com.bigdata.attr.QuorumPeerAttr; +import com.bigdata.service.QuorumPeerService; +import com.bigdata.service.Service; +import com.bigdata.util.EntryUtil; +import com.bigdata.util.Util; +import com.bigdata.util.config.LogUtil; + +import net.jini.core.entry.Entry; +import net.jini.core.lookup.ServiceID; +import net.jini.core.lookup.ServiceItem; +import net.jini.core.lookup.ServiceTemplate; +import net.jini.lookup.LookupCache; +import net.jini.lookup.ServiceDiscoveryEvent; +import net.jini.lookup.ServiceDiscoveryListener; +import net.jini.lookup.ServiceDiscoveryManager; + +import org.apache.log4j.Level; +import org.apache.log4j.Logger; + +import org.apache.zookeeper.AsyncCallback; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.SessionExpiredException; +import org.apache.zookeeper.Watcher.Event.KeeperState; +import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Stat; + +import java.io.IOException; +import java.net.InetAddress; +import java.rmi.RemoteException; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Helper class that wraps the <code>org.apache.zookeeper.ZooKeeper</code> + * client class, providing additional covenient functionality related to + * dynamic discovery of the connection information associated with the + * peers in the federation's ensemble, as well as exception and session + * expiry handling. + */ +public class QuorumPeerManager { + + private Logger logger; + + private ServiceDiscoveryManager sdm; + private int sessionTimeout; + + // How long to wait for the ensemble to be discovered (and the + // connectString to be constructed) + // 0L ==> don't wait, the caller will provide its own retry logic + // Long.MAX_VALUE ==> wait forever + // negative ==> use backoff strategy specified by this class + private long discoverWait; + + // How long to wait for a connection before declaring failure + // 0L ==> try to connect only once + // Long.MAX_VALUE ==> wait forever + // negative ==> use backoff strategy specified by this class + private long connectWait; + + private LookupCache quorumServiceCache; + private Map<UUID, String> hostPortMap = + new ConcurrentHashMap<UUID, String>(); + + private volatile String connectString = null; + private volatile ZooKeeper zkClient; + private volatile boolean terminated = false; + + private Object syncObj = new Object(); + private static long[] discoverBackoff = + {1L, 2L, 4L, 8L, 16L, 32L, 64L, 128L, 256L};//seconds + private static long[] connectBackoff = + {1L, 2L, 4L, 8L, 16L, 32L, 64L, 128L};//seconds + + public QuorumPeerManager(ServiceDiscoveryManager sdm, + int sessionTimeout, + Logger logger) + { + this(sdm, sessionTimeout, -1L, -1L, logger); + } + + public QuorumPeerManager(ServiceDiscoveryManager sdm, + int sessionTimeout, + long discoverWait, + long connectWait, + Logger logger) + { + if (sdm == null) { + throw new NullPointerException("null sdm"); + } + this.sdm = sdm; + ServiceDiscoveryListener cacheListener = new CacheListener(logger); + + // Discover all QuorumPeerServices that have the join the federation + // the given sdm is configured to discover (by groups and/or locs) + + Class[] quorumServiceType = new Class[] {QuorumPeerService.class}; + ServiceTemplate quorumServiceTmpl = + new ServiceTemplate(null, quorumServiceType, null); + try { + this.quorumServiceCache = sdm.createLookupCache(quorumServiceTmpl, + null,//filter + cacheListener); + } catch(RemoteException e) { + logger.warn(e.getMessage(), e); + } + + this.sessionTimeout = sessionTimeout; + this.discoverWait = discoverWait; + this.connectWait = connectWait; + this.logger = (logger == null ? + LogUtil.getLog4jLogger((this.getClass()).getName()) : + logger); + } + + // Wrapped methods from org.apache.zookeper.ZooKeeper client class + + public void addAuthInfo(String scheme, byte[] auth) throws IOException { + checkTerminated(); + getClient().addAuthInfo(scheme, auth); + } + + public void close() { + if (terminated) return; + if ( (zkClient != null) && (zkClient.getState().isAlive()) ) { + try { + zkClient.close(); + } catch(InterruptedException e) {//swallow + } + } + terminated = true; + connectString = null; + } + + public String create(String path, + byte[] data, + List<ACL> acl, + CreateMode createMode) + throws IOException, KeeperException, InterruptedException + { + checkTerminated(); + return getClient().create(path, data, acl, createMode); + } + + public void create(String path, + byte[] data, + List<ACL> acl, + CreateMode createMode, + AsyncCallback.StringCallback cb, + Object ctx) throws IOException + { + checkTerminated(); + getClient().create(path, data, acl, createMode, cb, ctx); + } + + public void delete(String path, int version) + throws IOException, KeeperException, InterruptedException + { + checkTerminated(); + getClient().delete(path, version); + } + + public void delete(String path, + int version, + AsyncCallback.VoidCallback cb, + Object ctx) throws IOException + { + checkTerminated(); + getClient().delete(path, version, cb, ctx); + } + + public Stat exists(String path, Watcher watcher) + throws IOException, KeeperException, InterruptedException + { + checkTerminated(); + return getClient().exists(path, watcher); + } + + public Stat exists(String path, boolean watch) + throws IOException, KeeperException, InterruptedException + { + checkTerminated(); + return getClient().exists(path, watch); + } + + public void exists(String path, + Watcher watcher, + AsyncCallback.StatCallback cb, + Object ctx) + throws IOException + { + checkTerminated(); + getClient().exists(path, watcher, cb, ctx); + } + + public void exists(String path, + boolean watch, + AsyncCallback.StatCallback cb, + Object ctx) + throws IOException + { + checkTerminated(); + getClient().exists(path, watch, cb, ctx); + } + + public byte[] getData(String path, Watcher watcher, Stat stat) + throws IOException, KeeperException, InterruptedException + { + checkTerminated(); + return getClient().getData(path, watcher, stat); + } + + public byte[] getData(String path, boolean watch, Stat stat) + throws IOException, KeeperException, InterruptedException + { + checkTerminated(); + return getClient().getData(path, watch, stat); + } + + public void getData(String path, + Watcher watcher, + AsyncCallback.DataCallback cb, + Object ctx) throws IOException + { + checkTerminated(); + getClient().getData(path, watcher, cb, ctx); + } + + public void getData(String path, + boolean watch, + AsyncCallback.DataCallback cb, + Object ctx) throws IOException + { + checkTerminated(); + getClient().getData(path, watch, cb, ctx); + } + + public Stat setData(String path, byte[] data, int version) + throws IOException, KeeperException, InterruptedException + { + checkTerminated(); + return getClient().setData(path, data, version); + } + + public void setData(String path, + byte[] data, + int version, + AsyncCallback.StatCallback cb, + Object ctx) throws IOException + { + checkTerminated(); + getClient().setData(path, data, version, cb, ctx); + } + + public List<ACL> getACL(String path, Stat stat) + throws IOException, KeeperException, InterruptedException + { + checkTerminated(); + return getClient().getACL(path, stat); + } + + public void getACL(String path, + Stat stat, + AsyncCallback.ACLCallback cb, + Object ctx) throws IOException + { + checkTerminated(); + getClient().getACL(path, stat, cb, ctx); + } + + public Stat setACL(String path, List<ACL> acl, int version) + throws IOException, KeeperException, InterruptedException + { + checkTerminated(); + return getClient().setACL(path, acl, version); + } + + public void setACL(String path, + List<ACL> acl, + int version, + AsyncCallback.StatCallback cb, + Object ctx) throws IOException + { + checkTerminated(); + getClient().setACL(path, acl, version, cb, ctx); + } + + public List<String> getChildren(String path, Watcher watcher) + throws IOException, KeeperException, InterruptedException + { + checkTerminated(); + return getClient().getChildren(path, watcher); + } + + public List<String> getChildren(String path, boolean watch) + throws IOException, KeeperException, InterruptedException + { + checkTerminated(); + return getClient().getChildren(path, watch); + } + + public void getChildren(String path, + Watcher watcher, + AsyncCallback.ChildrenCallback cb, + Object ctx) throws IOException + { + checkTerminated(); + getClient().getChildren(path, watcher, cb, ctx); + } + + public void getChildren(String path, + boolean watch, + AsyncCallback.ChildrenCallback cb, + Object ctx) throws IOException + { + checkTerminated(); + getClient().getChildren(path, watch, cb, ctx); + } + + public List<String> getChildren(String path, Watcher watcher, Stat stat) + throws IOException, KeeperException, InterruptedException + { + checkTerminated(); + return getClient().getChildren(path, watcher, stat); + } + + public List<String> getChildren(String path, boolean watch, Stat stat) + throws IOException, KeeperException, InterruptedException + { + checkTerminated(); + return getClient().getChildren(path, watch, stat); + } + + public void getChildren(String path, + Watcher watcher, + AsyncCallback.Children2Callback cb, + Object ctx) throws IOException + { + checkTerminated(); + getClient().getChildren(path, watcher, cb, ctx); + } + + public void getChildren(String path, + boolean watch, + AsyncCallback.Children2Callback cb, + Object ctx) throws IOException + { + checkTerminated(); + getClient().getChildren(path, watch, cb, ctx); + } + + public void sync(String path, AsyncCallback.VoidCallback cb, Object ctx) + throws IOException + { + checkTerminated(); + getClient().sync(path, cb, ctx); + } + + public ZooKeeper.States getState() throws IOException { + checkTerminated(); + return getClient().getState(); + } + + public String toString() { + checkTerminated(); + try { + return getClient().toString(); + } catch(IOException e) { + // default value when client unavailable + return "[connectString="+connectString + +", sessionTimeout="+sessionTimeout + +", connectWait="+connectWait + +", terminated="+terminated+"]"; + } + } + + // Private methods + + private ZooKeeper getClient() throws IOException { + if ( (zkClient != null) && (zkClient.getState().isAlive()) ) { + return zkClient; + } + + // Determine if ensemble has been discovered yet + + if (connectString == null) { + if(discoverWait > 0L) {//retry for discoverWait seconds + for (long i=0L; i<discoverWait; i++) { + Util.delayMS(1L*1000L); + if (connectString != null) break; + if (logger.isDebugEnabled()) { + logger.debug("ensemble still not discovered " + +"[try #"+(i+1)+"]"); + } + } + } else if (discoverWait < 0L) {//retry with backoff + for (int i=0; i<discoverBackoff.length; i++) { + Util.delayMS(discoverBackoff[i]*1000L); + if (connectString != null) break; + if (logger.isDebugEnabled()) { + logger.debug("ensemble still not discovered " + +"[try #"+(i+1)+"]"); + } + } + } + if (connectString == null) {//still not discovered ==> fail + zkClient = null; + throw new IllegalStateException + ("never discovered zookeeper ensemble"); + } + } + + // Ensemble discovered and connectString constructed, construct client + + zkClient = new ZooKeeper(connectString, + sessionTimeout, + new ZookeeperEventListener(logger)); + + // Connect to ensemble + + ZooKeeper.States state = zkClient.getState(); + logger.debug("state[try #0] = "+state); + + if ( !state.equals(ZooKeeper.States.CONNECTED) ) { + boolean connected = false; + if (connectWait == 0L) {//tried once above + zkClient = null; + } else if (connectWait > 0L) {//retry until connected or timeout + for (long i=0L; i<connectWait; i++) { + Util.delayMS(1L*1000L); + state = zkClient.getState(); + if (logger.isDebugEnabled()) { + logger.debug("state[try #"+(i+1)+"] = "+state); + } + if ( state.equals(ZooKeeper.States.CONNECTED) ) { + connected = true; + break; + } + } + } else { //connectWait < 0L ==> retry with default backoff + for (int i=0; i<connectBackoff.length; i++) { + Util.delayMS(connectBackoff[i]*1000L); + state = zkClient.getState(); + if (logger.isDebugEnabled()) { + logger.debug("state[try #"+(i+1)+"] = "+state); + } + if ( state.equals(ZooKeeper.States.CONNECTED) ) { + connected = true; + break; + } + } + } + if (!connected) zkClient = null;//never connected + } + if (zkClient == null) { + throw new IllegalStateException("zookeeper ensemble unavailable"); + } + return zkClient; + } + + private void checkTerminated() { + if (terminated) { + throw new IllegalStateException("QuorumPeerManager terminated"); + } + } + + // Nested class(es) + + private class CacheListener implements ServiceDiscoveryListener { + private Logger logger; + CacheListener(Logger logger) { + this.logger = logger; + } + public void serviceAdded(ServiceDiscoveryEvent event) { + ServiceItem item = event.getPostEventServiceItem(); + + ServiceID serviceId = item.serviceID; + Object service = item.service; + Entry[] attrs = item.attributeSets; + + Class serviceType = service.getClass(); + UUID serviceUUID = ((Service)service).getServiceUUID(); + + QuorumPeerAttr quorumPeerAttr = + (QuorumPeerAttr)(EntryUtil.getEntryByType + (attrs, QuorumPeerAttr.class)); + + InetAddress peerAddr = quorumPeerAttr.address; + int clientPort = quorumPeerAttr.clientPort; + int ensembleSize = quorumPeerAttr.nQuorumPeers; + + if(logger.isDebugEnabled()) { + logger.log(Level.DEBUG, "1 of "+ensembleSize+" quorum peer(s) " + +"DISCOVERED [addr="+peerAddr+", port=" + +clientPort+"]"); + } + hostPortMap.put(serviceUUID, peerAddr+":"+clientPort); + + // Build connectString when all expected peers found + synchronized(syncObj) { + if (hostPortMap.size() == ensembleSize) { + Iterator<String> itr = (hostPortMap.values()).iterator(); + //build connectString + StringBuffer strBuf = null; + if (itr.hasNext()) { + strBuf = new StringBuffer(itr.next()); + } + while( itr.hasNext() ) { + strBuf.append(","+itr.next()); + } + connectString = strBuf.toString(); + logger.debug("connectString = "+connectString); + } + } + } + + public void serviceRemoved(ServiceDiscoveryEvent event) { + ServiceItem item = event.getPreEventServiceItem(); + + ServiceID serviceId = item.serviceID; + Object service = item.service; + Entry[] attrs = item.attributeSets; + + Class serviceType = service.getClass(); + UUID serviceUUID = ((Service)service).getServiceUUID(); + + QuorumPeerAttr quorumPeerAttr = + (QuorumPeerAttr)(EntryUtil.getEntryByType + (attrs, QuorumPeerAttr.class)); + + InetAddress peerAddr = quorumPeerAttr.address; + int clientPort = quorumPeerAttr.clientPort; + int ensembleSize = quorumPeerAttr.nQuorumPeers; + + if(logger.isDebugEnabled()) { + logger.log(Level.DEBUG, "1 of "+ensembleSize+" quorum peer(s) " + +"DOWN [addr="+peerAddr+", port="+clientPort+"]"); + } + hostPortMap.remove(serviceUUID); + } + + public void serviceChanged(ServiceDiscoveryEvent event) { + + ServiceItem preItem = event.getPreEventServiceItem(); + ServiceItem postItem = event.getPostEventServiceItem(); + + ServiceID serviceId = postItem.serviceID; + Object service = postItem.service; + + Class serviceType = service.getClass(); + + Entry[] preAttrs = preItem.attributeSets; + Entry[] postAttrs = postItem.attributeSets; + + UUID serviceUUID = ((Service)service).getServiceUUID(); + + QuorumPeerAttr preQuorumPeerAttr = null; + QuorumPeerAttr postQuorumPeerAttr = null; + + if (preAttrs != null) { + preQuorumPeerAttr = + (QuorumPeerAttr)(EntryUtil.getEntryByType + (preAttrs, QuorumPeerAttr.class)); + } + if (postAttrs != null) { + postQuorumPeerAttr = + (QuorumPeerAttr)(EntryUtil.getEntryByType + (postAttrs, QuorumPeerAttr.class)); + } + + InetAddress prePeerAddr = null; + int preClientPort = Integer.MIN_VALUE; + int preEnsembleSize = Integer.MIN_VALUE; + if (preQuorumPeerAttr != null) { + prePeerAddr = preQuorumPeerAttr.address; + preClientPort = preQuorumPeerAttr.clientPort; + preEnsembleSize = preQuorumPeerAttr.nQuorumPeers; + } + + InetAddress postPeerAddr = null; + int postClientPort = Integer.MIN_VALUE; + int postEnsembleSize = Integer.MIN_VALUE; + if (postQuorumPeerAttr != null) { + postPeerAddr = postQuorumPeerAttr.address; + postClientPort = postQuorumPeerAttr.clientPort; + postEnsembleSize = postQuorumPeerAttr.nQuorumPeers; + } + + if ((preQuorumPeerAttr != null) && (postQuorumPeerAttr != null)) { + String logStr = "quorum peer(s) CHANGED [pre: addr=" + +prePeerAddr+", port="+preClientPort + +", ensembleSize="+preEnsembleSize + +" >>> post: addr="+postPeerAddr + +", port="+postClientPort+", ensembleSize=" + +postEnsembleSize+"]"; + if ( (prePeerAddr == null) || (postPeerAddr == null) ) { + logger.warn(logStr); + return; + } + if ( !(prePeerAddr.equals(postPeerAddr)) ) { + logger.warn(logStr); + return; + } + if (preClientPort != postClientPort) { + logger.warn(logStr); + return; + } + if (preEnsembleSize != postEnsembleSize) { + logger.warn(logStr); + return; + } + logger.debug(logStr); + } else if( (preQuorumPeerAttr == null) && + (postQuorumPeerAttr != null)) + { + logger.warn("quorum peer(s) CHANGED [attribute added >>> " + +"post: addr="+postPeerAddr+", port=" + +postClientPort+", ensembleSize=" + +postEnsembleSize+"]"); + return; + } else {// pre != null, post == null ==> removed attr + logger.warn("quorum peer(s) CHANGED [pre: addr="+prePeerAddr + +", port="+preClientPort+", ensembleSize=" + +preEnsembleSize+" >>> attribute removed]"); + return; + } + } + } + + private static class ZookeeperEventListener implements Watcher { + private Logger logger; + + public ZookeeperEventListener(Logger logger) { + this.logger = + (logger == null ? + LogUtil.getLog4jLogger((this.getClass()).getName()) : + logger); + } + + public void process(WatchedEvent event) { + KeeperState eventState = event.getState(); + switch (eventState) { + case Unknown: + logger.warn + ("zookeeper event [state="+eventState + +", event="+event+"]"); + break; + case Disconnected: + logger.debug("zookeeper event [state="+eventState+"]");; + break; + case SyncConnected: + logger.debug("zookeeper event [state="+eventState+"]");; + break; + case Expired: + logger.warn("zookeeper event [state="+eventState+"]"); + break; + } + + } + } + +} Property changes on: branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/quorum/QuorumPeerManager.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/dev-btm/bigdata-jini/src/test/com/bigdata/quorum/QuorumPeerServiceTest.java =================================================================== --- branches/dev-btm/bigdata-jini/src/test/com/bigdata/quorum/QuorumPeerServiceTest.java 2010-11-23 19:58:22 UTC (rev 3982) +++ branches/dev-btm/bigdata-jini/src/test/com/bigdata/quorum/QuorumPeerServiceTest.java 2010-11-23 21:39:34 UTC (rev 3983) @@ -38,6 +38,7 @@ import junit.framework.TestCase; import junit.framework.TestSuite; +import com.bigdata.jini.quorum.QuorumPeerManager; import com.bigdata.service.QuorumPeerService; import com.bigdata.util.Util; import com.bigdata.util.config.NicUtil; @@ -71,7 +72,11 @@ import org.apache.log4j.Level; import org.apache.log4j.Logger; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.KeeperException.SessionExpiredException; +import org.apache.zookeeper.Watcher.Event.KeeperState; import java.io.BufferedReader; import java.io.File; @@ -198,7 +203,7 @@ private static String[] groupsToDiscover = new String[] {"qaQuorumGroup"}; private static LookupLocator[] locsToDiscover = new LookupLocator[0]; private static DiscoveryManagement ldm; - protected static ServiceDiscoveryManager sdm; + private static ServiceDiscoveryManager sdm; private static CacheListener cacheListener; private static LookupCache quorumCache; @@ -467,9 +472,11 @@ logger.debug("\n\n"+testName+" EXIT\n"); } -// @Test(timeout=5000) - public void testGetSessionId() throws Exception { - testName = "testGetSessionId"; + // Verifies that the ZooKeeper client can be used to connect to the + // ensemble within a given amount of time +// @Test(timeout=20000) + public void testZooKeeperConnect() throws Exception { + testName = "testZooKeeperConnect"; testPassed = false; logger.info("\n\n-- "+testName+" ENTER ----------\n"); @@ -482,22 +489,68 @@ String connectString = strBuf.toString(); logger.info("connectString = "+connectString); - int sessionTimeout = 10000;//10 seconds - ZooKeeper zkClient = - new ZooKeeper(connectString, sessionTimeout, null); + int sessionTimeout = 40*1000;//max when tickTime is 2000 + ZooKeeper zkClient = new ZooKeeper(connectString, + sessionTimeout, + new ZookeeperEventListener()); + ZooKeeper.States state = zkClient.getState(); + logger.info("state[try #0] = "+state); - long sessionId = zkClient.getSessionId(); - logger.info("sessionId = "+sessionId); - + if ( !state.equals(ZooKeeper.States.CONNECTED) ) { + int nWait = 10; + for (int i=0; i<nWait; i++) { + Util.delayMS(1L*1000L); + state = zkClient.getState(); + logger.info("state[try #"+(i+1)+"] = "+zkClient.getState()); + if ( state.equals(ZooKeeper.States.CONNECTED) ) break; + } + } + if ( state.equals(ZooKeeper.States.CONNECTED) ) { + testPassed = true; + } zkClient.close(); + logger.debug("\n\n"+testName+" EXIT\n"); + } + // Verifies that the QuorumPeerManager class that wraps the ZooKeeper + // client can be used to discover and connect to the ensemble started + // by this test class. +// @Test(timeout=20000) + public void testQuorumPeerManagerConnect() throws Exception { + testName = "testQuorumPeerManagerConnect"; + testPassed = false; + logger.info("\n\n-- "+testName+" ENTER ----------\n"); + + int sessionTimeout = 40*1000;//max when tickTime is 2000 + QuorumPeerManager peerMgr = + new QuorumPeerManager(sdm, sessionTimeout, logger); + assertTrue("failed on QuorumPeerManager instantiation " + +"[null returned]", (peerMgr != null) ); + + ZooKeeper.States state = null; + try { + state = peerMgr.getState(); + } catch(IOException e) { + logger.warn("failed on QuorumPeerManager instantiation", e); + return; + } + assertTrue("getState failed [null]", (state != null) ); + logger.info("state = "+state); + + assertTrue("getState failed [not connected]", + state.equals(ZooKeeper.States.CONNECTED) ); + testPassed = true; + peerMgr.close(); logger.debug("\n\n"+testName+" EXIT\n"); } // Special test that is always the last test; to clearly distinguish the // logged output produced by the previous tests from the logged output // produced by the tearDown process. + // + // REMOVE this test when/if this test class is changed to use the + // @BeforeClass annotation. public void testLast() throws Exception { logger.info("\n\n-- BEGIN TEARDOWN ----------\n"); lastTest = true; @@ -761,8 +814,34 @@ } } - static class ServiceStarterTask implements Runnable { + private static class ZookeeperEventListener implements Watcher { + public void process(WatchedEvent event) { + KeeperState eventState = event.getState(); + switch (eventState) { + case Unknown: + logger.warn + ("zookeeper event [state="+eventState + +", event="+event+"]"); + break; + case Disconnected: + logger.info + ("zookeeper event [state="+eventState+"]");; + break; + case SyncConnected: + logger.info + ("zookeeper event [state="+eventState+"]");; + break; + case Expired: + logger.warn + ("zookeeper event [state="+eventState+"]"); + break; + } + } + } + + private static class ServiceStarterTask implements Runnable { + private String serviceStateDir; private String ensembleSizeOverride; private String clientPortOverride; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-23 19:58:29
|
Revision: 3982 http://bigdata.svn.sourceforge.net/bigdata/?rev=3982&view=rev Author: thompsonbry Date: 2010-11-23 19:58:22 +0000 (Tue, 23 Nov 2010) Log Message: ----------- Some more work on the extensible hashing control logic in the test suite. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java 2010-11-23 18:07:57 UTC (rev 3981) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java 2010-11-23 19:58:22 UTC (rev 3982) @@ -34,14 +34,12 @@ /** * Test suite for extensible hashing. * - * <br> - * * @todo Persistence capable hash table for high volume hash joins. The data * will be "rows" in a "relation" modeled using binding sets. We can use * dense encoding of these rows since they have a fixed schema (some * columns may allow nulls). There should also be a relationship to how we * encode these data for network IO. - * <p> + * * @todo Extensible hashing: * <p> * - hash(byte[] key) -> IRaba page. Use IRaba for keys/values and key @@ -308,6 +306,9 @@ */ private static class SimpleExtensibleHashMap { + // @todo static logger. +// final transient Logger log = SimpleExtensibleHashMap.class + /** * The #of int32 positions which are available in a {@link SimpleBucket} * . @@ -323,11 +324,11 @@ */ private int globalHashBits; - /** - * The size of the address space (#of buckets addressable given the #of - * {@link #globalHashBits} in use). - */ - private int addressSpaceSize; +// /** +// * The size of the address space (#of buckets addressable given the #of +// * {@link #globalHashBits} in use). +// */ +// private int addressSpaceSize; /** * The address map. You index into this map using @@ -353,10 +354,10 @@ */ private final int[] masks; - /** - * The current mask for the current {@link #globalHashBits}. - */ - private int globalMask; +// /** +// * The current mask for the current {@link #globalHashBits}. +// */ +// private int globalMask; /** * @@ -405,14 +406,14 @@ } - // save the current masking value for the current #of global bits. - globalMask = masks[globalHashBits]; +// // save the current masking value for the current #of global bits. +// globalMask = masks[globalHashBits]; /* * Now work backwards to determine the size of the address space (in * buckets). */ - addressSpaceSize = 1 << globalHashBits; + final int addressSpaceSize = 1 << globalHashBits; /* * Allocate and initialize the address space. All indices are @@ -422,7 +423,8 @@ buckets = new ArrayList<SimpleBucket>(addressSpaceSize/* initialCapacity */); - buckets.add(new SimpleBucket(1/* localHashBits */, bucketSize)); + // Note: the local bits of the first bucket is set to ZERO (0). + buckets.add(new SimpleBucket(0/* localHashBits */, bucketSize)); } @@ -432,15 +434,47 @@ /** The hash of an int key is that int. */ private int hash(final int key) { - return key; + + return key; + } - + + /** + * The index into the address table given that we use + * {@link #globalHashBits} of the given hash value. + * <p> + * Note: This is identical to maskOff(h,{@link #globalHashBits}). + */ + private int getIndexOf(final int h) { + + return maskOff(h, globalHashBits); + + } + + /** + * Mask off all but the lower <i>nbits</i> of the hash value. + * + * @param h + * The hash value. + * @param nbits + * The #of bits to consider. + * @return The hash value considering only the lower <i>nbits</i>. + */ + private int maskOff(final int h, final int nbits) { + + if (nbits < 0 || nbits > 32) + throw new IllegalArgumentException(); + + return h & masks[nbits]; + + } + /** The bucket address given the hash code of a key. */ private int addrOf(final int h) { - final int maskedOffIndex = h & globalMask; + final int index = getIndexOf(h); - return addressMap[maskedOffIndex]; + return addressMap[index]; } @@ -474,7 +508,7 @@ */ public int getAddressSpaceSize() { - return addressSpaceSize; + return addressMap.length; } @@ -511,10 +545,15 @@ * @return <code>true</code> iff the key was found. */ public boolean contains(final int key) { - final int h = hash(key); - final int addr = addrOf(h); - final SimpleBucket b = getBucket(addr); - return b.contains(h,key); + + final int h = hash(key); + + final int addr = addrOf(h); + + final SimpleBucket b = getBucket(addr); + + return b.contains(h, key); + } /** @@ -591,8 +630,11 @@ * logic since detecting some of these cases requires transparency * into the bucket. */ - private void split(final int key, final SimpleBucket b) { - if (globalHashBits < b.localHashBits) { + private void split(final int key, final SimpleBucket b) { + if (log.isDebugEnabled()) + log.debug("globalBits=" + globalHashBits + ",localHashBits=" + + b.localHashBits + ",key=" + key); + if (globalHashBits < b.localHashBits) { // This condition should never arise. throw new AssertionError(); } @@ -616,19 +658,108 @@ * allowed to be larger than the target size and gets treated as * a blob). */ -// doubleAddressSpace(); - /* - * Create a new bucket and wire it into the 2nd entry for the - * hash code for that key. - */ -// final int h = hash(key); -// final int addr1 = addrOf(h); -// final int addr2 = addr + 1; -// final SimpleBucket b1 = getBucket(addr); -// if (b1.insert(h, key)) { -// return; -// } - throw new UnsupportedOperationException(); + // the size of the address space before we double it. + final int oldAddrSize = getAddressSpaceSize(); + // the hash value of the key. + final int h = hash(key); + /* + * The index into the address space for the hash key given the + * #of bits considered before we double the address space. + */ + final int oldIndex = getIndexOf(h); + // the address of the bucket to be split. + final int addrOld = addressMap[oldIndex]; + /* + * The address into the new address map of the new bucket (once + * it gets created). + * + * Note: We find that entry by adding the size of the old + * address table to the index within the table of the bucket to + * be split. + */ + final int newIndex = oldIndex + oldAddrSize; + final int addrNew = addressMap[newIndex]; + // double the address space. + doubleAddressSpace(); + /* + * Create a new bucket and wire it into the 2nd entry for the + * hash code for that key. + */ + // the original bucket. + final SimpleBucket bold = getBucket(addrOld); + bold.localHashBits++; + // the new bucket. + final SimpleBucket bnew = new SimpleBucket(bold.localHashBits, + bucketSize); + // The address for the new bucket. + final int addrBNew = buckets.size(); + // Add to the chain of buckets. + buckets.add(bnew); + // Update the address table to point to the new bucket. + addressMap[addrNew] = addrBNew; + /* + * FIXME Redistribute the keys in the old bucket between the old + * and new bucket by considering one more bit in their hash + * values. + * + * Note: The move has to be handled in a manner which does not + * have side-effects which put the visitation of the keys in the + * original bucket out of whack. The code below figures out + * which keys move and which stay and copies the ones that move + * in one step. It then goes back through and deletes all keys + * which are found in the new bucket from the original bucket. + * + * @todo As a pre-condition to splitting the bucket, we need to + * verify that at least one key is not the same as the others in + * the bucket. If all keys are the same, then we should have + * followed an overflow path instead of a split path. + */ + { + // // flag for each key says whether it moves or stays. + // final boolean[] move = new boolean[bold.size]; + for (int i = 0; i < bold.size; i++) { + // a key from the original bucket. + final int k = bold.data[i]; + // the hash of the key with the #of of local bits. + final int h1 = maskOff(k, bold.localHashBits); + if (h1 == oldIndex) { + // The key does not move. + continue; + // move[i] = false; + } else if (h1 == newIndex) { + // The key will move. + // move[i] = true; + bnew.insert(h/* hash(key) */, key); + } else { + // Must be hashed to one of these two buckets!!! + throw new AssertionError(); + } + } + for (int i = 0; i < bnew.size; i++) { + // a key from the new bucket. + final int k = bnew.data[i]; + // delete the key from the old bucket. + bold.delete(h/* hash(key) */, key); + } + } + /* + * Insert the key into the expanded hash table. + */ + { + // the address of the bucket for that hash code. + final int addr = addrOf(h); + // the bucket for that address. + final SimpleBucket btmp = getBucket(addr); + if (btmp.insert(h, key)) { + // insert was successful. + return; + } + /* + * FIXME This could be a variety of special conditions which + * need to be handled. + */ + throw new UnsupportedOperationException(); + } } if (globalHashBits > b.localHashBits) { /* @@ -654,19 +785,56 @@ } } - /** - * Doubles the address space. - * - * FIXME Review the exact rule for doubling the address space. - */ - private void doubleAddressSpace() { - globalHashBits += 1; - final int[] tmp = addressMap; - addressMap = new int[tmp.length << 1]; - for (int i = 0, j = 0; i < tmp.length; i++) { - addressMap[j++] = tmp[i]; - addressMap[j++] = tmp[i]; - } + /** + * Doubles the address space. + * <p> + * This allocates a new address table and initializes it with TWO (2) + * identical copies of the current address table, one right after the + * other and increases {@link #globalHashBits} by ONE (1). + * <p> + * This operation preserves the current mapping of hash values into an + * address table when we consider one more bit in those hash values. For + * example, if we used to consider <code>3</code> bits of the hash value + * then we will now consider <code>4</code> bits. If the fourth bit of + * the hash value is ZERO (0) then it addresses into the first copy of + * the address table. If the fourth bit of the hash value is ONE (1) + * then it addresses into the second copy of the address table. Since + * the entries point to the same buckets as they did when we only + * considered <code>3</code> bits of the hash value the mapping of the + * keys onto the buckets is not changed by this operation. + */ + private void doubleAddressSpace() { + + if (log.isInfoEnabled()) + log.info("Doubleing the address space: globalBits=" + + globalHashBits + ", addressSpaceSize=" + + getAddressSpaceSize()); + + final int oldLength = addressMap.length; + + // allocate a new address table which is twice a large. + final int[] tmp = new int[oldLength << 1]; + + /* + * Copy the current address table into the lower half of the new + * table. + */ + System.arraycopy(addressMap/* src */, 0/* srcPos */, tmp/* dest */, + 0/* destPos */, oldLength); + + /* + * Copy the current address table into the upper half of the new + * table. + */ + System.arraycopy(addressMap/* src */, 0/* srcPos */, tmp/* dest */, + oldLength/* destPos */, oldLength); + + // Replace the address table. + addressMap = tmp; + + // Consider one more bit in the hash value of the keys. + globalHashBits += 1; + } private void merge(final int h, final SimpleBucket b) { @@ -710,9 +878,52 @@ * Note: This is NOT thread-safe! */ public Iterator<SimpleBucket> buckets() { - return buckets.iterator(); + + return buckets.iterator(); + } - + + /** + * Return the #of entries in the hash table having the given key. + * + * @param key + * The key. + * + * @return The #of entries having that key. + */ + public int[] getEntryCount(final int key) { + throw new UnsupportedOperationException(); + } + + /** + * Return all entries in the hash table having the given key. + * + * @param key + * The key. + * + * @return The entries in the hash table having that key. + * + * @todo this should return an iterator over the tuples for the real + * implementation. + */ + public int[] getEntries(final int key) { + throw new UnsupportedOperationException(); + } + + /** + * Return an entry in the hash table having the given key. If there is + * more than one entry for that key, then any entry having that key may + * be returned. + * + * @param key + * The key. + * + * @return An entry having that key. + */ + public int getEntry(final int key) { + throw new UnsupportedOperationException(); + } + } /** @@ -720,24 +931,32 @@ */ private static class SimpleBucket { - /** The #of hash code bits which are in use by this {@link SimpleBucket}. */ - int localHashBits; + /** + * The #of hash code bits which are in use by this {@link SimpleBucket}. + * + * @todo If we need to examine this when we change the size of the + * address space then it makes more sense to have this as local + * metadata in the address table than as local data in the bucket + * (the latter would require us to visit each bucket when + * expanding the address space). + */ + private int localHashBits; /** * The #of keys stored in the bucket. The keys are stored in a dense * array. For a given {@link #size}, the only indices of the array which * have any data are [0:{@link #size}-1]. */ - int size; + private int size; /** * The user data for the bucket. */ - final int[] data; + private final int[] data; - public SimpleBucket(final int localHashBits,final int bucketSize) { + public SimpleBucket(final int localHashBits, final int bucketSize) { - if (localHashBits <= 0 || localHashBits > 32) + if (localHashBits < 0 || localHashBits > 32) throw new IllegalArgumentException(); this.localHashBits = localHashBits; @@ -849,7 +1068,29 @@ return false; } + + /** + * The #of entries in the bucket. + */ + public int getEntryCount() { + return size; + + } + + /** + * A copy of the entries. + * <p> + * Note: This method returns as an array in order to avoid autobox + * issues which arise with int32 keys and Integers. Visitation of tuples + * in the bucket will be handled differently in a full implementation. + */ + public int[] getEntries() { + + return data.clone(); + + } + } /** @@ -962,26 +1203,167 @@ // still not split. assertEquals("bucketCount", 1, map.getBucketCount()); - // force a split. + // force a split (83 == 0b10000011) map.insert(83); assertEquals("bucketCount", 2, map.getBucketCount()); } - - /** - * Unit test with the following configuration and insert / event sequence: - * <ul> - * <li>bucket size := 4k</li> - * <li></li> - * <li></li> - * <li></li> - * </ul> - * <pre> - * </pre> - */ + + /** + * Unit test based on example in + * "External Memory Algorithms and Data Structures" by Vitter, page 239. + * <p> + * The assignment of keys to buckets in this example is based on the low + * bits of the key. Initially, the global depth is <code>3</code> so only + * the lower three bits are considered. + * + * <pre> + * 0 0 + * 1 1 + * 2 10 + * 3 11 + * 4 100 + * 5 101 + * 6 110 + * 7 111 + * </pre> + * + * To setup the example, we insert the sequence {4, 23, 18, 10, 44, 32, 9} + * into an extensible hashing algorithm using buckets with a capacity of + * <code>3</code>. The keys and the values stored in the buckets are int32 + * values. Inserting this sequence of keys should yield a hash table with a + * global depth <em>d</em> of <code>8</code> having 8 addresses and four + * buckets arranged as follows. + * + * <pre> + * [000] -> (A) [k=2] {4, 44, 32} + * [001] -> (C) [k=1] {23, 9} + * [010] -> (B) [k=3] {18} + * [011] -> (C) + * [100] -> (A) + * [101] -> (C) + * [110] -> (D) [k=3] {10} + * [111] -> (C) + * </pre> + * + * Where <em>k</em> is the local depth of a given bucket and the buckets are + * labeled A, B, C, ... based on the order in which they are written on the + * page in the example (which presumably is the order in which those buckets + * were created, but I have not yet verified this). + * <p> + * Next, key <code>76</code> is inserted. Considering only the d=3 bits of + * its hash code, this key would be inserted the address having bits + * <code>100</code>, which is mapped onto bucket (A). This will cause bucket + * (A) to be split into (A) and (D). The local depth of (A) is increased by + * one to <code>k=3</code>. The local depth of (D) is the same as (A). The + * hash table looks as follows after this split. + * + * <pre> + * [000] -> (A) [k=3] {32} + * [001] -> (C) [k=1] {23, 9} + * [010] -> (B) [k=3] {18} + * [011] -> (C) + * [100] -> (E) [k=4] {4, 44, 76} + * [101] -> (C) + * [110] -> (D) [k=3] {10} + * [111] -> (C) + * </pre> + * + * Finally, key <code>20</code> is inserted, again into the block addressed + * by address table entry <code>100</code>. This causes block (E) to be + * split into two blocks (E) and (F) having local bits <code>k=4</code>. The + * address table is doubled and the #of global bits is increased to + * <code>d=4</code>. The hash table after this expansion looks as follows: + * + * <pre> + * [0000] -> (A) [k=3] {32} + * [0001] -> (C) [k=1] {23, 9} + * [0010] -> (B) [k=3] {18} + * [0011] -> (C) + * [0100] -> (E) [k=4] {4, 20} + * [0101] -> (C) + * [0110] -> (D) [k=3] {10} + * [0111] -> (C) + * ---- extension ---- + * [1000] -> (A) + * [1001] -> (C) + * [1010] -> (B) + * [1011] -> (C) + * [1100] -> (F) [k=4] {44, 76} + * [1101] -> (C) + * [1110] -> (D) + * [1111] -> (C) + * </pre> + * + * When the address space is extended, the original address table entries + * remain at their given offsets into the new table. The new address table + * entries are initialized from the same entry which would have been + * addressed if we considered one less bit. For example, any hash code + * ending in <code>000</code> used to index into the first entry in the + * address table. After the address table is split, one more bit will be + * considered in the hash code of the key. Therefore, the same key will + * either be in the address table entry <code>0000</code> or the entry + * <code>1000</code>. To keep everything consistent, the address table entry + * for <code>1000</code> is therefore initialized from the address table + * entry for <code>0000</code>. In practice, all you are doing is writing a + * second copy of the original address table entries starting immediately + * after the original address table entries. + * <p> + * Personally, I find this representation much easier to interpret. You can + * see that the address table was simply duplicated and (E) was split into + * two buckets. One remains (E) and continues to be addressed from the 1st + * half of the address table. The other is (F) and is the only change in the + * 2nd half of the address table. + */ public void test_simple() { - - } - + final int bucketSize = 3; + + final SimpleExtensibleHashMap map = new SimpleExtensibleHashMap( + 1/* initialCapacity */, bucketSize); + + /* + * Insert the initial key sequence. + * + * FIXME Verify post-condition in detail for each bucket since the next + * steps require the problem to be setup correctly. + */ + final int[] keys0 = new int[]{4, 23, 18, 10, 44, 32, 9}; + for(int key : keys0) { + assertFalse(map.contains(key)); + map.insert(key); + assertTrue(map.contains(key)); + } + + assertEquals("globalHashBits", 3, map.getGlobalHashBits()); + assertEquals("addressSpace", 8, map.getAddressSpaceSize()); + assertEquals("bucketCount", 3, map.getBucketCount()); + + /* + * Insert key 76. This splits (A), which is the bucket addressed by + * [000]. This does not cause the address space to double since [100] + * was also a reference to (A). This increments the local depth of (A) + * to [3] and also creates a new bucket (E), having the same local depth + * as (A) [3], and then sets a reference to that bucket at index [100] + * of the address table. The keys in (A) are redistributed between (A) + * and (E) by considering the [k=3] bit hash, where [k] is the new local + * depth of (A) and (E). This gets the hash table into a consistent + * state. The new key (76) is then inserted into the hash table and + * winds up in (E). + * + * @todo verify the contents of each bucket in detail. + */ + map.insert(76); + + // unchanged. + assertEquals("globalHashBits", 3, map.getGlobalHashBits()); + assertEquals("addressSpace", 8, map.getAddressSpaceSize()); + + // one more bucket. + assertEquals("bucketCount", 4, map.getBucketCount()); + + fail("write test"); + + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-23 18:08:03
|
Revision: 3981 http://bigdata.svn.sourceforge.net/bigdata/?rev=3981&view=rev Author: thompsonbry Date: 2010-11-23 18:07:57 +0000 (Tue, 23 Nov 2010) Log Message: ----------- Removed problematic @Override tags Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-23 16:30:18 UTC (rev 3980) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-23 18:07:57 UTC (rev 3981) @@ -2650,7 +2650,7 @@ private static final transient Comparator<Vertex> INSTANCE = new BOpIdComparator(); - @Override +// @Override public int compare(final Vertex o1, final Vertex o2) { final int id1 = o1.pred.getId(); final int id2 = o2.pred.getId(); @@ -2674,7 +2674,7 @@ public static final transient Comparator<Edge> INSTANCE = new EstimatedEdgeCardinalityComparator(); - @Override +// @Override public int compare(final Edge o1, final Edge o2) { if (o1.sample == null && o2.sample == null) { // Neither edge is weighted. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-23 16:30:25
|
Revision: 3980 http://bigdata.svn.sourceforge.net/bigdata/?rev=3980&view=rev Author: thompsonbry Date: 2010-11-23 16:30:18 +0000 (Tue, 23 Nov 2010) Log Message: ----------- javadoc edits on join graph and extensible hashing Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-23 15:22:27 UTC (rev 3979) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-23 16:30:18 UTC (rev 3980) @@ -131,11 +131,38 @@ * query optimizer SHOULD pay attention to these things and exploit their * conditional selectivity for the query plan.] * - * @todo When there are optional join graphs, are we going to handle that by - * materializing a sample (or all) of the joins feeding that join graph - * and then apply the runtime optimizer to the optional join graph, - * getting out a sample to feed onto any downstream join graph? + * @todo Handle optional join graphs by first applying the runtime optimizer to + * the main join graph and obtaining a sample for the selected join path. + * That sample will then be feed into the the optional join graph in order + * to optimize the join order within the optional join graph (a join order + * which is selective in the optional join graph is better since it will + * result in faster rejections of intermediate results and hence do less + * work). + * <p> + * This is very much related to accepting a collection of non-empty + * binding sets when running the join graph. However, optional join graph + * should be presented in combination with the original join graph and the + * starting paths must be constrained to have the selected join path for + * the original join graph as a prefix. With this setup, the original join + * graph has been locked in to a specific join path and the sampling of + * edges and vertices for the optional join graph can proceed normally. + * <p> + * True optionals will always be appended as part of the "tail plan" for + * any join graph and can not be optimized as each optional join must run + * regardless (as long as the intermediate solution survives the + * non-optional joins). * + * @todo There are two cases where a join graph must be optimized against a + * specific set of inputs. In one case, it is a sample (this is how + * optimization of an optional join group proceeds per above). In the + * other case, the set of inputs is fixed and is provided instead of a + * single empty binding set as the starting condition. This second case is + * actually a bit more complicated since we can not use a random sample of + * vertices unless the do not share any variables with the initial binding + * sets. When there is a shared variable, we need to do a cutoff join of + * the edge with the initial binding sets. When there is not a shared + * variable, we can sample the vertex and then do a cutoff join. + * * @todo When we run into a cardinality estimation underflow (the expected * cardinality goes to zero) we could double the sample size for just * those join paths which hit a zero estimated cardinality and re-run them Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java 2010-11-23 15:22:27 UTC (rev 3979) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java 2010-11-23 16:30:18 UTC (rev 3980) @@ -35,176 +35,125 @@ * Test suite for extensible hashing. * * <br> - * (***) Persistence capable hash table for high volume hash joins. * - * The data will be "rows" in a "relation" modeled using binding sets. We can - * use dense encoding of these rows since they have a fixed schema (some columns - * may allow nulls). There should also be a relationship to how we encode these - * data for network IO. + * @todo Persistence capable hash table for high volume hash joins. The data + * will be "rows" in a "relation" modeled using binding sets. We can use + * dense encoding of these rows since they have a fixed schema (some + * columns may allow nulls). There should also be a relationship to how we + * encode these data for network IO. + * <p> + * @todo Extensible hashing: + * <p> + * - hash(byte[] key) -> IRaba page. Use IRaba for keys/values and key + * search. + * <p> + * - Split if overflows the bucket size (alternative is some versioning + * where the computed hash value indexes into a logical address which is + * then translated to an IRawStore address - does the RWStore help us out + * here?) + * <p> + * - Ring buffer to wire in hot nodes (but expect random touches). + * <p> + * - initially, no history (no versioning). just replace the record when + * it is evicted from the ring buffer. + * <p> + * What follows is a summary of an extensible hashing design for bigdata. + * This covers most aspects of the hash map design, but does not drill + * deeply into the question of scale-out hash maps. The immediate goal is + * to develop a hash map which can be used for a variety of tasks, + * primarily pertaining to analytic query as described above. + * <p> + * Extensible hashing is one form of dynamic hashing in which buckets are + * split or coalesced as necessary and in which the reorganization is + * performed on one bucket at a time. + * <p> + * Given a hash function h generating, e.g., int32 values where b is the + * #of bits in the hash code. At any point, we use 0 LTE i LTE b bits of + * that hash code as an index into a table of bucket addresses. The value + * of i will change as the #of buckets changes based on the scale of the + * data to be addressed. + * <p> + * Given a key K, the bucket address table is indexed with i bits of the + * hash code, h(K). The value at that index is the address of the hash + * bucket. However, several consecutive entries in the hash table may + * point to the same hash bucket (for example, the hash index may be + * created with i=4, which would give 16 index values but only one initial + * bucket). The bucket address table entries which map onto the same hash + * bucket will have a common bit length, which may be LTE [i]. This bit + * length is not stored in the bucket address table, but each bucket knows + * its bit length. Given a global bit length of [i] and a bucket bit + * length of [j], there will be 2^(i-j) bucket address table entries which + * point to the same bucket. + * <p> + * Hash table versioning can be easily implemented by: (a) a checkpoint + * record with the address of the bucket address table (which could be + * broken into a two level table comprised of 4k pages in order to make + * small updates faster); and (b) a store level policy such that we do not + * overwrite the modified records directly (though they may be recycled). + * This will give us the same consistent read behind behavior as the + * B+Tree. + * <p> + * The IIndex interface will need to be partitioned appropriately such + * that the IRangeScan interface is not part of the hash table indices (an + * isBTree() and isHashMap() method might be added). + * <p> + * While the same read-through views for shards should work with hash maps + * as work with B+Tree indices, a different scheme may be necessary to + * locate those shards and we might need to use int64 hash codes in + * scale-out or increase the page size (at least for the read-only hash + * segment files, which would also need a batch build operation). The + * AccessPath will also need to be updated to be aware of classes which do + * not support key-range scans, but only whole relation scans. + * <p> + * Locking on hash tables without versioning should be much simpler than + * locking on B+Trees since there is no hierarchy and more operations can + * proceed without blocking in parallel. + * <p> + * We can represent tuples (key,value pairs) in an IRaba data structure + * and reuse parts of the B+Tree infrastructure relating to compression of + * IRaba, key search, etc. In fact, we might use to lazy reordering notion + * from Monet DB cracking to only sort the keys in a bucket when it is + * persisted. This is also a good opportunity to tackling splitting the + * bucket if it overflows the target record size, e.g., 4k. We could throw + * out an exception if the sorted, serialized, and optionally compressed + * record exceeds the target record size and then split the bucket. All of + * this seems reasonable and we might be able to then back port those + * concepts into the B+Tree. + * <p> + * We need to estimate the #of tuples which will fit within the bucket. We + * can do this based on: (a) the byte length of the keys and values (key + * compression is not going to help out much for a hash index since the + * keys will be evenly distributed even if they are ordered within a + * bucket); (b) the known per tuple overhead and per bucket overhead; (c) + * an estimate of the compression ratio for raba encoding and record + * compression. This estimate could be used to proactively split a bucket + * before it is evicted. This is most critical before anything is evicted + * as we would otherwise have a single very large bucket. So, let's make + * this simple and split the bucket if the sum of the key + val bytes + * exceeds 120% of the target record size (4k, 8k, etc). The target page + * size can be a property of the hash index. [Note: There is an implicit + * limit on the size of a tuple with this approach. The alternative is to + * fix the #of tuples in the bucket and allow buckets to be of whatever + * size they are for the specific data in that bucket.] * - * https://sourceforge.net/apps/trac/bigdata/ticket/203 + * @todo RWStore integration notes: + * <p> + * - RWStore with "temporary" quality. Creates the backing file lazily on + * eviction from the write service. + * <p> + * - RWStore with "RAM" only? (Can not exceed the #of allocated buffers or + * can, but then it might force paging out to swap?) + * <p> + * - RWStore with "RAM" mostly. Converts to disk backed if uses all those + * buffers. Possibly just give the WriteCacheService a bunch of write + * cache buffers (10-100) and have it evict to disk *lazily* rather than + * eagerly (when the #of free buffers is down to 20%). + * <p> + * - RWStore with memory mapped file? As I recall, the problem is that we + * can not guarantee extension or close of the file under Java. But some + * people seem to make this work... * - * - * Extendable hash table: - * - * - hash(byte[] key) -> IRaba page. Use IRaba for keys/values and key search. - * - * - Split if overflows the bucket size (alternative is some versioning where - * the computed hash value indexes into a logical address which is then - * translated to an IRawStore address - does the RWStore help us out here?) - * - * - ring buffer to wire in hot nodes (but expect random touches). - * - * - initially, no history (no versioning). just replace the record when it is - * evicted from the ring buffer. - * - * What follows is a summary of an extendable hash map design for bigdata. This - * covers most aspects of the hash map design, but does not drill deeply into - * the question of scale-out hash maps. The immediate goal is to develop a hash - * map which can be used for a variety of tasks, primarily pertaining to - * analytic query as described above. - * - * Extendable hashing is one form of dynamic hashing in which buckets are split - * or coalesced as necessary and in which the reorganization is performed on one - * bucket at a time. - * - * Given a hash function h generating, e.g., int32 values where b is the #of - * bits in the hash code. At any point, we use 0 LTE i LTE b bits of that hash - * code as an index into a table of bucket addresses. The value of i will change - * as the #of buckets changes based on the scale of the data to be addressed. - * - * Given a key K, the bucket address table is indexed with i bits of the hash - * code, h(K). The value at that index is the address of the hash bucket. - * However, several consecutive entries in the hash table may point to the same - * hash bucket (for example, the hash index may be created with i=4, which would - * give 16 index values but only one initial bucket). The bucket address table - * entries which map onto the same hash bucket will have a common bit length, - * which may be LTE [i]. This bit length is not stored in the bucket address - * table, but each bucket knows its bit length. Given a global bit length of [i] - * and a bucket bit length of [j], there will be 2^(i-j) bucket address table - * entries which point to the same bucket. - * - * Lookup: Compute h(K) and right shift (w/o sign extension) by i bits. Use this - * to index into the bucket address table. The address in the table is the - * bucket address and may be used to directly read the bucket. - * - * Insert: Per lookup. On overflow, we need to split the bucket moving the - * existing records (and the new record) into new buckets. How this proceeds - * depends on whether the hash #of bits used in the bucket is equal to the #of - * bits used to index into the bucket address table. There are two cases: - * - * Split case 1: If i (global bits of the hash which are in use) == j (bucket - * bits of the hash which are in use), then the bucket address table is out of - * space and needs to be resized. Let i := i+1. This doubles the size of the - * bucket address table. Each original entry becomes two entries in the new - * table. For the specific bucket which is to be split, a new bucket is - * allocated and the 2nd bucket address table for that entry is set to the - * address of the new bucket. The tuples are then assigned to the original - * bucket and the new bucket by considering the additional bit of the hash code. - * Assuming that all keys are distinct, then one split will always be sufficient - * unless all tuples in the original bucket have the same hash code when their - * i+1 th bit is considered. In this case, we resort to an "overflow" bucket - * (alternatively, the bucket is allowed to be larger than the target size and - * gets treated as a blob). - * - * Split case 2: If i is GT j, then there will be at least two entries in the - * bucket address table which point to the same bucket. One of those entries is - * relabeled. Both the original bucket and the new bucket have their #of bits - * incremented by one, but the #of global bits in use does not change. Of the - * entries in the bucket address table which used to point to the original - * bucket, the 1st half are left alone and the 2nd half are updated to point to - * the new bucket. (Note that the #of entries depends on the global #of hash - * bits in use and the bucket local #of hash bits in use and will be 2 if there - * is a difference of one between those values but can be more than 2 and will - * always be an even number). The entries in the original bucket are rehashed - * and assigned based on the new #of hash bits to be considered to either the - * original bucket or the new bucket. The record is then inserted based on the - * new #of hash bits to be considered. If it still does not fit, then either - * handle by case (1) or case (2) as appropriate. - * - * Note that records which are in themselves larger than the bucket size must - * eventually be handled by: (A) using an overflow record; (B) allowing the - * bucket to become larger than the target page size (using a larger allocation - * slot or becoming a blob); or (C) recording the tuple as a raw record and - * maintaining only the full hash code of the tuple and its raw record address - * in the bucket (this would allow us to automatically promote long literals out - * of the hash bucket and a similar approach might be used for a B+Tree leaf, - * except that a long key will still cause a problem [also, this implies that - * deleting a bucket or leaf on the unisolated index of the RWStore might - * require a scan of the IRaba to identify blob references which must also be - * deleted, so it makes sense to track those as part of the bucket/leaf - * metadata). - * - * Delete: Buckets may be removed no later than when they become empty and doing - * this is a local operation with costs similar to splitting a bucket. Likewise, - * it is clearly possible to coalesce buckets which underflow before they become - * empty by scanning the 2^(i-j) buckets indexed from the entries in the bucket - * address table using i bits from h(K). [I need to research handling deletes a - * little more, including under what conditions it is cost effective to reduce - * the size of the bucket address table itself.] - * - * Hash table versioning can be easily implemented by: (a) a checkpoint record - * with the address of the bucket address table (which could be broken into a - * two level table comprised of 4k pages in order to make small updates faster); - * and (b) a store level policy such that we do not overwrite the modified - * records directly (though they may be recycled). This will give us the same - * consistent read behind behavior as the B+Tree. - * - * The IIndex interface will need to be partitioned appropriately such that the - * IRangeScan interface is not part of the hash table indices (an isBTree() and - * isHashMap() method might be added). - * - * While the same read-through views for shards should work with hash maps as - * work with B+Tree indices, a different scheme may be necessary to locate those - * shards and we might need to use int64 hash codes in scale-out or increase the - * page size (at least for the read-only hash segment files, which would also - * need a batch build operation). The AccessPath will also need to be updated to - * be aware of classes which do not support key-range scans, but only whole - * relation scans. - * - * Locking on hash tables without versioning should be much simpler than locking - * on B+Trees since there is no hierarchy and more operations can proceed - * without blocking in parallel. - * - * We can represent tuples (key,value pairs) in an IRaba data structure and - * reuse parts of the B+Tree infrastructure relating to compression of IRaba, - * key search, etc. In fact, we might use to lazy reordering notion from Monet - * DB cracking to only sort the keys in a bucket when it is persisted. This is - * also a good opportunity to tackling splitting the bucket if it overflows the - * target record size, e.g., 4k. We could throw out an exception if the sorted, - * serialized, and optionally compressed record exceeds the target record size - * and then split the bucket. All of this seems reasonable and we might be able - * to then back port those concepts into the B+Tree. - * - * We need to estimate the #of tuples which will fit within the bucket. We can - * do this based on: (a) the byte length of the keys and values (key compression - * is not going to help out much for a hash index since the keys will be evenly - * distributed even if they are ordered within a bucket); (b) the known per - * tuple overhead and per bucket overhead; (c) an estimate of the compression - * ratio for raba encoding and record compression. This estimate could be used - * to proactively split a bucket before it is evicted. This is most critical - * before anything is evicted as we would otherwise have a single very large - * bucket. So, let's make this simple and split the bucket if the sum of the key - * + val bytes exceeds 120% of the target record size (4k, 8k, etc). The target - * page size can be a property of the hash index. [Note: There is an implicit - * limit on the size of a tuple with this approach. The alternative is to fix - * the #of tuples in the bucket and allow buckets to be of whatever size they - * are for the specific data in that bucket.] - * - * - RWStore with "temporary" quality. Creates the backing file lazily on - * eviction from the write service. - * - * - RWStore with "RAM" only? (Can not exceed the #of allocated buffers or can, - * but then it might force paging out to swap?) - * - * - RWStore with "RAM" mostly. Converts to disk backed if uses all those - * buffers. Possibly just give the WriteCacheService a bunch of write cache - * buffers (10-100) and have it evict to disk *lazily* rather than eagerly (when - * the #of free buffers is down to 20%). - * - * - RWStore with memory mapped file? As I recall, the problem is that we can - * not guarantee extension or close of the file under Java. But some people seem - * to make this work... + * @see https://sourceforge.net/apps/trac/bigdata/ticket/203 */ public class TestExtensibleHashing extends TestCase2 { @@ -547,13 +496,18 @@ return bucketSize; } - + /** * Return <code>true</code> iff the hash table contains the key. + * <p> + * Lookup: Compute h(K) and right shift (w/o sign extension) by i bits. + * Use this to index into the bucket address table. The address in the + * table is the bucket address and may be used to directly read the + * bucket. * * @param key * The key. - * + * * @return <code>true</code> iff the key was found. */ public boolean contains(final int key) { @@ -565,7 +519,12 @@ /** * Insert the key into the hash table. Duplicates are allowed. + * <p> + * Insert: Per lookup. On overflow, we need to split the bucket moving + * the existing records (and the new record) into new buckets. * + * @see #split(int, int, SimpleBucket) + * * @param key * The key. * @@ -577,12 +536,155 @@ final int h = hash(key); final int addr = addrOf(h); final SimpleBucket b = getBucket(addr); - b.insert(h,key); + if (b.insert(h, key)) { + return; + } + // split the bucket and insert the record (recursive?) + split(key, b); } /** + * Split the bucket, adjusting the address map iff necessary. How this + * proceeds depends on whether the hash #of bits used in the bucket is + * equal to the #of bits used to index into the bucket address table. + * There are two cases: + * <p> + * Case 1: If {@link #globalHashBits} EQ the + * {@link SimpleBucket#localHashBits}, then the bucket address table is + * out of space and needs to be resized. + * <p> + * Case 2: If {@link #globalHashBits} is GT + * {@link SimpleBucket#localHashBits}, then there will be at least two + * entries in the bucket address table which point to the same bucket. + * One of those entries is relabeled. The record is then inserted based + * on the new #of hash bits to be considered. If it still does not fit, + * then either handle by case (1) or case (2) as appropriate. + * <p> + * Note that records which are in themselves larger than the bucket size + * must eventually be handled by: (A) using an overflow record; (B) + * allowing the bucket to become larger than the target page size (using + * a larger allocation slot or becoming a blob); or (C) recording the + * tuple as a raw record and maintaining only the full hash code of the + * tuple and its raw record address in the bucket (this would allow us + * to automatically promote long literals out of the hash bucket and a + * similar approach might be used for a B+Tree leaf, except that a long + * key will still cause a problem [also, this implies that deleting a + * bucket or leaf on the unisolated index of the RWStore might require a + * scan of the IRaba to identify blob references which must also be + * deleted, so it makes sense to track those as part of the bucket/leaf + * metadata). + * + * @param h + * The key which triggered the split. + * @param b + * The bucket lacking sufficient room for the key which + * triggered the split. + * + * @todo caller will need an exclusive lock if this is to be thread + * safe. + * + * @todo Overflow buckets (or oversize buckets) are required when all + * hash bits considered by the local bucket are the same, when all + * keys in the local bucket are the same, and when the record to + * be inserted is larger than the bucket. In order to handle these + * cases we may need to more closely integrate the insert/split + * logic since detecting some of these cases requires transparency + * into the bucket. + */ + private void split(final int key, final SimpleBucket b) { + if (globalHashBits < b.localHashBits) { + // This condition should never arise. + throw new AssertionError(); + } + if (globalHashBits == b.localHashBits) { + /* + * The address table is out of space and needs to be resized. + * + * Let {@link #globalHashBits} := {@link #globalHashBits} + 1. + * This doubles the size of the bucket address table. Each + * original entry becomes two entries in the new table. For the + * specific bucket which is to be split, a new bucket is + * allocated and the 2nd bucket address table for that entry is + * set to the address of the new bucket. The tuples are then + * assigned to the original bucket and the new bucket by + * considering the additional bit of the hash code. Assuming + * that all keys are distinct, then one split will always be + * sufficient unless all tuples in the original bucket have the + * same hash code when their (i+1)th bit is considered (this can + * also occur if duplicate keys are allow). In this case, we + * resort to an "overflow" bucket (alternatively, the bucket is + * allowed to be larger than the target size and gets treated as + * a blob). + */ +// doubleAddressSpace(); + /* + * Create a new bucket and wire it into the 2nd entry for the + * hash code for that key. + */ +// final int h = hash(key); +// final int addr1 = addrOf(h); +// final int addr2 = addr + 1; +// final SimpleBucket b1 = getBucket(addr); +// if (b1.insert(h, key)) { +// return; +// } + throw new UnsupportedOperationException(); + } + if (globalHashBits > b.localHashBits) { + /* + * There will be at least two entries in the address table which + * point to this bucket. One of those entries is relabeled. Both + * the original bucket and the new bucket have their {@link + * SimpleBucket#localHashBits} incremented by one, but the + * {@link #globalHashBits}. Of the entries in the bucket address + * table which used to point to the original bucket, the 1st + * half are left alone and the 2nd half are updated to point to + * the new bucket. (Note that the #of entries depends on the + * global #of hash bits in use and the bucket local #of hash + * bits in use and will be 2 if there is a difference of one + * between those values but can be more than 2 and will always + * be an even number). The entries in the original bucket are + * rehashed and assigned based on the new #of hash bits to be + * considered to either the original bucket or the new bucket. + * The record is then inserted based on the new #of hash bits to + * be considered. If it still does not fit, then either handle + * by case (1) or case (2) as appropriate. + */ + throw new UnsupportedOperationException(); + } + } + + /** + * Doubles the address space. + * + * FIXME Review the exact rule for doubling the address space. + */ + private void doubleAddressSpace() { + globalHashBits += 1; + final int[] tmp = addressMap; + addressMap = new int[tmp.length << 1]; + for (int i = 0, j = 0; i < tmp.length; i++) { + addressMap[j++] = tmp[i]; + addressMap[j++] = tmp[i]; + } + } + + private void merge(final int h, final SimpleBucket b) { + throw new UnsupportedOperationException(); + } + + /** * Delete the key from the hash table (in the case of duplicates, a * random entry having that key is deleted). + * <p> + * Delete: Buckets may be removed no later than when they become empty + * and doing this is a local operation with costs similar to splitting a + * bucket. Likewise, it is clearly possible to coalesce buckets which + * underflow before they become empty by scanning the 2^(i-j) buckets + * indexed from the entries in the bucket address table using i bits + * from h(K). [I need to research handling deletes a little more, + * including under what conditions it is cost effective to reduce the + * size of the bucket address table itself.] * * @param key * The key. @@ -590,6 +692,10 @@ * @return <code>true</code> iff a tuple having that key was deleted. * * @todo return the deleted tuple. + * + * @todo merge buckets when they underflow/become empty? (but note that + * we do not delete anything from the hash map for a hash join, + * just insert, insert, insert). */ public boolean delete(final int key) { final int h = hash(key); @@ -676,8 +782,10 @@ * The hash code of the key. * @param key * The key. + * + * @return <code>false</code> iff the bucket must be split. */ - public void insert(final int h, final int key) { + public boolean insert(final int h, final int key) { if (size == data.length) { /* @@ -693,11 +801,13 @@ * manage the split. If the bucket handles splits, then we need * to pass in the table reference. */ - throw new UnsupportedOperationException(); + return false; } data[size++] = key; + return true; + } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-23 15:22:37
|
Revision: 3979 http://bigdata.svn.sourceforge.net/bigdata/?rev=3979&view=rev Author: thompsonbry Date: 2010-11-23 15:22:27 +0000 (Tue, 23 Nov 2010) Log Message: ----------- Merge CHANGE_SET_BRANCH to trunk [r3608:HEAD]. Note: There is a problem in TestChangeSets when run with TestBigdataSailWithQuads. The test needs to be modified in order to not run the TM test variants when in quads mode. https://sourceforge.net/apps/trac/bigdata/ticket/166 has been amended to note this issue which will be fixed in the trunk. Modified Paths: -------------- trunk/bigdata/src/java/com/bigdata/btree/proc/AbstractKeyArrayIndexProcedure.java trunk/bigdata/src/test/com/bigdata/resources/AbstractResourceManagerTestCase.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/inf/SPOAssertionBuffer.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/inf/SPORetractionBuffer.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/inf/TruthMaintenance.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataStatementImpl.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/StatementBuffer.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/spo/ISPO.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPO.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOIndexRemover.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOIndexWriteProc.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOIndexWriter.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/spo/StatementWriter.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java trunk/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java trunk/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java trunk/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java trunk/bigdata-sails/src/test/com/bigdata/rdf/sail/ProxyBigdataSailTestCase.java Added Paths: ----------- trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/ trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/ChangeRecord.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeLog.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeRecord.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/InMemChangeLog.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOIndexMutation.java trunk/bigdata-sails/src/java/com/bigdata/rdf/sail/changesets/ trunk/bigdata-sails/src/test/com/bigdata/rdf/sail/TestChangeSets.java Removed Paths: ------------- trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/ChangeRecord.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeLog.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeRecord.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/InMemChangeLog.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java Modified: trunk/bigdata/src/java/com/bigdata/btree/proc/AbstractKeyArrayIndexProcedure.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/btree/proc/AbstractKeyArrayIndexProcedure.java 2010-11-23 14:27:09 UTC (rev 3978) +++ trunk/bigdata/src/java/com/bigdata/btree/proc/AbstractKeyArrayIndexProcedure.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -39,6 +39,7 @@ import java.io.ObjectInput; import java.io.ObjectOutput; import java.io.OutputStream; +import java.util.Arrays; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; @@ -795,18 +796,34 @@ IResultHandler<ResultBitBuffer, ResultBitBuffer> { private final boolean[] results; + + /** + * I added this so I could encode information about tuple modification + * that takes more than one boolean to encode. For example, SPOs can + * be: INSERTED, REMOVED, UPDATED, NO_OP (2 bits). + */ + private final int multiplier; + private final AtomicInteger onCount = new AtomicInteger(); public ResultBitBufferHandler(final int nkeys) { + + this(nkeys, 1); + + } + + public ResultBitBufferHandler(final int nkeys, final int multiplier) { - results = new boolean[nkeys]; + results = new boolean[nkeys*multiplier]; + this.multiplier = multiplier; } public void aggregate(final ResultBitBuffer result, final Split split) { - System.arraycopy(result.getResult(), 0, results, split.fromIndex, - split.ntuples); + System.arraycopy(result.getResult(), 0, results, + split.fromIndex*multiplier, + split.ntuples*multiplier); onCount.addAndGet(result.getOnCount()); Modified: trunk/bigdata/src/test/com/bigdata/resources/AbstractResourceManagerTestCase.java =================================================================== --- trunk/bigdata/src/test/com/bigdata/resources/AbstractResourceManagerTestCase.java 2010-11-23 14:27:09 UTC (rev 3978) +++ trunk/bigdata/src/test/com/bigdata/resources/AbstractResourceManagerTestCase.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -144,21 +144,21 @@ final private UUID dataServiceUUID = UUID.randomUUID(); - @Override +// @Override public IBigdataFederation getFederation() { return fed; } - @Override +// @Override public DataService getDataService() { throw new UnsupportedOperationException(); } - @Override +// @Override public UUID getDataServiceUUID() { return dataServiceUUID; Deleted: trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/ChangeRecord.java =================================================================== --- branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/ChangeRecord.java 2010-11-23 14:27:09 UTC (rev 3978) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/ChangeRecord.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -1,98 +0,0 @@ -package com.bigdata.rdf.changesets; - -import java.util.Comparator; -import com.bigdata.rdf.spo.ISPO; -import com.bigdata.rdf.spo.SPOComparator; - -public class ChangeRecord implements IChangeRecord { - - private final ISPO stmt; - - private final ChangeAction action; - -// private final StatementEnum oldType; - - public ChangeRecord(final ISPO stmt, final ChangeAction action) { - -// this(stmt, action, null); -// -// } -// -// public ChangeRecord(final BigdataStatement stmt, final ChangeAction action, -// final StatementEnum oldType) { -// - this.stmt = stmt; - this.action = action; -// this.oldType = oldType; - - } - - public ChangeAction getChangeAction() { - - return action; - - } - -// public StatementEnum getOldStatementType() { -// -// return oldType; -// -// } - - public ISPO getStatement() { - - return stmt; - - } - - @Override - public boolean equals(Object o) { - - if (o == this) - return true; - - if (o == null || o instanceof IChangeRecord == false) - return false; - - final IChangeRecord rec = (IChangeRecord) o; - - final ISPO stmt2 = rec.getStatement(); - - // statements are equal - if (stmt == stmt2 || - (stmt != null && stmt2 != null && stmt.equals(stmt2))) { - - // actions are equal - return action == rec.getChangeAction(); - - } - - return false; - - } - - public String toString() { - - StringBuilder sb = new StringBuilder(); - - sb.append(action).append(": ").append(stmt); - - return sb.toString(); - - } - - public static final Comparator<IChangeRecord> COMPARATOR = - new Comparator<IChangeRecord>() { - - public int compare(final IChangeRecord r1, final IChangeRecord r2) { - - final ISPO spo1 = r1.getStatement(); - final ISPO spo2 = r2.getStatement(); - - return SPOComparator.INSTANCE.compare(spo1, spo2); - - } - - }; - -} Copied: trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/ChangeRecord.java (from rev 3978, branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/ChangeRecord.java) =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/ChangeRecord.java (rev 0) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/ChangeRecord.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -0,0 +1,98 @@ +package com.bigdata.rdf.changesets; + +import java.util.Comparator; +import com.bigdata.rdf.spo.ISPO; +import com.bigdata.rdf.spo.SPOComparator; + +public class ChangeRecord implements IChangeRecord { + + private final ISPO stmt; + + private final ChangeAction action; + +// private final StatementEnum oldType; + + public ChangeRecord(final ISPO stmt, final ChangeAction action) { + +// this(stmt, action, null); +// +// } +// +// public ChangeRecord(final BigdataStatement stmt, final ChangeAction action, +// final StatementEnum oldType) { +// + this.stmt = stmt; + this.action = action; +// this.oldType = oldType; + + } + + public ChangeAction getChangeAction() { + + return action; + + } + +// public StatementEnum getOldStatementType() { +// +// return oldType; +// +// } + + public ISPO getStatement() { + + return stmt; + + } + + @Override + public boolean equals(Object o) { + + if (o == this) + return true; + + if (o == null || o instanceof IChangeRecord == false) + return false; + + final IChangeRecord rec = (IChangeRecord) o; + + final ISPO stmt2 = rec.getStatement(); + + // statements are equal + if (stmt == stmt2 || + (stmt != null && stmt2 != null && stmt.equals(stmt2))) { + + // actions are equal + return action == rec.getChangeAction(); + + } + + return false; + + } + + public String toString() { + + StringBuilder sb = new StringBuilder(); + + sb.append(action).append(": ").append(stmt); + + return sb.toString(); + + } + + public static final Comparator<IChangeRecord> COMPARATOR = + new Comparator<IChangeRecord>() { + + public int compare(final IChangeRecord r1, final IChangeRecord r2) { + + final ISPO spo1 = r1.getStatement(); + final ISPO spo2 = r2.getStatement(); + + return SPOComparator.INSTANCE.compare(spo1, spo2); + + } + + }; + +} Deleted: trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeLog.java =================================================================== --- branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeLog.java 2010-11-23 14:27:09 UTC (rev 3978) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeLog.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -1,38 +0,0 @@ -package com.bigdata.rdf.changesets; - -/** - * Provides detailed information on changes made to statements in the database. - * Change records are generated for any statements that are used in - * addStatement() or removeStatements() operations on the SAIL connection, as - * well as any inferred statements that are added or removed as a result of - * truth maintenance when the database has inference enabled. Change records - * will be sent to an instance of this class via the - * {@link #changeEvent(IChangeRecord)} method. These events will - * occur on an ongoing basis as statements are added to or removed from the - * indices. It is the change log's responsibility to collect change records. - * When the transaction is actually committed (or aborted), the change log will - * receive notification via {@link #transactionCommited()} or - * {@link #transactionAborted()}. - */ -public interface IChangeLog { - - /** - * Occurs when a statement add or remove is flushed to the indices (but - * not yet committed). - * - * @param record - * the {@link IChangeRecord} - */ - void changeEvent(final IChangeRecord record); - - /** - * Occurs when the current SAIL transaction is committed. - */ - void transactionCommited(); - - /** - * Occurs if the current SAIL transaction is aborted. - */ - void transactionAborted(); - -} Copied: trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeLog.java (from rev 3978, branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeLog.java) =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeLog.java (rev 0) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeLog.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -0,0 +1,38 @@ +package com.bigdata.rdf.changesets; + +/** + * Provides detailed information on changes made to statements in the database. + * Change records are generated for any statements that are used in + * addStatement() or removeStatements() operations on the SAIL connection, as + * well as any inferred statements that are added or removed as a result of + * truth maintenance when the database has inference enabled. Change records + * will be sent to an instance of this class via the + * {@link #changeEvent(IChangeRecord)} method. These events will + * occur on an ongoing basis as statements are added to or removed from the + * indices. It is the change log's responsibility to collect change records. + * When the transaction is actually committed (or aborted), the change log will + * receive notification via {@link #transactionCommited()} or + * {@link #transactionAborted()}. + */ +public interface IChangeLog { + + /** + * Occurs when a statement add or remove is flushed to the indices (but + * not yet committed). + * + * @param record + * the {@link IChangeRecord} + */ + void changeEvent(final IChangeRecord record); + + /** + * Occurs when the current SAIL transaction is committed. + */ + void transactionCommited(); + + /** + * Occurs if the current SAIL transaction is aborted. + */ + void transactionAborted(); + +} Deleted: trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeRecord.java =================================================================== --- branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeRecord.java 2010-11-23 14:27:09 UTC (rev 3978) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeRecord.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -1,120 +0,0 @@ -package com.bigdata.rdf.changesets; - -import com.bigdata.rdf.model.BigdataStatement; -import com.bigdata.rdf.model.StatementEnum; -import com.bigdata.rdf.spo.ISPO; - -/** - * Provides detailed information on changes made to statements in the database. - * Change records are generated for any statements that are used in - * addStatement() or removeStatements() operations on the SAIL connection, as - * well as any inferred statements that are added or removed as a result of - * truth maintenance when the database has inference enabled. - * <p> - * See {@link IChangeLog}. - */ -public interface IChangeRecord { - - /** - * Attempting to add or remove statements can have a number of different - * effects. This enum captures the different actions that can take place as - * a result of trying to add or remove a statement from the database. - */ - public enum ChangeAction { - - /** - * The focus statement was not in the database before and will be - * in the database after the commit. This can be the result of either - * explicit addStatement() operations on the SAIL connection, or from - * new inferences being generated via truth maintenance when the - * database has inference enabled. If the focus statement has a - * statement type of explicit then it was added via an addStatement() - * operation. If the focus statement has a statement type of inferred - * then it was added via truth maintenance. - */ - INSERTED, - - /** - * The focus statement was in the database before and will not - * be in the database after the commit. When the database has inference - * and truth maintenance enabled, the statement that is the focus of - * this change record was either an explicit statement that was the - * subject of a removeStatements() operation on the connection, or it - * was an inferred statement that was removed as a result of truth - * maintenance. Either way, the statement is no longer provable as an - * inference using other statements still in the database after the - * commit. If it were still provable, the explicit statement would have - * had its type changed to inferred, and the inferred statement would - * have remained untouched by truth maintenance. If an inferred - * statement was the subject of a removeStatement() operation on the - * connection it would have resulted in a no-op, since inferences can - * only be removed via truth maintenance. - */ - REMOVED, - - /** - * This change action can only occur when inference and truth - * maintenance are enabled on the database. Sometimes an attempt at - * statement addition or removal via an addStatement() or - * removeStatements() operation on the connection will result in a type - * change rather than an actual assertion or deletion. When in - * inference mode, statements can have one of three statement types: - * explicit, inferred, or axiom (see {@link StatementEnum}). There are - * several reasons why a statement will change type rather than be - * asserted or deleted: - * <p> - * <ul> - * <li> A statement is asserted, but already exists in the database as - * an inference or an axiom. The existing statement will have its type - * changed from inference or axiom to explicit. </li> - * <li> An explicit statement is retracted, but is still provable by - * other means. It will have its type changed from explicit to - * inference. </li> - * <li> An explicit statement is retracted, but is one of the axioms - * needed for inference. It will have its type changed from explicit to - * axiom. </li> - * </ul> - */ - UPDATED, - -// /** -// * This change action can occur for one of two reasons: -// * <p> -// * <ul> -// * <li> A statement is asserted, but already exists in the database as -// * an explicit statement. </li> -// * <li> An inferred statement or an axiom is retracted. Only explicit -// * statements can be retracted via removeStatements() operations. </li> -// * </ul> -// */ -// NO_OP - - } - - /** - * Return the ISPO that is the focus of this change record. - * - * @return - * the {@link ISPO} - */ - ISPO getStatement(); - - /** - * Return the change action for this change record. - * - * @return - * the {@link ChangeAction} - */ - ChangeAction getChangeAction(); - -// /** -// * If the change action is {@link ChangeAction#TYPE_CHANGE}, this method -// * will return the old statement type of the focus statement. The -// * new statement type is available on the focus statement itself. -// * -// * @return -// * the old statement type of the focus statement -// */ -// StatementEnum getOldStatementType(); - -} Copied: trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeRecord.java (from rev 3978, branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeRecord.java) =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeRecord.java (rev 0) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/IChangeRecord.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -0,0 +1,120 @@ +package com.bigdata.rdf.changesets; + +import com.bigdata.rdf.model.BigdataStatement; +import com.bigdata.rdf.model.StatementEnum; +import com.bigdata.rdf.spo.ISPO; + +/** + * Provides detailed information on changes made to statements in the database. + * Change records are generated for any statements that are used in + * addStatement() or removeStatements() operations on the SAIL connection, as + * well as any inferred statements that are added or removed as a result of + * truth maintenance when the database has inference enabled. + * <p> + * See {@link IChangeLog}. + */ +public interface IChangeRecord { + + /** + * Attempting to add or remove statements can have a number of different + * effects. This enum captures the different actions that can take place as + * a result of trying to add or remove a statement from the database. + */ + public enum ChangeAction { + + /** + * The focus statement was not in the database before and will be + * in the database after the commit. This can be the result of either + * explicit addStatement() operations on the SAIL connection, or from + * new inferences being generated via truth maintenance when the + * database has inference enabled. If the focus statement has a + * statement type of explicit then it was added via an addStatement() + * operation. If the focus statement has a statement type of inferred + * then it was added via truth maintenance. + */ + INSERTED, + + /** + * The focus statement was in the database before and will not + * be in the database after the commit. When the database has inference + * and truth maintenance enabled, the statement that is the focus of + * this change record was either an explicit statement that was the + * subject of a removeStatements() operation on the connection, or it + * was an inferred statement that was removed as a result of truth + * maintenance. Either way, the statement is no longer provable as an + * inference using other statements still in the database after the + * commit. If it were still provable, the explicit statement would have + * had its type changed to inferred, and the inferred statement would + * have remained untouched by truth maintenance. If an inferred + * statement was the subject of a removeStatement() operation on the + * connection it would have resulted in a no-op, since inferences can + * only be removed via truth maintenance. + */ + REMOVED, + + /** + * This change action can only occur when inference and truth + * maintenance are enabled on the database. Sometimes an attempt at + * statement addition or removal via an addStatement() or + * removeStatements() operation on the connection will result in a type + * change rather than an actual assertion or deletion. When in + * inference mode, statements can have one of three statement types: + * explicit, inferred, or axiom (see {@link StatementEnum}). There are + * several reasons why a statement will change type rather than be + * asserted or deleted: + * <p> + * <ul> + * <li> A statement is asserted, but already exists in the database as + * an inference or an axiom. The existing statement will have its type + * changed from inference or axiom to explicit. </li> + * <li> An explicit statement is retracted, but is still provable by + * other means. It will have its type changed from explicit to + * inference. </li> + * <li> An explicit statement is retracted, but is one of the axioms + * needed for inference. It will have its type changed from explicit to + * axiom. </li> + * </ul> + */ + UPDATED, + +// /** +// * This change action can occur for one of two reasons: +// * <p> +// * <ul> +// * <li> A statement is asserted, but already exists in the database as +// * an explicit statement. </li> +// * <li> An inferred statement or an axiom is retracted. Only explicit +// * statements can be retracted via removeStatements() operations. </li> +// * </ul> +// */ +// NO_OP + + } + + /** + * Return the ISPO that is the focus of this change record. + * + * @return + * the {@link ISPO} + */ + ISPO getStatement(); + + /** + * Return the change action for this change record. + * + * @return + * the {@link ChangeAction} + */ + ChangeAction getChangeAction(); + +// /** +// * If the change action is {@link ChangeAction#TYPE_CHANGE}, this method +// * will return the old statement type of the focus statement. The +// * new statement type is available on the focus statement itself. +// * +// * @return +// * the old statement type of the focus statement +// */ +// StatementEnum getOldStatementType(); + +} Deleted: trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/InMemChangeLog.java =================================================================== --- branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/InMemChangeLog.java 2010-11-23 14:27:09 UTC (rev 3978) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/InMemChangeLog.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -1,163 +0,0 @@ -package com.bigdata.rdf.changesets; - -import java.util.Collection; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.Map; -import org.apache.log4j.Logger; -import com.bigdata.rdf.model.BigdataStatement; -import com.bigdata.rdf.spo.ISPO; -import com.bigdata.rdf.store.AbstractTripleStore; -import com.bigdata.rdf.store.BigdataStatementIterator; -import com.bigdata.striterator.ChunkedArrayIterator; - -/** - * This is a very simple implementation of a change log. NOTE: This is not - * a particularly great implementation. First of all it ends up storing - * two copies of the change set. Secondly it needs to be smarter about - * concurrency, or maybe we can be smart about it when we do the - * implementation on the other side (the SAIL connection can just write - * change events to a buffer and then the buffer can be drained by - * another thread that doesn't block the actual read/write operations, - * although then we need to be careful not to issue the committed() - * notification before the buffer is drained). - * - * @author mike - * - */ -public class InMemChangeLog implements IChangeLog { - - protected static final Logger log = Logger.getLogger(InMemChangeLog.class); - - /** - * Running tally of new changes since the last commit notification. - */ - private final Map<ISPO,IChangeRecord> changeSet = - new HashMap<ISPO, IChangeRecord>(); - - /** - * Keep a record of the change set as of the last commit. - */ - private final Map<ISPO,IChangeRecord> committed = - new HashMap<ISPO, IChangeRecord>(); - - /** - * See {@link IChangeLog#changeEvent(IChangeRecord)}. - */ - public synchronized void changeEvent(final IChangeRecord record) { - - if (log.isInfoEnabled()) - log.info(record); - - changeSet.put(record.getStatement(), record); - - } - - /** - * See {@link IChangeLog#transactionCommited()}. - */ - public synchronized void transactionCommited() { - - if (log.isInfoEnabled()) - log.info("transaction committed"); - - committed.clear(); - - committed.putAll(changeSet); - - changeSet.clear(); - - } - - /** - * See {@link IChangeLog#transactionAborted()}. - */ - public synchronized void transactionAborted() { - - if (log.isInfoEnabled()) - log.info("transaction aborted"); - - changeSet.clear(); - - } - - /** - * Return the change set as of the last commmit point. - * - * @return - * a collection of {@link IChangeRecord}s as of the last commit - * point - */ - public Collection<IChangeRecord> getLastCommit() { - - return committed.values(); - - } - - /** - * Return the change set as of the last commmit point, using the supplied - * database to resolve ISPOs to BigdataStatements. - * - * @return - * a collection of {@link IChangeRecord}s as of the last commit - * point - */ - public Collection<IChangeRecord> getLastCommit(final AbstractTripleStore db) { - - return resolve(db, committed.values()); - - } - - /** - * Use the supplied database to turn a set of ISPO change records into - * BigdataStatement change records. BigdataStatements also implement - * ISPO, the difference being that BigdataStatements also contain - * materialized RDF terms for the 3 (or 4) positions, in addition to just - * the internal identifiers (IVs) for those terms. - * - * @param db - * the database containing the lexicon needed to materialize - * the BigdataStatement objects - * @param unresolved - * the ISPO change records that came from IChangeLog notification - * events - * @return - * the fully resolves BigdataStatement change records - */ - private Collection<IChangeRecord> resolve(final AbstractTripleStore db, - final Collection<IChangeRecord> unresolved) { - - final Collection<IChangeRecord> resolved = - new LinkedList<IChangeRecord>(); - - // collect up the ISPOs out of the unresolved change records - final ISPO[] spos = new ISPO[unresolved.size()]; - int i = 0; - for (IChangeRecord rec : unresolved) { - spos[i++] = rec.getStatement(); - } - - // use the database to resolve them into BigdataStatements - final BigdataStatementIterator it = - db.asStatementIterator( - new ChunkedArrayIterator<ISPO>(i, spos, null/* keyOrder */)); - - /* - * the BigdataStatementIterator will produce BigdataStatement objects - * in the same order as the original ISPO array - */ - for (IChangeRecord rec : unresolved) { - - final BigdataStatement stmt = it.next(); - - resolved.add(new ChangeRecord(stmt, rec.getChangeAction())); - - } - - return resolved; - - } - - - -} Copied: trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/InMemChangeLog.java (from rev 3978, branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/InMemChangeLog.java) =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/InMemChangeLog.java (rev 0) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/InMemChangeLog.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -0,0 +1,163 @@ +package com.bigdata.rdf.changesets; + +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.Map; +import org.apache.log4j.Logger; +import com.bigdata.rdf.model.BigdataStatement; +import com.bigdata.rdf.spo.ISPO; +import com.bigdata.rdf.store.AbstractTripleStore; +import com.bigdata.rdf.store.BigdataStatementIterator; +import com.bigdata.striterator.ChunkedArrayIterator; + +/** + * This is a very simple implementation of a change log. NOTE: This is not + * a particularly great implementation. First of all it ends up storing + * two copies of the change set. Secondly it needs to be smarter about + * concurrency, or maybe we can be smart about it when we do the + * implementation on the other side (the SAIL connection can just write + * change events to a buffer and then the buffer can be drained by + * another thread that doesn't block the actual read/write operations, + * although then we need to be careful not to issue the committed() + * notification before the buffer is drained). + * + * @author mike + * + */ +public class InMemChangeLog implements IChangeLog { + + protected static final Logger log = Logger.getLogger(InMemChangeLog.class); + + /** + * Running tally of new changes since the last commit notification. + */ + private final Map<ISPO,IChangeRecord> changeSet = + new HashMap<ISPO, IChangeRecord>(); + + /** + * Keep a record of the change set as of the last commit. + */ + private final Map<ISPO,IChangeRecord> committed = + new HashMap<ISPO, IChangeRecord>(); + + /** + * See {@link IChangeLog#changeEvent(IChangeRecord)}. + */ + public synchronized void changeEvent(final IChangeRecord record) { + + if (log.isInfoEnabled()) + log.info(record); + + changeSet.put(record.getStatement(), record); + + } + + /** + * See {@link IChangeLog#transactionCommited()}. + */ + public synchronized void transactionCommited() { + + if (log.isInfoEnabled()) + log.info("transaction committed"); + + committed.clear(); + + committed.putAll(changeSet); + + changeSet.clear(); + + } + + /** + * See {@link IChangeLog#transactionAborted()}. + */ + public synchronized void transactionAborted() { + + if (log.isInfoEnabled()) + log.info("transaction aborted"); + + changeSet.clear(); + + } + + /** + * Return the change set as of the last commmit point. + * + * @return + * a collection of {@link IChangeRecord}s as of the last commit + * point + */ + public Collection<IChangeRecord> getLastCommit() { + + return committed.values(); + + } + + /** + * Return the change set as of the last commmit point, using the supplied + * database to resolve ISPOs to BigdataStatements. + * + * @return + * a collection of {@link IChangeRecord}s as of the last commit + * point + */ + public Collection<IChangeRecord> getLastCommit(final AbstractTripleStore db) { + + return resolve(db, committed.values()); + + } + + /** + * Use the supplied database to turn a set of ISPO change records into + * BigdataStatement change records. BigdataStatements also implement + * ISPO, the difference being that BigdataStatements also contain + * materialized RDF terms for the 3 (or 4) positions, in addition to just + * the internal identifiers (IVs) for those terms. + * + * @param db + * the database containing the lexicon needed to materialize + * the BigdataStatement objects + * @param unresolved + * the ISPO change records that came from IChangeLog notification + * events + * @return + * the fully resolves BigdataStatement change records + */ + private Collection<IChangeRecord> resolve(final AbstractTripleStore db, + final Collection<IChangeRecord> unresolved) { + + final Collection<IChangeRecord> resolved = + new LinkedList<IChangeRecord>(); + + // collect up the ISPOs out of the unresolved change records + final ISPO[] spos = new ISPO[unresolved.size()]; + int i = 0; + for (IChangeRecord rec : unresolved) { + spos[i++] = rec.getStatement(); + } + + // use the database to resolve them into BigdataStatements + final BigdataStatementIterator it = + db.asStatementIterator( + new ChunkedArrayIterator<ISPO>(i, spos, null/* keyOrder */)); + + /* + * the BigdataStatementIterator will produce BigdataStatement objects + * in the same order as the original ISPO array + */ + for (IChangeRecord rec : unresolved) { + + final BigdataStatement stmt = it.next(); + + resolved.add(new ChangeRecord(stmt, rec.getChangeAction())); + + } + + return resolved; + + } + + + +} Deleted: trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java =================================================================== --- branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java 2010-11-23 14:27:09 UTC (rev 3978) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -1,208 +0,0 @@ -package com.bigdata.rdf.changesets; - -import java.util.Iterator; -import java.util.Map; -import org.apache.log4j.Logger; -import com.bigdata.rdf.changesets.IChangeRecord.ChangeAction; -import com.bigdata.rdf.internal.IV; -import com.bigdata.rdf.model.BigdataBNode; -import com.bigdata.rdf.spo.ISPO; -import com.bigdata.rdf.spo.SPO; -import com.bigdata.rdf.spo.ISPO.ModifiedEnum; -import com.bigdata.rdf.store.AbstractTripleStore; -import com.bigdata.relation.accesspath.IElementFilter; -import com.bigdata.striterator.ChunkedArrayIterator; -import com.bigdata.striterator.IChunkedOrderedIterator; - -public class StatementWriter { - - protected static final Logger log = Logger.getLogger(StatementWriter.class); - - public static long addStatements(final AbstractTripleStore database, - final AbstractTripleStore statementStore, - final boolean copyOnly, - final IElementFilter<ISPO> filter, - final IChunkedOrderedIterator<ISPO> itr, - final IChangeLog changeLog) { - - long n = 0; - - if (itr.hasNext()) { - -// final BigdataStatementIteratorImpl itr2 = -// new BigdataStatementIteratorImpl(database, bnodes, itr) -// .start(database.getExecutorService()); -// -// final BigdataStatement[] stmts = -// new BigdataStatement[database.getChunkCapacity()]; - final SPO[] stmts = new SPO[database.getChunkCapacity()]; - - int i = 0; - while ((i = nextChunk(itr, stmts)) > 0) { - n += addStatements(database, statementStore, copyOnly, filter, - stmts, i, changeLog); - } - - } - - return n; - - } - - private static long addStatements(final AbstractTripleStore database, - final AbstractTripleStore statementStore, - final boolean copyOnly, - final IElementFilter<ISPO> filter, - final ISPO[] stmts, - final int numStmts, - final IChangeLog changeLog) { - -// final SPO[] tmp = allocateSPOs(stmts, numStmts); - - final long n = database.addStatements(statementStore, copyOnly, - new ChunkedArrayIterator<ISPO>(numStmts, stmts, - null/* keyOrder */), filter); - - // Copy the state of the isModified() flag and notify changeLog - for (int i = 0; i < numStmts; i++) { - - if (stmts[i].isModified()) { - -// stmts[i].setModified(true); - - if (changeLog != null) { - - switch(stmts[i].getModified()) { - case INSERTED: - changeLog.changeEvent(new ChangeRecord(stmts[i], ChangeAction.INSERTED)); - break; - case UPDATED: - changeLog.changeEvent(new ChangeRecord(stmts[i], ChangeAction.UPDATED)); - break; - case REMOVED: - throw new AssertionError(); - default: - break; - } - - } - - } - - } - - return n; - - } - - public static long removeStatements(final AbstractTripleStore database, - final IChunkedOrderedIterator<ISPO> itr, - final boolean computeClosureForStatementIdentifiers, - final IChangeLog changeLog) { - - long n = 0; - - if (itr.hasNext()) { - -// final BigdataStatementIteratorImpl itr2 = -// new BigdataStatementIteratorImpl(database, bnodes, itr) -// .start(database.getExecutorService()); -// -// final BigdataStatement[] stmts = -// new BigdataStatement[database.getChunkCapacity()]; - final SPO[] stmts = new SPO[database.getChunkCapacity()]; - - int i = 0; - while ((i = nextChunk(itr, stmts)) > 0) { - n += removeStatements(database, stmts, i, - computeClosureForStatementIdentifiers, changeLog); - } - - } - - return n; - - } - - private static long removeStatements(final AbstractTripleStore database, - final ISPO[] stmts, - final int numStmts, - final boolean computeClosureForStatementIdentifiers, - final IChangeLog changeLog) { - - final long n = database.removeStatements( - new ChunkedArrayIterator<ISPO>(numStmts, stmts, - null/* keyOrder */), - computeClosureForStatementIdentifiers); - - // Copy the state of the isModified() flag and notify changeLog - for (int i = 0; i < numStmts; i++) { - - if (stmts[i].isModified()) { - - // just to be safe - stmts[i].setModified(ModifiedEnum.REMOVED); - - changeLog.changeEvent( - new ChangeRecord(stmts[i], ChangeAction.REMOVED)); - - } - - } - - return n; - - } - - private static int nextChunk(final Iterator<ISPO> itr, - final ISPO[] stmts) { - - assert stmts != null && stmts.length > 0; - - int i = 0; - while (itr.hasNext()) { - stmts[i++] = itr.next(); - if (i == stmts.length) { - // stmts[] is full - return i; - } - } - - /* - * stmts[] is empty (i = 0) or partially - * full (i > 0 && i < stmts.length) - */ - return i; - - } - -// private static SPO[] allocateSPOs(final BigdataStatement[] stmts, -// final int numStmts) { -// -// final SPO[] tmp = new SPO[numStmts]; -// -// for (int i = 0; i < tmp.length; i++) { -// -// final BigdataStatement stmt = stmts[i]; -// -// final SPO spo = new SPO(stmt); -// -// if (log.isDebugEnabled()) -// log.debug("writing: " + stmt.toString() + " (" + spo + ")"); -// -// if(!spo.isFullyBound()) { -// -// throw new AssertionError("Not fully bound? : " + spo); -// -// } -// -// tmp[i] = spo; -// -// } -// -// return tmp; -// -// -// } - -} Copied: trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java (from rev 3978, branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java) =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java (rev 0) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -0,0 +1,208 @@ +package com.bigdata.rdf.changesets; + +import java.util.Iterator; +import java.util.Map; +import org.apache.log4j.Logger; +import com.bigdata.rdf.changesets.IChangeRecord.ChangeAction; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.model.BigdataBNode; +import com.bigdata.rdf.spo.ISPO; +import com.bigdata.rdf.spo.SPO; +import com.bigdata.rdf.spo.ISPO.ModifiedEnum; +import com.bigdata.rdf.store.AbstractTripleStore; +import com.bigdata.relation.accesspath.IElementFilter; +import com.bigdata.striterator.ChunkedArrayIterator; +import com.bigdata.striterator.IChunkedOrderedIterator; + +public class StatementWriter { + + protected static final Logger log = Logger.getLogger(StatementWriter.class); + + public static long addStatements(final AbstractTripleStore database, + final AbstractTripleStore statementStore, + final boolean copyOnly, + final IElementFilter<ISPO> filter, + final IChunkedOrderedIterator<ISPO> itr, + final IChangeLog changeLog) { + + long n = 0; + + if (itr.hasNext()) { + +// final BigdataStatementIteratorImpl itr2 = +// new BigdataStatementIteratorImpl(database, bnodes, itr) +// .start(database.getExecutorService()); +// +// final BigdataStatement[] stmts = +// new BigdataStatement[database.getChunkCapacity()]; + final SPO[] stmts = new SPO[database.getChunkCapacity()]; + + int i = 0; + while ((i = nextChunk(itr, stmts)) > 0) { + n += addStatements(database, statementStore, copyOnly, filter, + stmts, i, changeLog); + } + + } + + return n; + + } + + private static long addStatements(final AbstractTripleStore database, + final AbstractTripleStore statementStore, + final boolean copyOnly, + final IElementFilter<ISPO> filter, + final ISPO[] stmts, + final int numStmts, + final IChangeLog changeLog) { + +// final SPO[] tmp = allocateSPOs(stmts, numStmts); + + final long n = database.addStatements(statementStore, copyOnly, + new ChunkedArrayIterator<ISPO>(numStmts, stmts, + null/* keyOrder */), filter); + + // Copy the state of the isModified() flag and notify changeLog + for (int i = 0; i < numStmts; i++) { + + if (stmts[i].isModified()) { + +// stmts[i].setModified(true); + + if (changeLog != null) { + + switch(stmts[i].getModified()) { + case INSERTED: + changeLog.changeEvent(new ChangeRecord(stmts[i], ChangeAction.INSERTED)); + break; + case UPDATED: + changeLog.changeEvent(new ChangeRecord(stmts[i], ChangeAction.UPDATED)); + break; + case REMOVED: + throw new AssertionError(); + default: + break; + } + + } + + } + + } + + return n; + + } + + public static long removeStatements(final AbstractTripleStore database, + final IChunkedOrderedIterator<ISPO> itr, + final boolean computeClosureForStatementIdentifiers, + final IChangeLog changeLog) { + + long n = 0; + + if (itr.hasNext()) { + +// final BigdataStatementIteratorImpl itr2 = +// new BigdataStatementIteratorImpl(database, bnodes, itr) +// .start(database.getExecutorService()); +// +// final BigdataStatement[] stmts = +// new BigdataStatement[database.getChunkCapacity()]; + final SPO[] stmts = new SPO[database.getChunkCapacity()]; + + int i = 0; + while ((i = nextChunk(itr, stmts)) > 0) { + n += removeStatements(database, stmts, i, + computeClosureForStatementIdentifiers, changeLog); + } + + } + + return n; + + } + + private static long removeStatements(final AbstractTripleStore database, + final ISPO[] stmts, + final int numStmts, + final boolean computeClosureForStatementIdentifiers, + final IChangeLog changeLog) { + + final long n = database.removeStatements( + new ChunkedArrayIterator<ISPO>(numStmts, stmts, + null/* keyOrder */), + computeClosureForStatementIdentifiers); + + // Copy the state of the isModified() flag and notify changeLog + for (int i = 0; i < numStmts; i++) { + + if (stmts[i].isModified()) { + + // just to be safe + stmts[i].setModified(ModifiedEnum.REMOVED); + + changeLog.changeEvent( + new ChangeRecord(stmts[i], ChangeAction.REMOVED)); + + } + + } + + return n; + + } + + private static int nextChunk(final Iterator<ISPO> itr, + final ISPO[] stmts) { + + assert stmts != null && stmts.length > 0; + + int i = 0; + while (itr.hasNext()) { + stmts[i++] = itr.next(); + if (i == stmts.length) { + // stmts[] is full + return i; + } + } + + /* + * stmts[] is empty (i = 0) or partially + * full (i > 0 && i < stmts.length) + */ + return i; + + } + +// private static SPO[] allocateSPOs(final BigdataStatement[] stmts, +// final int numStmts) { +// +// final SPO[] tmp = new SPO[numStmts]; +// +// for (int i = 0; i < tmp.length; i++) { +// +// final BigdataStatement stmt = stmts[i]; +// +// final SPO spo = new SPO(stmt); +// +// if (log.isDebugEnabled()) +// log.debug("writing: " + stmt.toString() + " (" + spo + ")"); +// +// if(!spo.isFullyBound()) { +// +// throw new AssertionError("Not fully bound? : " + spo); +// +// } +// +// tmp[i] = spo; +// +// } +// +// return tmp; +// +// +// } + +} Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/inf/SPOAssertionBuffer.java =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/inf/SPOAssertionBuffer.java 2010-11-23 14:27:09 UTC (rev 3978) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/inf/SPOAssertionBuffer.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -29,11 +29,15 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicLong; +import com.bigdata.rdf.changesets.IChangeLog; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.model.BigdataBNode; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.ISPOAssertionBuffer; import com.bigdata.rdf.spo.JustificationWriter; @@ -101,8 +105,13 @@ * {@link Justification}s for entailments. */ protected final boolean justify; - + /** + * Used for change set notification (optional). + */ + protected final IChangeLog changeLog; + + /** * Create a buffer. * * @param focusStore @@ -126,6 +135,38 @@ AbstractTripleStore db, IElementFilter<ISPO> filter, int capacity, boolean justified) { + this(focusStore, db, filter, capacity, justified, + null/* changeLog */); + + } + + /** + * Create a buffer. + * + * @param focusStore + * The focusStore on which the entailments computed by closure + * will be written (required). This is either the database or a + * temporary focusStore used during incremental TM. + * @param db + * The database in which the terms are defined (required). + * @param filter + * Option filter. When present statements matched by the filter + * are NOT retained by the {@link SPOAssertionBuffer} and will + * NOT be added to the <i>focusStore</i>. + * @param capacity + * The maximum {@link SPO}s that the buffer can hold before it + * is {@link #flush()}ed. + * @param justified + * true iff the Truth Maintenance strategy requires that we + * focusStore {@link Justification}s for entailments. + * @param changeLog + * optional change log for change notification + */ + public SPOAssertionBuffer(AbstractTripleStore focusStore, + AbstractTripleStore db, IElementFilter<ISPO> filter, int capacity, + boolean justified, final IChangeLog changeLog + ) { + super(db, filter, capacity); if (focusStore == null) @@ -142,6 +183,8 @@ justifications = justified ? new Justification[capacity] : null; + this.changeLog = changeLog; + } /** @@ -180,12 +223,26 @@ if (numJustifications == 0) { - // batch insert statements into the focusStore. - n = db.addStatements( + if (changeLog == null) { + + // batch insert statements into the focusStore. + n = db.addStatements( focusStore, true/* copyOnly */, new ChunkedArrayIterator<ISPO>(numStmts, stmts, null/*keyOrder*/), null/*filter*/); + + } else { + + n = com.bigdata.rdf.changesets.StatementWriter.addStatements( + db, + focusStore, + true/* copyOnly */, + null/* filter */, + new ChunkedArrayIterator<ISPO>(numStmts, stmts, null/*keyOrder*/), + changeLog); + + } } else { @@ -209,7 +266,8 @@ // task will write SPOs on the statement indices. tasks.add(new StatementWriter(getTermDatabase(), focusStore, false/* copyOnly */, new ChunkedArrayIterator<ISPO>( - numStmts, stmts, null/*keyOrder*/), nwritten)); + numStmts, stmts, null/*keyOrder*/), nwritten, + changeLog)); // task will write justifications on the justifications index. final AtomicLong nwrittenj = new AtomicLong(); Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/inf/SPORetractionBuffer.java =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/inf/SPORetractionBuffer.java 2010-11-23 14:27:09 UTC (rev 3978) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/inf/SPORetractionBuffer.java 2010-11-23 15:22:27 UTC (rev 3979) @@ -27,6 +27,11 @@ package com.bigdata.rdf.inf; +import java.util.Map; +import com.bigdata.rdf.changesets.IChangeLog; +import com.bigdata.rdf.changesets.StatementWriter; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.model.BigdataBNode; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPO; import com.bigdata.rdf.store.AbstractTripleStore; @@ -49,6 +54,11 @@ private final AbstractTripleStore store; private final boolean computeClosureForStatementIdentifiers; + + /** + * Optional change log for change notification. + */ + protected final IChangeLog changeLog; /** * @param store @@ -63,6 +73,27 @@ public SPORetractionBuffer(AbstractTripleStore store, int capacity, boolean computeClosureForStatementIdentifiers) { + this(store, capacity, computeClosureForStatementIdentifiers, + null/* changeLog */); + + } + + /** + * @param store + * The database from which the statement will be removed when the + * buffer is {@link #flush()}ed. + * @param capacity + * The capacity of the retraction buffer. + * @param computeClosureForStatementIdentifiers + * See + * {@link AbstractTripleStore#removeStatements(com.bigdata.rdf.spo.ISPOIterator, boolean)} + * @param changeLog + * optional change log for change notification + */ + public SPORetractionBuffer(AbstractTripleStore store, int capacity, + boolean computeClosureForStatementIdentifiers, + final IChangeLog changeLog) { + super(store, null/*filter*/, capacity); if (store == null) @@ -72,14 +103,31 @@ this.computeClosureForStatementIdentifiers = computeClosureForStatementIdentifiers; + this.changeLog = changeLog; + } public int flush() { if (isEmpty()) return 0; - long n = store.removeStatements(new ChunkedArrayIterator<ISPO>(numStmts,stmts, + final long n; + + if (changeLog == null) { + + n = store.removeStatements(new ChunkedArrayIterator<ISPO>(numStmts,stmts, null/*keyOrder*/), computeClosureForStatementIdentifiers); + + } else { + + n = StatementWriter.removeStatements( + store, + new ChunkedArrayIterator<ISPO>( + numStmts,stmts,null/*keyOrder*/), + computeClosureForStatementIdentifiers, + changeLog); + ... [truncated message content] |
From: <tho...@us...> - 2010-11-23 14:27:17
|
Revision: 3978 http://bigdata.svn.sourceforge.net/bigdata/?rev=3978&view=rev Author: thompsonbry Date: 2010-11-23 14:27:09 +0000 (Tue, 23 Nov 2010) Log Message: ----------- Commented out some @Override tags which can cause a problem depending on the Java version. Modified Paths: -------------- branches/CHANGE_SET_BRANCH/bigdata/src/test/com/bigdata/resources/AbstractResourceManagerTestCase.java branches/CHANGE_SET_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java Modified: branches/CHANGE_SET_BRANCH/bigdata/src/test/com/bigdata/resources/AbstractResourceManagerTestCase.java =================================================================== --- branches/CHANGE_SET_BRANCH/bigdata/src/test/com/bigdata/resources/AbstractResourceManagerTestCase.java 2010-11-23 14:17:27 UTC (rev 3977) +++ branches/CHANGE_SET_BRANCH/bigdata/src/test/com/bigdata/resources/AbstractResourceManagerTestCase.java 2010-11-23 14:27:09 UTC (rev 3978) @@ -144,21 +144,21 @@ final private UUID dataServiceUUID = UUID.randomUUID(); - @Override +// @Override public IBigdataFederation getFederation() { return fed; } - @Override +// @Override public DataService getDataService() { throw new UnsupportedOperationException(); } - @Override +// @Override public UUID getDataServiceUUID() { return dataServiceUUID; Modified: branches/CHANGE_SET_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java =================================================================== --- branches/CHANGE_SET_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java 2010-11-23 14:17:27 UTC (rev 3977) +++ branches/CHANGE_SET_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java 2010-11-23 14:27:09 UTC (rev 3978) @@ -697,7 +697,7 @@ * Comparator puts the entries into descending order by the query * execution time (longest running queries are first). */ - @Override +// @Override public int compare(Long o1, Long o2) { if(o1.longValue()<o2.longValue()) return 1; if(o1.longValue()>o2.longValue()) return -1; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-23 14:17:35
|
Revision: 3977 http://bigdata.svn.sourceforge.net/bigdata/?rev=3977&view=rev Author: thompsonbry Date: 2010-11-23 14:17:27 +0000 (Tue, 23 Nov 2010) Log Message: ----------- Merge from TRUNK to BRANCH (r3608:r3976). merge -r3608:HEAD https://bigdata.svn.sourceforge.net/svnroot/bigdata/trunk /root/workspace/bigdata-change-set-branch {{{ --- Merging r3608 through r3976 into /root/workspace/bigdata-change-set-branch U /root/workspace/bigdata-change-set-branch/bigdata-perf/README.txt U /root/workspace/bigdata-change-set-branch/build.xml A /root/workspace/bigdata-change-set-branch/bigdata-compatibility A /root/workspace/bigdata-change-set-branch/bigdata-compatibility/src A /root/workspace/bigdata-change-set-branch/bigdata-compatibility/src/test A /root/workspace/bigdata-change-set-branch/bigdata-compatibility/src/test/com A /root/workspace/bigdata-change-set-branch/bigdata-compatibility/src/test/com/bigdata A /root/workspace/bigdata-change-set-branch/bigdata-compatibility/src/test/com/bigdata/journal A /root/workspace/bigdata-change-set-branch/bigdata-compatibility/src/test/com/bigdata/journal/TestBinaryCompatibility.java U /root/workspace/bigdata-change-set-branch/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSids.java A /root/workspace/bigdata-change-set-branch/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java U /root/workspace/bigdata-change-set-branch/bigdata-rdf/src/java/com/bigdata/rdf/vocab/BaseVocabulary.java Merge complete. ===== File Statistics: ===== Added: 7 Updated: 4 }}} This change set incorporates the following: - BaseVocabulary: change to use the as generated serialVersionUID from the last release. - TestSids: javadoc. - bigdata-perf/README.txt: note concerning the use of the ant bundleJar target. - build.xml: removed the jini dependencies from the Sesame WAR deployment target "install-sesame-server". - bigdata-compatibility: some initial steps towards https://sourceforge.net/apps/trac/bigdata/ticket/171 (binary compatibility test suite). Modified Paths: -------------- branches/CHANGE_SET_BRANCH/bigdata-perf/README.txt branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/vocab/BaseVocabulary.java branches/CHANGE_SET_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSids.java branches/CHANGE_SET_BRANCH/build.xml Added Paths: ----------- branches/CHANGE_SET_BRANCH/bigdata-compatibility/ branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/ branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/ branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/com/ branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/com/bigdata/ branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/com/bigdata/journal/ branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/com/bigdata/journal/TestBinaryCompatibility.java Removed Paths: ------------- branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/ branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/ branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/com/ branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/com/bigdata/ branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/com/bigdata/journal/ branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/com/bigdata/journal/TestBinaryCompatibility.java Deleted: branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/com/bigdata/journal/TestBinaryCompatibility.java =================================================================== --- trunk/bigdata-compatibility/src/test/com/bigdata/journal/TestBinaryCompatibility.java 2010-11-22 21:12:22 UTC (rev 3976) +++ branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/com/bigdata/journal/TestBinaryCompatibility.java 2010-11-23 14:17:27 UTC (rev 3977) @@ -1,276 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -/* - * Created on Nov 19, 2010 - */ -package com.bigdata.journal; - -import java.io.File; -import java.io.IOException; -import java.util.Properties; -import java.util.UUID; - -import junit.framework.TestCase2; - -import com.bigdata.Banner; -import com.bigdata.btree.IIndex; -import com.bigdata.btree.IndexMetadata; - -/** - * Test suite for binary compatibility, portability, and forward compatibility - * or automated migration of persistent stores and persistence or serialization - * capable objects across different bigdata releases. The tests in this suite - * rely on artifacts which are archived within SVN. - * - * @todo create w/ small extent and truncate (RW store does not support - * truncate). - * - * @todo test binary migration and forward compatibility. - * - * @todo stubs to create and organize artifacts,etc. - * - * @todo data driven test suite? - * - * @todo create artifact for each release, name the artifacts systematically, - * e.g., test.release.(RW|WORM).jnl or test.release.seg. Collect a list of - * the created artifacts and run each test against each of the versions of - * the artifact. - * - * @todo Force artifact file name case for file system compatibility? - * - * @todo test journal (WORM and RW), btree, index segment, row store, persistent - * data structures (checkpoints, index metadata, tuple serializers, etc.), - * RDF layer, RMI message formats, etc. - * - * @todo Specific tests for - * <p> - * Name2Addr and DefaultKeyBuilderFactory portability problem. See - * https://sourceforge.net/apps/trac/bigdata/ticket/193 - * <p> - * WORM global row store resolution problem introduced in the - * JOURNAL_HA_BRANCH. See - * https://sourceforge.net/apps/trac/bigdata/ticket/171#comment:5 - * <p> - * Sparse row store JDK encoding problem: - * https://sourceforge.net/apps/trac/bigdata/ticket/107 - */ -public class TestBinaryCompatibility extends TestCase2 { - - /** - * - */ - public TestBinaryCompatibility() { - } - - /** - * @param name - */ - public TestBinaryCompatibility(String name) { - super(name); - } - - /** - * @todo munge the release version into a name that is compatibility with - * the file system ("." to "_"). Store artifacts at each release? At - * each release in which an incompatibility is introduced? At each - * release in which a persistence capable data structure or change is - * introduced? - */ - static protected final File artifactDir = new File( - "bigdata-compatibility/src/resources/artifacts"); - - protected static class Version { - private final String version; - private final String revision; - public Version(String version,String revision) { - this.version = version; - this.revision = revision; - } - - /** - * The bigdata version number associated with the release. This is in - * the form <code>xx.yy.zz</code> - */ - public String getVersion() { - return version; - } - - /** - * The SVN repository revision associated with the release. This is in - * the form <code>####</code>. - */ - public String getRevision() { - return revision; - } - } - - /** - * Known release versions. - */ - protected static Version V_0_83_2 = new Version("0.83.2", "3349"); - - /** - * Tested Versions. - */ - protected Version[] versions = new Version[] { - V_0_83_2 - }; - - protected void setUp() throws Exception { - - Banner.banner(); - - super.setUp(); - - if (!artifactDir.exists()) { - - if (!artifactDir.mkdirs()) { - - throw new IOException("Could not create: " + artifactDir); - - } - - } - - for (Version version : versions) { - - final File versionDir = new File(artifactDir, version.getVersion()); - - if (!versionDir.exists()) { - - if (!versionDir.mkdirs()) { - - throw new IOException("Could not create: " + versionDir); - - } - - } - - } - - } - - protected void tearDown() throws Exception { - - super.tearDown(); - - } - - /** - * @throws Throwable - * - * @todo Each 'test' should run an instance of a class which knows how to - * create the appropriate artifacts and how to test them. - */ - public void test_WORM_compatibility_with_JOURNAL_HA_BRANCH() - throws Throwable { - - final Version version = V_0_83_2; - - final File versionDir = new File(artifactDir, version.getVersion()); - - final File artifactFile = new File(versionDir, getName() - + BufferMode.DiskWORM + Journal.Options.JNL); - - if (!artifactFile.exists()) { - - createArtifact(artifactFile); - - } - - verifyArtifact(artifactFile); - - } - - protected void createArtifact(final File artifactFile) throws Throwable { - - if (log.isInfoEnabled()) - log.info("Creating: " + artifactFile); - - final Properties properties = new Properties(); - - properties.setProperty(Journal.Options.FILE, artifactFile.toString()); - - properties.setProperty(Journal.Options.INITIAL_EXTENT, "" - + Journal.Options.minimumInitialExtent); - - final Journal journal = new Journal(properties); - - try { - - final IndexMetadata md = new IndexMetadata(UUID.randomUUID()); - - final IIndex ndx = journal.registerIndex("kb.spo.SPO", md); - - ndx.insert(1,1); - - journal.commit(); - - // reduce to minimum footprint. - journal.truncate(); - - } catch (Throwable t) { - - journal.destroy(); - - throw new RuntimeException(t); - - } finally { - - if (journal.isOpen()) - journal.close(); - - } - - } - - protected void verifyArtifact(final File artifactFile) throws Throwable { - - if (log.isInfoEnabled()) - log.info("Verifying: " + artifactFile); - - final Properties properties = new Properties(); - - properties.setProperty(Journal.Options.FILE, artifactFile.toString()); - - final Journal journal = new Journal(properties); - - try { - - final IIndex ndx = journal.getIndex("kb.spo.SPO"); - - assertNotNull(ndx); - - assertEquals(1,ndx.lookup(1)); - - } finally { - - journal.close(); - - } - - } - -} Copied: branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/com/bigdata/journal/TestBinaryCompatibility.java (from rev 3976, trunk/bigdata-compatibility/src/test/com/bigdata/journal/TestBinaryCompatibility.java) =================================================================== --- branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/com/bigdata/journal/TestBinaryCompatibility.java (rev 0) +++ branches/CHANGE_SET_BRANCH/bigdata-compatibility/src/test/com/bigdata/journal/TestBinaryCompatibility.java 2010-11-23 14:17:27 UTC (rev 3977) @@ -0,0 +1,276 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Nov 19, 2010 + */ +package com.bigdata.journal; + +import java.io.File; +import java.io.IOException; +import java.util.Properties; +import java.util.UUID; + +import junit.framework.TestCase2; + +import com.bigdata.Banner; +import com.bigdata.btree.IIndex; +import com.bigdata.btree.IndexMetadata; + +/** + * Test suite for binary compatibility, portability, and forward compatibility + * or automated migration of persistent stores and persistence or serialization + * capable objects across different bigdata releases. The tests in this suite + * rely on artifacts which are archived within SVN. + * + * @todo create w/ small extent and truncate (RW store does not support + * truncate). + * + * @todo test binary migration and forward compatibility. + * + * @todo stubs to create and organize artifacts,etc. + * + * @todo data driven test suite? + * + * @todo create artifact for each release, name the artifacts systematically, + * e.g., test.release.(RW|WORM).jnl or test.release.seg. Collect a list of + * the created artifacts and run each test against each of the versions of + * the artifact. + * + * @todo Force artifact file name case for file system compatibility? + * + * @todo test journal (WORM and RW), btree, index segment, row store, persistent + * data structures (checkpoints, index metadata, tuple serializers, etc.), + * RDF layer, RMI message formats, etc. + * + * @todo Specific tests for + * <p> + * Name2Addr and DefaultKeyBuilderFactory portability problem. See + * https://sourceforge.net/apps/trac/bigdata/ticket/193 + * <p> + * WORM global row store resolution problem introduced in the + * JOURNAL_HA_BRANCH. See + * https://sourceforge.net/apps/trac/bigdata/ticket/171#comment:5 + * <p> + * Sparse row store JDK encoding problem: + * https://sourceforge.net/apps/trac/bigdata/ticket/107 + */ +public class TestBinaryCompatibility extends TestCase2 { + + /** + * + */ + public TestBinaryCompatibility() { + } + + /** + * @param name + */ + public TestBinaryCompatibility(String name) { + super(name); + } + + /** + * @todo munge the release version into a name that is compatibility with + * the file system ("." to "_"). Store artifacts at each release? At + * each release in which an incompatibility is introduced? At each + * release in which a persistence capable data structure or change is + * introduced? + */ + static protected final File artifactDir = new File( + "bigdata-compatibility/src/resources/artifacts"); + + protected static class Version { + private final String version; + private final String revision; + public Version(String version,String revision) { + this.version = version; + this.revision = revision; + } + + /** + * The bigdata version number associated with the release. This is in + * the form <code>xx.yy.zz</code> + */ + public String getVersion() { + return version; + } + + /** + * The SVN repository revision associated with the release. This is in + * the form <code>####</code>. + */ + public String getRevision() { + return revision; + } + } + + /** + * Known release versions. + */ + protected static Version V_0_83_2 = new Version("0.83.2", "3349"); + + /** + * Tested Versions. + */ + protected Version[] versions = new Version[] { + V_0_83_2 + }; + + protected void setUp() throws Exception { + + Banner.banner(); + + super.setUp(); + + if (!artifactDir.exists()) { + + if (!artifactDir.mkdirs()) { + + throw new IOException("Could not create: " + artifactDir); + + } + + } + + for (Version version : versions) { + + final File versionDir = new File(artifactDir, version.getVersion()); + + if (!versionDir.exists()) { + + if (!versionDir.mkdirs()) { + + throw new IOException("Could not create: " + versionDir); + + } + + } + + } + + } + + protected void tearDown() throws Exception { + + super.tearDown(); + + } + + /** + * @throws Throwable + * + * @todo Each 'test' should run an instance of a class which knows how to + * create the appropriate artifacts and how to test them. + */ + public void test_WORM_compatibility_with_JOURNAL_HA_BRANCH() + throws Throwable { + + final Version version = V_0_83_2; + + final File versionDir = new File(artifactDir, version.getVersion()); + + final File artifactFile = new File(versionDir, getName() + + BufferMode.DiskWORM + Journal.Options.JNL); + + if (!artifactFile.exists()) { + + createArtifact(artifactFile); + + } + + verifyArtifact(artifactFile); + + } + + protected void createArtifact(final File artifactFile) throws Throwable { + + if (log.isInfoEnabled()) + log.info("Creating: " + artifactFile); + + final Properties properties = new Properties(); + + properties.setProperty(Journal.Options.FILE, artifactFile.toString()); + + properties.setProperty(Journal.Options.INITIAL_EXTENT, "" + + Journal.Options.minimumInitialExtent); + + final Journal journal = new Journal(properties); + + try { + + final IndexMetadata md = new IndexMetadata(UUID.randomUUID()); + + final IIndex ndx = journal.registerIndex("kb.spo.SPO", md); + + ndx.insert(1,1); + + journal.commit(); + + // reduce to minimum footprint. + journal.truncate(); + + } catch (Throwable t) { + + journal.destroy(); + + throw new RuntimeException(t); + + } finally { + + if (journal.isOpen()) + journal.close(); + + } + + } + + protected void verifyArtifact(final File artifactFile) throws Throwable { + + if (log.isInfoEnabled()) + log.info("Verifying: " + artifactFile); + + final Properties properties = new Properties(); + + properties.setProperty(Journal.Options.FILE, artifactFile.toString()); + + final Journal journal = new Journal(properties); + + try { + + final IIndex ndx = journal.getIndex("kb.spo.SPO"); + + assertNotNull(ndx); + + assertEquals(1,ndx.lookup(1)); + + } finally { + + journal.close(); + + } + + } + +} Modified: branches/CHANGE_SET_BRANCH/bigdata-perf/README.txt =================================================================== --- branches/CHANGE_SET_BRANCH/bigdata-perf/README.txt 2010-11-22 21:12:22 UTC (rev 3976) +++ branches/CHANGE_SET_BRANCH/bigdata-perf/README.txt 2010-11-23 14:17:27 UTC (rev 3977) @@ -1,2 +1,6 @@ This module contains drivers for a variety of data sets and benchmarks used as -part of a performance test suite. \ No newline at end of file +part of a performance test suite. + +Note: You must run "ant bundleJar" in the top-level directory first. This will +build the bigdata code base and bundle together the various dependencies so they +will be available for the ant scripts in this module. Modified: branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/vocab/BaseVocabulary.java =================================================================== --- branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/vocab/BaseVocabulary.java 2010-11-22 21:12:22 UTC (rev 3976) +++ branches/CHANGE_SET_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/vocab/BaseVocabulary.java 2010-11-23 14:17:27 UTC (rev 3977) @@ -65,6 +65,11 @@ final static public Logger log = Logger.getLogger(BaseVocabulary.class); /** + * The serialVersionUID as reported by the trunk on Oct 6, 2010. + */ + private static final long serialVersionUID = 1560142397515291331L; + + /** * The database that is the authority for the defined terms and term * identifiers. This will be <code>null</code> when the de-serialization * ctor is used. Modified: branches/CHANGE_SET_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSids.java =================================================================== --- branches/CHANGE_SET_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSids.java 2010-11-22 21:12:22 UTC (rev 3976) +++ branches/CHANGE_SET_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSids.java 2010-11-23 14:17:27 UTC (rev 3977) @@ -44,6 +44,8 @@ import com.bigdata.rdf.vocab.NoVocabulary; /** + * Test case for reverse lookup from SID to statement. + * * @author <a href="mailto:mrp...@us...">Mike Personick</a> * @version $Id$ */ Modified: branches/CHANGE_SET_BRANCH/build.xml =================================================================== --- branches/CHANGE_SET_BRANCH/build.xml 2010-11-22 21:12:22 UTC (rev 3976) +++ branches/CHANGE_SET_BRANCH/build.xml 2010-11-23 14:17:27 UTC (rev 3977) @@ -1992,10 +1992,12 @@ <fileset dir="${bigdata.dir}/bigdata/lib"> <include name="**/*.jar" /> </fileset> +<!-- Jini should not be required for the Sesame WAR. <fileset dir="${bigdata.dir}/bigdata-jini/lib/jini/lib"> <include name="jini-core.jar" /> <include name="jini-ext.jar" /> </fileset> + --> </copy> <!-- copy resources to Workbench webapp. --> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-22 22:05:18
|
Revision: 3974 http://bigdata.svn.sourceforge.net/bigdata/?rev=3974&view=rev Author: thompsonbry Date: 2010-11-22 21:08:56 +0000 (Mon, 22 Nov 2010) Log Message: ----------- Added a sumRangeCounts counter to the join stats. Modified the runtime optimizer to use the sum of the range counts considered by the cutoff join when the cardinality estimate is recognized as a lower bound. Added test case for "bar" data set for the runtime optimizer. This query and data set ran into the lower bound estimate problem. The change in this commit fixed the query. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-22 19:22:05 UTC (rev 3973) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -142,6 +142,15 @@ * within the round. This would imply that we keep per join path limits. * The vertex and edge samples are already aware of the limit at which * they were last sampled so this should not cause any problems there. + * <p> + * A related option would be to deepen the samples only when we are in + * danger of cardinality estimation underflow. E.g., a per-path limit. + * Resampling vertices may only make sense when we increase the limit + * since otherwise we may find a different correlation with the new sample + * but the comparison of paths using one sample base with paths using a + * different sample base in a different round does not carry forward the + * cardinality estimates from the prior round (unless we do something like + * a weighted moving average). * * @todo When comparing choices among join paths having fully bound tails where * the estimated cardinality has also gone to zero, we should prefer to @@ -152,7 +161,7 @@ * those which reach the 1-var vertex. [In order to support this, we would * need a means to indicate that a fully bound access path should use an * index specified by the query optimizer rather than the primary index - * for the relation. In addition, this suggests that we should keep bloom + * for the relation. In addition, this suggests that we should keep bloom * filters for more than just the SPO(C) index in scale-out.] * * @todo Examine behavior when we do not have perfect covering indices. This @@ -187,6 +196,15 @@ String LIMIT = JoinGraph.class.getName() + ".limit"; int DEFAULT_LIMIT = 100; + + /** + * The <i>nedges</i> edges of the join graph having the lowest + * cardinality will be used to generate the initial join paths (default + * {@value #DEFAULT_NEDGES}). This must be a positive integer. + */ + String NEDGES = JoinGraph.class.getName() + ".nedges"; + + int DEFAULT_NEDGES = 2; } /** @@ -207,6 +225,15 @@ } + /** + * @see Annotations#NEDGES + */ + public int getNEdges() { + + return getProperty(Annotations.NEDGES, Annotations.DEFAULT_NEDGES); + + } + public JoinGraph(final NV... anns) { this(BOpBase.NOARGS, NV.asMap(anns)); @@ -542,7 +569,7 @@ * there is an error in the query such that the join will not select * anything. This is not 100%, merely indicative. */ - public final int outputCount; + public final long outputCount; /** * The ratio of the #of input samples consumed to the #of output samples @@ -592,7 +619,9 @@ final int sourceSampleLimit,// final int limit,// final int inputCount, // - final int outputCount,// + final long outputCount,// + final double f, + final long estimatedCardinality, final IBindingSet[] sample) { if (sample == null) @@ -609,10 +638,10 @@ this.outputCount = outputCount; - f = outputCount == 0 ? 0 : (outputCount / (double) inputCount); - - estimatedCardinality = (long) (rangeCount * f); - + this.f = f; + + this.estimatedCardinality = estimatedCardinality; + if (sourceSampleExact && outputCount < limit) { /* * Note: If the entire source vertex is being fed into the @@ -1037,20 +1066,55 @@ if (log.isTraceEnabled()) log.trace(joinStats.toString()); - + /* * TODO Improve comments here. See if it is possible to isolate a * common base class which would simplify the setup of the cutoff * join and the computation of the sample stats. */ + // #of solutions in. + final int nin = (int) joinStats.inputSolutions.get(); + + // #of solutions out. + long nout = joinStats.outputSolutions.get(); + + // cumulative range count of the sampled access paths. + final long sumRangeCount = joinStats.accessPathRangeCount.get(); + + if (nin == 1 && nout == limit) { + /* + * We are getting [limit] solutions out for one solution in. In + * this case, (nout/nin) is a lower bound for the estimated + * cardinality of the edge. In fact, this condition suggests + * that the upper bound is a must better estimate of the + * cardinality of this join. Therefore, we replace [nout] with + * the sum of the range counts for the as-bound predicates + * considered by the cutoff join. + * + * For example, consider a join feeding a rangeCount of 16 into + * a rangeCount of 175000. With a limit of 100, we estimated the + * cardinality at 1600L (lower bound). In fact, the cardinality + * is 16*175000. This falsely low estimate can cause solutions + * which are really better to be dropped. + */ + nout = sumRangeCount; + + } + + final double f = nout == 0 ? 0 : (nout / (double) nin); + + final long estimatedCardinality = (long) (sourceSampleRangeCount * f); + final EdgeSample edgeSample = new EdgeSample( sourceSampleRangeCount, // sourceSampleExact, // @todo redundant with sourceSampleLimit sourceSampleLimit, // limit, // - (int) joinStats.inputSolutions.get(),// - (int) joinStats.outputSolutions.get(), // + nin,// + nout, // + f, // + estimatedCardinality, // result.toArray(new IBindingSet[result.size()])); if (log.isDebugEnabled()) @@ -1719,19 +1783,25 @@ * @param limit * The limit for sampling a vertex and the initial limit for * cutoff join evaluation. + * @param nedges + * The edges in the join graph are sorted in order of + * increasing cardinality and up to <i>nedges</i> of the + * edges having the lowest cardinality are used to form the + * initial set of join paths. For each edge selected to form + * a join path, the starting vertex will be the vertex of + * that edge having the lower cardinality. * * @throws Exception */ public Path runtimeOptimizer(final QueryEngine queryEngine, - final int limit) throws Exception { + final int limit, final int nedges) throws Exception { // Setup the join graph. - Path[] paths = round0(queryEngine, limit, 2/* nedges */); + Path[] paths = round0(queryEngine, limit, nedges); /* - * The input paths for the first round have two vertices (one edge - * is two vertices). Each round adds one more vertex, so we have - * three vertices by the end of round 1. We are done once we have + * The initial paths all have one edge, and hence two vertices. Each + * round adds one more vertex to each path. We are done once we have * generated paths which include all vertices. * * This occurs at round := nvertices - 1 @@ -1796,6 +1866,11 @@ * The maximum #of edges to choose. Those having the smallest * expected cardinality will be chosen. * + * @return An initial set of paths starting from any most <i>nedges</i>. + * For each of the <i>nedges</i> lowest cardinality edges, the + * starting vertex will be the vertex with the lowest + * cardinality for that edge. + * * @throws Exception */ public Path[] round0(final QueryEngine queryEngine, final int limit, @@ -2489,6 +2564,8 @@ private final JGraph g; private int limit; + + private int nedges; JoinGraphTask(final BOpContext<IBindingSet> context) { @@ -2499,9 +2576,14 @@ limit = getLimit(); + nedges = getNEdges(); + if (limit <= 0) throw new IllegalArgumentException(); + if (nedges <= 0) + throw new IllegalArgumentException(); + final IPredicate[] v = getVertices(); g = new JGraph(v); @@ -2515,7 +2597,7 @@ // Find the best join path. final Path p = g.runtimeOptimizer(context.getRunningQuery() - .getQueryEngine(), limit); + .getQueryEngine(), limit, nedges); // Factory avoids reuse of bopIds assigned to the predicates. final BOpIdFactory idFactory = new BOpIdFactory(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java 2010-11-22 19:22:05 UTC (rev 3973) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -158,6 +158,7 @@ sb.append("\tjoinRatio"); // expansion rate multipler in the solution count. sb.append("\taccessPathDups"); sb.append("\taccessPathCount"); + sb.append("\taccessPathRangeCount"); sb.append("\taccessPathChunksIn"); sb.append("\taccessPathUnitsIn"); // dynamics based on elapsed wall clock time. @@ -337,6 +338,8 @@ sb.append('\t'); sb.append(stats.accessPathCount.get()); sb.append('\t'); + sb.append(stats.accessPathRangeCount.get()); + sb.append('\t'); sb.append(stats.accessPathChunksIn.get()); sb.append('\t'); sb.append(stats.accessPathUnitsIn.get()); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-11-22 19:22:05 UTC (rev 3973) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -778,6 +778,7 @@ sb.append("\tunitsOut"); sb.append("\taccessPathDups"); sb.append("\taccessPathCount"); + sb.append("\taccessPathRangeCount"); sb.append("\taccessPathChunksIn"); sb.append("\taccessPathUnitsIn"); //{chunksIn=1,unitsIn=100,chunksOut=4,unitsOut=313,accessPathDups=0,accessPathCount=100,chunkCount=100,elementCount=313} @@ -929,6 +930,8 @@ sb.append('\t'); sb.append(t.accessPathCount.get()); sb.append('\t'); + sb.append(t.accessPathRangeCount.get()); + sb.append('\t'); sb.append(t.accessPathChunksIn.get()); sb.append('\t'); sb.append(t.accessPathUnitsIn.get()); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-11-22 19:22:05 UTC (rev 3973) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -41,6 +41,7 @@ import java.util.concurrent.FutureTask; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; @@ -114,7 +115,7 @@ final private AtomicLong deadline = new AtomicLong(Long.MAX_VALUE); /** - * The timestamp(ms) when the query begins to execute. + * The timestamp (ms) when the query begins to execute. */ final private AtomicLong startTime = new AtomicLong(System .currentTimeMillis()); @@ -171,10 +172,91 @@ } /** - * The maximum number of operator tasks which may be concurrently executor + * The maximum number of operator tasks which may be concurrently executed * for a given (bopId,shardId). + * + * @see QueryEngineTestAnnotations#MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD */ final private int maxConcurrentTasksPerOperatorAndShard; + +// /** +// * The maximum #of concurrent tasks for this query across all operators and +// * shards. +// * +// * Note: This is not a safe option and MUST be removed. It is possible for +// * N-1 tasks to backup with the Nth task not running due to concurrent +// * execution of some of the N-t tasks. +// */ +// final private int maxConcurrentTasks = 10; + + /* + * FIXME Explore the use of this semaphore to limit the maximum #of messages + * further. (Note that placing a limit on messages would allow us to buffer + * potentially many chunks. That could be solved by making LocalChunkMessage + * transparent in terms of the #of chunks or _binding_sets_ which it is + * carrying, but let's take this one step at a time). + * + * The first issue is ensuring that the query continue to make progress when + * a semaphore with a limited #of permits is introduced. This is because the + * ChunkFutureTask only attempts to schedule the next task for a given + * (bopId,shardId) but we could have failed to accept outstanding work for + * any of a number of operator/shard combinations. Likewise, the QueryEngine + * tells the RunningQuery to schedule work each time a message is dropped + * onto the QueryEngine, but the signal to execute more work is lost if the + * permits were not available immediately. + * + * One possibility would be to have a delayed retry. Another would be to + * have ChunkTaskFuture try to run *any* messages, not just messages for the + * same (bopId,shardId). + * + * Also, when scheduling work, there needs to be some bias towards the + * downstream operators in the query plan in order to ensure that they get a + * chance to clear work from upstream operators. This suggests that we might + * carry an order[] and use it to scan the work queue -- or make the work + * queue a priority heap using the order[] to place a primary sort over the + * bopIds in terms of the evaluation order and letting the shardIds fall in + * increasing shard order so we have a total order for the priority heap (a + * total order may also require a tie breaker, but I think that the priority + * heap allows ties). + * + * This concept of memory overhead and permits would be associated with the + * workload waiting on a given node for processing. (In scale-out, we do not + * care how much data is moving in the cluster, only how much data is + * challenging an individual machine). + * + * This emphasize again why we need to get the data off of the Java heap. + * + * The same concept should apply for chained buffers. Maybe one way to do + * this is to allocate a fixed budget to each query for the Java heap and + * the C heap and then the query blocks or goes to disk. + */ +// /** +// * The maximum number of binding sets which may be outstanding before a task +// * which is producing binding sets will block. This value may be used to +// * limit the memory demand of a query in which some operators produce +// * binding sets faster than other operators can consume them. +// * +// * @todo This could be generalized to consider the Java heap separately from +// * the native heap as we get into the use of native ByteBuffers to +// * buffer intermediate results. +// * +// * @todo This is expressed in terms of messages and not {@link IBindingSet}s +// * because the {@link LocalChunkMessage} does not self-report the #of +// * {@link IBindingSet}s (or chunks). +// */ +// final private int maxOutstandingMessageCount = 100; +// +// /** +// * A counting semaphore used to limit the #of outstanding binding set chunks +// * which may be buffered before a producer will block when trying to emit +// * another chunk. +// * +// * @see HandleChunkBuffer#outputChunk(IBindingSet[]) +// * @see #scheduleNext(BSBundle) +// * +// * @see #maxOutstandingMessageCount +// */ +// final private Semaphore outstandingMessageSemaphore = new Semaphore(maxOutstandingMessageCount); /** * A collection of (bopId,partitionId) keys mapped onto a collection of @@ -471,6 +553,8 @@ this.bopIndex = BOpUtility.getIndex(query); + +// this.maxConcurrentTasksPerOperatorAndShard = 300; this.maxConcurrentTasksPerOperatorAndShard = query .getProperty( QueryEngineTestAnnotations.MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD, @@ -1203,6 +1287,33 @@ return false; } } +// if (runState.getTotalRunningCount() > maxConcurrentTasks) { +// // Too many already running. +// return false; +// } +// { +// /* +// * Verify that we can acquire sufficient permits to do some +// * work. +// */ +// final BlockingQueue<IChunkMessage<IBindingSet>> queue = operatorQueues +// .get(bundle); +// if (queue == null || queue.isEmpty()) { +// // No work. +// return false; +// } +// // The queue could be increased, but this will be its minimum size. +// final int minQueueSize = queue.size(); +// if(!outstandingMessageSemaphore.tryAcquire(minQueueSize)) { +// // Not enough permits. +// System.err.println("Permits: required=" + minQueueSize +// + ", available=" +// + outstandingMessageSemaphore.availablePermits() +// + ", bundle=" + bundle); +// return false; +// } +// +// } // Remove the work queue for that (bopId,partitionId). final BlockingQueue<IChunkMessage<IBindingSet>> queue = operatorQueues .remove(bundle); @@ -1210,7 +1321,7 @@ // no work return false; } - // Drain the work queue. + // Drain the work queue for that (bopId,partitionId). final List<IChunkMessage<IBindingSet>> messages = new LinkedList<IChunkMessage<IBindingSet>>(); queue.drainTo(messages); final int nmessages = messages.size(); @@ -1218,9 +1329,11 @@ * Combine the messages into a single source to be consumed by a * task. */ + int nchunks = 1; final IMultiSourceAsynchronousIterator<IBindingSet[]> source = new MultiSourceSequentialAsynchronousIterator<IBindingSet[]>(messages.remove(0).getChunkAccessor().iterator()); for (IChunkMessage<IBindingSet> msg : messages) { source.add(msg.getChunkAccessor().iterator()); + nchunks++; } /* * Create task to consume that source. @@ -1852,13 +1965,23 @@ */ private void outputChunk(final IBindingSet[] e) { - stats.unitsOut.add(((Object[]) e).length); + final int chunkSize = e.length; + + stats.unitsOut.add(chunkSize); stats.chunksOut.increment(); - sinkMessagesOut.addAndGet(q.getChunkHandler().handleChunk(q, bopId, - sinkId, e)); + final int messagesOut = q.getChunkHandler().handleChunk(q, bopId, + sinkId, e); + sinkMessagesOut.addAndGet(messagesOut); + +// try { +// q.outstandingMessageSemaphore.acquire(); +// } catch (InterruptedException e1) { +// throw new RuntimeException(e1); +// } + } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-11-22 19:22:05 UTC (rev 3973) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -218,6 +218,12 @@ public final CAT accessPathCount = new CAT(); /** + * The running sum of the range counts of the accepted as-bound access + * paths. + */ + public final CAT accessPathRangeCount = new CAT(); + + /** * The #of input solutions consumed (not just accepted). * <p> * This counter is highly correlated with {@link BOpStats#unitsIn} but @@ -333,6 +339,8 @@ accessPathCount.add(t.accessPathCount.get()); + accessPathRangeCount.add(t.accessPathRangeCount.get()); + accessPathChunksIn.add(t.accessPathChunksIn.get()); accessPathUnitsIn.add(t.accessPathUnitsIn.get()); @@ -358,6 +366,7 @@ protected void toString(final StringBuilder sb) { sb.append(",accessPathDups=" + accessPathDups.get()); sb.append(",accessPathCount=" + accessPathCount.get()); + sb.append(",accessPathRangeCount=" + accessPathRangeCount.get()); sb.append(",accessPathChunksIn=" + accessPathChunksIn.get()); sb.append(",accessPathUnitsIn=" + accessPathUnitsIn.get()); sb.append(",inputSolutions=" + inputSolutions.get()); @@ -1562,6 +1571,10 @@ stats.accessPathCount.increment(); + // the range count of the as-bound access path (should be cached). + stats.accessPathRangeCount.add(accessPath + .rangeCount(false/* exact */)); + if (accessPath.getPredicate() instanceof IStarJoin<?>) { handleStarJoin(); Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -0,0 +1,608 @@ +package com.bigdata.bop.rdf.joinGraph; + +import java.io.File; +import java.util.Arrays; +import java.util.Properties; + +import junit.framework.TestCase2; + +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.openrdf.rio.RDFFormat; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContextBase; +import com.bigdata.bop.BOpIdFactory; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.Var; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.controller.JoinGraph; +import com.bigdata.bop.controller.JoinGraph.JGraph; +import com.bigdata.bop.controller.JoinGraph.Path; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.engine.QueryLog; +import com.bigdata.bop.engine.RunningQuery; +import com.bigdata.bop.fed.QueryEngineFactory; +import com.bigdata.journal.ITx; +import com.bigdata.journal.Journal; +import com.bigdata.rdf.model.BigdataLiteral; +import com.bigdata.rdf.model.BigdataURI; +import com.bigdata.rdf.model.BigdataValue; +import com.bigdata.rdf.model.BigdataValueFactory; +import com.bigdata.rdf.spo.SPOPredicate; +import com.bigdata.rdf.store.AbstractTripleStore; +import com.bigdata.rdf.store.DataLoader; +import com.bigdata.rdf.store.LocalTripleStore; +import com.bigdata.rdf.store.DataLoader.ClosureEnum; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.rule.IRule; +import com.bigdata.relation.rule.Rule; +import com.bigdata.relation.rule.eval.DefaultEvaluationPlan2; +import com.bigdata.relation.rule.eval.IRangeCountFactory; + +/** + * Unit tests for runtime query optimization using {@link JoinGraph} and the + * "bar data" test set. + * <p> + * Note: When running large queries, be sure to provide a sufficient heap, set + * the -server flag, etc. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestJoinGraph.java 3918 2010-11-08 21:31:17Z thompsonbry $ + */ +public class TestJoinGraphOnBarData extends TestCase2 { + + /** + * + */ + public TestJoinGraphOnBarData() { + } + + /** + * @param name + */ + public TestJoinGraphOnBarData(String name) { + super(name); + } + + @Override + public Properties getProperties() { + + final Properties p = new Properties(super.getProperties()); + +// p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient +// .toString()); + + p.setProperty(AbstractTripleStore.Options.QUADS_MODE, "true"); + + /* + * Don't compute closure in the data loader since it does TM, not + * database at once closure. + */ + p.setProperty(DataLoader.Options.CLOSURE, ClosureEnum.None.toString()); + + return p; + + } + + private Journal jnl; + + private AbstractTripleStore database; + + /** The initial sampling limit. */ + private final int limit = 100; + + /** The #of edges considered for the initial paths. */ + private final int nedges = 2; + + private QueryEngine queryEngine; + + private String namespace; + + /** + * When true, do a warm up run of the plan generated by the static query + * optimizer. + */ + private final boolean warmUp = false; + + /** + * The #of times to run each query. Use N GT ONE (1) if you want to converge + * onto the hot query performance. + */ + private final int ntrials = 1; + + /** + * When <code>true</code> runs the dynamic query optimizer and then evaluates + * the generated query plan. + */ + private final boolean runRuntimeQueryOptimizer = true; + + /** + * When <code>true</code> runs the static query optimizer and then evaluates + * the generated query plan. + */ + private final boolean runStaticQueryOptimizer = true; + + /** + * Loads LUBM U1 into a triple store. + */ + protected void setUp() throws Exception { + +// QueryLog.logTableHeader(); + + super.setUp(); + +// System.err.println(UUID.randomUUID().toString()); +// System.exit(0); + + final Properties properties = getProperties(); + + final File file; + { + /* + * Use a specific file generated by some external process. + */ + file = new File("/data/bardata/bigdata-bardata.WORM.jnl"); + namespace = "bardata"; + } + + properties.setProperty(Journal.Options.FILE, file.toString()); + +// properties.setProperty(Journal.Options.BUFFER_MODE,BufferMode.DiskRW.toString()); + +// file.delete(); + + if (!file.exists()) { + + jnl = new Journal(properties); + + final AbstractTripleStore tripleStore = new LocalTripleStore(jnl, + namespace, ITx.UNISOLATED, properties); + + // Create the KB instance. + tripleStore.create(); + + tripleStore.getDataLoader().loadFiles( + new File("/root/Desktop/Downloads/barData/barData.trig"), + null/* baseURI */, RDFFormat.TRIG, null/* defaultGraph */, + null/* filter */); + + // Truncate the journal (trim its size). + jnl.truncate(); + + // Commit the journal. + jnl.commit(); + + // Close the journal. + jnl.close(); + + } + + // Open the test resource. + jnl = new Journal(properties); + + queryEngine = QueryEngineFactory + .getQueryController(jnl/* indexManager */); + + database = (AbstractTripleStore) jnl.getResourceLocator().locate( + namespace, jnl.getLastCommitTime()); + + if (database == null) + throw new RuntimeException("Not found: " + namespace); + + } + + protected void tearDown() throws Exception { + + if (database != null) { + database = null; + } + + if (queryEngine != null) { + queryEngine.shutdownNow(); + queryEngine = null; + } + + if(jnl != null) { + jnl.close(); + jnl = null; + } + + super.tearDown(); + + } + + /** + * Sample query for the synthetic data set. The query is arranged in a known + * good order. + * <p> + * Note: The runtime optimizer estimate of the cardinality of the edge [5 4] + * in this query is a lower bound, which makes this an interesting test + * case. The runtime optimizer detects this lower bound and replaces [nout] + * with the sum of the range count of the as-bound predicates for the join, + * which leads to an efficient query plan. + * + * <pre> + * SELECT ?f (COUNT(?d) AS ?total) WHERE { + * ?a <http://test/bar#beverageType> "Beer" . + * ?value <http://test/bar#orderItems> ?a. + * ?value <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://test/bar#Order> . + * ?a <http://test/bar#beverageType> ?d. + * ?value <http://test/bar#employee> ?b. + * ?b <http://test/bar#employeeNum> ?f. + * } GROUP BY ?f + * </pre> + * + * Note: Mike suggests that it is easier to read the query like this: + * + * <pre> + * SELECT ?employeeNum (COUNT(?type) AS ?total) + * WHERE { + * ?order <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> + * <http://test/bar#Order> . + * ?order <http://test/bar#orderItems> ?item . + * ?item <http://test/bar#beverageType> "Beer" . + * ?item <http://test/bar#beverageType> ?type . + * + * ?order <http://test/bar#employee> ?employee . + * + * ?employee <http://test/bar#employeeNum> ?employeeNum . + * } GROUP BY ?employeeNum + * </pre> + * + * @throws Exception + */ + public void test_query() throws Exception { + + /* + * Resolve terms against the lexicon. + */ + final BigdataValueFactory valueFactory = database.getLexiconRelation() + .getValueFactory(); + + final BigdataURI rdfType = valueFactory + .createURI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); + + final BigdataLiteral beer = valueFactory.createLiteral("Beer"); + + final BigdataURI beverageType = valueFactory + .createURI("http://test/bar#beverageType"); + + final BigdataURI orderItems = valueFactory + .createURI("http://test/bar#orderItems"); + + final BigdataURI Order = valueFactory + .createURI("http://test/bar#Order"); + + final BigdataURI employee = valueFactory + .createURI("http://test/bar#employee"); + + final BigdataURI employeeNum = valueFactory + .createURI("http://test/bar#employeeNum"); + + final BigdataValue[] terms = new BigdataValue[] { rdfType, beer, + beverageType, orderItems, Order, employee, employeeNum }; + + // resolve terms. + database.getLexiconRelation() + .addTerms(terms, terms.length, true/* readOnly */); + + { + for (BigdataValue tmp : terms) { + System.out.println(tmp + " : " + tmp.getIV()); + if (tmp.getIV() == null) + throw new RuntimeException("Not defined: " + tmp); + } + } + + final IPredicate[] preds; + final IPredicate p0, p1, p2, p3, p4, p5; + { +// a, value, d, b, f + final IVariable<?> a = Var.var("a"); + final IVariable<?> value = Var.var("value"); + final IVariable<?> d = Var.var("d"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> f = Var.var("f"); + + final IVariable<?> g0 = Var.var("g0"); + final IVariable<?> g1 = Var.var("g1"); + final IVariable<?> g2 = Var.var("g2"); + final IVariable<?> g3 = Var.var("g3"); + final IVariable<?> g4 = Var.var("g4"); + final IVariable<?> g5 = Var.var("g5"); + + + // The name space for the SPO relation. + final String[] spoRelation = new String[] { namespace + ".spo" }; + + // The name space for the Lexicon relation. + final String[] lexRelation = new String[] { namespace + ".lex" }; + + final long timestamp = jnl.getLastCommitTime(); + + int nextId = 0; + +// ?a <http://test/bar#beverageType> "Beer" . + p0 = new SPOPredicate(new BOp[] { a, + new Constant(beverageType.getIV()), + new Constant(beer.getIV()), g0 },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + + // ?value <http://test/bar#orderItems> ?a. + p1 = new SPOPredicate(new BOp[] { value, + new Constant(orderItems.getIV()), a, g1 },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + +// ?value <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://test/bar#Order> . + p2 = new SPOPredicate(new BOp[] { value, + new Constant(rdfType.getIV()), + new Constant(Order.getIV()), g2 },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + +// ?a <http://test/bar#beverageType> ?d. + p3 = new SPOPredicate(new BOp[] { a, + new Constant(beverageType.getIV()), d, g3 },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + +// ?value <http://test/bar#employee> ?b. + p4 = new SPOPredicate(new BOp[] { value, + new Constant(employee.getIV()), b, g4 },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + +// ?b <http://test/bar#employeeNum> ?f. + p5 = new SPOPredicate(new BOp[] { b, + new Constant(employeeNum.getIV()), f, g5 },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + + // the vertices of the join graph (the predicates). + preds = new IPredicate[] { p0, p1, p2, p3, p4, p5 }; + + } + + doTest(preds); + + } // LUBM_Q9 + + /** + * + * @param preds + * @throws Exception + * + * @todo To actually test anything this needs to compare the results (or at + * least the #of result). We could also test for known good join + * orders as generated by the runtime optimizer, but that requires a + * known data set (e.g., U1 or U50) and non-random sampling. + * + * @todo This is currently providing a "hot run" comparison by a series of + * trials. This means that the IO costs are effectively being wiped + * away, assuming that the file system cache is larger than the data + * set. The other way to compare performance is a cold cache / cold + * JVM run using the known solutions produced by the runtime versus + * static query optimizers. + */ + private void doTest(final IPredicate[] preds) throws Exception { + + if (warmUp) + runQuery("Warmup", queryEngine, runStaticQueryOptimizer(preds)); + + /* + * Run the runtime query optimizer once (its cost is not counted + * thereafter). + */ + final IPredicate[] runtimePredOrder = runRuntimeQueryOptimizer(preds); + + long totalRuntimeTime = 0; + long totalStaticTime = 0; + + for (int i = 0; i < ntrials; i++) { + + final String RUNTIME = getName() + " : runtime["+i+"] :"; + + final String STATIC = getName() + " : static ["+i+"] :"; + + final String GIVEN = getName() + " : given ["+i+"] :"; + + if (true/* originalOrder */) { + + runQuery(GIVEN, queryEngine, preds); + + } + + if (runStaticQueryOptimizer) { + + totalStaticTime += runQuery(STATIC, queryEngine, + runStaticQueryOptimizer(preds)); + + } + + if (runRuntimeQueryOptimizer) { + + /* + * Run the runtime query optimizer each time (its overhead is + * factored into the running comparison of the two query + * optimizers). + */ +// final IPredicate[] runtimePredOrder = runRuntimeQueryOptimizer(new JGraph( +// preds)); + + // Evaluate the query using the selected join order. + totalRuntimeTime += runQuery(RUNTIME, queryEngine, + runtimePredOrder); + + } + + } + + if(runStaticQueryOptimizer&&runRuntimeQueryOptimizer) { + System.err.println(getName() + " : Total times" + // + ": static=" + totalStaticTime + // + ", runtime=" + totalRuntimeTime + // + ", delta(static-runtime)=" + (totalStaticTime - totalRuntimeTime)); + } + + } + + /** + * Apply the runtime query optimizer. + * <p> + * Note: This temporarily raises the {@link QueryLog} log level during + * sampling to make the log files cleaner (this can not be done for a + * deployed system since the logger level is global and there are concurrent + * query mixes). + * + * @return The predicates in order as recommended by the runtime query + * optimizer. + * + * @throws Exception + */ + private IPredicate[] runRuntimeQueryOptimizer(final IPredicate[] preds) throws Exception { + + final Logger tmp = Logger.getLogger(QueryLog.class); + final Level oldLevel = tmp.getEffectiveLevel(); + tmp.setLevel(Level.WARN); + + try { + + final JGraph g = new JGraph(preds); + + final Path p = g.runtimeOptimizer(queryEngine, limit, nedges); + +// System.err.println(getName() + " : runtime optimizer join order " +// + Arrays.toString(Path.getVertexIds(p.edges))); + + return p.getPredicates(); + + } finally { + + tmp.setLevel(oldLevel); + + } + + } + + /** + * Apply the static query optimizer. + * + * @return The predicates in order as recommended by the static query + * optimizer. + */ + private IPredicate[] runStaticQueryOptimizer(final IPredicate[] preds) { + + final BOpContextBase context = new BOpContextBase(queryEngine); + + final IRule rule = new Rule("tmp", null/* head */, preds, null/* constraints */); + + final DefaultEvaluationPlan2 plan = new DefaultEvaluationPlan2( + new IRangeCountFactory() { + + public long rangeCount(final IPredicate pred) { + return context.getRelation(pred).getAccessPath(pred) + .rangeCount(false); + } + + }, rule); + + // evaluation plan order. + final int[] order = plan.getOrder(); + + final int[] ids = new int[order.length]; + + final IPredicate[] out = new IPredicate[order.length]; + + for (int i = 0; i < order.length; i++) { + + out[i] = preds[order[i]]; + + ids[i] = out[i].getId(); + + } + +// System.err.println(getName() + " : static optimizer join order " +// + Arrays.toString(ids)); + + return out; + + } + + /** + * Run a query joining a set of {@link IPredicate}s in the given join order. + * + * @return The elapsed query time (ms). + */ + private static long runQuery(final String msg, + final QueryEngine queryEngine, final IPredicate[] predOrder) + throws Exception { + + final BOpIdFactory idFactory = new BOpIdFactory(); + + final int[] ids = new int[predOrder.length]; + + for(int i=0; i<ids.length; i++) { + + final IPredicate<?> p = predOrder[i]; + + idFactory.reserve(p.getId()); + + ids[i] = p.getId(); + + } + + final PipelineOp queryOp = JoinGraph.getQuery(idFactory, predOrder); + + // submit query to runtime optimizer. + final RunningQuery q = queryEngine.eval(queryOp); + + // drain the query results. + long nout = 0; + long nchunks = 0; + final IAsynchronousIterator<IBindingSet[]> itr = q.iterator(); + try { + while (itr.hasNext()) { + final IBindingSet[] chunk = itr.next(); + nout += chunk.length; + nchunks++; + } + } finally { + itr.close(); + } + + // check the Future for the query. + q.get(); + + // show the results. + final BOpStats stats = q.getStats().get(queryOp.getId()); + + System.err.println(msg + " : ids=" + Arrays.toString(ids) + + ", elapsed=" + q.getElapsed() + ", nout=" + nout + + ", nchunks=" + nchunks + ", stats=" + stats); + + return q.getElapsed(); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-22 19:22:05 UTC (rev 3973) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -75,7 +75,12 @@ * FIXME There is now an option to converge onto the hot query * performance. Add an option to drop the file system cache and to * reopen the journal in order to converge on the cold query - * performance for the selected join orderings. + * performance for the selected join orderings. (Or, either devise a + * benchmark which can be used assess the relative performance with + * disk IO or use the LUBM benchmark at a data scale which would force + * queries to touch the disk (this actually requires a very high data + * scale for LUBM since the complex queries are not parameterized and + * tend to fully cache the relevant data on their first presentation.) * * FIXME Looks like U1000 Q2 runs into GC OH problems with both the * static and runtime query optimizers. Track down why. Note that Q2 @@ -167,6 +172,9 @@ /** The initial sampling limit. */ private final int limit = 100; + /** The #of edges considered for the initial paths. */ + private final int nedges = 2; + private QueryEngine queryEngine; private String namespace; @@ -184,10 +192,16 @@ private static final UUID resourceId = UUID.fromString("bb93d970-0cc4-48ca-ba9b-123412683b3d"); /** + * When true, do a warm up run of the plan generated by the static query + * optimizer. + */ + private final boolean warmUp = false; + + /** * The #of times to run each query. Use N GT ONE (1) if you want to converge * onto the hot query performance. */ - private final int ntrials = 5; + private final int ntrials = 1; /** * When <code>true</code> runs the dynamic query optimizer and then evaluates @@ -206,6 +220,8 @@ */ protected void setUp() throws Exception { +// QueryLog.logTableHeader(); + super.setUp(); // System.err.println(UUID.randomUUID().toString()); @@ -228,7 +244,7 @@ /* * Use a specific file generated by some external process. */ - final int nuniv = 50; + final int nuniv = 1000; file = new File("/data/lubm/U" + nuniv + "/bigdata-lubm.WORM.jnl"); namespace = "LUBM_U" + nuniv; } @@ -803,14 +819,14 @@ */ private void doTest(final IPredicate[] preds) throws Exception { - runQuery("Warmup", queryEngine, runStaticQueryOptimizer(preds)); + if (warmUp) + runQuery("Warmup", queryEngine, runStaticQueryOptimizer(preds)); /* * Run the runtime query optimizer once (its cost is not counted * thereafter). */ - final IPredicate[] runtimePredOrder = runRuntimeQueryOptimizer(new JGraph( - preds)); + final IPredicate[] runtimePredOrder = runRuntimeQueryOptimizer(preds); long totalRuntimeTime = 0; long totalStaticTime = 0; @@ -868,7 +884,7 @@ * * @throws Exception */ - private IPredicate[] runRuntimeQueryOptimizer(final JGraph g) throws Exception { + private IPredicate[] runRuntimeQueryOptimizer(final IPredicate[] preds) throws Exception { final Logger tmp = Logger.getLogger(QueryLog.class); final Level oldLevel = tmp.getEffectiveLevel(); @@ -876,7 +892,9 @@ try { - final Path p = g.runtimeOptimizer(queryEngine, limit); + final JGraph g = new JGraph(preds); + + final Path p = g.runtimeOptimizer(queryEngine, limit, nedges); // System.err.println(getName() + " : runtime optimizer join order " // + Arrays.toString(Path.getVertexIds(p.edges))); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-22 22:05:11
|
Revision: 3975 http://bigdata.svn.sourceforge.net/bigdata/?rev=3975&view=rev Author: thompsonbry Date: 2010-11-22 21:11:08 +0000 (Mon, 22 Nov 2010) Log Message: ----------- Commented out a test case which was a NOP. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/store/TestTripleStore.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/store/TestTripleStore.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/store/TestTripleStore.java 2010-11-22 21:08:56 UTC (rev 3974) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/store/TestTripleStore.java 2010-11-22 21:11:08 UTC (rev 3975) @@ -95,18 +95,18 @@ super(name); } - /** - * Verify that {@link AbstractTripleStore#isLiteral(long)} and friends all - * reported <code>false</code> for {@link IRawTripleStore#NULL}. - */ - public void test_bitFlagsReportFalseForNULL() { - -// assertFalse(VTE.isStatement(TermId.NULL)); -// assertFalse(VTE.isLiteral(TermId.NULL)); -// assertFalse(VTE.isURI(TermId.NULL)); -// assertFalse(VTE.isBNode(TermId.NULL)); - - } +// /** +// * Verify that {@link AbstractTripleStore#isLiteral(long)} and friends all +// * reported <code>false</code> for {@link IRawTripleStore#NULL}. +// */ +// public void test_bitFlagsReportFalseForNULL() { +// +//// assertFalse(VTE.isStatement(TermId.NULL)); +//// assertFalse(VTE.isLiteral(TermId.NULL)); +//// assertFalse(VTE.isURI(TermId.NULL)); +//// assertFalse(VTE.isBNode(TermId.NULL)); +// +// } /** * Test helper verifies that the term is not in the lexicon, adds the term This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |