From: <dm...@us...> - 2010-09-28 10:04:30
|
Revision: 3648 http://bigdata.svn.sourceforge.net/bigdata/?rev=3648&view=rev Author: dmacgbr Date: 2010-09-28 10:04:24 +0000 (Tue, 28 Sep 2010) Log Message: ----------- Modified operators and corresponding tests for the 'not yet bound' issue. (See Trac #179) Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQ.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQConstant.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INBinarySearch.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INHashMap.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestEQ.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestEQConstant.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestINConstraint.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQ.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQ.java 2010-09-28 09:48:02 UTC (rev 3647) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQ.java 2010-09-28 10:04:24 UTC (rev 3648) @@ -73,13 +73,13 @@ final IConstant<?> x = s.get((IVariable<?>) get(0)/* x */); if (x == null) - return false; // not bound. + return true; // not yet bound. // get binding for "y". final IConstant<?> y = s.get((IVariable<?>) get(1)/* y */); if (y == null) - return false; // not bound. + return true; // not yet bound. return x.equals(y); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQConstant.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQConstant.java 2010-09-28 09:48:02 UTC (rev 3647) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQConstant.java 2010-09-28 10:04:24 UTC (rev 3648) @@ -78,7 +78,7 @@ final IConstant<?> asBound = bset.get(var); if (asBound == null) - return false; // not bound. + return true; // not yet bound. final IConstant<?> cnst = (IConstant<?>) get(1); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INBinarySearch.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INBinarySearch.java 2010-09-28 09:48:02 UTC (rev 3647) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INBinarySearch.java 2010-09-28 10:04:24 UTC (rev 3648) @@ -181,8 +181,8 @@ if (x == null) { - // not bound - return false; + // not yet bound : @todo should this reject an unbound variable? + return true; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INHashMap.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INHashMap.java 2010-09-28 09:48:02 UTC (rev 3647) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INHashMap.java 2010-09-28 10:04:24 UTC (rev 3648) @@ -159,8 +159,8 @@ if (x == null) { - // not bound. - return false; + // not yet bound. + return true; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestEQ.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestEQ.java 2010-09-28 09:48:02 UTC (rev 3647) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestEQ.java 2010-09-28 10:04:24 UTC (rev 3648) @@ -114,7 +114,7 @@ } - public void test_eval_correct_unblound() { + public void test_eval_correct_unbound() { final EQ op = new EQ(Var.var("x"), Var.var("y")); @@ -122,7 +122,7 @@ new IVariable[] { Var.var("x") }, // new IConstant[] { new Constant<String>("1") }); - assertFalse(op.accept(bs1)); + assertTrue(op.accept(bs1)); } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestEQConstant.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestEQConstant.java 2010-09-28 09:48:02 UTC (rev 3647) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestEQConstant.java 2010-09-28 10:04:24 UTC (rev 3648) @@ -91,6 +91,6 @@ assertTrue ( op.accept ( eq ) ) ; assertFalse ( op.accept ( ne1 ) ) ; assertFalse ( op.accept ( ne2 ) ) ; - assertFalse ( op.accept ( nb ) ) ; + assertTrue ( op.accept ( nb ) ) ; } } \ No newline at end of file Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestINConstraint.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestINConstraint.java 2010-09-28 09:48:02 UTC (rev 3647) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestINConstraint.java 2010-09-28 10:04:24 UTC (rev 3648) @@ -125,7 +125,7 @@ assertTrue ( op.accept ( in ) ) ; assertFalse ( op.accept ( notin ) ) ; - assertFalse ( op.accept ( nb ) ) ; + assertTrue ( op.accept ( nb ) ) ; } protected abstract INConstraint newINConstraint ( IVariable<?> var, IConstant<?> vals [] ) ; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-06 13:12:09
|
Revision: 3739 http://bigdata.svn.sourceforge.net/bigdata/?rev=3739&view=rev Author: thompsonbry Date: 2010-10-06 13:12:02 +0000 (Wed, 06 Oct 2010) Log Message: ----------- Fixed a bug in FederatedQueryEngine where it was applying this.clientProxy in newRunningQuery(...) rather than the argument passed to that method. This was causing errors when the query crossed to another query engine (a peer) since the peer was attempting to communicate with itself as the query controller rather than the query engine against which the query was originally submitted. Modified both QueryEngine and FederatedQueryEngine to use private services for running queries and for materializing chunks. This makes it easier to identify the threads in a JVM which are specific to the query engine control logic. Fixed problems in Haltable where isError() or isCancelled could report false (they were being invoked without a lock held which made the update of the termination condition non-atomic with respect to those tests). Modified Haltable to not use a static CANCELLED exception object since that was providing a stack trace for when the class was loaded rather than when Haltable#cancel(mayInterruptIfRunning) was invoked. It now has a private 'cancelled' field. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StartOpMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-10-06 13:02:06 UTC (rev 3738) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-10-06 13:12:02 UTC (rev 3739) @@ -32,6 +32,8 @@ import java.util.UUID; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; import java.util.concurrent.LinkedBlockingQueue; @@ -51,6 +53,7 @@ import com.bigdata.resources.IndexManager; import com.bigdata.service.IBigdataFederation; import com.bigdata.service.IDataService; +import com.bigdata.util.concurrent.DaemonThreadFactory; /** * A class managing execution of concurrent queries against a local @@ -343,7 +346,12 @@ if (engineFuture.compareAndSet(null/* expect */, ft)) { - localIndexManager.getExecutorService().execute(ft); + engineService.set(Executors + .newSingleThreadExecutor(new DaemonThreadFactory( + QueryEngine.class + ".engineService"))); + + engineService.get().execute(ft); +// localIndexManager.getExecutorService().execute(ft); } else { @@ -354,11 +362,16 @@ } /** - * The {@link Future} for the query engine. + * The service on which we run the query engine. This is started by {@link #init()}. */ - private final AtomicReference<FutureTask<Void>> engineFuture = new AtomicReference<FutureTask<Void>>(); + private final AtomicReference<ExecutorService> engineService = new AtomicReference<ExecutorService>(); /** + * The {@link Future} for the query engine. This is set by {@link #init()}. + */ + private final AtomicReference<FutureTask<Void>> engineFuture = new AtomicReference<FutureTask<Void>>(); + + /** * Volatile flag is set for normal termination. When set, no new queries * will be accepted but existing queries will run to completion. */ @@ -524,9 +537,20 @@ // stop the query engine. final Future<?> f = engineFuture.get(); - if (f != null) + if (f != null) { f.cancel(true/* mayInterruptIfRunning */); + } + + // stop the service on which we ran the query engine. + final ExecutorService s = engineService.get(); + if (s != null) { + s.shutdownNow(); + } + // clear references. + engineFuture.set(null); + engineService.set(null); + } /** @@ -549,6 +573,12 @@ if (f != null) f.cancel(true/* mayInterruptIfRunning */); + // stop the service on which we ran the query engine. + final ExecutorService s = engineService.get(); + if (s != null) { + s.shutdownNow(); + } + // halt any running queries. for(RunningQuery q : runningQueries.values()) { @@ -556,6 +586,10 @@ } + // clear references. + engineFuture.set(null); + engineService.set(null); + } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-10-06 13:02:06 UTC (rev 3738) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-10-06 13:12:02 UTC (rev 3739) @@ -398,7 +398,7 @@ messagesConsumed(msg.bopId, msg.nmessages); if (TableLog.tableLog.isInfoEnabled()) { - TableLog.tableLog.info(getTableRow("startOp", msg.serviceId, + TableLog.tableLog.info(getTableRow("startOp", msg.runningOnServiceId, msg.bopId, msg.partitionId, msg.nmessages/* fanIn */, null/* cause */, null/* stats */)); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-06 13:02:06 UTC (rev 3738) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-06 13:12:02 UTC (rev 3739) @@ -204,7 +204,7 @@ public void setDeadline(final long deadline) { if (!controller) - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); if (deadline <= 0) throw new IllegalArgumentException(); @@ -291,8 +291,10 @@ /** * Return the current statistics for the query and <code>null</code> unless - * this is the query controller. For {@link PipelineOp} operator - * which is evaluated there will be a single entry in this map. + * this is the query controller. There will be a single entry in the map for + * each distinct {@link PipelineOp}. The map entries are inserted when we + * first begin to run an instance of that operator on some + * {@link IChunkMessage}. */ public Map<Integer/* bopId */, BOpStats> getStats() { @@ -511,7 +513,9 @@ try { // verify still running. - future.halted(); + if (future.isDone()) { + throw new RuntimeException("Query is done", future.getCause()); + } // add chunk to be consumed. chunksIn.add(msg); @@ -535,7 +539,7 @@ void startQuery(final IChunkMessage<IBindingSet> msg) { if (!controller) - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); if (msg == null) throw new IllegalArgumentException(); @@ -576,7 +580,7 @@ public void startOp(final StartOpMessage msg) { if (!controller) - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); if (msg == null) throw new IllegalArgumentException(); @@ -618,7 +622,7 @@ public void haltOp(final HaltOpMessage msg) { if (!controller) - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); if (msg == null) throw new IllegalArgumentException(); @@ -1083,7 +1087,7 @@ public IAsynchronousIterator<IBindingSet[]> iterator() { if (!controller) - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); if (queryIterator == null) throw new UnsupportedOperationException(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StartOpMessage.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StartOpMessage.java 2010-10-06 13:02:06 UTC (rev 3738) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StartOpMessage.java 2010-10-06 13:12:02 UTC (rev 3739) @@ -33,7 +33,7 @@ final public int partitionId; /** The node on which the operator will execute. */ - final public UUID serviceId; + final public UUID runningOnServiceId; /** * The #of {@link IChunkMessage} accepted as the input for the operator. @@ -45,7 +45,8 @@ final public int nmessages; public StartOpMessage(final UUID queryId, final int opId, - final int partitionId, final UUID serviceId, final int nmessages) { + final int partitionId, final UUID runningOnServiceId, + final int nmessages) { if (queryId == null) throw new IllegalArgumentException(); @@ -59,7 +60,7 @@ this.partitionId = partitionId; - this.serviceId = serviceId; + this.runningOnServiceId = runningOnServiceId; this.nmessages = nmessages; @@ -67,7 +68,7 @@ public String toString() { return getClass().getName() + "{queryId=" + queryId + ",bopId=" + bopId - + ",partitionId=" + partitionId + ",serviceId=" + serviceId + + ",partitionId=" + partitionId + ",serviceId=" + runningOnServiceId + ",nchunks=" + nmessages + "}"; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java 2010-10-06 13:02:06 UTC (rev 3738) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java 2010-10-06 13:12:02 UTC (rev 3739) @@ -33,6 +33,8 @@ import java.util.UUID; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; import java.util.concurrent.LinkedBlockingQueue; @@ -57,6 +59,7 @@ import com.bigdata.service.ResourceService; import com.bigdata.service.jini.JiniFederation; import com.bigdata.util.InnerCause; +import com.bigdata.util.concurrent.DaemonThreadFactory; /** * An {@link IBigdataFederation} aware {@link QueryEngine}. @@ -95,14 +98,21 @@ * The proxy for this query engine when used as a query controller. */ private final IQueryClient clientProxy; - + /** * A queue of {@link IChunkMessage}s which needs to have their data * materialized so an operator can consume those data on this node. + * This queue is drained by the {@link MaterializeChunksTask}. */ final private BlockingQueue<IChunkMessage<?>> chunkMaterializationQueue = new LinkedBlockingQueue<IChunkMessage<?>>(); /** + * The service on which we run {@link MaterializeChunksTask}. This is + * started by {@link #init()}. + */ + private final AtomicReference<ExecutorService> materializeChunksService = new AtomicReference<ExecutorService>(); + + /** * The {@link Future} for the task draining the {@link #chunkMaterializationQueue}. */ private final AtomicReference<FutureTask<Void>> materializeChunksFuture = new AtomicReference<FutureTask<Void>>(); @@ -270,8 +280,14 @@ new MaterializeChunksTask(), (Void) null); if (materializeChunksFuture.compareAndSet(null/* expect */, ft)) { - - getIndexManager().getExecutorService().execute(ft); + + materializeChunksService.set(Executors + .newSingleThreadExecutor(new DaemonThreadFactory( + FederatedQueryEngine.class + + ".materializeChunksService"))); + +// getIndexManager().getExecutorService().execute(ft); + materializeChunksService.get().execute(ft); } else { @@ -288,12 +304,23 @@ */ @Override protected void didShutdown() { - + // stop materializing chunks. final Future<?> f = materializeChunksFuture.get(); - if (f != null) + if (f != null) { f.cancel(true/* mayInterruptIfRunning */); + } + // stop the service on which we ran the MaterializeChunksTask. + final ExecutorService s = materializeChunksService.get(); + if (s != null) { + s.shutdownNow(); + } + + // Clear the references. + materializeChunksFuture.set(null); + materializeChunksService.set(null); + } /** @@ -516,8 +543,8 @@ final boolean controller, final IQueryClient clientProxy, final PipelineOp query) { - return new FederatedRunningQuery(this, queryId, controller, - this.clientProxy, query); + return new FederatedRunningQuery(this/*queryEngine*/, queryId, controller, + clientProxy, query); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java 2010-10-06 13:02:06 UTC (rev 3738) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java 2010-10-06 13:12:02 UTC (rev 3739) @@ -69,11 +69,11 @@ private final transient static Logger log = Logger .getLogger(Haltable.class); - /** - * Exception used to indicate a {@link #cancel(boolean) cancelled} - * computation. - */ - private static Throwable CANCELLED = new InterruptedException("CANCELLED"); +// /** +// * Exception used to indicate a {@link #cancel(boolean) cancelled} +// * computation. +// */ +// private static final Throwable CANCELLED = new InterruptedException("CANCELLED"); /** * Lock guarding the {@link #halted} condition and the various non-volatile, @@ -87,20 +87,20 @@ final private Condition halted = lock.newCondition(); /** - * The result of the computation. + * The result of the computation. This is guarded by the {@link #lock} . */ private V result = null; /** * The first cause as set by {@link #halt(Throwable)}. */ - private Throwable firstCause = null; + private volatile Throwable firstCause = null; /** * Flag is set <code>true</code> if the process was halted by a * {@link Throwable} not included in the set of normal termination causes. */ - private boolean error = false; + private volatile boolean error = false; /** * Set to <code>true</code> iff the process should halt. @@ -108,6 +108,12 @@ private volatile boolean halt = false; /** + * Set to <code>true</code> iff the process was {@link #cancel(boolean) + * cancelled}. + */ + private volatile boolean cancelled = false; + + /** * Halt (normal termination). */ final public void halt(final V v) { @@ -173,34 +179,56 @@ } /** - * Return unless processing has been halted. - * <p> - * This method may be used to detect asynchronous termination of the - * process. It will throw out the wrapper first cause if the process is - * halted. The method should be invoked from within the execution of the - * process itself so that it may notice asynchronous termination. + * Return unless processing has been halted. The method should be invoked + * from within the execution of the process itself so that it may notice + * asynchronous termination. It will throw out the wrapper first cause if + * the process is halted. The method is <code>protected</code> since the + * semantics are those of testing for unexpected termination of the process + * from within the process. External processes should use {@link #isDone()}. * * @throws RuntimeException * wrapping the {@link #firstCause} iff processing has been * halted. */ - final public void halted() { + final protected void halted() { if (halt) { - if (firstCause == null) + if (firstCause == null) { + /* + * Note: this is an error since there is an expectation by the + * process when it invokes halted() that the process is still + * running (since it invoked halted() it must be running). Since + * it is running, + */ throw new RuntimeException(); + + } throw new RuntimeException(firstCause); } } final public boolean cancel(final boolean mayInterruptIfRunning) { + lock.lock(); + try { + + final Throwable t = new InterruptedException(); - halt(CANCELLED); + halt(t); - // return true if this was the firstCause. - return (firstCause == CANCELLED); + if (firstCause == t) { + // iff this was the firstCause. + cancelled = true; + return true; + } + return false; + + } finally { + + lock.unlock(); + + } } final public V get() throws InterruptedException, ExecutionException { @@ -209,8 +237,11 @@ while (!halt) { halted.await(); } - if (firstCause == CANCELLED) - throw new CancellationException(); + if(cancelled) { + final CancellationException t = new CancellationException(); + t.initCause(firstCause); + throw t; + } if (error) throw new ExecutionException(firstCause); return result; @@ -260,13 +291,25 @@ */ final public boolean isError() { - return halt && error; + // Note: lock required for atomic visibility for [halt AND error]. + lock.lock(); + try { + return halt && error; + } finally { + lock.unlock(); + } } public boolean isCancelled() { - return halt && firstCause == CANCELLED; + // Note: lock required for atomic visibility for [halt AND cancelled]. + lock.lock(); + try { + return halt && cancelled; + } finally { + lock.unlock(); + } } @@ -333,8 +376,8 @@ * termination. */ protected boolean isNormalTerminationCause(final Throwable cause) { - if (CANCELLED == cause) - return true; +// if (InnerCause.isInnerCause(cause, CancelledException.class)) +// return true; if (InnerCause.isInnerCause(cause, InterruptedException.class)) return true; if (InnerCause.isInnerCause(cause, CancellationException.class)) Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-10-06 13:02:06 UTC (rev 3738) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-10-06 13:12:02 UTC (rev 3739) @@ -906,7 +906,7 @@ { // validate the stats map. assertNotNull(statsMap); - assertEquals(3, statsMap.size()); + assertEquals("statsMap.size()", 3, statsMap.size()); if (log.isInfoEnabled()) log.info(statsMap.toString()); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-06 14:44:49
|
Revision: 3740 http://bigdata.svn.sourceforge.net/bigdata/?rev=3740&view=rev Author: thompsonbry Date: 2010-10-06 14:44:41 +0000 (Wed, 06 Oct 2010) Log Message: ----------- Added support for PipelineOp.Annotations#SINK_REF (to override the default sink in support of routing around a union operator). Modified RunningQuery to permit the default sink and the optional sink to target the same operator. Modified the FederatedQueryEngine to use a thread pool to materialize chunks. Added unit tests for optional joins for the query engine and federated query engine. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/Union.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/Union.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/Union.java 2010-10-06 13:12:02 UTC (rev 3739) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/Union.java 2010-10-06 14:44:41 UTC (rev 3740) @@ -28,17 +28,11 @@ package com.bigdata.bop.bset; import java.util.Map; -import java.util.concurrent.Callable; import java.util.concurrent.FutureTask; -import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; +import com.bigdata.bop.IBindingSet; import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.join.PipelineJoin; -import com.bigdata.rdf.rules.TMUtility; -import com.bigdata.relation.RelationFusedView; -import com.bigdata.util.concurrent.Haltable; /** * UNION(ops)[maxParallel(default all)] @@ -49,19 +43,30 @@ * and may be executed independently. By default, the subqueries are run with * unlimited parallelism. * <p> - * UNION is useful when independent queries are evaluated and their outputs are - * merged. Outputs from the UNION operator flow to the parent operator and will - * be mapped across shards or nodes as appropriate for the parent. UNION runs on - * the query controller. In order to avoid routing intermediate results through - * the controller, the {@link PipelineOp.Annotations#SINK_REF} of each - * child operand should be overridden to specify the parent of the UNION - * operator. + * Note: UNION runs on the query controller. The + * {@link PipelineOp.Annotations#SINK_REF} of each child operand should be + * overridden to specify the parent of the UNION operator, thereby routing + * around the UNION operator itself. If you fail to do this, then the + * intermediate results of the subqueries will be routed through the UNION + * operator on the query controller. * <p> - * UNION can not be used when the intermediate results must be routed into the - * subqueries. However, a {@link Tee} pattern may help in such cases. For - * example, a {@link Tee} may be used to create a union of pipeline joins for - * two access paths during truth maintenance. + * UNION can not be used when intermediate results from other computations must + * be routed into subqueries. However, a {@link Tee} pattern may help in such + * cases. For example, a {@link Tee} may be used to create a union of pipeline + * joins for two access paths during truth maintenance. + * <p> + * For example: * + * <pre> + * UNION([a,b,c],{}) + * </pre> + * + * Will run the subqueries <i>a</i>, <i>b</i>, and <i>c</i> in parallel. Each + * subquery will be initialized with a single empty {@link IBindingSet}. The + * output of those subqueries will be routed to the UNION operator (their + * parent) unless the subqueries explicitly override this behavior using + * {@link PipelineOp.Annotations#SINK_REF}. + * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-10-06 13:12:02 UTC (rev 3739) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-10-06 14:44:41 UTC (rev 3740) @@ -393,6 +393,12 @@ } + protected void execute(final Runnable r) { + + localIndexManager.getExecutorService().execute(r); + + } + /** * Runnable submits chunks available for evaluation against running queries. * @@ -438,7 +444,7 @@ if (log.isDebugEnabled()) log.debug("Running chunk: " + chunk); // execute task. - localIndexManager.getExecutorService().execute(ft); + execute(ft); } catch (RejectedExecutionException ex) { // shutdown of the pool (should be an unbounded // pool). Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-06 13:12:02 UTC (rev 3739) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-06 14:44:41 UTC (rev 3740) @@ -873,8 +873,7 @@ * target it with a message. (The sink will be null iff there is no * parent for this operator.) */ - sinkId = p == null ? null : (Integer) p - .getRequiredProperty(PipelineOp.Annotations.BOP_ID); + sinkId = getEffectiveDefaultSink(bop, p); // altSink (null when not specified). altSinkId = (Integer) op @@ -889,12 +888,12 @@ + bop); } - if (sinkId != null && altSinkId != null - && sinkId.intValue() == altSinkId.intValue()) { - throw new RuntimeException( - "The primary and alternative sink may not be the same operator: " - + bop); - } +// if (sinkId != null && altSinkId != null +// && sinkId.intValue() == altSinkId.intValue()) { +// throw new RuntimeException( +// "The primary and alternative sink may not be the same operator: " +// + bop); +// } /* * Setup the BOpStats object. For some operators, e.g., SliceOp, @@ -932,6 +931,38 @@ } /** + * Return the effective default sink. + * + * @param bop + * The operator. + * @param p + * The parent of that operator, if any. + */ + private Integer getEffectiveDefaultSink(final BOp bop, final BOp p) { + + if (bop == null) + throw new IllegalArgumentException(); + + Integer sink; + + // Explictly specified sink? + sink = (Integer) bop.getProperty(PipelineOp.Annotations.SINK_REF); + + if (sink == null) { + if (p == null) { + // No parent, so no sink. + return null; + } + // The parent is the sink. + sink = (Integer) p + .getRequiredProperty(PipelineOp.Annotations.BOP_ID); + } + + return sink; + + } + + /** * Evaluate the {@link IChunkMessage}. */ public void run() { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java 2010-10-06 13:12:02 UTC (rev 3739) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java 2010-10-06 14:44:41 UTC (rev 3740) @@ -31,13 +31,10 @@ import java.nio.ByteBuffer; import java.rmi.RemoteException; import java.util.UUID; -import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executor; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; -import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.atomic.AtomicReference; @@ -99,23 +96,23 @@ */ private final IQueryClient clientProxy; - /** - * A queue of {@link IChunkMessage}s which needs to have their data - * materialized so an operator can consume those data on this node. - * This queue is drained by the {@link MaterializeChunksTask}. - */ - final private BlockingQueue<IChunkMessage<?>> chunkMaterializationQueue = new LinkedBlockingQueue<IChunkMessage<?>>(); +// /** +// * A queue of {@link IChunkMessage}s which needs to have their data +// * materialized so an operator can consume those data on this node. +// * This queue is drained by the {@link MaterializeChunksTask}. +// */ +// final private BlockingQueue<IChunkMessage<?>> chunkMaterializationQueue = new LinkedBlockingQueue<IChunkMessage<?>>(); /** - * The service on which we run {@link MaterializeChunksTask}. This is + * The service used to accept {@link IChunkMessage} for evaluation. This is * started by {@link #init()}. */ - private final AtomicReference<ExecutorService> materializeChunksService = new AtomicReference<ExecutorService>(); + private final AtomicReference<ExecutorService> acceptTaskService = new AtomicReference<ExecutorService>(); - /** - * The {@link Future} for the task draining the {@link #chunkMaterializationQueue}. - */ - private final AtomicReference<FutureTask<Void>> materializeChunksFuture = new AtomicReference<FutureTask<Void>>(); +// /** +// * The {@link Future} for the task draining the {@link #chunkMaterializationQueue}. +// */ +// private final AtomicReference<FutureTask<Void>> acceptMessageTaskFuture = new AtomicReference<FutureTask<Void>>(); @Override public UUID getServiceUUID() { @@ -229,17 +226,16 @@ /* * The proxy for this query engine when used as a query controller. * + * Note: DGC is relied on to clean up the exported proxy when the + * query engine dies. * - * Should the data services expose their query engine in this - * manner? - * - * @todo We need to unexport the proxy as well when the service is - * shutdown. This should follow the same pattern as DataService -> - * DataServer. E.g., a QueryEngineServer class. + * @todo There should be an explicit "QueryEngineServer" which is + * used as the front end for SPARQL queries. It should have an + * explicitly configured Exporter for its proxy. */ this.clientProxy = (IQueryClient) ((JiniFederation<?>) fed) - .getProxy(this, false/* enableDGC */); + .getProxy(this, true/* enableDGC */); } else { @@ -275,26 +271,11 @@ public void init() { super.init(); - - final FutureTask<Void> ft = new FutureTask<Void>( - new MaterializeChunksTask(), (Void) null); - - if (materializeChunksFuture.compareAndSet(null/* expect */, ft)) { - materializeChunksService.set(Executors - .newSingleThreadExecutor(new DaemonThreadFactory( - FederatedQueryEngine.class - + ".materializeChunksService"))); + acceptTaskService.set(Executors + .newCachedThreadPool(new DaemonThreadFactory( + FederatedQueryEngine.class + ".acceptService"))); -// getIndexManager().getExecutorService().execute(ft); - materializeChunksService.get().execute(ft); - - } else { - - throw new IllegalStateException("Already running"); - - } - } /** @@ -305,21 +286,14 @@ @Override protected void didShutdown() { - // stop materializing chunks. - final Future<?> f = materializeChunksFuture.get(); - if (f != null) { - f.cancel(true/* mayInterruptIfRunning */); - } - - // stop the service on which we ran the MaterializeChunksTask. - final ExecutorService s = materializeChunksService.get(); + // stop the service which is accepting messages. + final ExecutorService s = acceptTaskService.get(); if (s != null) { s.shutdownNow(); } // Clear the references. - materializeChunksFuture.set(null); - materializeChunksService.set(null); + acceptTaskService.set(null); } @@ -331,63 +305,50 @@ @Override public void shutdownNow() { - // stop materializing chunks. - final Future<?> f = materializeChunksFuture.get(); - if (f != null) - f.cancel(true/* mayInterruptIfRunning */); + // stop the service which is accepting messages. + final ExecutorService s = acceptTaskService.get(); + if (s != null) { + s.shutdownNow(); + } + // Clear the references. + acceptTaskService.set(null); + super.shutdownNow(); } /** - * Runnable materializes chunks and makes them available for further - * processing. + * Materialize an {@link IChunkMessage} for processing and place it on the + * queue of accepted messages. * - * @todo multiple threads for materializing chunks, not just one. can - * be multiple {@link MaterializeChunksTask}s running. + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> */ - private class MaterializeChunksTask implements Runnable { + private class MaterializeMessageTask implements Runnable { + private final IChunkMessage<?> msg; + + public MaterializeMessageTask(final IChunkMessage<?> msg) { + this.msg = msg; + } + public void run() { - if(log.isInfoEnabled()) - log.info("running: " + this); - while (true) { - try { - final IChunkMessage<?> msg = chunkMaterializationQueue.take(); - if(log.isDebugEnabled()) - log.debug("msg=" + msg); - try { - if(!accept(msg)) { - if(log.isDebugEnabled()) - log.debug("dropping: " + msg); - continue; - } - if(log.isDebugEnabled()) - log.debug("accepted: " + msg); - /* - * @todo The type warning here is because the rest of - * the API does not know what to do with messages for - * chunks other than IBindingSet[], e.g., IElement[], - * etc. - */ - FederatedQueryEngine.this - .acceptChunk((IChunkMessage) msg); - } catch(Throwable t) { - if(InnerCause.isInnerCause(t, InterruptedException.class)) { - log.warn("Interrupted."); - return; - } - throw new RuntimeException(t); - } - } catch (InterruptedException e) { + try { + if (!accept(msg)) { + if (log.isDebugEnabled()) + log.debug("dropping: " + msg); + return; + } + if (log.isDebugEnabled()) + log.debug("accepted: " + msg); + FederatedQueryEngine.this.acceptChunk((IChunkMessage) msg); + } catch (Throwable t) { + if (InnerCause.isInnerCause(t, InterruptedException.class)) { log.warn("Interrupted."); return; - } catch (Throwable ex) { - // log and continue - log.error(ex, ex); - continue; } + throw new RuntimeException(t); } } @@ -410,59 +371,30 @@ if (q == null) { - /* - * This code path handles the message the first time a chunk is - * observed on a node for a query. Since we do not broadcast the - * query to all nodes, the node has to resolve the query from the - * query controller. - * - * @todo Track recently terminated queries and do not recreate them. - */ - // true iff this is the query controller final boolean isController = getServiceUUID().equals( msg.getQueryController().getServiceUUID()); - - if(isController) { + + if (isController) { /* * @todo This would indicate that the query had been * concurrently terminated and cleared from the set of - * runningQueries and that we were not retaining metadata about - * queries which had been terminated. + * runningQueries and that we were not retaining metadata + * about queries which had been terminated. */ throw new AssertionError( "Query not running on controller: thisService=" + getServiceUUID() + ", msg=" + msg); } - /* - * Request the query from the query controller (RMI). - * - * @todo RMI is too expensive. Apply a memoizer pattern to avoid - * race conditions. - */ - final PipelineOp query = msg.getQueryController() - .getQuery(msg.getQueryId()); - - q = newRunningQuery(FederatedQueryEngine.this, queryId, - false/* controller */, msg.getQueryController(), query); - - final RunningQuery tmp = runningQueries.putIfAbsent(queryId, q); + // Get the query declaration from the query controller. + q = getDeclaredQuery(queryId); - if(tmp != null) { - - // another thread won this race. - q = (FederatedRunningQuery) tmp; - - } - } -// if(q == null) -// throw new RuntimeException(ERR_QUERY_NOT_RUNNING + queryId); - if (!q.isCancelled() && !msg.isMaterialized()) { + // materialize the chunk for this message. msg.materialize(q); } @@ -470,9 +402,44 @@ return !q.isCancelled(); } - - } // MaterializeChunksTask + /** + * This code path handles the message the first time a chunk is observed + * on a node for a query. Since we do not broadcast the query to all + * nodes, the node has to resolve the query from the query controller. + * + * @throws RemoteException + * + * @todo Track recently terminated queries and do not recreate them. + */ + private FederatedRunningQuery getDeclaredQuery(final UUID queryId) + throws RemoteException { + + /* + * Request the query from the query controller (RMI). + */ + final PipelineOp query = msg.getQueryController().getQuery( + msg.getQueryId()); + + FederatedRunningQuery q = newRunningQuery( + FederatedQueryEngine.this, queryId, false/* controller */, + msg.getQueryController(), query); + + final RunningQuery tmp = runningQueries.putIfAbsent(queryId, q); + + if (tmp != null) { + + // another thread won this race. + q = (FederatedRunningQuery) tmp; + + } + + return q; + + } + + } + public void declareQuery(final IQueryDecl queryDecl) { final UUID queryId = queryDecl.getQueryId(); @@ -489,11 +456,22 @@ if (msg == null) throw new IllegalArgumentException(); + if(log.isDebugEnabled()) + log.debug("msg=" + msg); + assertRunning(); - // queue up message to be materialized or otherwise handled later. - chunkMaterializationQueue.add(msg); + /* + * Schedule task to materialized or otherwise handle the message. + */ + + final Executor s = acceptTaskService.get(); + if (s == null) + throw new RuntimeException("Not running"); + + s.execute(new MaterializeMessageTask(msg)); + } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-10-06 13:12:02 UTC (rev 3739) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-10-06 14:44:41 UTC (rev 3740) @@ -44,9 +44,7 @@ import com.bigdata.bop.ArrayBindingSet; import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpEvaluationContext; -import com.bigdata.bop.PipelineOp; import com.bigdata.bop.Constant; import com.bigdata.bop.HashBindingSet; import com.bigdata.bop.IBindingSet; @@ -55,12 +53,14 @@ import com.bigdata.bop.IVariable; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; import com.bigdata.bop.Var; import com.bigdata.bop.ap.E; import com.bigdata.bop.ap.Predicate; import com.bigdata.bop.ap.R; import com.bigdata.bop.bset.ConditionalRoutingOp; import com.bigdata.bop.bset.StartOp; +import com.bigdata.bop.constraint.EQ; import com.bigdata.bop.constraint.EQConstant; import com.bigdata.bop.fed.TestFederatedQueryEngine; import com.bigdata.bop.join.PipelineJoin; @@ -1143,7 +1143,6 @@ BOpEvaluationContext.CONTROLLER),// })); - // @todo the KEY_ORDER should be bound before evaluation. final Predicate<?> pred1Op = new Predicate<E>(new IVariableOrConstant[] { Var.var("x"), Var.var("y") }, NV .asMap(new NV[] {// @@ -1411,16 +1410,212 @@ return nsuccess; } - + /** - * @todo Write unit tests for optional joins, including where an alternative - * sink is specified in the {@link BOpContext} and is used when the - * join fails. + * Unit test for optional join. Two joins are used and target a + * {@link SliceOp}. The 2nd join is marked as optional. Intermediate results + * which do not succeed on the optional join are forwarded to the + * {@link SliceOp} which is the target specified by the + * {@link PipelineOp.Annotations#ALT_SINK_REF}. + * + * @todo Write unit test for optional join groups. Here the goal is to + * verify that intermediate results may skip more than one join. This + * was a problem for the old query evaluation approach since binding + * sets had to cascade through the query one join at a time. However, + * the new query engine design should handle this case. */ - public void test_query_join2_optionals() { + public void test_query_join2_optionals() throws Exception { - fail("write test"); + final int startId = 1; + final int joinId1 = 2; + final int predId1 = 3; + final int joinId2 = 4; + final int predId2 = 5; + final int sliceId = 6; + + final IVariable<?> x = Var.var("x"); + final IVariable<?> y = Var.var("y"); + final IVariable<?> z = Var.var("z"); + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { x, y }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { y, z }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + startOp, pred1Op,// + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + })); + final PipelineOp join2Op = new PipelineJoin<E>(// + join1Op, pred2Op,// + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + // constraint x == z + new NV(PipelineJoin.Annotations.CONSTRAINTS,new IConstraint[]{ + new EQ(x,z) + }), + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL,true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF,sliceId),// + })); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{join2Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final RunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // two solutions where the 2nd join succeeds. + new ArrayBindingSet(// + new IVariable[] { x, y, z },// + new IConstant[] { new Constant<String>("Leon"), + new Constant<String>("Paul"), + new Constant<String>("Leon") }// + ), + new ArrayBindingSet(// + new IVariable[] { x, y, z },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Leon"), + new Constant<String>("Paul") }// + ), + // plus anything we read from the first access path which did not join. + new ArrayBindingSet(// + new IVariable[] { Var.var("x"), Var.var("y") },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary") }// + ), + new ArrayBindingSet(// + new IVariable[] { Var.var("x"), Var.var("y") },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Paul") }// + ) + }; + + assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + +// new E("John", "Mary"),// [0] +// new E("Leon", "Paul"),// [1] +// new E("Mary", "Paul"),// [2] +// new E("Paul", "Leon"),// [3] + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(4, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + // validate the stats for the start operator. + { + final BOpStats stats = statsMap.get(startId); + assertNotNull(stats); + if (log.isInfoEnabled()) + log.info("start: " + stats.toString()); + + // verify query solution stats details. + assertEquals(1L, stats.chunksIn.get()); + assertEquals(1L, stats.unitsIn.get()); + assertEquals(1L, stats.unitsOut.get()); + assertEquals(1L, stats.chunksOut.get()); + } + + // validate the stats for the 1st join operator. + { + final BOpStats stats = statsMap.get(joinId1); + assertNotNull(stats); + if (log.isInfoEnabled()) + log.info("join1: " + stats.toString()); + + // verify query solution stats details. + assertEquals(1L, stats.chunksIn.get()); + assertEquals(1L, stats.unitsIn.get()); + assertEquals(4L, stats.unitsOut.get()); + assertEquals(1L, stats.chunksOut.get()); + } + + // validate the stats for the 2nd join operator. + { + final BOpStats stats = statsMap.get(joinId2); + assertNotNull(stats); + if (log.isInfoEnabled()) + log.info("join2: " + stats.toString()); + + // verify query solution stats details. +// assertEquals(1L, stats.chunksIn.get()); + assertEquals(4L, stats.unitsIn.get()); + assertEquals(4L, stats.unitsOut.get()); +// assertEquals(1L, stats.chunksOut.get()); + } + + // Validate stats for the sliceOp. + { + final BOpStats stats = statsMap.get(sliceId); + assertNotNull(stats); + if (log.isInfoEnabled()) + log.info("slice: " + stats.toString()); + + // verify query solution stats details. +// assertEquals(2L, stats.chunksIn.get()); + assertEquals(4L, stats.unitsIn.get()); + assertEquals(4L, stats.unitsOut.get()); +// assertEquals(1L, stats.chunksOut.get()); + } + } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java 2010-10-06 13:12:02 UTC (rev 3739) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java 2010-10-06 14:44:41 UTC (rev 3740) @@ -35,7 +35,6 @@ import com.bigdata.bop.ArrayBindingSet; import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpEvaluationContext; import com.bigdata.bop.Constant; import com.bigdata.bop.HashBindingSet; @@ -51,6 +50,7 @@ import com.bigdata.bop.ap.Predicate; import com.bigdata.bop.ap.R; import com.bigdata.bop.bset.StartOp; +import com.bigdata.bop.constraint.EQ; import com.bigdata.bop.constraint.EQConstant; import com.bigdata.bop.engine.BOpStats; import com.bigdata.bop.engine.IChunkMessage; @@ -1014,7 +1014,6 @@ new String[] { namespace }),// new NV(Predicate.Annotations.BOP_ID, predId1),// new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// -// new NV(Predicate.Annotations.KEY_ORDER,R.primaryKeyOrder),// })); final Predicate<?> pred2Op = new Predicate<E>(new IVariableOrConstant[] { @@ -1024,7 +1023,6 @@ new String[] { namespace }),// new NV(Predicate.Annotations.BOP_ID, predId2),// new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// -// new NV(Predicate.Annotations.KEY_ORDER,R.primaryKeyOrder),// })); final PipelineOp join1Op = new PipelineJoin<E>(// @@ -1172,14 +1170,229 @@ } /** - * @todo Write unit tests for optional joins, including where an alternative - * sink is specified in the {@link BOpContext} and is used when the - * join fails. - * */ - public void test_query_join2_optionals() { + * Unit test for optional join. Two joins are used and target a + * {@link SliceOp}. The 2nd join is marked as optional. Intermediate results + * which do not succeed on the optional join are forwarded to the + * {@link SliceOp} which is the target specified by the + * {@link PipelineOp.Annotations#ALT_SINK_REF}. + * + * @todo Write unit test for optional join groups. Here the goal is to + * verify that intermediate results may skip more than one join. This + * was a problem for the old query evaluation approach since binding + * sets had to cascade through the query one join at a time. However, + * the new query engine design should handle this case. + */ + public void test_query_join2_optionals() throws Exception { - fail("write test"); + final int startId = 1; + final int joinId1 = 2; + final int predId1 = 3; + final int joinId2 = 4; + final int predId2 = 5; + final int sliceId = 6; + + final IVariable<?> x = Var.var("x"); + final IVariable<?> y = Var.var("y"); + final IVariable<?> z = Var.var("z"); + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { x, y }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { y, z }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + startOp, pred1Op,// + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + // Note: shard-partitioned joins! + new NV( Predicate.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.SHARDED),// + })); + final PipelineOp join2Op = new PipelineJoin<E>(// + join1Op, pred2Op,// + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + // Note: shard-partitioned joins! + new NV( Predicate.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.SHARDED),// + // constraint x == z + new NV(PipelineJoin.Annotations.CONSTRAINTS,new IConstraint[]{ + new EQ(x,z) + }), + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL,true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF,sliceId),// + })); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{join2Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final RunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // solutions where the 2nd join succeeds. + new ArrayBindingSet(// + new IVariable[] { x, y, z },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("John") }// + ), + new ArrayBindingSet(// + new IVariable[] { x, y, z },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("John"), + new Constant<String>("Mary") }// + ), + new ArrayBindingSet(// + new IVariable[] { x, y, z },// + new IConstant[] { new Constant<String>("Leon"), + new Constant<String>("Paul"), + new Constant<String>("Leon") }// + ), + new ArrayBindingSet(// + new IVariable[] { x, y, z },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Leon"), + new Constant<String>("Paul") }// + ), + /* + * Plus anything we read from the first access path which + * did not pass the 2nd join. + */ + new ArrayBindingSet(// + new IVariable[] { Var.var("x"), Var.var("y") },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Paul") }// + ), + }; + + TestQueryEngine.assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + +// // partition0 +// new E("John", "Mary"),// +// new E("Leon", "Paul"),// +// // partition1 +// new E("Mary", "John"),// +// new E("Mary", "Paul"),// +// new E("Paul", "Leon"),// + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(4, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + // validate the stats for the start operator. + { + final BOpStats stats = statsMap.get(startId); + assertNotNull(stats); + if (log.isInfoEnabled()) + log.info("start: " + stats.toString()); + + // verify query solution stats details. + assertEquals(1L, stats.chunksIn.get()); + assertEquals(1L, stats.unitsIn.get()); + assertEquals(1L, stats.unitsOut.get()); + assertEquals(1L, stats.chunksOut.get()); + } + + // validate the stats for the 1st join operator. + { + final BOpStats stats = statsMap.get(joinId1); + assertNotNull(stats); + if (log.isInfoEnabled()) + log.info("join1: " + stats.toString()); + + // verify query solution stats details. + assertEquals(2L, stats.chunksIn.get()); + assertEquals(2L, stats.unitsIn.get()); + assertEquals(5L, stats.unitsOut.get()); + assertEquals(2L, stats.chunksOut.get()); + } + + // validate the stats for the 2nd join operator. + { + final BOpStats stats = statsMap.get(joinId2); + assertNotNull(stats); + if (log.isInfoEnabled()) + log.info("join2: " + stats.toString()); + + // verify query solution stats details. +// assertEquals(1L, stats.chunksIn.get()); + assertEquals(5L, stats.unitsIn.get()); + assertEquals(5L, stats.unitsOut.get()); +// assertEquals(1L, stats.chunksOut.get()); + } + + // Validate stats for the sliceOp. + { + final BOpStats stats = statsMap.get(sliceId); + assertNotNull(stats); + if (log.isInfoEnabled()) + log.info("slice: " + stats.toString()); + + // verify query solution stats details. +// assertEquals(2L, stats.chunksIn.get()); + assertEquals(5L, stats.unitsIn.get()); + assertEquals(5L, stats.unitsOut.get()); +// assertEquals(1L, stats.chunksOut.get()); + } + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-08 14:56:37
|
Revision: 3755 http://bigdata.svn.sourceforge.net/bigdata/?rev=3755&view=rev Author: thompsonbry Date: 2010-10-08 14:56:30 +0000 (Fri, 08 Oct 2010) Log Message: ----------- Modified the default for REMOTE_ACCESS_PATH to be "true" and updated the Javadoc on IPredicate.Options.REMOTE_ACCESS_PATH. Updated the TestFederatedQueryEngine test suite to explicitly specify local (partitioned) access paths. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-10-08 14:40:48 UTC (rev 3754) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-10-08 14:56:30 UTC (rev 3755) @@ -48,6 +48,7 @@ import com.bigdata.relation.rule.IAccessPathExpander; import com.bigdata.relation.rule.eval.IEvaluationPlan; import com.bigdata.relation.rule.eval.pipeline.JoinMasterTask; +import com.bigdata.service.ndx.IClientIndex; import com.bigdata.striterator.IKeyOrder; import cutthecrap.utils.striterators.FilterBase; @@ -194,36 +195,46 @@ int DEFAULT_PARTITION_ID = -1; /** - * Boolean option determines whether the predicate will use a local - * access path or a remote access path (default + * Boolean option determines whether the predicate will use a data + * service local access path (partitioned index view) or a remote access + * path (global index view) (default * {@value #DEFAULT_REMOTE_ACCESS_PATH}). * <p> - * <em>Local access paths</em> are much more efficient and should be - * used for most purposes. However, it is not possible to impose certain - * kinds of filters on a sharded or hash partitioned operations across - * local access paths. In particular, a DISTINCT filter can not be - * imposed using sharded or hash partitioned. + * Note: "Remote" has the semantics that the access path has a total + * view of the index. In scale-out this is achieved using RMI and an + * {@link IClientIndex}. "Local" has the semantics that the access path + * has a partitioned view of the index. In scale-out, this corresponds + * to a shard. In standalone, there is no difference between "local" and + * "remote" index views since the indices are not partitioned. * <p> - * When the access path is local, the parent operator must be annotated - * to use a {@link BOpEvaluationContext#SHARDED shard wise} or - * {@link BOpEvaluationContext#HASHED node-wise} mapping of the binding - * sets. + * Local access paths (in scale-out) are much more efficient and should + * be used for most purposes. However, it is not possible to impose + * certain kinds of filters on a partitioned index. For example, a + * DISTINCT filter requires a global index view. * <p> - * <em>Remote access paths</em> use a scale-out index view. This view - * makes the scale-out index appear as if it were monolithic rather than - * sharded or hash partitioned. The monolithic view of a scale-out index - * can be used to impose a DISTINCT filter since all tuples will flow - * back to the caller. + * When the access path is local (aka partitioned), the parent operator + * must be annotated to use a {@link BOpEvaluationContext#SHARDED shard + * wise} or {@link BOpEvaluationContext#HASHED node-wise} mapping of the + * binding sets. * <p> - * When the access path is remote, the parent operator should use + * Remote access paths (in scale-out) use a scale-out index view. This + * view makes the scale-out index appear as if it were monolithic rather + * than partitioned. The monolithic view of a scale-out index can be + * used to impose a DISTINCT filter since all tuples will flow back to + * the caller. + * <p> + * When the access path is remote the parent operator should use * {@link BOpEvaluationContext#ANY} to prevent the binding sets from - * being moved around when the access path is remote. + * being moved around when the access path is remote. Note that the + * {@link BOpEvaluationContext} is basically ignored for standalone + * since there is only one place for the operator to run - on the query + * controller. * * @see BOpEvaluationContext */ String REMOTE_ACCESS_PATH = "remoteAccessPath"; - boolean DEFAULT_REMOTE_ACCESS_PATH = false; + boolean DEFAULT_REMOTE_ACCESS_PATH = true; /** * If the estimated rangeCount for an {@link AccessPath#iterator()} is Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java 2010-10-08 14:40:48 UTC (rev 3754) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java 2010-10-08 14:56:30 UTC (rev 3755) @@ -540,8 +540,8 @@ new NV(Predicate.Annotations.BOP_ID, predId),// new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// -// new NV(Predicate.Annotations.KEY_ORDER, -// R.primaryKeyOrder),// + // Note: local access path! + new NV( Predicate.Annotations.REMOTE_ACCESS_PATH,false), })); final PipelineJoin<E> joinOp = new PipelineJoin<E>(startOp/* left */, @@ -708,8 +708,8 @@ new NV(Predicate.Annotations.BOP_ID, predId),// new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// -// new NV(Predicate.Annotations.KEY_ORDER, -// R.primaryKeyOrder),// + // Note: local access path! + new NV( Predicate.Annotations.REMOTE_ACCESS_PATH,false), })); final PipelineJoin<E> joinOp = new PipelineJoin<E>(startOp/* left */, @@ -858,8 +858,8 @@ .asMap(new NV[] {// new NV(Predicate.Annotations.RELATION_NAME, new String[] { namespace }),// -// new NV(Predicate.Annotations.KEY_ORDER, -// R.primaryKeyOrder),// + // Note: local access path! + new NV( Predicate.Annotations.REMOTE_ACCESS_PATH,false), new NV(Predicate.Annotations.BOP_ID, predId),// new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// @@ -1013,6 +1013,8 @@ new NV(Predicate.Annotations.RELATION_NAME, new String[] { namespace }),// new NV(Predicate.Annotations.BOP_ID, predId1),// + // Note: local access path! + new NV( Predicate.Annotations.REMOTE_ACCESS_PATH,false), new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// })); @@ -1022,6 +1024,8 @@ new NV(Predicate.Annotations.RELATION_NAME, new String[] { namespace }),// new NV(Predicate.Annotations.BOP_ID, predId2),// + // Note: local access path! + new NV( Predicate.Annotations.REMOTE_ACCESS_PATH,false), new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// })); @@ -1207,6 +1211,8 @@ .asMap(new NV[] {// new NV(Predicate.Annotations.RELATION_NAME, new String[] { namespace }),// + // Note: local access path! + new NV( Predicate.Annotations.REMOTE_ACCESS_PATH,false), new NV(Predicate.Annotations.BOP_ID, predId1),// new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// })); @@ -1216,6 +1222,8 @@ .asMap(new NV[] {// new NV(Predicate.Annotations.RELATION_NAME, new String[] { namespace }),// + // Note: local access path! + new NV( Predicate.Annotations.REMOTE_ACCESS_PATH,false), new NV(Predicate.Annotations.BOP_ID, predId2),// new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// })); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-20 18:30:33
|
Revision: 3830 http://bigdata.svn.sourceforge.net/bigdata/?rev=3830&view=rev Author: thompsonbry Date: 2010-10-20 18:30:25 +0000 (Wed, 20 Oct 2010) Log Message: ----------- Added a MultiplexBlockingBuffer. This is a factory pattern which may be used to share the same backing BlockingBuffer among many producers. Each producer receives a skin for the backing buffer. The backing buffer is only closed once each producer closes their skin. Added an IMultiSourceAsynchronousIterator interface for an IAsynchronousIterator which can consume multiple sources. There is one implementation in this commit, which allows the producer to attach another source. This is used to assign a chunk to a task which is already running. There is another version which handles multiple concurrent producers, but it's implementation is not yet finished. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestAll.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/IMultiSourceAsynchronousIterator.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiplexBlockingBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiSourceSequentialAsynchronousIterator.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiplexBlockingBuffer.java Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/IMultiSourceAsynchronousIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/IMultiSourceAsynchronousIterator.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/IMultiSourceAsynchronousIterator.java 2010-10-20 18:30:25 UTC (rev 3830) @@ -0,0 +1,53 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 19, 2010 + */ + +package com.bigdata.relation.accesspath; + +/** + * An interface which permits new sources to be attached dynamically. The + * decision to accept a new source via {@link #add(IAsynchronousIterator)} or to + * {@link IMultiSourceAsynchronousIterator#close()} the iterator must be atomic. + * In particular, it is illegal for a source to be accepted after the iterator + * has been closed. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface IMultiSourceAsynchronousIterator<E> extends + IAsynchronousIterator<E> { + + /** + * Add a source. If the iterator already reports that it is closed then the + * new source can not be added and this method will return false. + * + * @param src + * The source. + * @return <code>true</code> iff the source could be added. + */ + boolean add(IAsynchronousIterator<E> src); + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/IMultiSourceAsynchronousIterator.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java 2010-10-20 18:30:25 UTC (rev 3830) @@ -0,0 +1,186 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 19, 2010 + */ + +package com.bigdata.relation.accesspath; + +import java.util.NoSuchElementException; +import java.util.Queue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReentrantLock; + + +/** + * Class allows new sources to be attached dynamically. If the existing sources + * are drained then the iterator will {@link #close()} itself so that new + * sources can no longer be attached. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class MultiSourceSequentialAsynchronousIterator<E> implements + IMultiSourceAsynchronousIterator<E> { + + private final ReentrantLock lock = new ReentrantLock(); + + private final Queue<IAsynchronousIterator<E>> sources = new LinkedBlockingQueue<IAsynchronousIterator<E>>(); + + /** + * The current inner iterator. When <code>null</code> the outer iterator has + * been closed and will not deliver any more results and will not accept any + * new sources. + * <p> + * Note: This can be asynchronously closed if the application invokes + * {@link #close()}. Methods which test on this can not assume that it will + * be non-<code>null</code> the next time they check unless they are holding + * the {@link #lock}. Methods which do not obtain the lock can offer a + * weaker atomicity by copying the reference to a local variable and then + * testing that variable. + */ + private volatile IAsynchronousIterator<E> current; + + public MultiSourceSequentialAsynchronousIterator(final IAsynchronousIterator<E> src) { + current = src; + } + + public void close() { + lock.lock(); + try { + current = null; + sources.clear(); + } finally { + lock.unlock(); + } + } + + public boolean add(final IAsynchronousIterator<E> src) { + if (src == null) + throw new IllegalArgumentException(); + lock.lock(); + try { + if (current == null) + return false; + sources.add(src); + return true; + } finally { + lock.unlock(); + } + } + + /** + * If the current source is not exhausted, then return it immediately. + * Otherwise, return the next source which is not exhausted. If no such + * sources are available, then {@link #close()} the iterator. The decision + * to accept another source or to close the iterator is made atomic by the + * use of the {@link #lock} in this method and in {@link #close()}. + * + * @return The next source -or- <code>null</code> if there are no sources + * available. + */ + private IAsynchronousIterator<E> nextSource() { + final IAsynchronousIterator<E> tmp = current; + if (tmp == null) + return null; + if (!tmp.isExhausted()) + return current; // Note: MAY be asynchronously cleared! + // current is known to be [null]. + lock.lock(); + try { + // remove the head of the queue (non-blocking) + while ((current = sources.poll()) != null) { + if (!current.isExhausted()) + return current; + } + // no more sources with data, close while holding lock. + close(); + return null; + } finally { + lock.unlock(); + } + } + + public boolean hasNext() { + while (true) { + final IAsynchronousIterator<E> tmp = nextSource(); + if (tmp == null) + return false; + if (tmp.hasNext()) + return true; + } + } + + public E next() { + while (true) { + final IAsynchronousIterator<E> tmp = nextSource(); + if (tmp == null) + throw new NoSuchElementException(); + if (tmp.hasNext()) + return tmp.next(); + } + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + public boolean isExhausted() { + return nextSource() == null; + } + + public boolean hasNext(final long timeout, final TimeUnit unit) + throws InterruptedException { + final long begin = System.nanoTime(); + final long nanos = unit.toNanos(timeout); + long remaining = nanos; + while (remaining > 0) { + final IAsynchronousIterator<E> tmp = nextSource(); + if (tmp == null) + return false; + if (tmp.hasNext(remaining, TimeUnit.NANOSECONDS)) + return true; + remaining = nanos - (System.nanoTime() - begin); + } + // timeout. + return false; + } + + public E next(final long timeout, final TimeUnit unit) + throws InterruptedException { + final long begin = System.nanoTime(); + final long nanos = unit.toNanos(timeout); + long remaining = nanos; + while (true) { + final IAsynchronousIterator<E> tmp = nextSource(); + if (tmp == null) + return null; + if (tmp.hasNext(remaining, TimeUnit.NANOSECONDS)) + return tmp.next(); + remaining = nanos - (System.nanoTime() - begin); + } + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiplexBlockingBuffer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiplexBlockingBuffer.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiplexBlockingBuffer.java 2010-10-20 18:30:25 UTC (rev 3830) @@ -0,0 +1,212 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 7, 2010 + */ + +package com.bigdata.relation.accesspath; + +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.concurrent.Future; +import java.util.concurrent.locks.ReentrantLock; + + +/** + * A factory for skins which may be used to multiplex writes against a + * {@link BlockingBuffer}. Each skin writes through to the backing + * {@link BlockingBuffer} but may be closed independently of the backing + * {@link BlockingBuffer}. This allows multiple produces to share a single + * {@link BlockingBuffer} as long as they use a subset of the + * {@link IBlockingBuffer} API (they can not set the {@link Future} on the + * objects returned by this factory or obtain its + * {@link IBlockingBuffer#iterator()}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + * + * @todo Does this need to close automatically when the last open inner buffer + * is closed or should it be closed explicitly and close all inner buffers + * when it is closed? + */ +public class MultiplexBlockingBuffer<E> { + + /** The delegate. */ + private final IBlockingBuffer<E> b; + + /** Lock guarding internal state. */ + private final ReentrantLock lock = new ReentrantLock(); + + /** The set of opened buffered which have not yet been closed. */ + private final LinkedHashSet<IBlockingBuffer<E>> set = new LinkedHashSet<IBlockingBuffer<E>>(); + + /** The #of currently open buffers. */ + private int counter = 0; + + public MultiplexBlockingBuffer(final IBlockingBuffer<E> b) { + if (b == null) + throw new IllegalArgumentException(); + this.b = b; + } + + public boolean isOpen() { + return b.isOpen(); + } + + public IBlockingBuffer<E> newInstance() { + lock.lock(); + try { + if(!isOpen())// ??? + throw new BufferClosedException(); + final IBlockingBuffer<E> n = new InnerBlockingBuffer(); + if (!set.add(n)) + throw new AssertionError(); + counter++; + return n; + } finally { + lock.unlock(); + } + } + + public void flushAndCloseAll() { + lock.lock(); + try { + final Iterator<IBlockingBuffer<E>> itr = set.iterator(); + while(itr.hasNext()) { + final IBlockingBuffer<E> n = itr.next(); + n.close(); + } + assert counter == 0; + b.flush(); + b.close(); + } finally { + lock.unlock(); + } + } + + /** + * The {@link IBlockingBuffer} reference provided to the constructor. + */ + public IBlockingBuffer<E> getBackingBuffer() { + return b; + } + + /** + * Inner "skin" writes through to the backing buffer shared by all skins. + * <p> + * Note: This inner class does not support several of the + * {@link IBlockingBuffer} methods whose semantics are likely to cause + * problems when interpreted in the light of a skin over a shared buffer. + * The only way these methods could be given clear semantics is if the skin + * were actually a full {@link BlockingBuffer} which was coupled to the + * shared buffer. However, that involves double buffering and double copying + * and I do not think that this is worth it. + */ + private class InnerBlockingBuffer implements IBlockingBuffer<E> { + + public InnerBlockingBuffer() { + } + + private boolean innerBufferOpen = true; + + public IAsynchronousIterator<E> iterator() { + throw new UnsupportedOperationException(); + } + + public void setFuture(Future future) { + throw new UnsupportedOperationException(); + } + + public void abort(final Throwable cause) { + lock.lock(); + try { + if (!innerBufferOpen) + throw new BufferClosedException(); + b.abort(cause); + } finally { + lock.unlock(); + } + } + + public void close() { + lock.lock(); + try { + if (!innerBufferOpen) + return; + innerBufferOpen = false; + if (!set.remove(this)) + throw new AssertionError(); + counter--; + if (counter == 0) { + /* + * Note: We flush the backing buffer before we close it in + * case it has anything buffered. This covers the normal, + * which is where the caller has already invoked flush() on + * this skin and should not create any harm otherwise. + */ + b.flush(); + b.close(); + } + } finally { + lock.unlock(); + } + } + + public Future getFuture() { + return b.getFuture(); + } + + public boolean isOpen() { + return innerBufferOpen && b.isOpen(); + } + + public long flush() { + /* + * Nothing to flush. The target is flushed when the outer class is + * closed. + */ + return 0; + } + + public void add(E e) { + if (!innerBufferOpen) + throw new BufferClosedException(); + b.add(e); + } + + public boolean isEmpty() { + return b.isEmpty(); + } + + public void reset() { + throw new UnsupportedOperationException(); + } + + public int size() { + return b.size(); + } + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiplexBlockingBuffer.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestAll.java 2010-10-20 18:27:22 UTC (rev 3829) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestAll.java 2010-10-20 18:30:25 UTC (rev 3830) @@ -77,11 +77,17 @@ suite.addTestSuite(TestBlockingBufferWithChunks.class); suite.addTestSuite(TestBlockingBufferWithChunksDeque.class); - + suite.addTestSuite(TestUnsynchronizedArrayBuffer.class); suite.addTestSuite(TestUnsynchronizedUnboundedChunkBuffer.class); + suite.addTestSuite(TestMultiplexBlockingBuffer.class); + + suite.addTestSuite(TestMultiSourceSequentialAsynchronousIterator.class); + + //suite.addTestSuite(TestMultiSourceParallelAsynchronousIterator.class); + return suite; } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiSourceSequentialAsynchronousIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiSourceSequentialAsynchronousIterator.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiSourceSequentialAsynchronousIterator.java 2010-10-20 18:30:25 UTC (rev 3830) @@ -0,0 +1,171 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 6, 2010 + */ + +package com.bigdata.relation.accesspath; + +import java.util.concurrent.TimeUnit; + +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; + +import junit.framework.TestCase2; + +/** + * Test suite for the {@link MultiSourceSequentialAsynchronousIterator}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestMultiSourceSequentialAsynchronousIterator extends TestCase2 { + + public TestMultiSourceSequentialAsynchronousIterator() { + + } + + public TestMultiSourceSequentialAsynchronousIterator(String name) { + super(name); + } + + private final IAsynchronousIterator<String> emptyIterator() { + return new ThickAsynchronousIterator<String>(new String[]{}); + } + + public void test1() throws InterruptedException { + + // empty iterator. + final MultiSourceSequentialAsynchronousIterator<String> itr = new MultiSourceSequentialAsynchronousIterator<String>( + emptyIterator()); + +// // nothing available yet. +// assertFalse(itr.hasNext(1, TimeUnit.MILLISECONDS)); +// assertNull(itr.next(1, TimeUnit.MILLISECONDS)); + + // add an empty chunk. + assertTrue(itr.add(new ThickAsynchronousIterator<String>( + new String[] {}))); + +// // still nothing available yet. +// assertFalse(itr.hasNext(1, TimeUnit.MILLISECONDS)); +// assertNull(itr.next(1, TimeUnit.MILLISECONDS)); + + // add a non-empty chunk. + assertTrue(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "a" }))); + + // reports data available and visits data. + assertTrue(itr.hasNext(1, TimeUnit.MILLISECONDS)); + assertEquals("a", itr.next(1, TimeUnit.MILLISECONDS)); + + // add a non-empty chunk. + assertTrue(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "b" }))); + + // reports data available and visits data. + assertTrue(itr.hasNext()); + assertEquals("b", itr.next()); + + // close the iterator. + itr.close(); + + // iterator reports nothing available. + assertFalse(itr.hasNext()); + assertFalse(itr.hasNext(1, TimeUnit.MILLISECONDS)); + assertNull(itr.next(1, TimeUnit.MILLISECONDS)); + + // can not add more sources. + assertFalse(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "b" }))); + + } + + public void test2() throws InterruptedException { + + // empty iterator. + final MultiSourceSequentialAsynchronousIterator<String> itr = new MultiSourceSequentialAsynchronousIterator<String>( + emptyIterator()); + + // add a non-empty chunk. + assertTrue(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "a" }))); + + // add a non-empty chunk. + assertTrue(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "b" }))); + + // reports data available and visits data. + assertTrue(itr.hasNext()); + assertEquals("a", itr.next()); + assertTrue(itr.hasNext()); + assertEquals("b", itr.next()); + + // another read on the iterator causes it to be closed. + assertFalse(itr.hasNext()); + + // can not add more sources. + assertFalse(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "b" }))); + + } + + /** + * Verify that the iterator notices if it is asynchronously closed. + * + * @throws InterruptedException + */ + public void test3() throws InterruptedException { + + // empty iterator. + final MultiSourceSequentialAsynchronousIterator<String> itr = new MultiSourceSequentialAsynchronousIterator<String>( + emptyIterator()); + + new Thread() { + + public void run() { + try { + log.info("Will wait on iterator."); + if (itr.hasNext(2000, TimeUnit.MILLISECONDS)) + fail("Iterator should not visit anything."); + } catch (Throwable t) { + log.error(t, t); + } + } + + }.start(); + + log.info("Sleeping..."); + Thread.sleep(500/*milliseconds.*/); + + log.info("Will close iterator."); + itr.close(); + + // can not add more sources. + assertFalse(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "b" }))); + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiSourceSequentialAsynchronousIterator.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiplexBlockingBuffer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiplexBlockingBuffer.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiplexBlockingBuffer.java 2010-10-20 18:30:25 UTC (rev 3830) @@ -0,0 +1,127 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 8, 2010 + */ + +package com.bigdata.relation.accesspath; + +import junit.framework.TestCase2; + +import com.bigdata.relation.accesspath.BlockingBuffer; +import com.bigdata.relation.accesspath.BufferClosedException; +import com.bigdata.relation.accesspath.IBlockingBuffer; + +/** + * Test suite for {@link MultiplexBlockingBuffer}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestMultiplexBlockingBuffer extends TestCase2 { + + /** + * + */ + public TestMultiplexBlockingBuffer() { + + } + + /** + * @param name + */ + public TestMultiplexBlockingBuffer(String name) { + super(name); + + } + + public void test_multiplex() { + + final IBlockingBuffer<String> buffer = new BlockingBuffer<String>(); + + final MultiplexBlockingBuffer<String> multiplex = new MultiplexBlockingBuffer<String>(buffer); + + // buffer is open and empty. + assertTrue(buffer.isOpen()); + assertTrue(buffer.isEmpty()); + + // multiplex is open. + assertTrue(multiplex.isOpen()); + + final IBlockingBuffer<String> skin1 = multiplex.newInstance(); + + final IBlockingBuffer<String> skin2 = multiplex.newInstance(); + + // buffer is open and empty. + assertTrue(buffer.isOpen()); + assertTrue(buffer.isEmpty()); + + // multiplex is open. + assertTrue(multiplex.isOpen()); + + skin1.add("a"); + skin1.flush(); + skin1.close(); + try { + skin1.add("a2"); + fail("Expecting: " + BufferClosedException.class); + } catch (BufferClosedException ex) { + if (log.isInfoEnabled()) + log.info("Ignoring expected exception: " + ex); + } + + // buffer is open but no longer empty. + assertTrue(buffer.isOpen()); + assertFalse(buffer.isEmpty()); + + // multiplex is open. + assertTrue(multiplex.isOpen()); + + skin2.add("b"); + skin2.add("c"); + skin2.flush(); + + // buffer is open but not empty. + assertTrue(buffer.isOpen()); + assertFalse(buffer.isEmpty()); + + // multiplex is open. + assertTrue(multiplex.isOpen()); + + // close the last open skin. + skin2.close(); + + // buffer is closed but not empty. + assertFalse(buffer.isOpen()); + assertFalse(buffer.isEmpty()); + + // multiplex closed. + assertFalse(multiplex.isOpen()); + + // verify the data. + assertSameIterator(new String[]{"a","b","c"}, buffer.iterator()); + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiplexBlockingBuffer.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-22 19:45:42
|
Revision: 3838 http://bigdata.svn.sourceforge.net/bigdata/?rev=3838&view=rev Author: thompsonbry Date: 2010-10-22 19:45:33 +0000 (Fri, 22 Oct 2010) Log Message: ----------- Reorganized RunningQueue in order to have (a) operator tasks generate chunks incrementally (this avoids problems with deadlock when the operator writes onto a bounded queue and reduces the latency required to produce each solution); and (b) have operator tasks drain their work queue in order to get better efficiency when the producer is leading. These changes should all benefit scale-out and as well as scale-up. Scale-up will also benefit from chaining the operators together (rather than passing around IChunkMessages) but I have not yet made that optimization. I have tested this change set against: - TestBigdataSailWithQuads - LUBM U10 - BSBM 1M No obvious lock contention was visible with BSBM 1M. No obvious hotspots were revealed by a sampling profiler. I am going to test on larger scale on a workstation next so I can compare performance to the trunk baseline. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/NIOChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/ThickChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestConditionalRoutingOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestCopyBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestNIOChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestThickChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestDistinctBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkHandler.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChunkHandler.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederationChunkHandler.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-10-22 17:53:21 UTC (rev 3837) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -37,8 +37,6 @@ import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; -import com.bigdata.relation.accesspath.IMultiSourceAsynchronousIterator; -import com.bigdata.relation.accesspath.MultiSourceSequentialAsynchronousIterator; import com.bigdata.service.IBigdataFederation; /** @@ -57,7 +55,8 @@ private final BOpStats stats; - private final IMultiSourceAsynchronousIterator<E[]> source; +// private final IMultiSourceAsynchronousIterator<E[]> source; + private final IAsynchronousIterator<E[]> source; private final IBlockingBuffer<E[]> sink; @@ -98,25 +97,25 @@ return source; } - /** - * Attach another source. The decision to attach the source is mutex with - * respect to the decision that the source reported by {@link #getSource()} - * is exhausted. - * - * @param source - * The source. - * - * @return <code>true</code> iff the source was attached. - */ - public boolean addSource(IAsynchronousIterator<E[]> source) { +// /** +// * Attach another source. The decision to attach the source is mutex with +// * respect to the decision that the source reported by {@link #getSource()} +// * is exhausted. +// * +// * @param source +// * The source. +// * +// * @return <code>true</code> iff the source was attached. +// */ +// public boolean addSource(IAsynchronousIterator<E[]> source) { +// +// if (source == null) +// throw new IllegalArgumentException(); +// +// return this.source.add(source); +// +// } - if (source == null) - throw new IllegalArgumentException(); - - return this.source.add(source); - - } - /** * Where to write the output of the operator. * @@ -202,7 +201,8 @@ throw new IllegalArgumentException(); this.partitionId = partitionId; this.stats = stats; - this.source = new MultiSourceSequentialAsynchronousIterator<E[]>(source); + this.source = source; +// this.source = new MultiSourceSequentialAsynchronousIterator<E[]>(source); this.sink = sink; this.sink2 = sink2; // may be null } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-10-22 17:53:21 UTC (rev 3837) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -161,31 +161,31 @@ } - /** - * Instantiate a buffer suitable as a sink for this operator. The buffer - * will be provisioned based on the operator annotations. - * <p> - * Note: if the operation swallows binding sets from the pipeline (such as - * operators which write on the database) then the operator MAY return an - * immutable empty buffer. - * - * @param stats - * The statistics on this object will automatically be updated as - * elements and chunks are output onto the returned buffer. - * - * @return The buffer. - */ - public IBlockingBuffer<IBindingSet[]> newBuffer(final BOpStats stats) { +// /** +// * Instantiate a buffer suitable as a sink for this operator. The buffer +// * will be provisioned based on the operator annotations. +// * <p> +// * Note: if the operation swallows binding sets from the pipeline (such as +// * operators which write on the database) then the operator MAY return an +// * immutable empty buffer. +// * +// * @param stats +// * The statistics on this object will automatically be updated as +// * elements and chunks are output onto the returned buffer. +// * +// * @return The buffer. +// */ +// public IBlockingBuffer<IBindingSet[]> newBuffer(final BOpStats stats) { +// +// if (stats == null) +// throw new IllegalArgumentException(); +// +// return new BlockingBufferWithStats<IBindingSet[]>( +// getChunkOfChunksCapacity(), getChunkCapacity(), +// getChunkTimeout(), Annotations.chunkTimeoutUnit, stats); +// +// } - if (stats == null) - throw new IllegalArgumentException(); - - return new BlockingBufferWithStats<IBindingSet[]>( - getChunkOfChunksCapacity(), getChunkCapacity(), - getChunkTimeout(), Annotations.chunkTimeoutUnit, stats); - - } - /** * Return a {@link FutureTask} which computes the operator against the * evaluation context. The caller is responsible for executing the @@ -205,77 +205,4 @@ */ abstract public FutureTask<Void> eval(BOpContext<IBindingSet> context); - private static class BlockingBufferWithStats<E> extends BlockingBuffer<E> { - - private final BOpStats stats; - - /** - * @param chunkOfChunksCapacity - * @param chunkCapacity - * @param chunkTimeout - * @param chunkTimeoutUnit - * @param stats - */ - public BlockingBufferWithStats(int chunkOfChunksCapacity, - int chunkCapacity, long chunkTimeout, - TimeUnit chunkTimeoutUnit, final BOpStats stats) { - - super(chunkOfChunksCapacity, chunkCapacity, chunkTimeout, - chunkTimeoutUnit); - - this.stats = stats; - - } - - /** - * Overridden to track {@link BOpStats#unitsOut} and - * {@link BOpStats#chunksOut}. - * <p> - * Note: {@link BOpStats#chunksOut} will report the #of chunks added to - * this buffer. However, the buffer MAY combine chunks either on add() - * or when drained by the iterator so the actual #of chunks read back - * from the iterator MAY differ. - * <p> - * {@inheritDoc} - */ - @Override - public boolean add(final E e, final long timeout, final TimeUnit unit) - throws InterruptedException { - - final boolean ret = super.add(e, timeout, unit); - - if (e.getClass().getComponentType() != null) { - - stats.unitsOut.add(((Object[]) e).length); - - } else { - - stats.unitsOut.increment(); - - } - - stats.chunksOut.increment(); - - return ret; - - } - - /** - * You can uncomment a line in this method to see who is closing the - * buffer. - * <p> - * {@inheritDoc} - */ - @Override - public void close() { - -// if (isOpen()) -// log.error(toString(), new RuntimeException("STACK TRACE")); - - super.close(); - - } - - } - } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -0,0 +1,90 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 22, 2010 + */ + +package com.bigdata.bop.engine; + +import java.util.concurrent.TimeUnit; + +import com.bigdata.bop.BufferAnnotations; +import com.bigdata.bop.PipelineOp; +import com.bigdata.relation.accesspath.BlockingBuffer; + +/** + * Extended to use the {@link BufferAnnotations} to provision the + * {@link BlockingBuffer} and to track the {@link BOpStats} as chunks are added + * to the buffer. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class BlockingBufferWithStats<E> extends BlockingBuffer<E> { + + private final BOpStats stats; + + public BlockingBufferWithStats(final PipelineOp op, final BOpStats stats) { + + super(op.getChunkOfChunksCapacity(), op.getChunkCapacity(), op + .getChunkTimeout(), BufferAnnotations.chunkTimeoutUnit); + + this.stats = stats; + + } + + /** + * Overridden to track {@link BOpStats#unitsOut} and + * {@link BOpStats#chunksOut}. + * <p> + * Note: {@link BOpStats#chunksOut} will report the #of chunks added to this + * buffer. However, the buffer MAY combine chunks either on add() or when + * drained by the iterator so the actual #of chunks read back from the + * iterator MAY differ. + * <p> + * {@inheritDoc} + */ + @Override + public boolean add(final E e, final long timeout, final TimeUnit unit) + throws InterruptedException { + + final boolean ret = super.add(e, timeout, unit); + + if (e.getClass().getComponentType() != null) { + + stats.unitsOut.add(((Object[]) e).length); + + } else { + + stats.unitsOut.increment(); + + } + + stats.chunksOut.increment(); + + return ret; + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkHandler.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkHandler.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkHandler.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -0,0 +1,76 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 22, 2010 + */ + +package com.bigdata.bop.engine; + +import com.bigdata.bop.IBindingSet; + +/** + * Interface dispatches an {@link IBindingSet}[] chunk generated by a running + * operator task. Each task may produce zero or more such chunks. The chunks may + * be combined together by the caller in order to have "chunkier" processing by + * this interface. The interface is responsible for generating the appropriate + * {@link IChunkMessage}(s) for each {@link IBindingSet}[] chunk. In standalone + * there is a one-to-one relationship between input chunks and output messages. + * In scale-out, we map each {@link IBindingSet} over the shard(s) for the next + * operator, which is a many-to-one mapping. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface IChunkHandler { + + /** + * Take an {@link IBindingSet}[] chunk generated by some pass over an + * operator and make it available to the target operator. How this is done + * depends on whether the query is running against a standalone database or + * the scale-out database. + * <p> + * Note: The return value is used as part of the termination criteria for + * the query which depends on (a) the #of running operator tasks and (b) the + * #of {@link IChunkMessage}s generated (available) and consumed. The return + * value of this method increases the #of {@link IChunkMessage} available to + * the query. + * + * @param query + * The query. + * @param bopId + * The operator which wrote on the sink. + * @param sinkId + * The identifier of the target operator. + * @param chunk + * The intermediate results to be passed to that target operator. + * + * @return The #of {@link IChunkMessage} sent. This will always be ONE (1) + * for scale-up. For scale-out, there will be at least one + * {@link IChunkMessage} per index partition over which the + * intermediate results were mapped. + */ + int handleChunk(RunningQuery query, int bopId, int sinkId, + IBindingSet[] chunk); + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkHandler.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java 2010-10-22 17:53:21 UTC (rev 3837) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -54,13 +54,15 @@ * {@link RunState} termination conditions linked to having multiple * {@link IChunkMessage}s. * <p> - * Note: Just controlling the - * {@link PipelineOp.Annotations#CHUNK_CAPACITY} and - * {@link PipelineOp.Annotations#CHUNK_OF_CHUNKS_CAPACITY} is not - * enough to force the {@link QueryEngine} to run the an operator once per - * source chunk. The {@link QueryEngine} normally combines chunks together. - * You MUST also specify this annotation in order for the query engine to - * send multiple {@link IChunkMessage} rather than just one. + * Note: Just controlling the {@link PipelineOp.Annotations#CHUNK_CAPACITY} + * and {@link PipelineOp.Annotations#CHUNK_OF_CHUNKS_CAPACITY} is not enough + * to force the {@link QueryEngine} to run the an operator once per source + * chunk. The {@link QueryEngine} normally combines chunks together. You + * MUST also specify this annotation in order for the query engine to send + * multiple {@link IChunkMessage} rather than just one. + * + * @deprecated Support for this is no longer present. It was lost when the + * {@link StandaloneChunkHandler} was written. */ String ONE_MESSAGE_PER_CHUNK = QueryEngineTestAnnotations.class.getName() + ".oneMessagePerChunk"; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-10-22 17:53:21 UTC (rev 3837) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -185,7 +185,7 @@ * readily exposed as {@link Map} object. If we were to expose the map, it * would have to be via a get(key) style interface. */ - /* private */final Map<Integer/* bopId */, AtomicLong/* availableChunkCount */> availableMap = new ConcurrentHashMap<Integer, AtomicLong>(); + /* private */final Map<Integer/* bopId */, AtomicLong/* availableChunkCount */> availableMap = new LinkedHashMap<Integer, AtomicLong>(); /** * A collection reporting on the #of instances of a given {@link BOp} which @@ -412,71 +412,71 @@ } - /** - * Update the {@link RunState} to indicate that the data in the - * {@link IChunkMessage} was attached to an already running task for the - * target operator. - * - * @param msg - * @param runningOnServiceId - * @return <code>true</code> if this is the first time we will evaluate the - * op. - * - * @throws IllegalArgumentException - * if the argument is <code>null</code>. - * @throws TimeoutException - * if the deadline for the query has passed. - */ - synchronized - public void addSource(final IChunkMessage<?> msg, - final UUID runningOnServiceId) throws TimeoutException { - - if (msg == null) - throw new IllegalArgumentException(); - - if (allDone.get()) - throw new IllegalStateException(ERR_QUERY_HALTED); - - if (deadline < System.currentTimeMillis()) - throw new TimeoutException(ERR_DEADLINE); - - nsteps.incrementAndGet(); - - final int bopId = msg.getBOpId(); - final int nmessages = 1; - - if (runningMap.get(bopId) == null) { - /* - * Note: There is a race condition in RunningQuery such that it is - * possible to add a 2nd source to an operator task before the task - * has begun to execute. Since the task calls startOp() once it - * begins to execute, this means that addSource() can be ordered - * before startOp() for the same task. This code block explicitly - * allows this condition and sets a 0L in the runningMap for the - * [bopId]. - */ - AtomicLong n = runningMap.get(bopId); - if (n == null) - runningMap.put(bopId, n = new AtomicLong()); -// throw new AssertionError(ERR_OP_NOT_STARTED + " msg=" + msg -// + ", this=" + this); - } - - messagesConsumed(bopId, nmessages); - - if (TableLog.tableLog.isInfoEnabled()) { - TableLog.tableLog.info(getTableRow("addSrc", runningOnServiceId, - bopId, msg.getPartitionId(), nmessages/* fanIn */, - null/* cause */, null/* stats */)); - } - - if (log.isInfoEnabled()) - log.info("startOp: " + toString() + " : bop=" + bopId); - - if (log.isTraceEnabled()) - log.trace(msg.toString()); - - } +// /** +// * Update the {@link RunState} to indicate that the data in the +// * {@link IChunkMessage} was attached to an already running task for the +// * target operator. +// * +// * @param msg +// * @param runningOnServiceId +// * @return <code>true</code> if this is the first time we will evaluate the +// * op. +// * +// * @throws IllegalArgumentException +// * if the argument is <code>null</code>. +// * @throws TimeoutException +// * if the deadline for the query has passed. +// */ +// synchronized +// public void addSource(final IChunkMessage<?> msg, +// final UUID runningOnServiceId) throws TimeoutException { +// +// if (msg == null) +// throw new IllegalArgumentException(); +// +// if (allDone.get()) +// throw new IllegalStateException(ERR_QUERY_HALTED); +// +// if (deadline < System.currentTimeMillis()) +// throw new TimeoutException(ERR_DEADLINE); +// +// nsteps.incrementAndGet(); +// +// final int bopId = msg.getBOpId(); +// final int nmessages = 1; +// +// if (runningMap.get(bopId) == null) { +// /* +// * Note: There is a race condition in RunningQuery such that it is +// * possible to add a 2nd source to an operator task before the task +// * has begun to execute. Since the task calls startOp() once it +// * begins to execute, this means that addSource() can be ordered +// * before startOp() for the same task. This code block explicitly +// * allows this condition and sets a 0L in the runningMap for the +// * [bopId]. +// */ +// AtomicLong n = runningMap.get(bopId); +// if (n == null) +// runningMap.put(bopId, n = new AtomicLong()); +//// throw new AssertionError(ERR_OP_NOT_STARTED + " msg=" + msg +//// + ", this=" + this); +// } +// +// messagesConsumed(bopId, nmessages); +// +// if (TableLog.tableLog.isInfoEnabled()) { +// TableLog.tableLog.info(getTableRow("addSrc", runningOnServiceId, +// bopId, msg.getPartitionId(), nmessages/* fanIn */, +// null/* cause */, null/* stats */)); +// } +// +// if (log.isInfoEnabled()) +// log.info("startOp: " + toString() + " : bop=" + bopId); +// +// if (log.isTraceEnabled()) +// log.trace(msg.toString()); +// +// } /** * Update the {@link RunState} to reflect the post-condition of the Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-22 17:53:21 UTC (rev 3837) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -28,6 +28,9 @@ package com.bigdata.bop.engine; import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.UUID; import java.util.concurrent.BlockingQueue; @@ -36,7 +39,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; -import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -55,13 +58,12 @@ import com.bigdata.bop.NoSuchBOpException; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.solutions.SliceOp; -import com.bigdata.io.DirectBufferPoolAllocator.IAllocationContext; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.ITx; -import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; -import com.bigdata.relation.accesspath.MultiplexBlockingBuffer; +import com.bigdata.relation.accesspath.IMultiSourceAsynchronousIterator; +import com.bigdata.relation.accesspath.MultiSourceSequentialAsynchronousIterator; import com.bigdata.service.IBigdataFederation; import com.bigdata.striterator.ICloseableIterator; import com.bigdata.util.concurrent.Haltable; @@ -85,6 +87,12 @@ * controller is attempted on some other {@link IQueryPeer}. */ static protected final String ERR_NOT_CONTROLLER = "Operator only permitted on the query controller"; + + /** + * Error message used when a request is made after the query has stopped + * executing. + */ + static protected final String ERR_QUERY_DONE = "Query is no longer running"; /** * The class executing the query on this node. @@ -141,67 +149,66 @@ * A collection of (bopId,partitionId) keys mapped onto a collection of * operator task evaluation contexts for currently executing operators for * this query. - * - * @todo Futures are not being cleared from this collection as operators - * complete. This should be done systematically in order to ensure - * that any allocations associated with an operator task execution are - * released in a timely manner for long-running operators. (In fact, - * the {@link IAllocationContext} should take care of most of the - * issues here but we could still wind up with a lot of entries in - * this map in scale-out where there can be up to one per bop per - * shard in a given query.) */ private final ConcurrentHashMap<BSBundle, ChunkFutureTask> operatorFutures; /** + * A map of unbounded work queues for each (bopId,partitionId). Empty queues + * are removed from the map. + * <p> + * The map is guarded by the {@link #lock}. + */ + private final Map<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>> operatorQueues; + + /** * The runtime statistics for each {@link BOp} in the query and * <code>null</code> unless this is the query controller. */ final private ConcurrentHashMap<Integer/* bopId */, BOpStats> statsMap; - /** - * When running in stand alone, we can chain together the operators and have - * much higher throughput. Each operator has an {@link BlockingBuffer} which - * is essentially its input queue. The operator will drain its input queue - * using {@link BlockingBuffer#iterator()}. - * <p> - * Each operator closes its {@link IBlockingBuffer} sink(s) once its own - * source has been closed and it has finished processing that source. Since - * multiple producers can target the same operator, we need a means to - * ensure that the source for the target operator is not closed until each - * producer which targets that operator has closed its corresponding sink. - * <p> - * In order to support this many-to-one producer/consumer pattern, we wrap - * the input queue (a {@link BlockingBuffer}) for each operator having - * multiple sources with a {@link MultiplexBlockingBuffer}. This class gives - * each producer their own view on the underlying {@link BlockingBuffer}. - * The underlying {@link BlockingBuffer} will not be closed until all - * source(s) have closed their view of that buffer. This collection keeps - * track of the {@link MultiplexBlockingBuffer} wrapping the - * {@link BlockingBuffer} which is the input queue for each operator. - * <p> - * The input queues themselves are {@link BlockingBuffer} objects. Those - * objects are available from this map using - * {@link MultiplexBlockingBuffer#getBackingBuffer()}. These buffers are - * pre-allocated by {@link #populateInputBufferMap(BOp)}. - * {@link #startTasks(BOp)} is responsible for starting the operator tasks - * in a "back-to-front" order. {@link #startQuery(IChunkMessage)} kicks off - * the query and invokes {@link #startTasks(BOp)} to chain the input queues - * and output queues together (when so chained, the output queues are skins - * over the input queues obtained from {@link MultiplexBlockingBuffer}). - * - * FIXME The inputBufferMap will let us construct consumer producer chains - * where the consumer _waits_ for all producer(s) which target the consumer - * to close the sink associated with that consumer. Unlike when attaching an - * {@link IChunkMessage} to an already running operator, the consumer will - * NOT terminate (due to lack up input) until each running producer - * terminating that consumer terminates. This will improve concurrency, - * result in fewer task instances, and have better throughput than attaching - * a chunk to an already running task. However, in scale-out we will have - * tasks running on different nodes so we can not always chain together the - * producer and consumer in this tightly integrated manner. - */ - final private ConcurrentHashMap<Integer/*operator*/, MultiplexBlockingBuffer<IBindingSet[]>/*inputQueue*/> inputBufferMap; +// /** +// * When running in stand alone, we can chain together the operators and have +// * much higher throughput. Each operator has an {@link BlockingBuffer} which +// * is essentially its input queue. The operator will drain its input queue +// * using {@link BlockingBuffer#iterator()}. +// * <p> +// * Each operator closes its {@link IBlockingBuffer} sink(s) once its own +// * source has been closed and it has finished processing that source. Since +// * multiple producers can target the same operator, we need a means to +// * ensure that the source for the target operator is not closed until each +// * producer which targets that operator has closed its corresponding sink. +// * <p> +// * In order to support this many-to-one producer/consumer pattern, we wrap +// * the input queue (a {@link BlockingBuffer}) for each operator having +// * multiple sources with a {@link MultiplexBlockingBuffer}. This class gives +// * each producer their own view on the underlying {@link BlockingBuffer}. +// * The underlying {@link BlockingBuffer} will not be closed until all +// * source(s) have closed their view of that buffer. This collection keeps +// * track of the {@link MultiplexBlockingBuffer} wrapping the +// * {@link BlockingBuffer} which is the input queue for each operator. +// * <p> +// * The input queues themselves are {@link BlockingBuffer} objects. Those +// * objects are available from this map using +// * {@link MultiplexBlockingBuffer#getBackingBuffer()}. These buffers are +// * pre-allocated by {@link #populateInputBufferMap(BOp)}. +// * {@link #startTasks(BOp)} is responsible for starting the operator tasks +// * in a "back-to-front" order. {@link #startQuery(IChunkMessage)} kicks off +// * the query and invokes {@link #startTasks(BOp)} to chain the input queues +// * and output queues together (when so chained, the output queues are skins +// * over the input queues obtained from {@link MultiplexBlockingBuffer}). +// * +// * FIXME The inputBufferMap will let us construct consumer producer chains +// * where the consumer _waits_ for all producer(s) which target the consumer +// * to close the sink associated with that consumer. Unlike when attaching an +// * {@link IChunkMessage} to an already running operator, the consumer will +// * NOT terminate (due to lack up input) until each running producer +// * terminating that consumer terminates. This will improve concurrency, +// * result in fewer task instances, and have better throughput than attaching +// * a chunk to an already running task. However, in scale-out we will have +// * tasks running on different nodes so we can not always chain together the +// * producer and consumer in this tightly integrated manner. +// */ +// final private ConcurrentHashMap<Integer/*operator*/, MultiplexBlockingBuffer<IBindingSet[]>/*inputQueue*/> inputBufferMap; /** * The buffer used for the overall output of the query pipeline. @@ -244,14 +251,14 @@ */ final AtomicBoolean didQueryTearDown = new AtomicBoolean(false); - /** - * The chunks available for immediate processing (they must have been - * materialized). - * <p> - * Note: This is package private so it will be visible to the - * {@link QueryEngine}. - */ - final/* private */BlockingQueue<IChunkMessage<IBindingSet>> chunksIn = new LinkedBlockingDeque<IChunkMessage<IBindingSet>>(); +// /** +// * The chunks available for immediate processing (they must have been +// * materialized). +// * <p> +// * Note: This is package private so it will be visible to the +// * {@link QueryEngine}. +// */ +// final/* private */BlockingQueue<IChunkMessage<IBindingSet>> chunksIn = new LinkedBlockingDeque<IChunkMessage<IBindingSet>>(); /** * Set the query deadline. The query will be cancelled when the deadline is @@ -368,6 +375,21 @@ } /** + * Lookup and return the {@link BOp} with that identifier using an index. + * + * @param bopId + * The identifier. + * + * @return The {@link BOp} -or- <code>null</code> if no {@link BOp} was + * found in the query with for that identifier. + */ + public BOp getBOp(final int bopId) { + + return bopIndex.get(bopId); + + } + + /** * @param queryEngine * The {@link QueryEngine} on which the query is running. In * scale-out, a query is typically instantiated on many @@ -430,6 +452,8 @@ this.operatorFutures = new ConcurrentHashMap<BSBundle, ChunkFutureTask>(); + this.operatorQueues = new LinkedHashMap<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>>(); + /* * Setup the BOpStats object for each pipeline operator in the query. */ @@ -445,7 +469,8 @@ final BOpStats queryStats = statsMap.get(query.getId()); - queryBuffer = query.newBuffer(queryStats); + queryBuffer = new BlockingBufferWithStats<IBindingSet[]>(query, + queryStats); queryIterator = new QueryResultIterator<IBindingSet[]>(this, queryBuffer.iterator()); @@ -467,31 +492,31 @@ } - if(!queryEngine.isScaleOut()) { - /* - * Since the query engine is using the stand alone database mode we - * will now setup the input queues for each operator. Those queues - * will be used by each operator which targets a given operator. - * Each operator will start once and will run until all of its - * source(s) are closed. - * - * This allocates the buffers in a top-down manner (this is the - * reverse of the pipeline evaluation order). Allocation halts at if - * we reach an operator without children (e.g., StartOp) or an - * operator which is a CONTROLLER (Union). (If allocation does not - * halt at those boundaries then we can allocate buffers which will - * not be used. On the one hand, the StartOp receives a message - * containing the chunk to be evaluated. On the other hand, the - * buffers are not shared between the parent and a subquery so - * allocation within the subquery is wasted. This is also true for - * the [statsMap].) - */ - inputBufferMap = null; -// inputBufferMap = new ConcurrentHashMap<Integer, MultiplexBlockingBuffer<IBindingSet[]>>(); -// populateInputBufferMap(query); - } else { - inputBufferMap = null; - } +// if(!queryEngine.isScaleOut()) { +// /* +// * Since the query engine is using the stand alone database mode we +// * will now setup the input queues for each operator. Those queues +// * will be used by each operator which targets a given operator. +// * Each operator will start once and will run until all of its +// * source(s) are closed. +// * +// * This allocates the buffers in a top-down manner (this is the +// * reverse of the pipeline evaluation order). Allocation halts at if +// * we reach an operator without children (e.g., StartOp) or an +// * operator which is a CONTROLLER (Union). (If allocation does not +// * halt at those boundaries then we can allocate buffers which will +// * not be used. On the one hand, the StartOp receives a message +// * containing the chunk to be evaluated. On the other hand, the +// * buffers are not shared between the parent and a subquery so +// * allocation within the subquery is wasted. This is also true for +// * the [statsMap].) +// */ +// inputBufferMap = null; +//// inputBufferMap = new ConcurrentHashMap<Integer, MultiplexBlockingBuffer<IBindingSet[]>>(); +//// populateInputBufferMap(query); +// } else { +// inputBufferMap = null; +// } } @@ -562,140 +587,97 @@ // // } - /** - * Take a chunk generated by some pass over an operator and make it - * available to the target operator. How this is done depends on whether the - * query is running against a standalone database or the scale-out database. - * <p> - * Note: The return value is used as part of the termination criteria for - * the query. - * <p> - * The default implementation supports a standalone database. The generated - * chunk is left on the Java heap and handed off synchronously using - * {@link QueryEngine#acceptChunk(IChunkMessage)}. That method will queue - * the chunk for asynchronous processing. - * - * @param bop - * The operator which wrote on the sink. - * @param sinkId - * The identifier of the target operator. - * @param sink - * The intermediate results to be passed to that target operator. - * - * @return The #of {@link IChunkMessage} sent. This will always be ONE (1) - * for scale-up. For scale-out, there will be at least one - * {@link IChunkMessage} per index partition over which the - * intermediate results were mapped. - */ - protected <E> int handleOutputChunk(final BOp bop, final int sinkId, - final IBlockingBuffer<IBindingSet[]> sink) { +// /** +// * Take a chunk generated by some pass over an operator and make it +// * available to the target operator. How this is done depends on whether the +// * query is running against a standalone database or the scale-out database. +// * <p> +// * Note: The return value is used as part of the termination criteria for +// * the query. +// * <p> +// * The default implementation supports a standalone database. The generated +// * chunk is left on the Java heap and handed off synchronously using +// * {@link QueryEngine#acceptChunk(IChunkMessage)}. That method will queue +// * the chunk for asynchronous processing. +// * +// * @param bop +// * The operator which wrote on the sink. +// * @param sinkId +// * The identifier of the target operator. +// * @param sink +// * The intermediate results to be passed to that target operator. +// * +// * @return The #of {@link IChunkMessage} sent. This will always be ONE (1) +// * for scale-up. For scale-out, there will be at least one +// * {@link IChunkMessage} per index partition over which the +// * intermediate results were mapped. +// */ +// protected <E> int handleOutputChunk(final BOp bop, final int sinkId, +// final IBlockingBuffer<IBindingSet[]> sink) { +// +// if (bop == null) +// throw new IllegalArgumentException(); +// +// if (sink == null) +// throw new IllegalArgumentException(); +// +// if (inputBufferMap != null && inputBufferMap.get(sinkId) != null) { +// /* +// * FIXME The sink is just a wrapper for the input buffer so we do +// * not need to do anything to propagate the data from one operator +// * to the next. +// */ +// return 0; +// } +// +// /* +// * Note: The partitionId will always be -1 in scale-up. +// */ +// final int partitionId = -1; +// +// final boolean oneMessagePerChunk = bop.getProperty( +// QueryEngineTestAnnotations.ONE_MESSAGE_PER_CHUNK, +// QueryEngineTestAnnotations.DEFAULT_ONE_MESSAGE_PER_CHUNK); +// +// if (oneMessagePerChunk) { +// +// final IAsynchronousIterator<IBindingSet[]> itr = sink.iterator(); +// +// int nchunks = 0; +// +// while (itr.hasNext()) { +// +// final IBlockingBuffer<IBindingSet[]> tmp = new BlockingBuffer<IBindingSet[]>( +// 1); +// +// tmp.add(itr.next()); +// +// tmp.close(); +// +// final LocalChunkMessage<IBindingSet> chunk = new LocalChunkMessage<IBindingSet>( +// clientProxy, queryId, sinkId, partitionId, tmp +// .iterator()); +// +// queryEngine.acceptChunk(chunk); +// +// nchunks++; +// +// } +// +// return nchunks; +// +// } +// +// final LocalChunkMessage<IBindingSet> chunk = new LocalChunkMessage<IBindingSet>( +// clientProxy, queryId, sinkId, partitionId, sink.iterator()); +// +// queryEngine.acceptChunk(chunk); +// +// return 1; +// +// } - if (bop == null) - throw new IllegalArgumentException(); - - if (sink == null) - throw new IllegalArgumentException(); - - if (inputBufferMap != null && inputBufferMap.get(sinkId) != null) { - /* - * FIXME The sink is just a wrapper for the input buffer so we do - * not need to do anything to propagate the data from one operator - * to the next. - */ - return 0; - } - - /* - * Note: The partitionId will always be -1 in scale-up. - */ - final int partitionId = -1; - - final boolean oneMessagePerChunk = bop.getProperty( - QueryEngineTestAnnotations.ONE_MESSAGE_PER_CHUNK, - QueryEngineTestAnnotations.DEFAULT_ONE_MESSAGE_PER_CHUNK); - - if (oneMessagePerChunk) { - - final IAsynchronousIterator<IBindingSet[]> itr = sink.iterator(); - - int nchunks = 0; - - while (itr.hasNext()) { - - final IBlockingBuffer<IBindingSet[]> tmp = new BlockingBuffer<IBindingSet[]>( - 1); - - tmp.add(itr.next()); - - tmp.close(); - - final LocalChunkMessage<IBindingSet> chunk = new LocalChunkMessage<IBindingSet>( - clientProxy, queryId, sinkId, partitionId, tmp - .iterator()); - - queryEngine.acceptChunk(chunk); - - nchunks++; - - } - - return nchunks; - - } - - final LocalChunkMessage<IBindingSet> chunk = new LocalChunkMessage<IBindingSet>( - clientProxy, queryId, sinkId, partitionId, sink.iterator()); - - queryEngine.acceptChunk(chunk); - - return 1; - - } - /** - * Make a chunk of binding sets available for consumption by the query. - * <p> - * Note: this is invoked by {@link QueryEngine#acceptChunk(IChunkMessage)} - * - * @param msg - * The chunk. - * - * @todo Does this method really need the {@link #lock}? I doubt it since - * {@link #chunksIn} is thread-safe. - */ - protected void acceptChunk(final IChunkMessage<IBindingSet> msg) { - - if (msg == null) - throw new IllegalArgumentException(); - - if (!msg.isMaterialized()) - throw new IllegalStateException(); - - lock.lock(); - - try { - - // verify still running. - if (future.isDone()) { - throw new RuntimeException("Query is done", future.getCause()); - } - - // add chunk to be consumed. - chunksIn.add(msg); - - if (log.isDebugEnabled()) - log - .debug("chunksIn.size()=" + chunksIn.size() + ", msg=" - + msg); - } finally { - - lock.unlock(); - - } - - } - - /** * Invoked once by the query controller with the initial * {@link IChunkMessage} which gets the query moving. */ @@ -949,155 +931,254 @@ } +// /** +// * Consume zero or more chunks in the input queue for this query. The +// * chunk(s) will either be assigned to an already running task for the +// * target operator or they will be assigned to new tasks. +// * +// * FIXME Drain the input queue, assigning any chunk waiting to a task. If +// * the task is already running, then add the chunk to that task. Otherwise +// * start a new task. +// */ +// protected void consumeChunk() { +// final IChunkMessage<IBindingSet> msg = chunksIn.poll(); +// if (msg == null) +// return; +// try { +// if (!msg.isMaterialized()) +// throw new IllegalStateException(); +// if (log.isTraceEnabled()) +// log.trace("Accepted chunk: " + msg); +// final BSBundle bundle = new BSBundle(msg.getBOpId(), msg +// .getPartitionId()); +//// /* +//// * Look for instance of this task which is already running. +//// */ +//// final ChunkFutureTask chunkFutureTask = operatorFutures.get(bundle); +//// if (!queryEngine.isScaleOut() && chunkFutureTask != null) { +//// /* +//// * Attempt to atomically attach the message as another src. +//// */ +//// if (chunkFutureTask.chunkTask.context.addSource(msg +//// .getChunkAccessor().iterator())) { +//// /* +//// * @todo I've commented this out for now. I am not convinced +//// * that we need to update the RunState when accepting +//// * another message into a running task. This would only +//// * matter if haltOp() reported the #of consumed messages, +//// * but RunState.haltOp() just decrements the #of available +//// * messages by one which balances startOp(). Just because we +//// * attach more messages dynamically does not mean that we +//// * need to report that back to the query controller as long +//// * as haltOp() balances startOp(). +//// */ +////// lock.lock(); +////// try { +////// /* +////// * message was added to a running task. +////// * +////// * FIXME This needs to be an RMI in scale-out back to +////// * the query controller so it can update the #of +////// * messages which are being consumed by this task. +////// * However, doing RMI here will add latency into the +////// * thread submitting tasks for evaluation and the +////// * coordination overhead of addSource() in scale-out may +////// * be too high. However, if we do not combine sources in +////// * scale-out then we may have too much overhead in terms +////// * of the #of running tasks with few tuples per task. +////// * Another approach is the remote async iterator with +////// * multiple sources (parallel multi source iterator). +////// * +////// * FIXME This code path is NOT being taken in scale-out +////// * right now since it would not get the message to the +////// * query controller. We will need to add addSource() to +////// * IQueryClient parallel to startOp() and haltOp() for +////// * this to work. +////// */ +////// runState.addSource(msg, queryEngine.getServiceUUID()); +////// return; +////// } finally { +////// lock.unlock(); +////// } +//// } +//// } +// // wrap runnable. +// final ChunkFutureTask ft = new ChunkFutureTask(new ChunkTask(msg)); +// /* +// * FIXME Rather than queue up a bunch of operator tasks for the same +// * (bopId,partitionId), this blocks until the current operator task +// * is done and then submits the new one. This prevents us from +// * allocating 100s of threads for complex queries and prevents us +// * from losing track of the Futures of those tasks. However, since +// * this is happening in the caller's thread the QueryEngine is not +// * making any progress while we are blocked. A pattern which hooks +// * the Future and then submits the next task (such as the +// * LatchedExecutor) would fix this. This might have to be one +// * LatchedExecutor per pipeline operator. +// */ +// FutureTask<Void> existing = operatorFutures.putIfAbsent(bundle, ft); +// if (existing != null) { +// existing.get(); +// if (!operatorFutures.remove(bundle, existing)) +// throw new AssertionError(); +// if (operatorFutures.put(bundle, ft) != null) +// throw new AssertionError(); +// } +//// // add to list of active futures for this query. +//// if (operatorFutures.put(bundle, ft) != null) { +//// /* +//// * Note: This can cause the FutureTask to be accessible (above) +//// * before startOp() has been called for that ChunkTask (the +//// * latter occurs when the chunk task actually runs.) This a race +//// * condition has been resolved in RunState by allowing +//// * addSource() even when there is no registered task running for +//// * that [bopId]. +//// * +//// * FIXME This indicates that we have more than one future for +//// * the same (bopId,shardId). When this is true we are losing +//// * track of Futures with the consequence that we can not +//// * properly cancel them. Instead of losing track like this, we +//// * should be targeting the running operator instance with the +//// * new chunk. This needs to be done atomically, e.g., using the +//// * [lock]. +//// * +//// * Even if we only have one task per operator in standalone and +//// * we attach chunks to an already running task in scale-out, +//// * there is still the possibility in scale-out that a task may +//// * have closed its source but still be running, in which case we +//// * would lose the Future for the already running task when we +//// * start a new task for the new chunk for the target operator. +//// */ +//// // throw new AssertionError(); +//// } +// // submit task for execution (asynchronous). +// queryEngine.execute(ft); +// } catch (Throwable ex) { +// // halt query. +// throw new RuntimeException(halt(ex)); +// } +// } + /** - * Consume zero or more chunks in the input queue for this query. The - * chunk(s) will either be assigned to an already running task for the - * target operator or they will be assigned to new tasks. + * Make a chunk of binding sets available for consumption by the query. + * <p> + * Note: this is invoked by {@link QueryEngine#acceptChunk(IChunkMessage)} * - * FIXME Drain the input queue, assigning any chunk waiting to a task. If - * the task is already running, then add the chunk to that task. Otherwise - * start a new task. + * @param msg + * The chunk. */ - protected void consumeChunk() { - final IChunkMessage<IBindingSet> msg = chunksIn.poll(); + protected void acceptChunk(final IChunkMessage<IBindingSet> msg) { + if (msg == null) - return; + throw new IllegalArgumentException(); + + if (!msg.isMaterialized()) + throw new IllegalStateException(); + + final BSBundle bundle = new BSBundle(msg.getBOpId(), msg + .getPartitionId()); + + lock.lock(); + try { - if (!msg.isMaterialized()) - throw new IllegalStateException(); - if (log.isTraceEnabled()) - log.trace("Accepted chunk: " + msg); - final BSBundle bundle = new BSBundle(msg.getBOpId(), msg - .getPartitionId()); + + // verify still running. + if (future.isDone()) + throw new RuntimeException(ERR_QUERY_DONE, future.getCause()); + + BlockingQueue<IChunkMessage<IBindingSet>> queue = operatorQueues + .get(bundle); + + if (queue == null) { + + queue = new LinkedBlockingQueue<IChunkMessage<IBindingSet>>(/* unbounded */); + + operatorQueues.put(bundle, queue); + + } + + queue.add(msg); + + } finally { + + lock.unlock(); + + } + + } + + /** + * Examines the input queue for each (bopId,partitionId). If there is work + * available and no task is currently running, then drain the work queue and + * submit a task to consume that work. + */ + protected void consumeChunk() { + lock.lock(); + try { + for(BSBundle bundle : operatorQueues.keySet()) { + scheduleNext(bundle); + } + } finally { + lock.unlock(); + } + } + + /** + * Examine the input queue for the (bopId,partitionId). If there is work + * available and no task is currently running, then drain the work queue and + * submit a task to consume that work. + * + * @param bundle + * The (bopId,partitionId). + * + * @return <code>true</code> if a new task was started. + */ + private boolean scheduleNext(final BSBundle bundle) { + if (bundle == null) + throw new IllegalArgumentException(); + lock.lock(); + try { + // Make sure the query is still running. + future.halted(); + // Is there a Future for this (bopId,partitionId)? + final ChunkFutureTask cft = operatorFutures.get(bundle); + if (cft != null && !cft.isDone()) { + // already running. + return false; + } + // Remove the work queue for that (bopId,partitionId). + final BlockingQueue<IChunkMessage<IBindingSet>> queue = operatorQueues + .remove(bundle); + if (queue == null || queue.isEmpty()) { + // no work + return false; + } + // Drain the work queue. + final List<IChunkMessage<IBindingSet>> messages = new LinkedList<IChunkMessage<IBindingSet>>(); + queue.drainTo(messages); + final int nmessages = messages.size(); /* - * Look for instance of this task which is already running. + * Combine the messages into a single source to be consumed by a + * task. */ - final ChunkFutureTask chunkFutureTask = operatorFutures.get(bundle); - if (!queryEngine.isScaleOut() && chunkFutureTask != null) { - /* - * Attempt to atomically attach the message as another src. - */ - if (chunkFutureTask.chunkTask.context.addSource(msg - ... [truncated message content] |
From: <tho...@us...> - 2010-10-22 19:58:51
|
Revision: 3840 http://bigdata.svn.sourceforge.net/bigdata/?rev=3840&view=rev Author: thompsonbry Date: 2010-10-22 19:58:41 +0000 (Fri, 22 Oct 2010) Log Message: ----------- Added support for sampling from a local access path. I still need to add support for sampling in scale-out. This is in service of adaptive query optimization. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestAll.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalBTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalShard.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalShard.java Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,125 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 16, 2010 - */ - -package com.bigdata.bop.ap; - -import com.bigdata.bop.AbstractAccessPathOp; -import com.bigdata.bop.BOp; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IPredicate; -import com.bigdata.bop.NV; -import com.bigdata.btree.IIndex; -import com.bigdata.relation.accesspath.IAccessPath; - -/** - * Abstract base class for sampling operator for an {@link IIndex}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * @param <E> - * The generic type of the elements materialized from that index. - * - * @todo Implement sample operator. E.g., sampleRange(fromKey,toKey,limit). This - * could be on {@link IIndex} or on {@link IAccessPath}. For a shard view, - * it must proportionally select from among the ordered components of the - * view. For a hash table it would be sample(limit) since range based - * operations are not efficient. - * <p> - * This should accept an index, not a predicate (for RDF we determine the - * index an analysis of the bound and unbound arguments on the predicate - * and always have a good index, but this is not true in the general - * case). When the index is remote, it should be executed at the remote - * index. - * - * @todo This needs to operation on element chunks, not {@link IBindingSet} - * chunks. It also may not require pipelining. - */ -abstract public class AbstractSampleIndex<E> extends AbstractAccessPathOp<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * Known annotations. - */ - public interface Annotations extends BOp.Annotations { - /** - * The sample limit. - */ - String LIMIT = "limit"; - } - - protected AbstractSampleIndex(final IPredicate<E> pred, final int limit) { - - super(new BOp[] { pred }, NV.asMap(new NV[] {// - new NV(Annotations.LIMIT, Integer.valueOf(limit)) // - })); - - if (pred == null) - throw new IllegalArgumentException(); - - if (limit <= 0) - throw new IllegalArgumentException(); - - switch (getEvaluationContext()) { - case HASHED: - case SHARDED: - break; - default: - throw new UnsupportedOperationException( - Annotations.EVALUATION_CONTEXT + "=" - + getEvaluationContext()); - } - - } - - @SuppressWarnings("unchecked") - public IPredicate<E> pred() { - - return (IPredicate<E>) get(0); - - } - - public int limit() { - - return (Integer) getRequiredProperty(Annotations.LIMIT); - - } - -// /** -// * This is a shard wise operator. -// */ -// @Override -// public BOpEvaluationContext getEvaluationContext() { -// -// return BOpEvaluationContext.SHARDED; -// -// } - -} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java (from rev 3756, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -0,0 +1,451 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 16, 2010 + */ + +package com.bigdata.bop.ap; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.Callable; + +import com.bigdata.bop.AbstractAccessPathOp; +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContextBase; +import com.bigdata.bop.IPredicate; +import com.bigdata.btree.AbstractBTree; +import com.bigdata.btree.ILeafCursor; +import com.bigdata.btree.ILinearList; +import com.bigdata.btree.IRangeQuery; +import com.bigdata.btree.ITuple; +import com.bigdata.btree.ITupleCursor; +import com.bigdata.btree.filter.Advancer; +import com.bigdata.btree.view.FusedView; +import com.bigdata.relation.IRelation; +import com.bigdata.relation.accesspath.AccessPath; +import com.bigdata.relation.accesspath.IAccessPath; +import com.bigdata.relation.rule.IAccessPathExpander; +import com.bigdata.striterator.IKeyOrder; + +import cutthecrap.utils.striterators.IFilter; + +/** + * Sampling operator for the {@link IAccessPath} implied by an + * {@link IPredicate}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: AbstractSampleIndex.java 3672 2010-09-28 23:39:42Z thompsonbry + * $ + * @param <E> + * The generic type of the elements materialized from that index. + * + * @todo This is a basic operator which is designed to support adaptive query + * optimization. However, there are a lot of possible semantics for + * sampling, including: uniform distribution, randomly distribution, tuple + * at a time versus clustered (sampling with leaves), adaptive sampling + * until the sample reflects some statistical property of the underlying + * population, etc. + */ +public class SampleIndex<E> extends AbstractAccessPathOp<E> { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * Known annotations. + */ + public interface Annotations extends BOp.Annotations { + + /** + * The sample limit (default {@value #DEFAULT_LIMIT}). + */ + String LIMIT = "limit"; + + int DEFAULT_LIMIT = 100; + + /** + * The {@link IPredicate} describing the access path to be sampled + * (required). + */ + String PREDICATE = SampleIndex.class.getName() + ".predicate"; + + } + + public SampleIndex(SampleIndex<E> op) { + + super(op); + + } + + public SampleIndex(BOp[] args, Map<String, Object> annotations) { + + super(args, annotations); + + } + + public int limit() { + + return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); + + } + + @SuppressWarnings("unchecked") + public IPredicate<E> getPredicate() { + + return (IPredicate<E>) getRequiredProperty(Annotations.PREDICATE); + + } + + /** + * Return a sample from the access path associated with the + * {@link Annotations#PREDICATE}. + */ + public E[] eval(final BOpContextBase context) { + + try { + return new SampleTask(context).call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + } + + /** + * Sample an {@link IAccessPath}. + * + * FIXME This needs to handle each of the following conditions: + * <p> + * Timestamp {read-historical, read-committed, read-write tx, unisolated}<br> + * Index view {standalone, partitioned,global view of partitioned}<br> + * + * @todo The general approach uses the {@link ILinearList} interface to take + * evenly distributed or randomly distributed samples from the + * underlying index. This is done using an {@link IFilter} which is + * evaluated local to the index. This works whether or not the access + * path is using a partitioned view of the index. + * <p> + * When sampling an index shard the {@link ILinearList} API is not + * defined for the {@link FusedView}. Since this sampling operator + * exists for the purposes of estimating the cardinality of an access + * path, we can dispense with the fused view and collect a number of + * samples from each component of that view which is proportional to + * the range count of the view divided by the range count of the + * component index. This may cause tuples which have since been + * deleted to become visible, but this should not cause problems when + * estimating the cardinality of a join path as long as we always + * report the actual tuples from the fused view in the case where the + * desired sample size is LTE the estimated range count of the access + * path. + * + * @todo Better performance could be realized by accepting all tuples in a + * leaf. This requires a sensitivity to the leaf boundaries which + * might be obtained with an {@link ITupleCursor} extension interface + * for local indices or with the {@link ILeafCursor} interface if that + * can be exposed from a sufficiently low level {@link ITupleCursor} + * implementation. However, when they are further constraints layered + * onto the access path by the {@link IPredicate} it may be that such + * clustered (leaf at once) sampling is not practical. + * + * @todo When sampling a global view of a partitioned index, we should focus + * the sample on a subset of the index partitions in order to + * "cluster" the effort. This can of course introduce bias. However, + * if there are a lot of index partitions then the sample will of + * necessity be very small in proportion to the data volume and the + * opportunity for bias will be correspondingly large. + * + * @todo If there is an {@link IAccessPathExpander} then + */ + private class SampleTask implements Callable<E[]> { + + private final BOpContextBase context; + + SampleTask(final BOpContextBase context) { + + this.context = context; + + } + + /** Return a sample from the access path. */ + public E[] call() throws Exception { + + return sample(limit(), getPredicate()).getSample(); + + } + + /** + * Return a sample from the access path. + * + * @param limit + * @return + */ + public AccessPathSample<E> sample(final int limit, + IPredicate<E> predicate) { + + final IRelation<E> relation = context.getRelation(predicate); + + // @todo assumes raw AP. + final AccessPath<E> accessPath = (AccessPath<E>) context + .getAccessPath(relation, predicate); + + final long rangeCount = accessPath.rangeCount(false/* exact */); + + if (limit > rangeCount) { + + /* + * The sample will contain everything in the access path. + */ + return new AccessPathSample<E>(limit, accessPath); + + } + + /* + * Add the CURSOR and PARALLEL flags to the predicate. + * + * @todo turn off REVERSE if specified. + */ + final int flags = predicate.getProperty( + IPredicate.Annotations.FLAGS, + IPredicate.Annotations.DEFAULT_FLAGS) + | IRangeQuery.CURSOR + | IRangeQuery.PARALLEL; + + predicate = (IPredicate<E>) predicate.setProperty( + IPredicate.Annotations.FLAGS, flags); + + /* + * Add advancer to collect sample. + */ + predicate = ((Predicate<E>) predicate) + .addIndexLocalFilter(new SampleAdvancer<E>(//rangeCount, + limit, accessPath.getFromKey(), accessPath + .getToKey())); + + return new AccessPathSample<E>(limit, context.getAccessPath( + relation, predicate)); + + } + + } + + /** + * An advancer pattern which is designed to take evenly distributed samples + * from an index. The caller specifies the #of tuples to be skipped after + * each tuple visited. That number should be computed based on the estimated + * range count of the index and the desired sample size. This can fail to + * gather the desired number of sample if additional filters are applied + * which further restrict the elements selected by the predicate. However, + * it will still faithfully represent the expected cardinality of the + * sampled access path. + * + * @author tho...@us... + * + * @param <E> + * The generic type of the elements visited by that access path. + */ + private static class SampleAdvancer<E> extends Advancer<E> { + + private static final long serialVersionUID = 1L; + + /** The desired total limit on the sample. */ + private final int limit; + + private final byte[] /*fromKey,*/ toKey; + + /* + * Transient data. This gets initialized when we visit the first tuple. + */ + + /** The #of tuples to be skipped after every tuple visited. */ + private transient int skipCount; + /** The #of tuples accepted so far. */ + private transient int nread = 0; + /** The inclusive lower bound of the first tuple actually visited. */ + private transient int fromIndex; + /** The exclusive upper bound of the last tuple which could be visited. */ + private transient int toIndex; + + /** + * + * @param limit + * The #of samples to visit. + */ + public SampleAdvancer(final int limit, final byte[] fromKey, + final byte[] toKey) { + + this.limit = limit; + this.toKey = toKey; + } + + /** + * @todo This is taking evenly spaced samples. It is much more efficient + * to take clusters of samples when you can accept the bias. + * Taking a clustered sample really requires knowing where the + * leaf boundaries are in the index, e.g., using + * {@link ILeafCursor}. + */ + @Override + protected void advance(final ITuple<E> tuple) { + + final AbstractBTree ndx = (AbstractBTree) src.getIndex(); + + final int currentIndex = ndx.indexOf(tuple.getKey()); + + if (nread == 0) { + + // inclusive lower bound. + fromIndex = currentIndex; + + // exclusive upper bound. + toIndex = toKey == null ? ndx.getEntryCount() : ndx + .indexOf(toKey); + + final int rangeCount = (toIndex - fromIndex); + + skipCount = Math.max(1, rangeCount / limit); + + // minus one since src.next() already consumed one tuple. + skipCount -= 1; + +// System.err.println("limit=" + limit + ", rangeCount=" +// + rangeCount + ", skipCount=" + skipCount); + + } + + nread++; + + if (skipCount > 0) { + + /* + * If the skip count is positive, then skip over N tuples. + */ + + final int nextIndex = Math.min(ndx.getEntryCount() - 1, + currentIndex + skipCount); + + src.seek(ndx.keyAt(nextIndex)); + + } + + } + + } // class SampleAdvancer + + /** + * A sample from an access path. + * + * @param <E> + * The generic type of the elements visited by that access + * path. + * + * @author tho...@us... + */ + public static class AccessPathSample<E> implements Serializable { + + private static final long serialVersionUID = 1L; + + private final IPredicate<E> pred; + private final IKeyOrder<E> keyOrder; + private final int limit; + private final E[] sample; + + /** + * Constructor populates the sample using the caller's + * {@link IAccessPath#iterator()}. The caller is responsible for setting + * up the {@link IAccessPath} such that it provides an efficient sample + * of the access path with the appropriate constraints. + * + * @param limit + * @param accessPath + */ + private AccessPathSample(final int limit, + final IAccessPath<E> accessPath) { + + if (limit <= 0) + throw new IllegalArgumentException(); + + if (accessPath == null) + throw new IllegalArgumentException(); + + this.pred = accessPath.getPredicate(); + + this.keyOrder = accessPath.getKeyOrder(); + + this.limit = limit; + + // drain the access path iterator. + final ArrayList<E> tmp = new ArrayList<E>(limit); + + int nsamples = 0; + + final Iterator<E> src = accessPath.iterator(0L/* offset */, limit, + limit/* capacity */); + + while (src.hasNext() && nsamples < limit) { + + tmp.add(src.next()); + + nsamples++; + + } + + // convert to an array of the appropriate type. + sample = tmp.toArray((E[]) java.lang.reflect.Array.newInstance( + tmp.get(0).getClass(), tmp.size())); + + } + + public IPredicate<E> getPredicate() { + return pred; + } + + public boolean isEmpty() { + return sample != null; + } + + public int sampleSize() { + return sample == null ? 0 : sample.length; + } + + public int limit() { + return limit; + } + + /** + * The sample. + * + * @return The sample -or- <code>null</code> if the sample was + * empty. + */ + public E[] getSample() { + return sample; + } + + } // AccessPathSample + +} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalBTree.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalBTree.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalBTree.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,95 +0,0 @@ -package com.bigdata.bop.ap; - -import java.util.concurrent.Callable; -import java.util.concurrent.FutureTask; - -import com.bigdata.bop.BOpContext; -import com.bigdata.bop.IPredicate; -import com.bigdata.btree.AbstractBTree; -import com.bigdata.relation.accesspath.IBlockingBuffer; - -/** - * Sampling operator for an {@link AbstractBTree}. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ -public class SampleLocalBTree<E> extends AbstractSampleIndex<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public SampleLocalBTree(final IPredicate<E> pred, final int limit) { - - super(pred, limit); - - } - - public FutureTask<Void> eval(final BOpContext<E> context) { - - if (context.getPartitionId() != -1) { - // Must not be specific to a shard. - throw new UnsupportedOperationException(); - } - - return new FutureTask<Void>(new LocalBTreeSampleTask(context)); - - } - - /** - * Sample an {@link AbstractBTree}. - */ - private class LocalBTreeSampleTask implements - Callable<Void> { - - private final BOpContext<E> context; - - private final IBlockingBuffer<E[]> sink; - - LocalBTreeSampleTask(final BOpContext<E> context) { - - this.context = context; - - this.sink = context.getSink(); - - } - - public Void call() throws Exception { - - /* - * FIXME Decide how we are going to resolve the appropriate index - * for the predicate. This could go through - * IJoinNexus.getTailRelationView() and - * IJoinNexus.getTailAccessPath(). Those are just going through the - * locator. Review how the actual access path is selected versus the - * IKeyOrder specified on the IPredicate. If the IKeyOrder of - * interest is on the IPredicate, then why not just use that? - */ - -// final IPredicate<E> pred = pred(); -// -// final String relationName = pred.getOnlyRelationName(); -// -// final IRelation<E> rel = (IRelation<E>) joinNexus.getIndexManager() -// .getResourceLocator().locate(relationName, -// joinNexus.getReadTimestamp()); -// -// final IAccessPath<E> accessPath = rel.getAccessPath(pred); - - /* - * FIXME Sample N randomly chosen indices or evenly selected? - * - * Note: If there are only 100 leaves and we sample evenly, that - * could result in reading all the leaves. However, when the - * B+Tree is large we will only touch a few leaves even with - * uniform sampling. - */ - throw new UnsupportedOperationException(); - - } - - } // class LocalBTreeSampleTask - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalShard.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalShard.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalShard.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,87 +0,0 @@ -package com.bigdata.bop.ap; - -import java.util.concurrent.Callable; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; - -import com.bigdata.bop.BOpContext; -import com.bigdata.bop.IPredicate; -import com.bigdata.btree.AbstractBTree; -import com.bigdata.relation.IRelation; -import com.bigdata.relation.accesspath.IAccessPath; -import com.bigdata.relation.accesspath.IBlockingBuffer; - -/** - * Sampling operator for a shard view. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ -public class SampleLocalShard<E> extends AbstractSampleIndex<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public SampleLocalShard(final IPredicate<E> pred, final int limit) { - - super(pred,limit); - - } - - /* - * Note: This is done at evaluation time, local to the data. - */ - public FutureTask<Void> eval(final BOpContext<E> context) { - - if (context.getPartitionId() == -1) { - // Must be specific to a shard. - throw new UnsupportedOperationException(); - } - - return new FutureTask<Void>(new LocalShardSampleTask(context)); - - } - - /** - * Sample an {@link AbstractBTree}. - */ - private class LocalShardSampleTask implements Callable<Void> { - - private final BOpContext<E> context; - private final IBlockingBuffer<E[]> sink; - - LocalShardSampleTask(final BOpContext<E> context) { - - this.context = context; - - this.sink = context.getSink(); - - } - - public Void call() throws Exception { - - final IPredicate<E> pred = pred(); - - final IRelation<E> view = context.getRelation(pred); - - final IAccessPath<E> accessPath = view.getAccessPath(pred); - - /* - * FIXME Sample N tuples based on a uniform offset distribution, - * discarding duplicates or tuples which are deleted in their - * most recent revision. - * - * Note: If there are only 100 leaves and we sample evenly, that - * could result in reading all the leaves. However, when the - * B+Tree is large we will only touch a few leaves even with - * uniform sampling. - */ - throw new UnsupportedOperationException(); - - } - - } // class LocalShardSampleTask - -} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1583,11 +1583,15 @@ if (partitionCount == 0) { - /* - * SWAG in case zero partition count is reported (I am not sure that - * this code path is possible). - */ - return new ScanCostReport(0L/* rangeCount */, partitionCount, 100/* millis */); +// /* +// * SWAG in case zero partition count is reported (I am not sure that +// * this code path is possible). +// */ +// return new ScanCostReport(0L/* rangeCount */, partitionCount, 100/* millis */); + /* + * Should never be "zero" partition count. + */ + throw new AssertionError(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestAll.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestAll.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -24,8 +24,6 @@ package com.bigdata.bop.ap; -import com.bigdata.bop.ap.filter.TestDistinctFilter; - import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; @@ -72,12 +70,9 @@ /* * Sampling an access path. */ - - // test sampling from an AbstractBTree. - suite.addTestSuite(TestSampleLocalBTree.class); - // test sampling from an FusedView. - suite.addTestSuite(TestSampleLocalBTree.class); + // test sampling form an index. + suite.addTestSuite(TestSampleIndex.class); return suite; Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java (from rev 3756, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -0,0 +1,234 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 19, 2010 + */ + +package com.bigdata.bop.ap; + +import java.text.NumberFormat; +import java.util.Arrays; +import java.util.Properties; +import java.util.Random; + +import junit.framework.TestCase2; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContextBase; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.Var; +import com.bigdata.journal.BufferMode; +import com.bigdata.journal.ITx; +import com.bigdata.journal.Journal; +import com.bigdata.striterator.ChunkedArrayIterator; + +/** + * Test suite for {@link SampleIndex}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestSampleLocalBTree.java 3665 2010-09-28 16:53:22Z thompsonbry + * $ + * + * FIXME Just like {@link TestPredicateAccessPath}, this test suite + * needs to cover all of the combinations of global views of + * partitioned and unpartitioned indices. + */ +public class TestSampleIndex extends TestCase2 { + + /** + * + */ + public TestSampleIndex() { + } + + /** + * @param name + */ + public TestSampleIndex(String name) { + super(name); + } + + @Override + public Properties getProperties() { + + final Properties p = new Properties(super.getProperties()); + + p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient + .toString()); + + return p; + + } + + static private final String namespace = "ns"; + + Journal jnl; + + R rel; + + public void setUp() throws Exception { + + jnl = new Journal(getProperties()); + + } + + /** + * Create and populate relation in the {@link #namespace}. + * + * @return The #of distinct entries. + */ + private int loadData(final int scale) { + + final String[] names = new String[] { "John", "Mary", "Saul", "Paul", + "Leon", "Jane", "Mike", "Mark", "Jill", "Jake", "Alex", "Lucy" }; + + final Random rnd = new Random(); + + // #of distinct instances of each name. + final int populationSize = Math.max(10, (int) Math.ceil(scale / 10.)); + + // #of trailing zeros for each name. + final int nzeros = 1 + (int) Math.ceil(Math.log10(populationSize)); + +// System.out.println("scale=" + scale + ", populationSize=" +// + populationSize + ", nzeros=" + nzeros); + + final NumberFormat fmt = NumberFormat.getIntegerInstance(); + fmt.setMinimumIntegerDigits(nzeros); + fmt.setMaximumIntegerDigits(nzeros); + fmt.setGroupingUsed(false); + + // create the relation. + final R rel = new R(jnl, namespace, ITx.UNISOLATED, new Properties()); + rel.create(); + + // data to insert. + final E[] a = new E[scale]; + + for (int i = 0; i < scale; i++) { + + final String n1 = names[rnd.nextInt(names.length)] + + fmt.format(rnd.nextInt(populationSize)); + + final String n2 = names[rnd.nextInt(names.length)] + + fmt.format(rnd.nextInt(populationSize)); + +// System.err.println("i=" + i + ", n1=" + n1 + ", n2=" + n2); + + a[i] = new E(n1, n2); + + } + + // sort before insert for efficiency. + Arrays.sort(a,R.primaryKeyOrder.getComparator()); + + // insert data (the records are not pre-sorted). + final long ninserts = rel.insert(new ChunkedArrayIterator<E>(a.length, a, null/* keyOrder */)); + + // Do commit since not scale-out. + jnl.commit(); + + // should exist as of the last commit point. + this.rel = (R) jnl.getResourceLocator().locate(namespace, + ITx.READ_COMMITTED); + + assertNotNull(rel); + + return (int) ninserts; + + } + + public void tearDown() throws Exception { + + if (jnl != null) { + jnl.destroy(); + jnl = null; + } + + // clear reference. + rel = null; + + } + + /** + * Unit test verifies some aspects of a sample taken from a local index + * (primarily that the sample respects the limit). + */ + public void test_something() { + + final int scale = 10000; + + final int nrecords = loadData(scale); + + final IVariable<?> x = Var.var("x"); + + final IVariable<?> y = Var.var("y"); + + final IPredicate<E> predicate = new Predicate<E>(new BOp[] { x, y }, + new NV(IPredicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(IPredicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED)// + ); + + final BOpContextBase context = new BOpContextBase(null/* fed */, jnl/* indexManager */); + + final int[] limits = new int[] { // + 1, 9, 19, 100, 217, 900,// + nrecords, + nrecords + 1 + }; + + for (int limit : limits) { + + final SampleIndex<E> sampleOp = new SampleIndex<E>( + new BOp[0], + NV + .asMap( + // + new NV(SampleIndex.Annotations.PREDICATE, + predicate),// + new NV(SampleIndex.Annotations.LIMIT, limit)// + )); + + final E[] a = sampleOp.eval(context); + +// System.err.println("limit=" + limit + ", nrecords=" + nrecords +// + ", nsamples=" + a.length); +// +// for (int i = 0; i < a.length && i < 10; i++) { +// System.err.println("a[" + i + "]=" + a[i]); +// } + + final int nexpected = Math.min(nrecords, limit); + + assertEquals("#samples (limit=" + limit + ", nrecords=" + nrecords + + ", nexpected=" + nexpected + ")", nexpected, a.length); + + } + + } + +} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,59 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 19, 2010 - */ - -package com.bigdata.bop.ap; - -import com.bigdata.bop.ap.SampleLocalBTree; - -import junit.framework.TestCase2; - -/** - * Test suite for {@link SampleLocalBTree}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class TestSampleLocalBTree extends TestCase2 { - - /** - * - */ - public TestSampleLocalBTree() { - } - - /** - * @param name - */ - public TestSampleLocalBTree(String name) { - super(name); - } - - public void test_something() { - fail("write tests"); - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalShard.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalShard.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalShard.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,59 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 19, 2010 - */ - -package com.bigdata.bop.ap; - -import com.bigdata.bop.ap.SampleLocalShard; - -import junit.framework.TestCase2; - -/** - * Test suite for {@link SampleLocalShard}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class TestSampleLocalShard extends TestCase2 { - - /** - * - */ - public TestSampleLocalShard() { - } - - /** - * @param name - */ - public TestSampleLocalShard(String name) { - super(name); - } - - public void test_something() { - fail("write tests"); - } - -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-02 10:42:00
|
Revision: 3861 http://bigdata.svn.sourceforge.net/bigdata/?rev=3861&view=rev Author: thompsonbry Date: 2010-11-02 10:41:53 +0000 (Tue, 02 Nov 2010) Log Message: ----------- RunningQuery - Modified to not log an InterruptedException for an operator task @ ERROR since this is the normal behavior of a SliceOp when its LIMIT is satisfied. TestQueryEngine - Modified a unit test which uses SliceOp to limit the #of results visited to not check the statistics on the join operator used in the query. The statistics of the join operator are not reliably updated because there is a race condition between the SliceOp, which interrupts the join task, and the join task's normal completion and post-processing. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-11-02 10:18:52 UTC (rev 3860) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-11-02 10:41:53 UTC (rev 3861) @@ -67,6 +67,7 @@ import com.bigdata.relation.accesspath.MultiSourceSequentialAsynchronousIterator; import com.bigdata.service.IBigdataFederation; import com.bigdata.striterator.ICloseableIterator; +import com.bigdata.util.InnerCause; import com.bigdata.util.concurrent.Haltable; import com.bigdata.util.concurrent.Memoizer; @@ -1340,9 +1341,16 @@ } catch (Throwable ex1) { - // Log an error. - log.error("queryId=" + queryId + ", bopId=" + t.bopId - + ", bop=" + t.bop, ex1); + /* + * Note: SliceOp will cause other operators to be interrupted + * during normal evaluation so it is not useful to log an + * InterruptedException @ ERROR. + */ + if (!InnerCause.isInnerCause(ex1, InterruptedException.class)) { + // Log an error. + log.error("queryId=" + queryId + ", bopId=" + t.bopId + + ", bop=" + t.bop, ex1); + } /* * Mark the query as halted on this node regardless of whether Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-11-02 10:18:52 UTC (rev 3860) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-11-02 10:41:53 UTC (rev 3861) @@ -925,20 +925,28 @@ assertEquals(1L, stats.chunksOut.get()); } - // validate the stats for the join operator. - { - final BOpStats stats = statsMap.get(joinId); - assertNotNull(stats); - if (log.isInfoEnabled()) - log.info("join : " + stats.toString()); + /* + * Note: SliceOp can cause the Join operator to be interrupted. If this + * occurs, the BOpStats for the join are not reported and aggregated + * reliably (there is a race between the completion of the join and its + * interrupt by the slice). Since this unit test has a slice which will + * interrupt the running query, we can not test the stats on the join + * reliably for this unit test. + */ +// // validate the stats for the join operator. +// { +// final BOpStats stats = statsMap.get(joinId); +// assertNotNull(stats); +// if (log.isInfoEnabled()) +// log.info("join : " + stats.toString()); +// +// // verify query solution stats details. +// assertEquals(1L, stats.chunksIn.get()); +// assertEquals(1L, stats.unitsIn.get()); +// assertEquals(4L, stats.unitsOut.get()); +// assertEquals(1L, stats.chunksOut.get()); +// } - // verify query solution stats details. - assertEquals(1L, stats.chunksIn.get()); - assertEquals(1L, stats.unitsIn.get()); - assertEquals(4L, stats.unitsOut.get()); - assertEquals(1L, stats.chunksOut.get()); - } - // validate the stats for the slice operator. { final BOpStats stats = statsMap.get(sliceId); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-02 12:43:23
|
Revision: 3865 http://bigdata.svn.sourceforge.net/bigdata/?rev=3865&view=rev Author: thompsonbry Date: 2010-11-02 12:43:16 +0000 (Tue, 02 Nov 2010) Log Message: ----------- https://sourceforge.net/apps/trac/bigdata/ticket/187 I've updated the javadoc for ITransactionService#newTx(long) to indicate that the given timestamp may lie in the future. I've updated AbstractTransactionService? to hand back nextTimestamp() for a read-only transaction request when the given timestamp is in the future. I've updated TestTransactionService? to test this behavior for read-only and read-write tx (nothing had to be changed to support the latter). I've updated test_newTx_readOnly_timestampInFuture to request a timestamp which is known to be in the future and to verify that a read-only tx was assigned. These edits are self-consistent, but the tx semantics really ought to be reviewed in more depth, e.g., as part of https://sourceforge.net/apps/trac/bigdata/ticket/145 or when adding full-distributed read-write tx support to the database. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/ITransactionService.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/service/AbstractTransactionService.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/journal/TestTransactionService.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/ITransactionService.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/ITransactionService.java 2010-11-02 12:42:22 UTC (rev 3864) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/ITransactionService.java 2010-11-02 12:43:16 UTC (rev 3865) @@ -152,9 +152,6 @@ * @return The unique transaction identifier. * * @throws IllegalStateException - * if the requested timestamp is greater than - * {@link #getLastCommitTime()}. - * @throws IllegalStateException * if the requested timestamp is for a commit point that is no * longer preserved by the database (the resources for that * commit point have been released). @@ -164,6 +161,9 @@ * @todo specialize exception for a timestamp that is no longer preserved * and for one that is in the future? */ +// * @throws IllegalStateException +// * if the requested timestamp is greater than +// * {@link #getLastCommitTime()}. public long newTx(long timestamp) throws IOException; /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/service/AbstractTransactionService.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/service/AbstractTransactionService.java 2010-11-02 12:42:22 UTC (rev 3864) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/service/AbstractTransactionService.java 2010-11-02 12:43:16 UTC (rev 3865) @@ -625,6 +625,8 @@ private volatile long lastTimestamp; /** + * {@inheritDoc} + * <p> * Note: There is an upper bound of one read-write transaction that may be * created per millisecond (the resolution of {@link #nextTimestamp()}) and * requests for new read-write transactions contend with other requests for @@ -1234,21 +1236,21 @@ final long lastCommitTime = getLastCommitTime(); -// if (timestamp > lastCommitTime) { - if (timestamp > lastTimestamp) { - - /* - * You can't request a historical read for a timestamp which has not - * yet been issued by this service! - */ +// if (timestamp > lastTimestamp) { +// +// /* +// * You can't request a historical read for a timestamp which has not +// * yet been issued by this service! +// */ +// +// throw new IllegalStateException( +// "Timestamp is in the future: timestamp=" + timestamp +// + ", lastCommitTime=" + lastCommitTime +// + ", lastTimestamp=" + lastTimestamp); +// +// } else + if (timestamp == lastCommitTime) { - throw new IllegalStateException( - "Timestamp is in the future: timestamp=" + timestamp - + ", lastCommitTime=" + lastCommitTime - + ", lastTimestamp=" + lastTimestamp); - - } else if (timestamp == lastCommitTime) { - /* * Special case. We just return the next timestamp. * @@ -1325,12 +1327,23 @@ if (commitTime == -1L) { /* - * @todo I believe that this can only arise when there are no commit - * points in the log. + * There are no commit points in the log. + * + * Note: Just return the next timestamp. It is guaranteed to be GT + * the desired commit time (which does not exist) and LT the next + * commit point. */ - throw new RuntimeException( - "No data for that commit time: timestamp=" + timestamp); + return nextTimestamp(); + +// /* +// * Note: I believe that this can only arise when there are no commit +// * points in the log. The thrown exception is per the top-level api +// * for ITransactionService#newTx(long). +// */ +// throw new IllegalStateException( +// "No data for that commit time: timestamp=" + timestamp); + } /* Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/journal/TestTransactionService.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/journal/TestTransactionService.java 2010-11-02 12:42:22 UTC (rev 3864) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/journal/TestTransactionService.java 2010-11-02 12:43:16 UTC (rev 3865) @@ -270,8 +270,15 @@ @Override public long nextTimestamp() { - super.nextTimestamp () ; - return super.nextTimestamp () ; + // skip at least one millisecond. + super.nextTimestamp(); + + /* + * Invoke the behavior on the base class, which has a side-effect on + * the private [lastTimestamp] method. + */ + return super.nextTimestamp(); + } } @@ -889,33 +896,96 @@ } } + + /** + * Verify the behavior of the {@link AbstractTransactionService} when there + * are no commit points and a read-only transaction is requested. Since + * there are no commit points, the transaction service will return the next + * timestamp. That value will be GT the requested timestamp and LT any + * commit point (all commit points are in the future). + */ + public void test_newTx_nothingCommitted_readOnlyTx() { + final MockTransactionService service = newFixture(); + + try { + + /* + * Note: The commit time log is empty. + */ + final long timestamp = service.nextTimestamp(); + + /* + * Request a read-only view which is in the past based on the + * transaction server's clock. However, there are no commit points + * which cover that timestamp since there are no commit points in + * the database. + */ + service.newTx(timestamp - 1); + + } finally { + + service.destroy(); + + } + + } + /** - * Verify that you can not create a read-only transaction using a timestamp - * that is in the future. + * Verify the behavior of the {@link AbstractTransactionService} when there + * are no commit points and a read-write transaction is requested. You can + * always obtain a read-write transaction, even when there are no commit + * points on the database. */ - public void test_newTx_readOnly_timestampInFuture() { + public void test_newTx_nothingCommitted_readWriteTx() { final MockTransactionService service = newFixture(); try { /* - * Note: The commit time log is empty so anything is in the future. + * Note: The commit time log is empty. */ + service.newTx(ITx.UNISOLATED); + + } finally { - try { - /** - * FIXME Modified to be compatible with changes made to AbstractTransactionService, revision 3804. - * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/187">Trac 187</a> - */ -// service.newTx(10); - service.newTx(service.nextTimestamp () + 10); - fail("Expecting: "+IllegalStateException.class); - } catch(IllegalStateException ex) { - log.info("Ignoring expected exception: "+ex); - } + service.destroy(); + + } + + } + + /** + * Verify that you can create a read-only transaction using a timestamp that + * is in the future. A commit point is generated and a read-only tx is + * requested which is beyond that commit point. The returned tx will be + * assigned using nextTimestamp() which is guaranteed to be less than the + * next commit point on the database (which in this case would be the first + * commit point as well). + */ + public void test_newTx_readOnly_timestampInFuture() { + + final MockTransactionService service = newFixture(); + + try { + + // request a timestamp. + final long timestamp1 = service.nextTimestamp(); + // make that timestamp a valid commit time. + service.notifyCommit(timestamp1); + +// try { + // request a timestamp in the future. + final long tx = service.newTx(timestamp1 * 2); + System.err.println("ts="+timestamp1); + System.err.println("tx="+tx); +// fail("Expecting: "+IllegalStateException.class); +// } catch(IllegalStateException ex) { +// log.info("Ignoring expected exception: "+ex); +// } + } finally { service.destroy(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-03 11:52:15
|
Revision: 3874 http://bigdata.svn.sourceforge.net/bigdata/?rev=3874&view=rev Author: thompsonbry Date: 2010-11-03 11:52:07 +0000 (Wed, 03 Nov 2010) Log Message: ----------- Backing out the introduction of a weak reference into the WriteExecutorService in order to track down some odd errors which this appears to have introduced. This is per [1]. [1] https://sourceforge.net/apps/trac/bigdata/ticket/196 Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/WriteExecutorService.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/journal/TestJournalShutdown.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/WriteExecutorService.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/WriteExecutorService.java 2010-11-02 19:00:31 UTC (rev 3873) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/WriteExecutorService.java 2010-11-03 11:52:07 UTC (rev 3874) @@ -221,7 +221,12 @@ private static class MyLockManager<R extends Comparable<R>> extends NonBlockingLockManagerWithNewDesign<R> { -// private final WriteExecutorService service; + /* + * FIXME restored hard reference since introducing just a weak reference + * here appears to be causing some odd behaviors. Track these behaviors + * down and sort this all out. + */ + private final WriteExecutorService service; private final WeakReference<WriteExecutorService> serviceRef; public MyLockManager(final int capacity, final int maxLockTries, @@ -230,7 +235,7 @@ super(capacity, maxLockTries, predeclareLocks); -// this.service = service; + this.service = service; this.serviceRef = new WeakReference<WriteExecutorService>(service); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/journal/TestJournalShutdown.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/journal/TestJournalShutdown.java 2010-11-02 19:00:31 UTC (rev 3873) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/journal/TestJournalShutdown.java 2010-11-03 11:52:07 UTC (rev 3874) @@ -116,7 +116,8 @@ if (nalive.get() == ncreated.get()) { - fail("No journals were finalized."); + fail("Created " + ncreated + + " journals. No journals were finalized."); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-23 16:30:25
|
Revision: 3980 http://bigdata.svn.sourceforge.net/bigdata/?rev=3980&view=rev Author: thompsonbry Date: 2010-11-23 16:30:18 +0000 (Tue, 23 Nov 2010) Log Message: ----------- javadoc edits on join graph and extensible hashing Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-23 15:22:27 UTC (rev 3979) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-23 16:30:18 UTC (rev 3980) @@ -131,11 +131,38 @@ * query optimizer SHOULD pay attention to these things and exploit their * conditional selectivity for the query plan.] * - * @todo When there are optional join graphs, are we going to handle that by - * materializing a sample (or all) of the joins feeding that join graph - * and then apply the runtime optimizer to the optional join graph, - * getting out a sample to feed onto any downstream join graph? + * @todo Handle optional join graphs by first applying the runtime optimizer to + * the main join graph and obtaining a sample for the selected join path. + * That sample will then be feed into the the optional join graph in order + * to optimize the join order within the optional join graph (a join order + * which is selective in the optional join graph is better since it will + * result in faster rejections of intermediate results and hence do less + * work). + * <p> + * This is very much related to accepting a collection of non-empty + * binding sets when running the join graph. However, optional join graph + * should be presented in combination with the original join graph and the + * starting paths must be constrained to have the selected join path for + * the original join graph as a prefix. With this setup, the original join + * graph has been locked in to a specific join path and the sampling of + * edges and vertices for the optional join graph can proceed normally. + * <p> + * True optionals will always be appended as part of the "tail plan" for + * any join graph and can not be optimized as each optional join must run + * regardless (as long as the intermediate solution survives the + * non-optional joins). * + * @todo There are two cases where a join graph must be optimized against a + * specific set of inputs. In one case, it is a sample (this is how + * optimization of an optional join group proceeds per above). In the + * other case, the set of inputs is fixed and is provided instead of a + * single empty binding set as the starting condition. This second case is + * actually a bit more complicated since we can not use a random sample of + * vertices unless the do not share any variables with the initial binding + * sets. When there is a shared variable, we need to do a cutoff join of + * the edge with the initial binding sets. When there is not a shared + * variable, we can sample the vertex and then do a cutoff join. + * * @todo When we run into a cardinality estimation underflow (the expected * cardinality goes to zero) we could double the sample size for just * those join paths which hit a zero estimated cardinality and re-run them Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java 2010-11-23 15:22:27 UTC (rev 3979) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java 2010-11-23 16:30:18 UTC (rev 3980) @@ -35,176 +35,125 @@ * Test suite for extensible hashing. * * <br> - * (***) Persistence capable hash table for high volume hash joins. * - * The data will be "rows" in a "relation" modeled using binding sets. We can - * use dense encoding of these rows since they have a fixed schema (some columns - * may allow nulls). There should also be a relationship to how we encode these - * data for network IO. + * @todo Persistence capable hash table for high volume hash joins. The data + * will be "rows" in a "relation" modeled using binding sets. We can use + * dense encoding of these rows since they have a fixed schema (some + * columns may allow nulls). There should also be a relationship to how we + * encode these data for network IO. + * <p> + * @todo Extensible hashing: + * <p> + * - hash(byte[] key) -> IRaba page. Use IRaba for keys/values and key + * search. + * <p> + * - Split if overflows the bucket size (alternative is some versioning + * where the computed hash value indexes into a logical address which is + * then translated to an IRawStore address - does the RWStore help us out + * here?) + * <p> + * - Ring buffer to wire in hot nodes (but expect random touches). + * <p> + * - initially, no history (no versioning). just replace the record when + * it is evicted from the ring buffer. + * <p> + * What follows is a summary of an extensible hashing design for bigdata. + * This covers most aspects of the hash map design, but does not drill + * deeply into the question of scale-out hash maps. The immediate goal is + * to develop a hash map which can be used for a variety of tasks, + * primarily pertaining to analytic query as described above. + * <p> + * Extensible hashing is one form of dynamic hashing in which buckets are + * split or coalesced as necessary and in which the reorganization is + * performed on one bucket at a time. + * <p> + * Given a hash function h generating, e.g., int32 values where b is the + * #of bits in the hash code. At any point, we use 0 LTE i LTE b bits of + * that hash code as an index into a table of bucket addresses. The value + * of i will change as the #of buckets changes based on the scale of the + * data to be addressed. + * <p> + * Given a key K, the bucket address table is indexed with i bits of the + * hash code, h(K). The value at that index is the address of the hash + * bucket. However, several consecutive entries in the hash table may + * point to the same hash bucket (for example, the hash index may be + * created with i=4, which would give 16 index values but only one initial + * bucket). The bucket address table entries which map onto the same hash + * bucket will have a common bit length, which may be LTE [i]. This bit + * length is not stored in the bucket address table, but each bucket knows + * its bit length. Given a global bit length of [i] and a bucket bit + * length of [j], there will be 2^(i-j) bucket address table entries which + * point to the same bucket. + * <p> + * Hash table versioning can be easily implemented by: (a) a checkpoint + * record with the address of the bucket address table (which could be + * broken into a two level table comprised of 4k pages in order to make + * small updates faster); and (b) a store level policy such that we do not + * overwrite the modified records directly (though they may be recycled). + * This will give us the same consistent read behind behavior as the + * B+Tree. + * <p> + * The IIndex interface will need to be partitioned appropriately such + * that the IRangeScan interface is not part of the hash table indices (an + * isBTree() and isHashMap() method might be added). + * <p> + * While the same read-through views for shards should work with hash maps + * as work with B+Tree indices, a different scheme may be necessary to + * locate those shards and we might need to use int64 hash codes in + * scale-out or increase the page size (at least for the read-only hash + * segment files, which would also need a batch build operation). The + * AccessPath will also need to be updated to be aware of classes which do + * not support key-range scans, but only whole relation scans. + * <p> + * Locking on hash tables without versioning should be much simpler than + * locking on B+Trees since there is no hierarchy and more operations can + * proceed without blocking in parallel. + * <p> + * We can represent tuples (key,value pairs) in an IRaba data structure + * and reuse parts of the B+Tree infrastructure relating to compression of + * IRaba, key search, etc. In fact, we might use to lazy reordering notion + * from Monet DB cracking to only sort the keys in a bucket when it is + * persisted. This is also a good opportunity to tackling splitting the + * bucket if it overflows the target record size, e.g., 4k. We could throw + * out an exception if the sorted, serialized, and optionally compressed + * record exceeds the target record size and then split the bucket. All of + * this seems reasonable and we might be able to then back port those + * concepts into the B+Tree. + * <p> + * We need to estimate the #of tuples which will fit within the bucket. We + * can do this based on: (a) the byte length of the keys and values (key + * compression is not going to help out much for a hash index since the + * keys will be evenly distributed even if they are ordered within a + * bucket); (b) the known per tuple overhead and per bucket overhead; (c) + * an estimate of the compression ratio for raba encoding and record + * compression. This estimate could be used to proactively split a bucket + * before it is evicted. This is most critical before anything is evicted + * as we would otherwise have a single very large bucket. So, let's make + * this simple and split the bucket if the sum of the key + val bytes + * exceeds 120% of the target record size (4k, 8k, etc). The target page + * size can be a property of the hash index. [Note: There is an implicit + * limit on the size of a tuple with this approach. The alternative is to + * fix the #of tuples in the bucket and allow buckets to be of whatever + * size they are for the specific data in that bucket.] * - * https://sourceforge.net/apps/trac/bigdata/ticket/203 + * @todo RWStore integration notes: + * <p> + * - RWStore with "temporary" quality. Creates the backing file lazily on + * eviction from the write service. + * <p> + * - RWStore with "RAM" only? (Can not exceed the #of allocated buffers or + * can, but then it might force paging out to swap?) + * <p> + * - RWStore with "RAM" mostly. Converts to disk backed if uses all those + * buffers. Possibly just give the WriteCacheService a bunch of write + * cache buffers (10-100) and have it evict to disk *lazily* rather than + * eagerly (when the #of free buffers is down to 20%). + * <p> + * - RWStore with memory mapped file? As I recall, the problem is that we + * can not guarantee extension or close of the file under Java. But some + * people seem to make this work... * - * - * Extendable hash table: - * - * - hash(byte[] key) -> IRaba page. Use IRaba for keys/values and key search. - * - * - Split if overflows the bucket size (alternative is some versioning where - * the computed hash value indexes into a logical address which is then - * translated to an IRawStore address - does the RWStore help us out here?) - * - * - ring buffer to wire in hot nodes (but expect random touches). - * - * - initially, no history (no versioning). just replace the record when it is - * evicted from the ring buffer. - * - * What follows is a summary of an extendable hash map design for bigdata. This - * covers most aspects of the hash map design, but does not drill deeply into - * the question of scale-out hash maps. The immediate goal is to develop a hash - * map which can be used for a variety of tasks, primarily pertaining to - * analytic query as described above. - * - * Extendable hashing is one form of dynamic hashing in which buckets are split - * or coalesced as necessary and in which the reorganization is performed on one - * bucket at a time. - * - * Given a hash function h generating, e.g., int32 values where b is the #of - * bits in the hash code. At any point, we use 0 LTE i LTE b bits of that hash - * code as an index into a table of bucket addresses. The value of i will change - * as the #of buckets changes based on the scale of the data to be addressed. - * - * Given a key K, the bucket address table is indexed with i bits of the hash - * code, h(K). The value at that index is the address of the hash bucket. - * However, several consecutive entries in the hash table may point to the same - * hash bucket (for example, the hash index may be created with i=4, which would - * give 16 index values but only one initial bucket). The bucket address table - * entries which map onto the same hash bucket will have a common bit length, - * which may be LTE [i]. This bit length is not stored in the bucket address - * table, but each bucket knows its bit length. Given a global bit length of [i] - * and a bucket bit length of [j], there will be 2^(i-j) bucket address table - * entries which point to the same bucket. - * - * Lookup: Compute h(K) and right shift (w/o sign extension) by i bits. Use this - * to index into the bucket address table. The address in the table is the - * bucket address and may be used to directly read the bucket. - * - * Insert: Per lookup. On overflow, we need to split the bucket moving the - * existing records (and the new record) into new buckets. How this proceeds - * depends on whether the hash #of bits used in the bucket is equal to the #of - * bits used to index into the bucket address table. There are two cases: - * - * Split case 1: If i (global bits of the hash which are in use) == j (bucket - * bits of the hash which are in use), then the bucket address table is out of - * space and needs to be resized. Let i := i+1. This doubles the size of the - * bucket address table. Each original entry becomes two entries in the new - * table. For the specific bucket which is to be split, a new bucket is - * allocated and the 2nd bucket address table for that entry is set to the - * address of the new bucket. The tuples are then assigned to the original - * bucket and the new bucket by considering the additional bit of the hash code. - * Assuming that all keys are distinct, then one split will always be sufficient - * unless all tuples in the original bucket have the same hash code when their - * i+1 th bit is considered. In this case, we resort to an "overflow" bucket - * (alternatively, the bucket is allowed to be larger than the target size and - * gets treated as a blob). - * - * Split case 2: If i is GT j, then there will be at least two entries in the - * bucket address table which point to the same bucket. One of those entries is - * relabeled. Both the original bucket and the new bucket have their #of bits - * incremented by one, but the #of global bits in use does not change. Of the - * entries in the bucket address table which used to point to the original - * bucket, the 1st half are left alone and the 2nd half are updated to point to - * the new bucket. (Note that the #of entries depends on the global #of hash - * bits in use and the bucket local #of hash bits in use and will be 2 if there - * is a difference of one between those values but can be more than 2 and will - * always be an even number). The entries in the original bucket are rehashed - * and assigned based on the new #of hash bits to be considered to either the - * original bucket or the new bucket. The record is then inserted based on the - * new #of hash bits to be considered. If it still does not fit, then either - * handle by case (1) or case (2) as appropriate. - * - * Note that records which are in themselves larger than the bucket size must - * eventually be handled by: (A) using an overflow record; (B) allowing the - * bucket to become larger than the target page size (using a larger allocation - * slot or becoming a blob); or (C) recording the tuple as a raw record and - * maintaining only the full hash code of the tuple and its raw record address - * in the bucket (this would allow us to automatically promote long literals out - * of the hash bucket and a similar approach might be used for a B+Tree leaf, - * except that a long key will still cause a problem [also, this implies that - * deleting a bucket or leaf on the unisolated index of the RWStore might - * require a scan of the IRaba to identify blob references which must also be - * deleted, so it makes sense to track those as part of the bucket/leaf - * metadata). - * - * Delete: Buckets may be removed no later than when they become empty and doing - * this is a local operation with costs similar to splitting a bucket. Likewise, - * it is clearly possible to coalesce buckets which underflow before they become - * empty by scanning the 2^(i-j) buckets indexed from the entries in the bucket - * address table using i bits from h(K). [I need to research handling deletes a - * little more, including under what conditions it is cost effective to reduce - * the size of the bucket address table itself.] - * - * Hash table versioning can be easily implemented by: (a) a checkpoint record - * with the address of the bucket address table (which could be broken into a - * two level table comprised of 4k pages in order to make small updates faster); - * and (b) a store level policy such that we do not overwrite the modified - * records directly (though they may be recycled). This will give us the same - * consistent read behind behavior as the B+Tree. - * - * The IIndex interface will need to be partitioned appropriately such that the - * IRangeScan interface is not part of the hash table indices (an isBTree() and - * isHashMap() method might be added). - * - * While the same read-through views for shards should work with hash maps as - * work with B+Tree indices, a different scheme may be necessary to locate those - * shards and we might need to use int64 hash codes in scale-out or increase the - * page size (at least for the read-only hash segment files, which would also - * need a batch build operation). The AccessPath will also need to be updated to - * be aware of classes which do not support key-range scans, but only whole - * relation scans. - * - * Locking on hash tables without versioning should be much simpler than locking - * on B+Trees since there is no hierarchy and more operations can proceed - * without blocking in parallel. - * - * We can represent tuples (key,value pairs) in an IRaba data structure and - * reuse parts of the B+Tree infrastructure relating to compression of IRaba, - * key search, etc. In fact, we might use to lazy reordering notion from Monet - * DB cracking to only sort the keys in a bucket when it is persisted. This is - * also a good opportunity to tackling splitting the bucket if it overflows the - * target record size, e.g., 4k. We could throw out an exception if the sorted, - * serialized, and optionally compressed record exceeds the target record size - * and then split the bucket. All of this seems reasonable and we might be able - * to then back port those concepts into the B+Tree. - * - * We need to estimate the #of tuples which will fit within the bucket. We can - * do this based on: (a) the byte length of the keys and values (key compression - * is not going to help out much for a hash index since the keys will be evenly - * distributed even if they are ordered within a bucket); (b) the known per - * tuple overhead and per bucket overhead; (c) an estimate of the compression - * ratio for raba encoding and record compression. This estimate could be used - * to proactively split a bucket before it is evicted. This is most critical - * before anything is evicted as we would otherwise have a single very large - * bucket. So, let's make this simple and split the bucket if the sum of the key - * + val bytes exceeds 120% of the target record size (4k, 8k, etc). The target - * page size can be a property of the hash index. [Note: There is an implicit - * limit on the size of a tuple with this approach. The alternative is to fix - * the #of tuples in the bucket and allow buckets to be of whatever size they - * are for the specific data in that bucket.] - * - * - RWStore with "temporary" quality. Creates the backing file lazily on - * eviction from the write service. - * - * - RWStore with "RAM" only? (Can not exceed the #of allocated buffers or can, - * but then it might force paging out to swap?) - * - * - RWStore with "RAM" mostly. Converts to disk backed if uses all those - * buffers. Possibly just give the WriteCacheService a bunch of write cache - * buffers (10-100) and have it evict to disk *lazily* rather than eagerly (when - * the #of free buffers is down to 20%). - * - * - RWStore with memory mapped file? As I recall, the problem is that we can - * not guarantee extension or close of the file under Java. But some people seem - * to make this work... + * @see https://sourceforge.net/apps/trac/bigdata/ticket/203 */ public class TestExtensibleHashing extends TestCase2 { @@ -547,13 +496,18 @@ return bucketSize; } - + /** * Return <code>true</code> iff the hash table contains the key. + * <p> + * Lookup: Compute h(K) and right shift (w/o sign extension) by i bits. + * Use this to index into the bucket address table. The address in the + * table is the bucket address and may be used to directly read the + * bucket. * * @param key * The key. - * + * * @return <code>true</code> iff the key was found. */ public boolean contains(final int key) { @@ -565,7 +519,12 @@ /** * Insert the key into the hash table. Duplicates are allowed. + * <p> + * Insert: Per lookup. On overflow, we need to split the bucket moving + * the existing records (and the new record) into new buckets. * + * @see #split(int, int, SimpleBucket) + * * @param key * The key. * @@ -577,12 +536,155 @@ final int h = hash(key); final int addr = addrOf(h); final SimpleBucket b = getBucket(addr); - b.insert(h,key); + if (b.insert(h, key)) { + return; + } + // split the bucket and insert the record (recursive?) + split(key, b); } /** + * Split the bucket, adjusting the address map iff necessary. How this + * proceeds depends on whether the hash #of bits used in the bucket is + * equal to the #of bits used to index into the bucket address table. + * There are two cases: + * <p> + * Case 1: If {@link #globalHashBits} EQ the + * {@link SimpleBucket#localHashBits}, then the bucket address table is + * out of space and needs to be resized. + * <p> + * Case 2: If {@link #globalHashBits} is GT + * {@link SimpleBucket#localHashBits}, then there will be at least two + * entries in the bucket address table which point to the same bucket. + * One of those entries is relabeled. The record is then inserted based + * on the new #of hash bits to be considered. If it still does not fit, + * then either handle by case (1) or case (2) as appropriate. + * <p> + * Note that records which are in themselves larger than the bucket size + * must eventually be handled by: (A) using an overflow record; (B) + * allowing the bucket to become larger than the target page size (using + * a larger allocation slot or becoming a blob); or (C) recording the + * tuple as a raw record and maintaining only the full hash code of the + * tuple and its raw record address in the bucket (this would allow us + * to automatically promote long literals out of the hash bucket and a + * similar approach might be used for a B+Tree leaf, except that a long + * key will still cause a problem [also, this implies that deleting a + * bucket or leaf on the unisolated index of the RWStore might require a + * scan of the IRaba to identify blob references which must also be + * deleted, so it makes sense to track those as part of the bucket/leaf + * metadata). + * + * @param h + * The key which triggered the split. + * @param b + * The bucket lacking sufficient room for the key which + * triggered the split. + * + * @todo caller will need an exclusive lock if this is to be thread + * safe. + * + * @todo Overflow buckets (or oversize buckets) are required when all + * hash bits considered by the local bucket are the same, when all + * keys in the local bucket are the same, and when the record to + * be inserted is larger than the bucket. In order to handle these + * cases we may need to more closely integrate the insert/split + * logic since detecting some of these cases requires transparency + * into the bucket. + */ + private void split(final int key, final SimpleBucket b) { + if (globalHashBits < b.localHashBits) { + // This condition should never arise. + throw new AssertionError(); + } + if (globalHashBits == b.localHashBits) { + /* + * The address table is out of space and needs to be resized. + * + * Let {@link #globalHashBits} := {@link #globalHashBits} + 1. + * This doubles the size of the bucket address table. Each + * original entry becomes two entries in the new table. For the + * specific bucket which is to be split, a new bucket is + * allocated and the 2nd bucket address table for that entry is + * set to the address of the new bucket. The tuples are then + * assigned to the original bucket and the new bucket by + * considering the additional bit of the hash code. Assuming + * that all keys are distinct, then one split will always be + * sufficient unless all tuples in the original bucket have the + * same hash code when their (i+1)th bit is considered (this can + * also occur if duplicate keys are allow). In this case, we + * resort to an "overflow" bucket (alternatively, the bucket is + * allowed to be larger than the target size and gets treated as + * a blob). + */ +// doubleAddressSpace(); + /* + * Create a new bucket and wire it into the 2nd entry for the + * hash code for that key. + */ +// final int h = hash(key); +// final int addr1 = addrOf(h); +// final int addr2 = addr + 1; +// final SimpleBucket b1 = getBucket(addr); +// if (b1.insert(h, key)) { +// return; +// } + throw new UnsupportedOperationException(); + } + if (globalHashBits > b.localHashBits) { + /* + * There will be at least two entries in the address table which + * point to this bucket. One of those entries is relabeled. Both + * the original bucket and the new bucket have their {@link + * SimpleBucket#localHashBits} incremented by one, but the + * {@link #globalHashBits}. Of the entries in the bucket address + * table which used to point to the original bucket, the 1st + * half are left alone and the 2nd half are updated to point to + * the new bucket. (Note that the #of entries depends on the + * global #of hash bits in use and the bucket local #of hash + * bits in use and will be 2 if there is a difference of one + * between those values but can be more than 2 and will always + * be an even number). The entries in the original bucket are + * rehashed and assigned based on the new #of hash bits to be + * considered to either the original bucket or the new bucket. + * The record is then inserted based on the new #of hash bits to + * be considered. If it still does not fit, then either handle + * by case (1) or case (2) as appropriate. + */ + throw new UnsupportedOperationException(); + } + } + + /** + * Doubles the address space. + * + * FIXME Review the exact rule for doubling the address space. + */ + private void doubleAddressSpace() { + globalHashBits += 1; + final int[] tmp = addressMap; + addressMap = new int[tmp.length << 1]; + for (int i = 0, j = 0; i < tmp.length; i++) { + addressMap[j++] = tmp[i]; + addressMap[j++] = tmp[i]; + } + } + + private void merge(final int h, final SimpleBucket b) { + throw new UnsupportedOperationException(); + } + + /** * Delete the key from the hash table (in the case of duplicates, a * random entry having that key is deleted). + * <p> + * Delete: Buckets may be removed no later than when they become empty + * and doing this is a local operation with costs similar to splitting a + * bucket. Likewise, it is clearly possible to coalesce buckets which + * underflow before they become empty by scanning the 2^(i-j) buckets + * indexed from the entries in the bucket address table using i bits + * from h(K). [I need to research handling deletes a little more, + * including under what conditions it is cost effective to reduce the + * size of the bucket address table itself.] * * @param key * The key. @@ -590,6 +692,10 @@ * @return <code>true</code> iff a tuple having that key was deleted. * * @todo return the deleted tuple. + * + * @todo merge buckets when they underflow/become empty? (but note that + * we do not delete anything from the hash map for a hash join, + * just insert, insert, insert). */ public boolean delete(final int key) { final int h = hash(key); @@ -676,8 +782,10 @@ * The hash code of the key. * @param key * The key. + * + * @return <code>false</code> iff the bucket must be split. */ - public void insert(final int h, final int key) { + public boolean insert(final int h, final int key) { if (size == data.length) { /* @@ -693,11 +801,13 @@ * manage the split. If the bucket handles splits, then we need * to pass in the table reference. */ - throw new UnsupportedOperationException(); + return false; } data[size++] = key; + return true; + } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-03 18:48:11
|
Revision: 3991 http://bigdata.svn.sourceforge.net/bigdata/?rev=3991&view=rev Author: thompsonbry Date: 2010-12-03 18:48:02 +0000 (Fri, 03 Dec 2010) Log Message: ----------- Modified the Checkpoint record to support HTree as well as BTree. This change introduces a new version for the checkpoint record and is backwards compatible. Modified the DefaultLeafCoder to support hash buckets with the optional inclusion of 32-bit hash codes into the record. This does not change the binary layout of the leaf when hash values are not included and is therefore backward compatible. Added unit tests for the DefaultLeafCoder when used to store the data for an HTree bucket. Moved the HTree classes out of test. They are not ready for use, but the modification to support the hash bucket data page require that the various interfaces are declared in the src/java rather than the src/test code paths. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/Checkpoint.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/AbstractReadOnlyNodeData.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractNodeOrLeafDataRecordTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/MockLeafData.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/AbstractHashPage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashFunction.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IDirectoryData.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestExtensibleHashing.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/ExtensibleHashMap.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableCheckpoint.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashDirectory.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashFunction.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/Checkpoint.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/Checkpoint.java 2010-12-01 21:43:35 UTC (rev 3990) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/Checkpoint.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -34,17 +34,59 @@ // persistent and immutable. private long addrMetadata; - private long addrRoot; - private int height; - private int nnodes; - private int nleaves; - private int nentries; + private long addrRoot; // of root node/leaf for BTree; rootDir for HTree. + private int height; // height for BTree; globalDepth for HTree. + private int nnodes; // #of directories for HTree + private int nleaves; // #of buckets for HTree. + private int nentries; // #of tuples in the index. private long counter; - /** Note: added in {@link #VERSION1} and presumed 0L in earlier versions. */ private long addrBloomFilter; - /** + /** + * Added in {@link #VERSION1}. This is a short field allowing for 65536 + * different possible index types. + */ + private IndexTypeEnum indexType; + + /** + * Type safe enumeration of index types. + */ + public static enum IndexTypeEnum { + + /** BTree. */ + BTree((short)0), + + /** Extendable hash tree. */ + HTree((short)1); + + private IndexTypeEnum(final short code) { + + this.code = code; + + } + + private final short code; + + public short getCode() { + + return code; + + } + + static public IndexTypeEnum valueOf(final short code) { + switch (code) { + case 0: + return BTree; + case 1: + return HTree; + default: + throw new IllegalArgumentException("code=" + code); + } + } + } + + /** * The address used to read this {@link Checkpoint} record from the * store. * <p> @@ -99,20 +141,45 @@ return addrBloomFilter; } - - /** - * The height of the tree - ZERO(0) means just a root leaf. Values - * greater than zero give the #of levels of abstract nodes. There is - * always one layer of leaves which is not included in this value. - */ + + /** + * The height of a B+Tree. ZERO(0) means just a root leaf. Values greater + * than zero give the #of levels of abstract nodes. There is always one + * layer of leaves which is not included in this value. + * + * @return The global depth and ZERO (0) unless the checkpoint record is for + * an {@link IndexTypeEnum#BTree} + */ public final int getHeight() { - return height; + switch (indexType) { + case BTree: + return height; + default: + throw new UnsupportedOperationException(); + } } + /** + * The global depth of the root directory (HTree only). + * + * @return The global depth and ZERO (0) unless the checkpoint record is for + * an {@link IndexTypeEnum#HTree} + */ + public final int getGlobalDepth() { + + switch (indexType) { + case HTree: + return height; + default: + throw new UnsupportedOperationException(); + } + + } + /** - * The #of non-leaf nodes. + * The #of non-leaf nodes (B+Tree) or directories (HTree). */ public final int getNodeCount() { @@ -121,7 +188,7 @@ } /** - * The #of leaves. + * The #of leaves (B+Tree) or hash buckets (HTree). */ public final int getLeafCount() { @@ -130,7 +197,7 @@ } /** - * The #of index entries. + * The #of index entries (aka tuple count). */ public final int getEntryCount() { @@ -155,7 +222,10 @@ public final String toString() { return "Checkpoint" + // - "{height=" + height + // + "{indexType=" + indexType + // + (indexType == IndexTypeEnum.BTree ? ",height=" + height + : (indexType == IndexTypeEnum.HTree ? ",globalDepth=" + + height : "")) + ",nnodes=" + nnodes + // ",nleaves=" + nleaves + // ",nentries=" + nentries + // @@ -195,7 +265,9 @@ 0, // nnodes 0, // nleaves 0, // nentries - 0L // counter + 0L, // counter + IndexTypeEnum.BTree // indexType + ); } @@ -223,7 +295,8 @@ 0, // nnodes 0, // nleaves 0, // nentries - oldCheckpoint.counter + oldCheckpoint.counter,// + IndexTypeEnum.BTree// ); } @@ -276,15 +349,19 @@ btree.nnodes,// btree.nleaves,// btree.nentries,// - btree.counter.get()// + btree.counter.get(),// + IndexTypeEnum.BTree// ); } - private Checkpoint(final long addrMetadata, final long addrRoot, - final long addrBloomFilter, final int height, final int nnodes, - final int nleaves, final int nentries, final long counter) { + private Checkpoint(final long addrMetadata, final long addrRoot, + final long addrBloomFilter, final int height, final int nnodes, + final int nleaves, final int nentries, final long counter, + final IndexTypeEnum indexType) { + assert indexType != null; + /* * Note: The constraint on [addrMetadata] is relaxed in order to permit * a transient BTree (no backing store). @@ -313,6 +390,8 @@ this.counter = counter; + this.indexType = indexType; + } /** @@ -327,11 +406,17 @@ * {@link Checkpoint} record. */ private static transient final int VERSION0 = 0x0; + + /** + * Adds the {@link #indexType} field and the {@link #globalDepth} field, + * which is present only for {@link IndexTypeEnum#HTree}. + */ + private static transient final int VERSION1 = 0x1; /** * The current version. */ - private static transient final int VERSION = VERSION0; + private static transient final int VERSION = VERSION1; /** * Write the {@link Checkpoint} record on the store, setting @@ -386,8 +471,13 @@ final int version = in.readInt(); - if (version != VERSION0) - throw new IOException("Unknown version: " + version); + switch (version) { + case VERSION0: + case VERSION1: + break; + default: + throw new IOException("Unknown version: " + version); + } this.addrMetadata = in.readLong(); @@ -405,7 +495,19 @@ this.counter = in.readLong(); - in.readLong(); // unused. + switch (version) { + case VERSION0: + in.readLong(); // unused + indexType = IndexTypeEnum.BTree; + break; + case VERSION1: + this.indexType = IndexTypeEnum.valueOf(in.readShort()); + in.readShort();// ignored. + in.readInt();// ignored. + break; + default: + throw new AssertionError(); + } in.readLong(); // unused. @@ -431,10 +533,20 @@ out.writeLong(counter); - out.writeLong(0L/*unused*/); + /* + * 8 bytes follow. + */ - out.writeLong(0L/*unused*/); - - } + out.writeShort(indexType.getCode()); + out.writeShort(0/* unused */); + out.writeInt(0/* unused */); + /* + * 8 bytes follow. + */ + + out.writeLong(0L/* unused */); + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/AbstractReadOnlyNodeData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/AbstractReadOnlyNodeData.java 2010-12-01 21:43:35 UTC (rev 3990) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/AbstractReadOnlyNodeData.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -129,6 +129,14 @@ */ protected static final short DELTA_VERSION_TIMESTAMPS = 1 << 2; + /** + * Bit flag indicating that the int32 hash of the key should be stored in + * the leaf data record. The function used to compute hash code will be + * known to the owning data structure. This is primarily intended for use + * with hash trees. + */ + protected static final short FLAG_HASH_KEYS = 1 << 3; + /** * The size of the field in the data record which encodes whether the data * record represents a B+Tree {@link #NODE}, a {@link #LEAF}, or a Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java 2010-12-01 21:43:35 UTC (rev 3990) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -43,6 +43,7 @@ import com.bigdata.btree.raba.IRaba; import com.bigdata.btree.raba.codec.ICodedRaba; import com.bigdata.btree.raba.codec.IRabaCoder; +import com.bigdata.htree.data.IBucketData; import com.bigdata.io.AbstractFixedByteArrayBuffer; import com.bigdata.io.DataOutputBuffer; @@ -53,7 +54,7 @@ * @version $Id$ */ public class DefaultLeafCoder implements IAbstractNodeDataCoder<ILeafData>, - Externalizable { + Externalizable { /** * @@ -189,12 +190,16 @@ short flags = 0; final boolean hasDeleteMarkers = leaf.hasDeleteMarkers(); final boolean hasVersionTimestamps = leaf.hasVersionTimestamps(); + final boolean hasHashKeys = leaf instanceof IBucketData; // @todo add hasHashKeys() method? if (hasDeleteMarkers) { flags |= AbstractReadOnlyNodeData.FLAG_DELETE_MARKERS; } if (hasVersionTimestamps) { flags |= AbstractReadOnlyNodeData.FLAG_VERSION_TIMESTAMPS; } + if(hasHashKeys) { + flags |= AbstractReadOnlyNodeData.FLAG_HASH_KEYS; + } buf.putShort(flags); @@ -341,6 +346,88 @@ } + // hash codes of the keys (MSB prefix plus LSB coded). +// final int O_hashKeys; + if (hasHashKeys) { + + // The bit length of the hash values. + final int hashBitLength = 32;//((IBucketData)leaf).getHashBitLength(); + + // The bit length of the shared MSB prefix. + final int lengthMSB = ((IBucketData)leaf).getLengthMSB(); + + // The bit length of the LSB which differ for each hash value. + final int lengthLSB = hashBitLength - lengthMSB; + +// buf.putShort((short) hashBitLength); + + buf.putShort((short) lengthMSB); + +// O_hashKeys = buf.pos(); + + if (nkeys > 0) { + + final int byteLength = BytesUtil + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + + final byte[] a = new byte[byteLength]; + + final OutputBitStream obs = new OutputBitStream(a); + + try { + + // The hash of the first key. + int h = ((IBucketData) leaf).getHash(0/* index */); + + // Drop off the LSB bits, leaving the MSB bits in the LSB position. + h = h >>> lengthLSB; + +// // Reverse bits to since obs writes the LSB of the int. +// h = Integer.reverse(h); + + // The MSB prefix. + obs.writeInt(h, lengthMSB/* MSB bits */); + + // The LSB of the hash of each key. + for (int i = 0; i < nkeys; i++) { + + // The hash of this key. + h = ((IBucketData)leaf).getHash(i); + + // Drop off the MSB bits. + h = h >>> lengthMSB; + +// // Reverse bits since obs writes the LSB of the int. +// h = Integer.reverse(h); + + // The LSB. + obs.writeInt(h, lengthLSB); + + } + + // copy onto the buffer. + buf.put(a); + + } catch (IOException e) { + throw new RuntimeException(e); + // Note: close is not necessary if flushed and backed by + // byte[]. + // } finally { + // try { + // obs.close(); + // } catch (IOException e) { + // log.error(e); + // } + } + + } + +// } else { +// +// O_hashKeys = -1; + + } + // Slice containing the coded leaf. final AbstractFixedByteArrayBuffer slice = buf.slice(// O_origin, buf.pos() - O_origin); @@ -373,7 +460,7 @@ * @version $Id$ */ static private class ReadOnlyLeafData extends AbstractReadOnlyNodeData<ILeafData> - implements ILeafData { + implements ILeafData, IBucketData { /** The backing buffer. */ private final AbstractFixedByteArrayBuffer b; @@ -407,6 +494,25 @@ */ private final int versionTimestampBits; + /** + * Offset of the int32 hash values in the buffer encoding hash value of + * the tuple keys -or- <code>-1</code> if the leaf does not report those + * data. + */ + private final int O_hashKeys; + + /** + * The #of bits used to code the hash keys -or- ZERO (0) if they are not + * present. (The length of the MSB hash prefix is 32-lengthLSB.) + */ + private final int lengthLSB; + + /** + * The MSB hash prefix shared by all hash codes on this page -or- ZERO + * (0) if hash codes are not present in the page. + */ + private final int hashMSB; + public final AbstractFixedByteArrayBuffer data() { return b; @@ -469,6 +575,7 @@ pos += SIZEOF_FLAGS; final boolean hasVersionTimestamps = ((flags & FLAG_VERSION_TIMESTAMPS) != 0); final boolean hasDeleteMarkers = ((flags & FLAG_DELETE_MARKERS) != 0); + final boolean hasHashKeys = ((flags & FLAG_HASH_KEYS) != 0); this.nkeys = buf.getInt(pos); pos += SIZEOF_NKEYS; @@ -523,6 +630,49 @@ } + if(hasHashKeys) { + + final int lengthMSB = buf.getShort(pos); + pos += 2; + + lengthLSB = 32 /* hashBitLength */- lengthMSB; + + /* + * The byte offset to the start of the bit coded hash keys. The + * first bit coded value is the MSB prefix. You need to skip + * over that when indexing into the LSB array. + */ + O_hashKeys = pos; + + final int byteLength = BytesUtil + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + + if (nkeys > 0) { + + final InputBitStream ibs = buf.slice(pos, byteLength) + .getInputBitStream(); + + try { + hashMSB = ibs.readInt(lengthMSB); + } catch (IOException ex) { + // Note: should not be thrown. + throw new RuntimeException(ex); + } + + } else { + + hashMSB = 0; + + } + + } else { + + O_hashKeys = -1; + lengthLSB = 0; + hashMSB = 0; + + } + // save reference to buffer this.b = buf; @@ -584,6 +734,7 @@ pos += SIZEOF_FLAGS; final boolean hasVersionTimestamps = ((flags & FLAG_VERSION_TIMESTAMPS) != 0); final boolean hasDeleteMarkers = ((flags & FLAG_DELETE_MARKERS) != 0); + final boolean hasHashKeys = ((flags & FLAG_HASH_KEYS) != 0); this.nkeys = buf.getInt(pos); pos += SIZEOF_NKEYS; @@ -638,6 +789,49 @@ } + if(hasHashKeys) { + + final int lengthMSB = buf.getShort(pos); + pos += 2; + + lengthLSB = 32 /* hashBitLength */- lengthMSB; + + /* + * The byte offset to the start of the bit coded hash keys. The + * first bit coded value is the MSB prefix. You need to skip + * over that when indexing into the LSB array. + */ + O_hashKeys = pos; + + final int byteLength = BytesUtil + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + + if (nkeys > 0) { + + final InputBitStream ibs = buf.slice(pos, byteLength) + .getInputBitStream(); + + try { + hashMSB = ibs.readInt(lengthMSB); + } catch (IOException ex) { + // Note: should not be thrown. + throw new RuntimeException(ex); + } + + } else { + + hashMSB = 0; + + } + + } else { + + O_hashKeys = -1; + lengthLSB = 0; + hashMSB = 0; + + } + // save reference to buffer this.b = buf; @@ -709,6 +903,12 @@ } + final public boolean hasHashKeys() { + + return (flags & FLAG_HASH_KEYS) != 0; + + } + public long getMinimumVersionTimestamp() { if (!hasVersionTimestamps()) @@ -770,7 +970,55 @@ return b.getBit((O_deleteMarkers << 3) + index); } + + final public int getLengthMSB() { + + + if (!hasHashKeys()) + throw new UnsupportedOperationException(); + + final int lengthMSB = 32/* hashBitLength */- lengthLSB; + + return lengthMSB; + + } + final public int getHash(final int index) { + + if (index < 0 || index >= nkeys) + throw new IllegalArgumentException(); + + if (!hasHashKeys()) + throw new UnsupportedOperationException(); + + final int lengthMSB = 32/* hashBitLength */- lengthLSB; + + final int byteLength = BytesUtil.bitFlagByteLength(lengthMSB + + nkeys * lengthMSB/* nbits */); + + final InputBitStream ibs = b.slice(O_hashKeys, byteLength) + .getInputBitStream(); + + try { + + final long position = lengthMSB + index * lengthLSB; + + ibs.position(position); + + int h = ibs.readInt(lengthLSB); + + h |= hashMSB; + + return h; + + } catch(IOException ex) { + + throw new RuntimeException(ex); + + } + + } + final public IRaba getKeys() { return keys; @@ -942,6 +1190,26 @@ } + if (leaf instanceof IBucketData) { + + final IBucketData d = (IBucketData)leaf; + + sb.append(",\nhashCodes={lengthMSB=" + d.getLengthMSB() + + ",tuples=["); + + for (int i = 0; i < nkeys; i++) { + + if (i > 0) + sb.append(", "); + + sb.append(d.getHash(i)); + + } + + sb.append("]"); + + } + return sb; } Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/AbstractHashPage.java (from rev 3990, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/AbstractHashPage.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/AbstractHashPage.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -0,0 +1,100 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Dec 1, 2010 + */ +package com.bigdata.htree; + +import java.lang.ref.Reference; + +import org.apache.log4j.Logger; + +import com.bigdata.btree.PO; + +/** + * Abstract class for both directory and data pages for a hash index. + */ +abstract public class AbstractHashPage <T extends AbstractHashPage +/* + * DO-NOT-USE-GENERIC-HERE. The compiler will fail under Linux (JDK 1.6.0_14, + * _16). + */ +> extends PO //implements IAbstractNode, IAbstractNodeData +{ + + private final static transient Logger log = Logger + .getLogger(AbstractHashPage.class); + + /** + * Transient back reference to the index to which this directory belongs. + */ + protected transient HashTree htbl; + + /** + * <p> + * A {@link Reference} to this page. This is created when the page is + * created and effectively provides a canonical {@link Reference} object for + * any given page. + * </p> + * + * @todo Do we need back references for recursive directories? + */ + transient protected final Reference<? extends AbstractHashPage<T>> self; + + /** + * Disallowed. + */ + private AbstractHashPage() { + + throw new UnsupportedOperationException(); + + } + + protected AbstractHashPage(final HashTree htbl, final boolean dirty) { + + if(htbl == null) + throw new IllegalArgumentException(); + + this.htbl = htbl; + + // reference to self: reused to link parents and children. + this.self = htbl.newRef(this); + + if (!dirty) { + + /* + * Nodes default to being dirty, so we explicitly mark this as + * clean. This is ONLY done for the de-serialization constructors. + */ + + setDirty(false); + + } + +// @todo Add to the hard reference queue. +// btree.touch(this); + + } + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java (from rev 3990, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -0,0 +1,106 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Dec 1, 2010 + */ +package com.bigdata.htree; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.UUID; + +/** + * Configuration options. + * + * @todo Reconcile with IndexMetadata. + */ +public class HTableMetadata implements Externalizable { + + /** + * The unique identifier for the index. + */ + private UUID uuid; + + /** + * Function used to generate hash values from keys. + */ + private HashFunction hashFunction; + + private Object directoryCoder; + + private Object bucketCoder; + + /** + * Function decides whether to split a page, link an overflow page, or + * expand the size of a page. + */ + // private SplitFunction splitFunction; + + /** + * De-serialization constructor. + */ + public HTableMetadata() { + + } + + /** + * Anonymous hash index. + * + * @param uuid + * The unique index identifier. + */ + public HTableMetadata(final UUID uuid) { + + this(null/* name */, uuid); + + } + + /** + * Named hash index + * + * @param name + * The index name. + * @param uuid + * The unique index identifier. + */ + public HTableMetadata(final String name, final UUID uuid) { + + } + + @Override + public void readExternal(ObjectInput in) throws IOException, + ClassNotFoundException { + // TODO Auto-generated method stub + throw new UnsupportedOperationException(); + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + // TODO Auto-generated method stub + throw new UnsupportedOperationException(); + } + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java (from rev 3990, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -0,0 +1,689 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Nov 29, 2010 + */ +package com.bigdata.htree; + +import java.util.Iterator; + +import org.apache.log4j.Logger; + +import com.bigdata.btree.IOverflowHandler; +import com.bigdata.btree.IndexSegment; +import com.bigdata.btree.data.IAbstractNodeDataCoder; +import com.bigdata.btree.data.ILeafData; +import com.bigdata.btree.raba.IRaba; +import com.bigdata.htree.data.IBucketData; +import com.bigdata.htree.data.IDirectoryData; +import com.bigdata.io.AbstractFixedByteArrayBuffer; +import com.bigdata.rawstore.IRawStore; + +/** + * A (very) simple hash bucket. The bucket holds N int32 keys. + * + * @todo The hash of the key should be part of the ITuple interface so it can be + * passed along based on the application level encoding of the key. + * + * @todo Support out-of-line representations of the key and/or value for a tuple + * when they are large. The definition of "large" can be a configuration + * value for the index metadata. For example, 1/4 of the target page size + * or (1k assuming a target page size of 4k). It should also be possible + * to specify that the value is always out of line (this corresponds to + * the common practice in a relational database of indexing into a + * persistent heap rather than the perfect indices with their inline data + * which we use for RDF statements). + * <p> + * The easiest way to do this is to treat the key and value separately and + * write them as raw records onto the backing store if they exceed the + * configured threshold. For the B+Tree, we can not readily move the key + * out of line since we need it for search, but it is easy to do this for + * the HTree. (For now, I suggest that we live with the constraint that + * the key can not be moved out of line for the B+Tree.) For both index + * structures, it is easy to move the value out of line. The tuple + * metadata will stay inline regardless. + * <p> + * In order to resolve out of line keys and/or values the + * {@link ILeafData} will need access to the {@link IRawStore} reference. + * This may require an API change to {@link IRaba} and/or + * {@link IAbstractNodeDataCoder} (the latter also needs to be modified to + * work with {@link IDirectoryData} records) in order to made the + * {@link IRawStore} reference available when the record is serialized + * and/or deserialized. + * <p> + * When the tuple is deleted, the raw record reference for its key and/or + * value must also be deleted. + * <p> + * During a bulk index build, the raw record must be copied to the target + * index store, e.g., an {@link IndexSegment} using an + * {@link IOverflowHandler}. + */ +public class HashBucket extends AbstractHashPage<HashBucket>// +// implements IBucketData// +{ + + private final transient static Logger log = Logger + .getLogger(HashBucket.class); + + /** + * The #of hash code bits which are in use by this {@link HashBucket}. + * <p> + * Note: There are <code>2^(globalBits-localBits)</code> dictionary entries + * which address a given page. Initially, globalBits := 1 and localBits := + * 0. For these values, we have <code>2^(1-0) == 2</code> references to the + * initial page of the hash table. + * + * @todo If we need to examine this when we change the size of the address + * space then it makes more sense to have this as local metadata in + * the address table than as local data in the bucket (the latter + * would require us to visit each bucket when expanding the address + * space). This only needs to be 4 bits to express values in [0:31]. + * + * @todo When overflow buckets are chained together, does each bucket have + * {@link #localHashBits}? If they do, then we need to make sure that + * all buckets in the chain are updated. If {@link #localHashBits} is + * only marked on the first bucket in the chain then we need to + * correctly ignore it on overflow buckets. + * + * @todo adjusting this dirties the bucket (unless the #of local bits its + * stored in the address table entry, but that increases the in-memory + * burden of the address table). + */ + private int localHashBits; + + /** + * The #of keys stored in the bucket. The keys are stored in a dense array. + * For a given {@link #size}, the only indices of the array which have any + * data are [0:{@link #size}-1]. + */ + int size; + + /** + * The user data for the bucket. + * + * @todo IRaba keys plus IRaba vals. + */ + final int[] data; + + protected void setLocalHashBits(final int localHashBits) { + + this.localHashBits = localHashBits; + + } + + public int getLocalHashBits() { + return localHashBits; + } + + /** + * Human friendly representation. + */ + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(super.toString()); + sb.append("{localHashBits=" + getLocalHashBits()); + sb.append(",size=" + size); + sb.append(",values={"); + for (int i = 0; i < size; i++) { + if (i > 0) + sb.append(','); + sb.append(Integer.toString(data[i])); + } + sb.append("}}"); + return sb.toString(); + } + + /** + * Create a new mutable bucket. + * + * @param htbl + * @param localHashBits + * @param bucketSize + */ + public HashBucket(final HashTree htbl, + final int localHashBits, final int bucketSize) { + + super(htbl, true/* dirty */); + + if (localHashBits < 0 || localHashBits > 32) + throw new IllegalArgumentException(); + + if (bucketSize <= 0) + throw new IllegalArgumentException(); + + this.localHashBits = localHashBits; + + this.data = new int[bucketSize]; + + // one more bucket. + htbl.nbuckets++; + + } + + /** + * Return <code>true</code> if the bucket contains the key. + * + * @param h + * The hash code of the key. + * @param key + * The key. + * + * @return <code>true</code> if the key was found in the bucket. + * + * @todo passing in the hash code here makes sense when the bucket stores + * the hash values, e.g., if we always do that or if we have an out of + * bucket reference to a raw record because the tuple did not fit in + * the bucket. + */ + public boolean contains(final int h, final int key) { + + for (int i = 0; i < size; i++) { + + if (data[i] == key) + return true; + + } + + return false; + + } + + /** + * Type safe enumeration reports on the various outcomes when attempting to + * insert a tuple into a page. + * + * @todo The problem with this enumeration (or with using a return code per + * the following javadoc) is that page splits are going to be deferred + * until the page is evicted unless there is an aspect of the split + * function which decides based on the #of tuples on the page. If a + * the split function reports that the page is over capacity when it + * is evicted, then we need to decide whether to split the page, chain + * an overflow page, or use a larger capacity page. + * <p> + * What we should do is scan the page if an insert would fail (or if + * the serialization of the page would fail) and determine what local + * depth we would need to successfully split the page (e.g., no more + * than 70% of the items would be in any prefix at a given depth). + * That can be used to guide the decision to use overflow pages or + * expand the directory. + * <p> + * What are some fast techniques for counting the #of bits which we + * need to make the necessary distinctions in the bucket? Should we + * build a trie over the hash codes? + */ + private static enum InsertEnum { + /** + * The tuple was inserted successfully into this page. + * + * @todo This could be reported as ZERO (0), which is an indication that + * NO expansions where required to insert the tuple into the page. + */ + OK, + /** + * The insert failed because the page is full. Further, the tuple has + * the same key value as all other tuples on the page. Therefore, either + * the insert must be directed into an overflow page or the page size + * must be allowed to increase. + * + * @todo This could be reported as {@link Integer#MAX_VALUE}, which is + * an indication that infinite expansions will not make it + * possible to insert the key into this page (e.g., an overflow + * page is required). [Alternatively, this could report the + * necessary page size if we allow the page size to expand.] + */ + KEYS_ARE_IDENTICAL, + /** + * The insert failed because the page is full. Further, the hash + * associated with the tuple is the same as the hash for all other keys + * on the page. In this case, the insert operation will eventually + * succeed if the address space is expanded (one or more times). + * + * @todo This could be reported as the #of bits which are in common for + * the keys in this page. That could be used to determine how many + * expansions would be required before the key could be inserted. + * [If KEYS_ARE_IDENTICAL is handled by reporting the necessary + * page size, then this could report the #of hash bits which are + * identical using a negative integer (flipping the sign).] + */ + HASH_IS_IDENTICAL; + } + + /** + * Insert the key into the bucket (duplicates are allowed). It is an error + * if the bucket is full. + * + * @param h + * The hash code of the key. + * @param key + * The key. + * + * @return <code>false</code> iff the bucket must be split. + * + * @todo The caller needs to be careful that [h] is the full hash code for + * the key. Normally this is not a problem, but we sometimes wind up + * with masked off hash codes, especially during splits and merges, + * and those must not be passed in here. + */ + public void insert(final int h, final int key) { + + if (size == data.length) { + + /* + * The bucket must be split, potentially recursively. + * + * Note: Splits need to be triggered based on information which is + * only available to the bucket when it considers the insert of a + * specific tuple, including whether the tuple is promoted to a raw + * record reference, whether the bucket has deleted tuples which can + * be compacted, etc. + * + * @todo I need to figure out where the control logic goes to manage + * the split. If the bucket handles splits, then we need to pass in + * the table reference. + */ + + // split the bucket and insert the record (recursive?) + split(key, this); + + /* + * Insert the key into the expanded hash table (this will insert + * into either the old or the new bucket, depending on the hash code + * for the key). + * + * FIXME There are a variety of special conditions which need to be + * handled by insert(), especially all keys have the same value or + * the same int32 hash code or the tuple is too large for the + * bucket. Those conditions all need to be handled before requested + * a split. Since insert() has to handle all of this, it is also + * responsible for re-attempting the key insertion after the split. + * + * The next step is to handle cases where splitting the bucket once + * does not result in a bucket with sufficient space for the new + * key. There are actually two cases here: (1) the hash codes of the + * keys are distinct, so if we double the address space enough times + * the insert will succeed; (2) the hash codes of the keys are + * identical, so no amount of expansion of the address space will + * permit the insert to succeed and an overflow page must be used. + * For (1) we can also chose to use an overflow page in order to + * prevent run away expansion of the address space. + * + * This class needs to be converted to use persistence and to use an + * IRaba for keys/values. For the sake of the unit tests, it needs + * to be parameterized for the overflow versus expand decision and + * the IRaba for the keys needs to be defined such that we have a + * guaranteed split when there are three integer keys (or a split + * function could be used to make this decision based on more + * general criteria). [Could also use a pure- append binary raba w/ + * compacting if the raba is full and there are deleted tuples.] + */ + if (log.isDebugEnabled()) + log.debug("retrying insert: key=" + key); + + /* + * @todo This can recurse until the address space reaches the + * maximum possible address space and then throw an exception. The + * code should be modified to use a decision function for growing + * the page, chaining an overflow page, or splitting the page (when + * it would cause the address space to be doubled). + */ + htbl.insert(key); + +// { +// // the hash value of the key. +// final int h = htbl.hash(key); +// // the address of the bucket for that hash code. +// final int addr = htbl.getRoot().addrOf(h); +// // the bucket for that address. +// final SimpleBucket btmp = htbl.getBucketAtStoreAddr(addr); +// if (btmp.insert(h, key)) { +// // insert was successful. +// return; +// } +// /* +// */ +// +// log +// .fatal("Split of bucket did not map space available for new key: key=" +// + key + ", table=" + htbl.dump()); +// +// throw new UnsupportedOperationException(); +// +// } + + return; + + } + + data[size++] = key; + + // one more entry in the index. + htbl.nentries++; + + } + + /** + * Delete a tuple having the specified key. If there is more than one such + * tuple, then a random tuple having the key is deleted. + * + * @param h + * The hash code of the key. + * @param key + * The key. + * + * @todo return the delete tuple. + */ + public boolean delete(final int h, final int key) { + + for (int i = 0; i < size; i++) { + + if (data[i] == key) { + + // #of tuples remaining beyond this point. + final int length = size - i - 1; + + if (length > 0) { + + // Keep the array dense by copying down by one. + System.arraycopy(data, i + 1/* srcPos */, data/* dest */, + i/* destPos */, length); + + } + + size--; + + // one less entry in the index. + htbl.nentries--; + + return true; + + } + + } + + return false; + + } + + /** + * The #of entries in the bucket. + */ + public int getEntryCount() { + + return size; + + } + + /** + * Visit the entries in any order. + */ + public Iterator<Integer/* key */> getEntries() { + + return new EntryIterator(); + + } + + /** + * Visits the entries in the page. + */ + private class EntryIterator implements Iterator<Integer> { + + private int current = 0; + + private EntryIterator() { + + } + + @Override + public boolean hasNext() { + return current < size; + } + + @Override + public Integer next() { + return data[current++]; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + } + + @Override + public void delete() throws IllegalStateException { + // TODO Auto-generated method stub + throw new UnsupportedOperationException(); + } + + /** + * Split the bucket, adjusting the address map iff necessary. How this + * proceeds depends on whether the hash #of bits used in the bucket is equal + * to the #of bits used to index into the bucket address table. There are + * two cases: + * <p> + * Case 1: If {@link #globalHashBits} EQ the + * {@link HashBucket#localHashBits}, then the bucket address table is out + * of space and needs to be resized. + * <p> + * Case 2: If {@link #globalHashBits} is GT + * {@link HashBucket#localHashBits}, then there will be at least two + * entries in the bucket address table which point to the same bucket. One + * of those entries is relabeled. The record is then inserted based on the + * new #of hash bits to be considered. If it still does not fit, then either + * handle by case (1) or case (2) as appropriate. + * <p> + * Note that records which are in themselves larger than the bucket size + * must eventually be handled by: (A) using an overflow record; (B) allowing + * the bucket to become larger than the target page size (using a larger + * allocation slot or becoming a blob); or (C) recording the tuple as a raw + * record and maintaining only the full hash code of the tuple and its raw + * record address in the bucket (this would allow us to automatically + * promote long literals out of the hash bucket and a similar approach might + * be used for a B+Tree leaf, except that a long key will still cause a + * problem [also, this implies that deleting a bucket or leaf on the + * unisolated index of the RWStore might require a scan of the IRaba to + * identify blob references which must also be deleted, so it makes sense to + * track those as part of the bucket/leaf metadata). + * + * @param h + * The key which triggered the split. + * @param bold + * The bucket lacking sufficient room for the key which triggered + * the split. + * + * @todo caller will need an exclusive lock if this is to be thread safe. + * + * @todo Overflow buckets (or oversize buckets) are required when all hash + * bits considered by the local bucket are the same, when all keys in + * the local bucket are the same, and when the record to be inserted + * is larger than the bucket. In order to handle these cases we may + * need to more closely integrate the insert/split logic since + * detecting some of these cases requires transparency into the + * bucket. + * + * FIXME The caller could decide to switch to a larger page size or + * chain overflow pages together in order to increase storage + * utilization or handle buckets having large populations of identical + * keys (or keys with the same int32 hash code). [This decision must + * be made before we decide to split.] + * + * FIXME The caller should handle the promotion of large tuples to raw + * records when they are inserted, so we do not need to handle that + * here either. + */ + private void split(final int key, final HashBucket bold) { + + final int globalHashBits = htbl.getGlobalHashBits(); + + if (log.isDebugEnabled()) + log.debug("globalBits=" + globalHashBits + ",localHashBits=" + + bold.getLocalHashBits() + ",key=" + key); + + if (globalHashBits < bold.getLocalHashBits()) { + // This condition should never arise. + throw new AssertionError(); + } + + if (globalHashBits == bold.getLocalHashBits()) { + /* + * The address table is out of space and needs to be resized. + */ + htbl.getRoot().doubleAddressSpaceAndSplitBucket(key, bold); + // fall through + } + + if (globalHashBits > bold.getLocalHashBits()) { + /* + * Split the bucket. + */ + htbl.getRoot().splitBucket(key, bold); + // fall through. + } + + } + + /* + * IBucketData + */ + + public int getHash(int index) { + // TODO Auto-generated method stub + return 0; + } + + public int getLengthMSB() { + // TODO Auto-generated method stub + return 0; + } + + /* + * IAbstractNodeData + */ + + public boolean hasVersionTimestamps() { + // TODO Auto-generated method stub + return false; + } + + public AbstractFixedByteArrayBuffer data() { + // TODO Auto-generated method stub + return null; + } + + public int getKeyCount() { + // TODO Auto-generated method stub + return 0; + } + + public IRaba getKeys() { + // TODO Auto-generated method stub + return null; + } + + public long getMaximumVersionTimestamp() { + // TODO Auto-generated method stub + return 0; + } + + public long getMinimumVersionTimestamp() { + // TODO Auto-generated method stub + return 0; + } + + public int getSpannedTupleCount() { + // TODO Auto-generated method stub + return 0; + } + + public boolean isCoded() { + // TODO Auto-generated method stub + return false; + } + + final public boolean isLeaf() { + + return true; + + } + + /** + * The result depends on the implementation. The {@link HashBucket} will be + * mutable when it is first created and is made immutable when it is + * persisted. If there is a mutation operation, the backing + * {@link IBucketData} is automatically converted into a mutable instance. + */ + final public boolean isReadOnly() { + +// return data.isReadOnly(); + // TODO Auto-generated method stub + return false; + + } + + /* + * ILeafData + */ + + public boolean getDeleteMarker(int index) { + // TODO Auto-generated method stub + return false; + } + + public long getNextAddr() { + // TODO Auto-generated method stub + return 0; + } + + public long getPriorAddr() { + // TODO Auto-generated method stub + return 0; + } + + public int getValueCount() { + // TODO Auto-generated method stub + return 0; + } + + public IRaba getValues() { + // TODO Auto-generated method stub + return null; + } + + public long getVersionTimestamp(int index) { + // TODO Auto-generated method stub + return 0; + } + + public boolean hasDeleteMarkers() { + // TODO Auto-generated method stub + return false; + } + + public boolean isDoubleLinked() { + // TODO Auto-generated method stub + return false; + } + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java (from rev 3990, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashDirectory.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java 2010-12-03 18:48:02 UTC (rev 3991) @@ -0,0 +1,989 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Dec 1, 2010 + */ +package com.bigdata.htree; + +import java.lang.ref.Reference; +import java.util.Formatter; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; + +import org.apache.log4j.Logger; + +import com.bigdata.rawstore.IRawStore; +import com.bigdata.util.concurrent.Memoizer; + +/** + * A simple (flat) directory for an extensible hashing. + */ +public class HashDirectory extends AbstractHashPage<HashDirectory> { + + private final transient static Logger log = Logger + .getLogger(HashDirectory.class); + +/* FIXME We need a data record (interface and implementations) for the directory + * page. The data record for a bucket is more similar to a B+Tree leaf than is + * the data record for a directory to a B+Tree node. + */ +// /** +// * The data record. {@link MutableNodeData} is used for all mutation +// * operations. {@link ReadOnlyNodeData} is used when the {@link Node} is +// * made persistent. A read-only data record is automatically converted into +// * a {@link MutableNodeData} record when a mutation operation is requested. +// * <p> +// * Note: This is package private in order to expose it to {@link Leaf}. +// * +// * @todo consider volatile and private for {@link Node#data} and +// * {@link Leaf#data} with accessors and settors at package private +// * where necessary. +// */ +// INodeData data; + + /** + * The #of hash code bits which are in use by the {@link #addressMap}. Each + * hash bucket also as a local #of hash bits. Given <code>i</code> is the + * #of global hash bits and <code>j</code> is the number of hash bits in + * some bucket, there will be <code>2^(i-j)</code> addresses which point to + * the same bucket. + */ + private int globalHashBits; + + /** + * The maximum number of directories entries which are permitted. + * + * @todo This can be ignored for a hash tree (recursive directories). In + * that case we are concerned with when to split the directory because + * the page is full rather than an absolute maximum on the address + * space size. + */ + private final int maximumCapacity; + + /** + * The address map. You index into this map using {@link #globalHashBits} + * out of the hash code for a probe key. The values are storage addresses + * for the backing {@link IRawStore}. The address will be {@link #NULL} if + * the corresponding child is dirty, in which case {@link #childRefs} will + * always have a {@link Reference} to the dirty child. This pattern is used + * in combination with either strong references or weak references and a + * ring buffer to manage the incremental eviction of dirty pages. + * + * @todo make this into a private IDirectoryData record. + * <p> + * It seems likely that we want to also record the local depth for + * each child in the IDataDirectory record and a flag indicating + * whether the child is a bucket or a directory page. + */ + private long[] addressMap; + + /** + * <p> + * Weak references to child pages (may be directories or buckets). The + * capacity of this array depends on the #of global bits for the directory. + * </p> + * <p> + * Note: This should not be marked as volatile. Volatile does not make the + * elements of the array volatile, only the array reference itself. The + * field would be final except that we clear the reference when stealing the + * array or deleting the node. + * </p> + * + * @todo document why package private (AbstractBTree.loadChild uses this but + * maybe that method could be moved to Node). + */ + private transient/* volatile */Reference<AbstractHashPage<?>>[] childRefs; + + public String toString() { + + return super.toString(); + + } + + /** + * Dumps the buckets in the directory along with metadata about the + * directory. + * + * @param sb + * Where to write the dump. + */ + protected void dump(final StringBuilder sb) { + + // used to remember the visited pages by their addresses (when non-NULL) + final Set<Long/* addrs */> visitedAddrs = new LinkedHashSet<Long>(); + + // used to remember the visited pages when they are transient. + final Map<AbstractHashPage/* children */, Integer/* label */> visitedChildren = new LinkedHashMap<AbstractHashPage, Integer>(); + + // used to format the address table. + final Formatter f = new Formatter(sb); + + // scan through the address table. + for (int index = 0; index < addressMap.length; index++) { + + boolean visited = false; + + long addr = addressMap[index]; + + if (addr != NULL && !visitedAddrs.add(addr)) { + + visited = true; + + } + + HashBucket b = (HashBucket) (childRefs[index]).get(); + + if (b != null && visitedChildren.containsKey(b)) { + + visited = true; + + } else { + + visitedChildren.put(b, index); + + } + + if(b == null) { + + // materialize the bucket. + b = getBucketFromEntryIndex(index); + + addr = b.getIdentity(); + + } + + /* + * The label will be either the storage address followed by "P" (for + * Persistent) or the index of the directory entry followed by "T" + * (for Transient). + */ + final String label = addr == 0L ? (visitedChildren.get(b) + "T") + : (addr + "P"); + + f.format("\n%2d [%" + globalHashBits + "s] => (%8s)", index, + Integer.toBinaryString(HashTree.maskOff(index, + globalHashBits)), label); + + if (!visited) { + + /* + * Show the bucket details the first time we visit it. + */ + + // The #of local hash bits for the target page. + final int localHashBits = b.getLocalHashBits(); + + // The #of entries in this directory for that target page. + final int nrefs = HashTree.pow2(globalHashBits + - localHashBits); + + sb.append(" [k=" + b.getLocalHashBits() + ", n=" + nrefs + + "] {"); + + final Iterator<Integer> eitr = b.getEntries(); + + boolean first = true; + + while(eitr.hasNext()) { + + if (!first) + sb.append(", "); + + sb.append(eitr.next()/*.getObject()*/); + + first = false; + + } + + sb.append("}"); + + } + + } + + sb.append('\n'); + + } + + /** + * Create a new mutable directory page. + * + * @param htbl + * @param initialCapacity + * The initial capacity is the #of buckets which may be stored in + * the hash table before it must be resized. It is expressed in + * buckets and not tuples because there is not (in general) a + * fixed relationship between the size of a bucket and the #of + * tuples which can be stored in that bucket. This will be + * rounded up to the nearest power of two. + * @param maximumCapacity + * @param bucketSize + * + * @todo both maximumCapacity and bucketSize will go away. The maximum + * capacity will be replaced by a decision function for splitting the + * directory page. The bucketSize will be replaced by a decision + * function for splitting, overflowing, or growing the bucket page. + */ + @SuppressWarnings("unchecked") + protected HashDirectory(final HashTree htbl, + final int initialCapacity, final int maximumCapacity, + final int bucketSize) { + + super(htbl, true /* dirty */); + + if (initialCapacity <= 0) + throw new IllegalArgumentException(); + + if (maximumCapacity < initialCapacity) + throw new IllegalArgumentException(); + + this.maximumCapacity = maximumCapacity; + + /* + * Setup the hash table given the ini... [truncated message content] |
From: <tho...@us...> - 2010-12-03 22:26:48
|
Revision: 3993 http://bigdata.svn.sourceforge.net/bigdata/?rev=3993&view=rev Author: thompsonbry Date: 2010-12-03 22:26:40 +0000 (Fri, 03 Dec 2010) Log Message: ----------- Continued work on the HTree, especially on the interface, persistent data record, and mutable data record for the hash bucket. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -35,6 +35,8 @@ import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; +import java.util.Iterator; +import java.util.NoSuchElementException; import org.apache.log4j.Logger; @@ -368,7 +370,7 @@ if (nkeys > 0) { final int byteLength = BytesUtil - .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB))/* nbits */); final byte[] a = new byte[byteLength]; @@ -443,22 +445,45 @@ return encodeLive(leaf, buf).data(); } - - /** - * A read-only view of the data for a B+Tree leaf based on a compact record - * format. While some fields are cached, for the most part the various data - * fields, including the keys and values, are accessed in place in the data - * record in order to minimize the memory footprint of the leaf. The keys and - * values are coded using a caller specified {@link IRabaCoder}. The specific - * coding scheme is specified by the {@link IndexMetadata} for the B+Tree - * instance and is not stored within the leaf data record. - * <p> - * Note: The leading byte of the record format codes for a leaf, a double-linked - * leaf or a node in a manner which is compatible with {@link ReadOnlyNodeData}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ + + /** + * A read-only view of the data for a B+Tree leaf based on a compact record + * format. While some fields are cached, for the most part the various data + * fields, including the keys and values, are accessed in place in the data + * record in order to minimize the memory footprint of the leaf. The keys + * and values are coded using a caller specified {@link IRabaCoder}. The + * specific coding scheme is specified by the {@link IndexMetadata} for the + * B+Tree instance and is not stored within the leaf data record. The use of + * prefix coding for keys is a good general choices, but should not be used + * in combination with a hash tree unless an order preserving hashing + * function is being used. + * <p> + * Note: The leading byte of the record format codes for a leaf, a + * double-linked leaf or a node in a manner which is compatible with + * {@link ReadOnlyNodeData}. + * <p> + * The {@link DefaultLeafCoder} automatically maintains hash values for keys + * for an {@link IBucketData} record. The hash values of the keys in the + * bucket will have a shared prefix (the MSB hash prefix) which corresponds + * to the globalDepth of the path through the hash tree leading to this + * bucket less the localDepth of this bucket. It is therefore possible to + * store only the LSB bits of the hash values in the page and reconstruct + * the hash values using the MSB bits from the path through the hash tree. + * In order to be able to reconstruct the full hash code key based solely on + * local information, the MSB bits can be written out once and the LSB bits + * can be written out once per tuple. Testing the hash value of a key may + * then be done considering only the LSB bits of the hash value. This + * storage scheme also has the advantage that the hash value is not + * restricted to an int32 and is therefore compatible with the use of + * cryptographic hash functions. (If hash values are stored in a B+Tree leaf + * they will not shared this prefix property and can not be compressed in + * this manner). + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + * @version $Id: DefaultLeafCoder.java 3991 2010-12-03 18:48:02Z thompsonbry + * $ + */ static private class ReadOnlyLeafData extends AbstractReadOnlyNodeData<ILeafData> implements ILeafData, IBucketData { @@ -645,7 +670,7 @@ O_hashKeys = pos; final int byteLength = BytesUtil - .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB))/* nbits */); if (nkeys > 0) { @@ -804,7 +829,7 @@ O_hashKeys = pos; final int byteLength = BytesUtil - .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB))/* nbits */); if (nkeys > 0) { @@ -994,7 +1019,7 @@ final int lengthMSB = 32/* hashBitLength */- lengthLSB; final int byteLength = BytesUtil.bitFlagByteLength(lengthMSB - + nkeys * lengthMSB/* nbits */); + + (nkeys * lengthLSB)/* nbits */); final InputBitStream ibs = b.slice(O_hashKeys, byteLength) .getInputBitStream(); @@ -1018,8 +1043,102 @@ } } - - final public IRaba getKeys() { + + public Iterator<Integer> hashIterator(final int h) { + + return new HashMatchIterator(h); + + } + + /** + * Visits the index of each bucket entry having a matching hash code. + * + * @todo a trie over the hash entries would provide much faster search. + */ + private class HashMatchIterator implements Iterator<Integer> { + + private final int h; + private final int lengthMSB; + private final InputBitStream ibs; + private int currentIndex = 0; + private Integer nextResult = null; + + private HashMatchIterator(final int h) { + + this.h = h; + + lengthMSB = 32/* hashBitLength */- lengthLSB; + + final int byteLength = BytesUtil.bitFlagByteLength(lengthMSB + + (nkeys * lengthLSB)/* nbits */); + + ibs = b.slice(O_hashKeys, byteLength) + .getInputBitStream(); + + } + + public boolean hasNext() { + + final int n = getKeyCount(); + + while (nextResult == null && currentIndex < n) { + + final int index = currentIndex++; + + int h1; + try { + + // We do not need to re-position the ibs. +// final long position = lengthMSB + currentIndex +// * lengthLSB; +// ibs.position(position); + + h1 = ibs.readInt(lengthLSB); + + h1 |= hashMSB; + + } catch (IOException ex) { + + throw new RuntimeException(ex); + + } + + if (h1 == h) { + + nextResult = Integer.valueOf(index); + + break; + + } + + } + + return nextResult != null; + + } + + public Integer next() { + + if (!hasNext()) + throw new NoSuchElementException(); + + final Integer tmp = nextResult; + + nextResult = null; + + return tmp; + + } + + public void remove() { + + throw new UnsupportedOperationException(); + + } + + } + + final public IRaba getKeys() { return keys; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -32,6 +32,7 @@ import com.bigdata.btree.IOverflowHandler; import com.bigdata.btree.IndexSegment; +import com.bigdata.btree.data.DefaultLeafCoder; import com.bigdata.btree.data.IAbstractNodeDataCoder; import com.bigdata.btree.data.ILeafData; import com.bigdata.btree.raba.IRaba; @@ -46,6 +47,11 @@ * @todo The hash of the key should be part of the ITuple interface so it can be * passed along based on the application level encoding of the key. * + * @todo Consider organizing the hash values of the keys in the page using a + * trie for faster lookup. This could be done when they are serialized (in + * which case this decision disappears into the {@link DefaultLeafCoder}) + * or dynamically. + * * @todo Support out-of-line representations of the key and/or value for a tuple * when they are large. The definition of "large" can be a configuration * value for the index metadata. For example, 1/4 of the target page size @@ -124,8 +130,19 @@ * * @todo IRaba keys plus IRaba vals. */ - final int[] data; + final int[] entries; + /** + * The data record. A mutable is used for all mutation operations. The data + * record is replaced by a read-only record used when the hash bucket is + * made persistent. A read-only data record is automatically converted into + * a mutable record when a mutation operation is requested. + * <p> + * Note: This is package private in order to expose it to + * {@link HashDirectory}. + */ + private IBucketData data; + protected void setLocalHashBits(final int localHashBits) { this.localHashBits = localHashBits; @@ -133,7 +150,9 @@ } public int getLocalHashBits() { + return localHashBits; + } /** @@ -148,7 +167,7 @@ for (int i = 0; i < size; i++) { if (i > 0) sb.append(','); - sb.append(Integer.toString(data[i])); + sb.append(Integer.toString(entries[i])); } sb.append("}}"); return sb.toString(); @@ -174,7 +193,7 @@ this.localHashBits = localHashBits; - this.data = new int[bucketSize]; + this.entries = new int[bucketSize]; // one more bucket. htbl.nbuckets++; @@ -200,7 +219,7 @@ for (int i = 0; i < size; i++) { - if (data[i] == key) + if (entries[i] == key) return true; } @@ -287,7 +306,7 @@ */ public void insert(final int h, final int key) { - if (size == data.length) { + if (size == entries.length) { /* * The bucket must be split, potentially recursively. @@ -375,7 +394,7 @@ } - data[size++] = key; + entries[size++] = key; // one more entry in the index. htbl.nentries++; @@ -397,7 +416,7 @@ for (int i = 0; i < size; i++) { - if (data[i] == key) { + if (entries[i] == key) { // #of tuples remaining beyond this point. final int length = size - i - 1; @@ -405,7 +424,7 @@ if (length > 0) { // Keep the array dense by copying down by one. - System.arraycopy(data, i + 1/* srcPos */, data/* dest */, + System.arraycopy(entries, i + 1/* srcPos */, entries/* dest */, i/* destPos */, length); } @@ -461,7 +480,7 @@ @Override public Integer next() { - return data[current++]; + return entries[current++]; } @Override @@ -568,14 +587,12 @@ * IBucketData */ - public int getHash(int index) { - // TODO Auto-generated method stub - return 0; + public int getHash(final int index) { + return data.getHash(index); } public int getLengthMSB() { - // TODO Auto-generated method stub - return 0; + return data.getLengthMSB(); } /* @@ -583,49 +600,39 @@ */ public boolean hasVersionTimestamps() { - // TODO Auto-generated method stub - return false; + return data.hasVersionTimestamps(); } public AbstractFixedByteArrayBuffer data() { - // TODO Auto-generated method stub - return null; + return data.data(); } public int getKeyCount() { - // TODO Auto-generated method stub - return 0; + return data.getKeyCount(); } public IRaba getKeys() { - // TODO Auto-generated method stub - return null; + return data.getKeys(); } public long getMaximumVersionTimestamp() { - // TODO Auto-generated method stub - return 0; + return data.getMaximumVersionTimestamp(); } public long getMinimumVersionTimestamp() { - // TODO Auto-generated method stub - return 0; + return data.getMinimumVersionTimestamp(); } public int getSpannedTupleCount() { - // TODO Auto-generated method stub - return 0; + return data.getKeyCount(); } public boolean isCoded() { - // TODO Auto-generated method stub - return false; + return data.isCoded(); } final public boolean isLeaf() { - return true; - } /** @@ -635,55 +642,43 @@ * {@link IBucketData} is automatically converted into a mutable instance. */ final public boolean isReadOnly() { - -// return data.isReadOnly(); - // TODO Auto-generated method stub - return false; - + return data.isReadOnly(); } /* * ILeafData */ - public boolean getDeleteMarker(int index) { - // TODO Auto-generated method stub - return false; + public boolean getDeleteMarker(final int index) { + return data.getDeleteMarker(index); } public long getNextAddr() { - // TODO Auto-generated method stub - return 0; + return data.getNextAddr(); } public long getPriorAddr() { - // TODO Auto-generated method stub - return 0; + return data.getPriorAddr(); } public int getValueCount() { - // TODO Auto-generated method stub - return 0; + return data.getValueCount(); } public IRaba getValues() { - // TODO Auto-generated method stub - return null; + return data.getValues(); } - public long getVersionTimestamp(int index) { - // TODO Auto-generated method stub - return 0; + public long getVersionTimestamp(final int index) { + return data.getVersionTimestamp(index); } public boolean hasDeleteMarkers() { - // TODO Auto-generated method stub - return false; + return data.hasDeleteMarkers(); } public boolean isDoubleLinked() { - // TODO Auto-generated method stub - return false; + return data.isDoubleLinked(); } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -775,7 +775,7 @@ // Adjust the #of local bits to be considered. bold.setLocalHashBits(bold.getLocalHashBits() + 1); // The new bucket. - bnew = new HashBucket(htbl, bold.getLocalHashBits(), bold.data.length/* bucketSize */); + bnew = new HashBucket(htbl, bold.getLocalHashBits(), bold.entries.length/* bucketSize */); // // The address for the new bucket. // final int addrBNew = htbl.buckets.size(); // Add to the chain of buckets. @@ -862,7 +862,7 @@ { // the new bucket. bnew = new HashBucket(htbl, bold.getLocalHashBits() + 1, - bold.data.length/* bucketSize */); + bold.entries.length/* bucketSize */); // // Add to the chain of buckets. // htbl.buckets.add(bnew); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -528,8 +528,7 @@ * have access to the store or ITuple will have to have indirection * support. */ - public HashTree(final int initialCapacity, - final int bucketSize) { + public HashTree(final int initialCapacity, final int bucketSize) { // @todo pass in the store reference per AbstractBTree. this.store = new SimpleMemoryRawStore(); Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,32 @@ +package com.bigdata.htree; + +import com.bigdata.btree.ITuple; + +/** + * Extended interface to support hash buckets. + * + * @author tho...@us... + * + * @param <E> + * + * @todo The reason for having this on ITuple is to make it practical for the + * hash code to be defined in terms of application specific data types + * rather than the unsigned byte[] key (but the latter could of course be + * decoded by the hash function before computing the hash of the + * application data type, except for things like Unicode keys). + * <p> + * This should probably be lifted onto {@link ITuple} and + * {@link #getKeyHash()} should be declared to throw an + * {@link UnsupportedOperationException} if the hash code of the key is + * not being stored. + */ +public interface IHashTuple<E> extends ITuple<E> { + +// int getHashBitLength(); + + /** + * The int32 hash value of the key. + */ + int getKeyHash(); + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,617 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 25, 2009 + */ + +package com.bigdata.htree; + +import java.util.Iterator; + +import cern.colt.map.OpenIntIntHashMap; + +import com.bigdata.btree.ITuple; +import com.bigdata.btree.MutableLeafData; +import com.bigdata.btree.raba.IRaba; +import com.bigdata.htree.data.IBucketData; +import com.bigdata.io.AbstractFixedByteArrayBuffer; +import com.bigdata.io.ByteArrayBuffer; +import com.bigdata.io.IDataRecord; +import com.bigdata.rawstore.Bytes; + +/** + * Implementation maintains Java objects corresponding to the persistent data + * and defines methods for a variety of mutations on the {@link IBucketData} + * record which operate by direct manipulation of the Java objects. + * <p> + * Note: package private fields are used so that they may be directly accessed + * by the {@link HashBucket} class. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: MutableLeafData.java 2265 2009-10-26 12:51:06Z thompsonbry $ + * + * @todo Consider mutable implementation based on a compacting record ala GOM. + * This is especially attractive for the hash tree. The implementation + * would have to be wholly different from the {@link MutableLeafData} + * class. Instead of managing the {@link IRaba} for the keys and values + * separately, each {@link ITuple} would be appended into a byte[] (or + * {@link IDataRecord}). There would be a budget for that backing buffer + * which is the maximum in-memory size of the bucket. An index would + * provide random access into the buffer for only those entries which are + * "live" and a counter is maintain of the #of entries in the buffer which + * are no longer in use. When the buffer capacity is reached, the buffer + * is compacted by copying all of the entries accessible from the index + * into a new buffer and the old buffer is released. + * <p> + * Records which are too large for the buffer should be moved out of line. + * <p> + * This can be used in combination with a dynamically maintained trie for + * fast hash lookup, or we could just scan the entries. + * <p> + * Lexicon key search can scan the entries using the index. Scanning can + * have a side-effect in which the position of the entry offsets in the + * index is swapped if the keys are out of order. This would give us + * MonetDB style "cracking". The keys would have to be put into full order + * no later than when the record is made persistent. + * <p> + * Even though mutation is not thread safe, compacting the data record + * must not cause the assignment of indices to tuples to change when the + * caller is iterating through the entries by index. + * + * @todo When the record is serialized, do we need to allow a decision function + * to examine the record and decide whether it must be split? Since we do + * not have a fixed target for the page size, but only a budget, and since + * compression of keys, values, metadata, and the encoded record can all + * be applied, it seems that the decision function should be in terms of + * the buffer budget and a maximum #of entries (e.g., B+Tree branching + * factor or an equivalent hash bucket threshold). + */ +public class MutableBucketData implements IBucketData { + + private IDataRecord buf; + + private /*@todo final*/ OpenIntIntHashMap index; + + /** + * Constructor used when converting a persistent data record into a mutable + * one. + * + * @param data + */ + public MutableBucketData(final IBucketData data) { + + } + + /** + * + * @param bufferSize + * The initial size of the backing byte[]. + * @param branchingFactor + * The maximum #of tuples which may be stored in the data record. + * + * @todo The buffer must be permitted grow until it is sufficient to encode + * approximately one page worth of tuples. + * <p> + * is typically on the order of the size of a page, e.g., 4k. Since + * the data record will be encoded and possible compressed before it + * is written onto the store, this can be larger than the target. + * <p> + * In order to avoid problems where the objects are much smaller than + * expected, we should allow the backing buffer to grow or we should + * fit a model which estimates the size of the resulting page based on + * the size of the buffer and then grow the buffer until we can + * satisfy the target page size. + */ + public MutableBucketData(final int bufferSize, final int branchingFactor) { + + final int initialBufferSize = Math + .min(Bytes.kilobyte32 * 4, bufferSize); + + buf = new ByteArrayBuffer(initialBufferSize); + + index = new OpenIntIntHashMap(branchingFactor/* initialCapacity */); + + } + + @Override + public int getHash(int index) { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getKeyCount() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getLengthMSB() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public Iterator<Integer> hashIterator(int h) { + // TODO Auto-generated method stub + return null; + } + + @Override + public boolean getDeleteMarker(int index) { + // TODO Auto-generated method stub + return false; + } + + @Override + public long getNextAddr() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public long getPriorAddr() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getValueCount() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public IRaba getValues() { + // TODO Auto-generated method stub + return null; + } + + @Override + public long getVersionTimestamp(int index) { + // TODO Auto-generated method stub + return 0; + } + + @Override + public boolean hasDeleteMarkers() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean hasVersionTimestamps() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean isDoubleLinked() { + // TODO Auto-generated method stub + return false; + } + + @Override + public AbstractFixedByteArrayBuffer data() { + // TODO Auto-generated method stub + return null; + } + + @Override + public IRaba getKeys() { + // TODO Auto-generated method stub + return null; + } + + @Override + public long getMaximumVersionTimestamp() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public long getMinimumVersionTimestamp() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getSpannedTupleCount() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public boolean isCoded() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean isLeaf() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean isReadOnly() { + // TODO Auto-generated method stub + return false; + } + +// /** +// * The keys for the entries in the bucket. Unlike a B+Tree, the keys are NOT +// * maintained in a sorted order. Search proceeds by scanning for matching +// * hash codes and filtering for matching keys. +// */ +// final MutableKeyBuffer keys; +// +// /** +// * The values for the entries in the bucket. There is one value per key. The +// * value MAY be null. +// */ +// final MutableValueBuffer vals; +// +// /** +// * The deletion markers IFF isolation is supported by the {@link HTree}. +// */ +// final boolean[] deleteMarkers; +// +// /** +// * The version timestamps IFF isolation is supported by the {@link HTree}. +// */ +// final long[] versionTimestamps; +// +// /** +// * The minimum version timestamp. +// * +// * @todo these fields add 16 bytes to each {@link MutableBucketData} object +// * even when we do not use them. It would be better to use a subclass +// * or tack them onto the end of the {@link #versionTimestamps} array. +// */ +// long minimumVersionTimestamp; +// long maximumVersionTimestamp; +// +// /** +// * Create an empty data record with internal arrays dimensioned for the +// * specified branching factor. +// * +// * @param branchingFactor +// * The maximum #of entries in the hash bucket before it will +// * overflow or be split. Since the goal is to manage the size +// * of the bucket on the disk and since we do not known the size +// * of the bucket's data record until it is being evicted, this +// * value places an upper bound after which the bucket will be +// * @param hasVersionTimestamps +// * <code>true</code> iff version timestamps will be maintained. +// * @param hasDeleteMarkers +// * <code>true</code> iff delete markers will be maintained. +// */ +// public MutableBucketData(final int branchingFactor, +// final boolean hasVersionTimestamps, final boolean hasDeleteMarkers) { +// +// keys = new MutableKeyBuffer(branchingFactor + 1); +// +// vals = new MutableValueBuffer(branchingFactor + 1); +// +// versionTimestamps = (hasVersionTimestamps ? new long[branchingFactor + 1] +// : null); +// +// // init per API specification. +// minimumVersionTimestamp = Long.MAX_VALUE; +// maximumVersionTimestamp = Long.MIN_VALUE; +// +// deleteMarkers = (hasDeleteMarkers ? new boolean[branchingFactor + 1] +// : null); +// +// } +// +// /** +// * Copy ctor. +// * +// * @param branchingFactor +// * The branching factor for the owning B+Tree. +// * @param src +// * The source leaf. +// */ +// public MutableBucketData(final int branchingFactor, final ILeafData src) { +// +// keys = new MutableKeyBuffer(branchingFactor + 1, src.getKeys()); +// +// vals = new MutableValueBuffer(branchingFactor + 1, src.getValues()); +// +// versionTimestamps = (src.hasVersionTimestamps() ? new long[branchingFactor + 1] +// : null); +// +// deleteMarkers = (src.hasDeleteMarkers() ? new boolean[branchingFactor + 1] +// : null); +// +// final int nkeys = keys.size(); +// +// if (versionTimestamps != null) { +// +// for (int i = 0; i < nkeys; i++) { +// +// versionTimestamps[i] = src.getVersionTimestamp(i); +// +// } +// +// minimumVersionTimestamp = src.getMinimumVersionTimestamp(); +// +// maximumVersionTimestamp = src.getMaximumVersionTimestamp(); +// +// } else { +// +// minimumVersionTimestamp = Long.MAX_VALUE; +// +// maximumVersionTimestamp = Long.MIN_VALUE; +// +// +// } +// +// if (deleteMarkers != null) { +// +// for (int i = 0; i < nkeys; i++) { +// +// deleteMarkers[i] = src.getDeleteMarker(i); +// +// } +// +// } +// +// } +// +// /** +// * Ctor based on just "data" -- used by unit tests. +// * +// * @param keys +// * A representation of the defined keys in the leaf. +// * @param values +// * An array containing the values found in the leaf. +// * @param versionTimestamps +// * An array of the version timestamps (iff the version metadata +// * is being maintained). +// * @param deleteMarkers +// * An array of the delete markers (iff the version metadata is +// * being maintained). +// */ +// public MutableBucketData(final MutableKeyBuffer keys, +// final MutableValueBuffer values, final long[] versionTimestamps, +// final boolean[] deleteMarkers) { +// +// assert keys != null; +// assert values != null; +// assert keys.capacity() == values.capacity(); +// if (versionTimestamps != null) { +// assert versionTimestamps.length == keys.capacity(); +// } +// if (deleteMarkers != null) { +// assert deleteMarkers.length == keys.capacity(); +// } +// +// this.keys = keys; +// this.vals = values; +// this.versionTimestamps = versionTimestamps; +// this.deleteMarkers = deleteMarkers; +// +// if (versionTimestamps != null) +// recalcMinMaxVersionTimestamp(); +// +// } +// +// /** +// * Range check a tuple index. +// * +// * @param index +// * The index of a tuple in [0:nkeys]. +// * @return <code>true</code> +// * +// * @throws IndexOutOfBoundsException +// * if the index is not in the legal range. +// */ +// final protected boolean rangeCheckTupleIndex(final int index) { +// +// if (index < 0 || index > getKeys().size()) +// throw new IndexOutOfBoundsException(); +// +// return true; +// +// } +// +// /** +// * No - this is a mutable data record. +// */ +// final public boolean isReadOnly() { +// +// return false; +// +// } +// +// /** +// * No. +// */ +// final public boolean isCoded() { +// +// return false; +// +// } +// +// final public AbstractFixedByteArrayBuffer data() { +// +// throw new UnsupportedOperationException(); +// +// } +// +// public final long getVersionTimestamp(final int index) { +// +// if (versionTimestamps == null) +// throw new UnsupportedOperationException(); +// +// assert rangeCheckTupleIndex(index); +// +// return versionTimestamps[index]; +// +// } +// +// final public long getMinimumVersionTimestamp() { +// +// if (versionTimestamps == null) +// throw new UnsupportedOperationException(); +// +// return minimumVersionTimestamp; +// +// } +// +// final public long getMaximumVersionTimestamp() { +// +// if (versionTimestamps == null) +// throw new UnsupportedOperationException(); +// +// return maximumVersionTimestamp; +// +// } +// +// public final boolean getDeleteMarker(final int index) { +// +// if (deleteMarkers == null) +// throw new UnsupportedOperationException(); +// +// assert rangeCheckTupleIndex(index); +// +// return deleteMarkers[index]; +// +// } +// +// final public IRaba getValues() { +// +// return vals; +// +// } +// +// final public IRaba getKeys() { +// +// return keys; +// +// } +// +// /** +// * Always returns <code>true</code>. +// */ +// final public boolean isLeaf() { +// +// return true; +// +// } +// +// /** +// * For a leaf the #of entries is always the #of keys. +// */ +// final public int getSpannedTupleCount() { +// +// return getKeys().size(); +// +// } +// +// final public int getValueCount() { +// +// return vals.size(); +// +// } +// +// final public boolean hasDeleteMarkers() { +// +// return deleteMarkers != null; +// +// } +// +// final public boolean hasVersionTimestamps() { +// +// return versionTimestamps != null; +// +// } +// +// final public int getKeyCount() { +// +// return keys.size(); +// +// } +// +// /** +// * No - this class does not support double-linked leaves (only the +// * {@link IndexSegment} actually uses double-linked leaves). +// */ +// final public boolean isDoubleLinked() { +// +// return false; +// +// } +// +// final public long getNextAddr() { +// +// throw new UnsupportedOperationException(); +// +// } +// +// final public long getPriorAddr() { +// +// throw new UnsupportedOperationException(); +// +// } +// +// /** +// * Recalculate the min/max version timestamp on the leaf. The caller is +// * responsible for propagating the new min/max to the ancestors of the leaf. +// * +// * @throws UnsupportedOperationException +// * if the leaf is not maintaining per-tuple version timestamps. +// */ +// void recalcMinMaxVersionTimestamp() { +// +// // must be maintaining version timestamps. +// if (versionTimestamps == null) +// throw new UnsupportedOperationException(); +// +// final int nkeys = keys.nkeys; +// +// long min = Long.MAX_VALUE; +// long max = Long.MIN_VALUE; +// +// for (int i = 0; i < nkeys; i++) { +// +// final long t = versionTimestamps[i]; +// +// if (t < min) +// min = t; +// +// if (t > max) +// max = t; +// +// } +// +// minimumVersionTimestamp = min; +// maximumVersionTimestamp = max; +// +// } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -26,40 +26,23 @@ */ package com.bigdata.htree.data; -import com.bigdata.btree.IOverflowHandler; -import com.bigdata.btree.IndexSegment; -import com.bigdata.btree.data.IAbstractNodeDataCoder; +import java.util.Iterator; + import com.bigdata.btree.data.ILeafData; -import com.bigdata.btree.raba.IRaba; -import com.bigdata.rawstore.IRawStore; /** - * Interface for the data record of a hash bucket. + * Interface for the data record of a hash bucket. The hash bucket extends the + * B+Tree leaf data record interface. A hash bucket page may be shared by + * multiple directory entries (this is one of the principle tenants of + * extendible hashing). However, the bucket is just a bucket to each such + * directory entry. There is no sense of offset addressing into the shared + * bucket. * <p> - * The hash bucket extends the B+Tree leaf node page. However, hash buckets must - * have the HASH_KEYS flag enabled and SHOULD NOT use prefix compression unless - * (a) an order preserving hash function is used; and (b) the tuples are in key - * order within the page. - * <p> - * The hash values of the keys in the bucket will have a shared prefix (when - * using an MSB hash prefix) which corresponds to the globalDepth of the path - * through the hash tree leading to this bucket less the localDepth of this - * bucket. It is therefore possible (in principle) to store only the LSB bits of - * the hash values in the page and reconstruct the hash values using the MSB - * bits from the path through the hash tree. In order to be able to reconstruct - * the full hash code key based solely on local information, the MSB bits can be - * written out once and the LSB bits can be written out once per tuple. Testing - * the hash value of a key may then be done considering only the LSB bits of the - * hash value. This storage scheme also has the advantage that the hash value is - * not restricted to an int32 and is therefore compatible with the use of - * cryptographic hash functions. (If hash values are stored in a B+Tree leaf - * they will not shared this prefix property and can not be compressed in this - * manner). - * <p> * The {@link ILeafData#getPriorAddr()} and {@link ILeafData#getNextAddr()} * fields of the {@link ILeafData} record are reserved by the hash tree to * encode the search order for range queries when used in combination with an * order preserving hash function. + * <p> * * @author tho...@us... */ @@ -72,10 +55,11 @@ // */ // int getLocalDepth(); -// /** -// * The total bit length of the hash values of the keys. -// */ -// int getHashBitLength(); + /** + * Return the #of entries in the hash bucket (all keys, not just the + * distinct keys). + */ + int getKeyCount(); /** * The length (in bits) of the MSB prefix shared by the hash values of the @@ -91,6 +75,29 @@ * @return The hash value of that key. */ int getHash(int index); + + /** + * Return an {@link Iterator} which visits the index of each entry in the + * hash bucket having the given hash code. + * + * @param h + * The hash code. + * + * @todo Note: There is a design tradeoff between autoboxing of the + * <code>int</code> index and allowing the {@link IBucketData} class + * to encapsulate the iterator pattern together with any setup which + * can be done once per scan for a given hash code. For example, using + * a BitInputStream. The iterator allows us to amortize the cost of + * that setup, but we pay for the autoboxing of the index values. + * However, autobox primitives tend to be quite cheap as they are + * rapidly reclaimed by GC. + * <p> + * It is possible to implement an extension interface which returns + * the [int] index without autoboxing. If this method signature is + * modified to return that interface then the implementation can avoid + * autoboxing. + */ + Iterator<Integer> hashIterator(int h); // /** // * The storage address of the last overflow page in the overflow chain. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -32,6 +32,8 @@ import java.io.IOException; import java.util.Arrays; import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.Random; import java.util.TreeMap; @@ -53,6 +55,7 @@ import com.bigdata.btree.raba.IRaba; import com.bigdata.btree.raba.codec.RandomKeysGenerator; import com.bigdata.cache.HardReferenceQueue; +import com.bigdata.htree.data.IBucketData; import com.bigdata.io.SerializerUtil; import com.bigdata.rawstore.Bytes; import com.bigdata.rawstore.IRawStore; @@ -479,6 +482,12 @@ } + if(n1 instanceof IBucketData) { + + assertSameHashCodes((IBucketData) n1, (IBucketData) n2); + + } + assertSameRaba(n1.getValues(), n2.getValues()); } @@ -668,6 +677,68 @@ } + /** + * Verifies details for the {@link IBucketData} interface. + * + * @param b1 + * A hash bucket. + * @param b2 + * Another hash bucket. + */ + static public void assertSameHashCodes(final IBucketData b1, final IBucketData b2) { + + // The key and value counts must be the same. + final int n = b1.getKeyCount(); + assertEquals("keyCount", n, b2.getKeyCount()); + assertEquals("valueCount", n, b1.getValueCount()); + assertEquals("valueCount", n, b2.getValueCount()); + + assertEquals("lengthMSB", b1.getLengthMSB(), b2.getLengthMSB()); + + /* + * Verify that the same hash codes are reported at each index position. + */ + for (int i = 0; i < n; i++) { + + final int h1 = b1.getHash(i); + + final int h2 = b2.getHash(i); + + if (h1 != h2) { + + assertEquals("getHash(" + i + ")", h1, h2); + + } + + } + + /* + * Now verify that the same hash matches are reported for each + * visited hash code. + */ + for (int i = 0; i < n; i++) { + + final int h1 = b1.getHash(i); + + final List<Integer> indices = new LinkedList<Integer>(); + + final Iterator<Integer> eitr = b1.hashIterator(h1); + + while (eitr.hasNext()) { + + indices.add(eitr.next()); + + } + + final Integer[] hashCodes = indices.toArray(new Integer[indices + .size()]); + + assertSameIterator("hashCodes", hashCodes, b2.hashIterator(h1)); + + } + + } + /** * Special purpose helper used to vet {@link Node#childAddr}. * @@ -677,7 +748,7 @@ * @param node * The node. */ - public void assertChildKeys(final long[] childAddr, final Node node) { + static public void assertChildKeys(final long[] childAddr, final Node node) { final int nChildAddr = childAddr.length; @@ -720,7 +791,7 @@ * @param node * The node. */ - public void assertKeys(final byte[][] keys, final AbstractNode<?> node) { + static public void assertKeys(final byte[][] keys, final AbstractNode<?> node) { // // verify the capacity of the keys[] on the node. // assertEquals("keys[] capacity", (node.maxKeys + 1) * stride, @@ -763,7 +834,7 @@ * @param node * The node. */ - public void assertEntryCounts(final int[] expected, final INodeData node) { + static public void assertEntryCounts(final int[] expected, final INodeData node) { final int len = expected.length; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -27,8 +27,6 @@ package com.bigdata.btree.data; - -import com.bigdata.btree.raba.IRaba; import com.bigdata.btree.raba.ReadOnlyKeysRaba; import com.bigdata.btree.raba.ReadOnlyValuesRaba; import com.bigdata.io.DataOutputBuffer; Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,68 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.htree; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * Aggregates test suites into increasing dependency order. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestAll extends TestCase { + + /** + * + */ + public TestAll() { + } + + /** + * @param arg0 + */ + public TestAll(String arg0) { + super(arg0); + } + + /** + * Returns a test that will run each of the implementation specific test + * suites in turn. + */ + public static Test suite() + { + + final TestSuite suite = new TestSuite("HTree"); + + suite.addTest(com.bigdata.htree.data.TestAll.suite()); + + suite.addTestSuite(TestExtensibleHashing.class); + + return suite; + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,49 @@ +package com.bigdata.htree.data; + +import com.bigdata.btree.data.AbstractLeafDataRecordTestCase; +import com.bigdata.btree.data.ILeafData; +import com.bigdata.btree.raba.IRaba; + +/** + * Abstract class for tests of {@link IBucketData} implementations. + */ +abstract public class AbstractHashBucketDataRecordTestCase extends + AbstractLeafDataRecordTestCase { + + public AbstractHashBucketDataRecordTestCase() { + + super(); + + } + + public AbstractHashBucketDataRecordTestCase(String name) { + + super(name); + + } + + protected ILeafData mockLeafFactory(final IRaba keys, final IRaba vals, + final boolean[] deleteMarkers, final long[] versionTimestamps) { + + /* + * Note: This computes the MSB prefix and the hash codes using the + * standard Java semantics for the hash of a byte[]. In practice, the + * hash value is normally computed from the key using an application + * specified hash function. + */ + final int lengthMSB = 0; + + final int[] hashCodes = new int[keys.size()]; + + for (int i = 0; i < hashCodes.length; i++) { + + hashCodes[i] = keys.get(i).hashCode(); + + } + + return new MockBucketData(keys, vals, deleteMarkers, versionTimestamps, + lengthMSB, hashCodes); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -1,5 +1,10 @@ package com.bigdata.htree.data; +import it.unimi.dsi.fastutil.Hash; + +import java.util.Iterator; +import java.util.NoSuchElementException; + import com.bigdata.btree.data.MockLeafData; import com.bigdata.btree.raba.IRaba; @@ -72,4 +77,57 @@ } + public Iterator<Integer> hashIterator(final int h) { + + return new HashMatchIterator(h); + + } + + /** + * Visits the index of each bucket entry having a matching hash code. + */ + private class HashMatchIterator implements Iterator<Integer> { + + private final int h; + private int currentIndex = 0; + private Integer nextResult = null; + + private HashMatchIterator(final int h) { + this.h = h; + } + + public boolean hasNext() { + final int n = getKeyCount(); + while (nextResult == null && currentIndex < n) { + final int index = currentIndex++; + final int h1 = getHash(index); + if (h1 == h) { + nextResult = Integer.valueOf(index); + break; + } + } + return nextResult != null; + } + + public Integer next() { + + if (!hasNext()) + throw new NoSuchElementException(); + + final Integer tmp = nextResult; + + nextResult = null; + + return tmp; + + } + + public void remove() { + + throw new UnsupportedOperationException(); + + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -67,6 +67,8 @@ /* * Test w/ all key and value coders suitable for leaves. * + * @todo test the mutable bucket data record + * * @todo test w/ linked-leaf (order preserving hash functions). * * @todo test w/ out-of-line tuples (when too large for the page). Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java (from rev 3991, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,68 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 5, 2009 + */ + +package com.bigdata.htree.data; + +import com.bigdata.btree.data.DefaultLeafCoder; +import com.bigdata.btree.data.ILeafData; +import com.bigdata.btree.raba.codec.CanonicalHuffmanRabaCoder; + +/** + * Test suite for the HTree {@link ILeafData} records (accessing coded data in + * place). + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman extends + AbstractHashBucketDataRecordTestCase { + + /** + * + */ + public TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman() { + } + + /** + * @param name + */ + public TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman(String name) { + super(name); + } + + protected void setUp() throws Exception { + + super.setUp(); + + coder = new DefaultLeafCoder(// + CanonicalHuffmanRabaCoder.INSTANCE,// keys + CanonicalHuffmanRabaCoder.INSTANCE // vals + ); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -29,8 +29,6 @@ import com.bigdata.btree.data.AbstractLeafDataRecordTestCase; import com.bigdata.btree.data.DefaultLeafCoder; -import com.bigdata.btree.data.ILeafData; -import com.bigdata.btree.raba.IRaba; import com.bigdata.btree.raba.codec.SimpleRabaCoder; /** @@ -65,28 +63,4 @@ } - protected ILeafData mockLeafFactory(final IRaba keys, final IRaba vals, - final boolean[] deleteMarkers, final long[] versionTimestamps) { - - /* - * Note: This computes the MSB prefix and the hash codes using the - * standard Java semantics for the hash of a byte[]. In practice, the - * hash value is normally computed from the key using an application - * specified hash function. - */ - final int lengthMSB = 0; - - final int[] hashCodes = new int[keys.size()]; - - for (int i = 0; i < hashCodes.length; i++) { - - hashCodes[i] = keys.get(i).hashCode(); - - } - - return new MockBucketData(keys, vals, deleteMarkers, versionTimestamps, - lengthMSB, hashCodes); - - } - } Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -1,93 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 5, 2009 - */ - -package com.bigdata.htree.data; - -import com.bigdata.btree.data.AbstractLeafDataRecordTestCase; -import com.bigdata.btree.data.DefaultLeafCoder; -import com.bigdata.btree.data.ILeafData; -import com.bigdata.btree.raba.IRaba; -import com.bigdata.btree.raba.codec.CanonicalHuffmanRabaCoder; - -/** - * Test suite for the HTree {@link ILeafData} records (accessing coded data in - * place). - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman extends AbstractLeafDataRecordTestCase { - - /** - * - */ - public TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman() { - } - - /** - * @param name - */ - public TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman(String name) { - super(name); - } - - protected void setUp() throws Exception { - - super.setUp(); - - coder = new DefaultLeafCoder(// - CanonicalHuffmanRabaCoder.INSTANCE,// keys - CanonicalHuffmanRabaCoder.INSTANCE // vals - ); - - } - - protected ILeafData mockLeafFactory(final IRaba keys, final IRaba vals, - final boolean[] deleteMarkers, final long[] versionTimestamps) { - - /* - * Note: This computes the MSB prefix and the hash codes using the - * standard Java semantics for the hash of a byte[]. In practice, the - * hash value is normally computed from the key using an application - * specified hash function. - */ - final int lengthMSB = 0; - - final int[] hashCodes = new int[keys.size()]; - - for (int i = 0; i < hashCodes.length; i++) { - - hashCodes[i] = keys.get(i).hashCode(); - - } - - return new MockBucketData(keys, vals, deleteMarkers, versionTimestamps, - lengthMSB, hashCodes); - - } - -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-12 22:22:09
|
Revision: 4004 http://bigdata.svn.sourceforge.net/bigdata/?rev=4004&view=rev Author: thompsonbry Date: 2010-12-12 22:22:01 +0000 (Sun, 12 Dec 2010) Log Message: ----------- Added the concept of stackable symbol tables to IBindingSet (push(),pop()) to support optional join groups. Added a new ListBindingSet implementation which should be more efficient than the HashBindingSet, which is our current mutable implementation of choice. Added Serialization tests to the IBindingSet implementations. Modified DistinctBindingSetOp, which used a constructor which is no longer public. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestArrayBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestIBindingSet.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ListBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestListBindingSet.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java 2010-12-11 16:04:20 UTC (rev 4003) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java 2010-12-12 22:22:01 UTC (rev 4004) @@ -103,14 +103,22 @@ * The #of bound variables. */ public int size(); - - /** - * Visits the bindings. - */ + + /** + * Visits the bindings. + * + * @todo The unit tests verify that the implementations do not permit + * mutation using the iterator, but that is not actually specified by + * the API as forbidden. + */ public Iterator<Map.Entry<IVariable,IConstant>> iterator(); /** * Visits the bound variables. + * + * @todo The unit tests verify that the implementations do not permit + * mutation using the iterator, but that is not actually specified by + * the API as forbidden. */ public Iterator<IVariable> vars(); @@ -118,13 +126,17 @@ * Return a shallow copy of the binding set. */ public IBindingSet clone(); - - /** - * Return a shallow copy of the binding set, eliminating unnecessary - * variables. - */ + + /** + * Return a shallow copy of the binding set, eliminating unnecessary + * variables. + * + * @param variablesToKeep + * When non-<code>null</code>, only the listed variables are + * retained. + */ public IBindingSet copy(IVariable[] variablesToKeep); - + /** * True iff the variables and their bound values are the same * for the two binding sets. @@ -134,15 +146,49 @@ */ public boolean equals(Object o); - /** - * The hash code of a binding is defined as the bit-wise XOR of the hash - * codes of the {@link IConstant}s for its bound variables. Unbound - * variables are ignored when computing the hash code. Binding sets are - * unordered collections, therefore the calculated hash code intentionally - * does not dependent on the order in which the bindings are iterated over. - * The hash code reflects the current state of the bindings and must be - * recomputed if the bindings are changed. - */ + /** + * The hash code of a binding is defined as the bit-wise XOR of the hash + * codes of the {@link IConstant}s for its bound variables. Unbound + * variables are ignored when computing the hash code. Binding sets are + * unordered collections, therefore the calculated hash code intentionally + * does not depend on the order in which the bindings are visited. The hash + * code reflects the current state of the bindings and must be recomputed if + * the bindings are changed. + */ public int hashCode(); - + + /** + * Make a copy of the current symbol table (aka current variable bindings) + * and push it onto onto the stack. Variable bindings will be made against + * the current symbol table. The symbol table stack is propagated by + * {@link #clone()} and {@link #copy(IVariable[])}. Symbols tables may be + * used to propagate conditional bindings through a data flow until a + * decision point is reached, at which point they may be either discarded or + * committed. This mechanism may be used to support SPARQL style optional + * join groups. + * + * @throws UnsupportedOperationException + * if the {@link IBindingSet} is not mutable. + * + * @see #pop(boolean) + */ + public void push(); + + /** + * Pop the current symbol table off of the stack. + * + * @param save + * When <code>true</code>, the bindings on the current symbol + * table are copied to the parent symbol table before the current + * symbol table is popped off of the stack. If <code>false</code> + * , any bindings associated with that symbol table are + * discarded. + * + * @throws IllegalStateException + * if there is no nested symbol table. + * + * @see #push() + */ + public void pop(boolean save); + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java 2010-12-11 16:04:20 UTC (rev 4003) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java 2010-12-12 22:22:01 UTC (rev 4004) @@ -28,14 +28,14 @@ package com.bigdata.bop.bindingSet; +import java.io.Serializable; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.Map; +import java.util.Stack; import java.util.Map.Entry; -import org.apache.log4j.Logger; - import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IVariable; @@ -54,36 +54,389 @@ private static final long serialVersionUID = -6468905602211956490L; - private static final Logger log = Logger.getLogger(ArrayBindingSet.class); +// private static final Logger log = Logger.getLogger(ArrayBindingSet.class); /** - * A dense array of the bound variables. + * A symbol table implemented by two correlated arrays. */ - private final IVariable[] vars; + private static class ST implements Serializable { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * A dense array of the bound variables. + */ + private final IVariable[] vars; + + /** + * A dense array of the values bound to the variables (correlated with + * {@link #vars}). + */ + private final IConstant[] vals; + + /** + * The #of entries in the arrays which have defined values. + */ + private int nbound = 0; + + private ST(final int nbound,final IVariable[] vars, final IConstant[] vals) { + this.nbound = nbound; + this.vars = vars; + this.vals = vals; + } + + public IConstant get(final IVariable var) { + + if (var == null) + throw new IllegalArgumentException(); + + for (int i = 0; i < nbound; i++) { + + if (vars[i] == var) { + + return vals[i]; + + } + + } + + return null; + + } + + void set(final IVariable var, final IConstant val) { + + if (var == null) + throw new IllegalArgumentException(); + + if (val == null) + throw new IllegalArgumentException(); + + for (int i = 0; i < nbound; i++) { + + if (vars[i] == var) { + + vals[i] = val; + + return; + + } + + } + + vars[nbound] = var; + + vals[nbound] = val; + + nbound++; + + } + + void clearAll() { + + for (int i = nbound - 1; nbound > 0; i--, nbound--) { + + vars[i] = null; + + vals[i] = null; + + } + + assert nbound == 0; + + } + + /** + * Since the array is dense (no gaps), {@link #clear(IVariable)} + * requires that we copy down any remaining elements in the array by one + * position. + * + * @return <code>true</code> if the data structure was modified by the + * operation. + */ + boolean clear(final IVariable var) { + + if (var == null) + throw new IllegalArgumentException(); + + for (int i = 0; i < nbound; i++) { + + if (vars[i] == var) { + + final int nremaining = nbound-(i+1); + + if (nremaining >= 0) { + + // Copy down to close up the gap! + System.arraycopy(vars, i+1, vars, i, nremaining); + + System.arraycopy(vals, i+1, vals, i, nremaining); + + } else { + + // Just clear the reference. + + vars[i] = null; + + vals[i] = null; + + } + + nbound--; + + return true; + + } + + } + + return false; + + } + + } + + /** + * The stack of symbol tables. Each symbol table is a mapping from an + * {@link IVariable} onto its non-<code>null</code> bound {@link IConstant}. + * The stack is initialized with an empty symbol table. Symbol tables may be + * pushed onto the stack or popped off of the stack, but the stack MAY NOT + * become empty. + */ + private final Stack<ST> stack; + + /** + * Return the symbol table on the top of the stack. + */ + private ST current() { + + return stack.peek(); + + } + + public void push() { + + // The current symbol table. + final ST cur = current(); + + // Create a new symbol table. + final ST tmp = new ST(cur.nbound, cur.vars.clone(), cur.vals.clone()); + + // Push the new symbol table onto the stack. + stack.push(tmp); + + } + + public void pop(final boolean save) { + + if (stack.size() < 2) { + /* + * The stack may never become empty. Therefore there must be at + * least two symbol tables on the stack for a pop() request. + */ + throw new IllegalArgumentException(); + } + + // Pop the symbol table off of the top of the stack. + final ST old = stack.pop(); + + if (save) { + + // discard the current symbol table. + stack.pop(); + + // replacing it with the symbol table which we popped off the stack. + stack.push(old); + + } else { + + // clear the hash code. + hash = 0; + + } + + } + /** - * A dense array of the values bound to the variables (correlated with - * {@link #vars}). - */ - private final IConstant[] vals; + * Copy constructor (used by clone, copy). + * + * @param src + * The source to be copied. + * @param variablesToKeep + * The variables to be retained for the symbol table on the top + * of the stack (optional). + */ + protected ArrayBindingSet(final ArrayBindingSet src, + final IVariable[] variablesToKeep) { - private int nbound = 0; + stack = new Stack<ST>(); - /** - * Copy constructor. - */ - protected ArrayBindingSet(final ArrayBindingSet bindingSet) { + final int stackSize = src.stack.size(); + + int depth = 1; + + for (ST srcLst : src.stack) { + + /* + * Copy the source bindings. + * + * Note: If a restriction exists on the variables to be copied, then + * it is applied onto the the top level of the stack. If the symbol + * table is saved when it is pop()'d, then the modified bindings + * will replace the parent symbol table on the stack. + */ + final ST tmp = copy(srcLst, + depth == stackSize ? variablesToKeep : null); + + // Push onto the stack. + stack.push(tmp); + + } + + } + + /** + * Return a copy of the source list. + * + * @param src + * The source list. + * @param variablesToKeep + * When non-<code>null</code>, only the bindings for the + * variables listed in this array will copied. + * + * @return The copy. + */ + private ST copy(final ST src, final IVariable[] variablesToKeep) { + + if (variablesToKeep == null) { + + return new ST(src.nbound, src.vars, src.vals); + + } + + final ST dst = new ST(0/* nbound */, new IVariable[src.vars.length], + new IConstant[src.vals.length]); + + // bitflag for the old binding set + final boolean[] keep = new boolean[src.nbound]; - if (bindingSet == null) - throw new IllegalArgumentException(); + // for each var in the old binding set, see if we need to keep it + for (int i = 0; i < src.nbound; i++) { + + final IVariable v = src.vars[i]; + + keep[i] = false; + for (IVariable k : variablesToKeep) { + if (v == k) { + keep[i] = true; + break; + } + } + + } - nbound = bindingSet.nbound; + // fill in the new binding set based on the keep bitflag + for (int i = 0; i < src.nbound; i++) { + if (keep[i]) { + dst.vars[dst.nbound] = src.vars[i]; + dst.vals[dst.nbound] = src.vals[i]; + dst.nbound++; + } + } - vars = bindingSet.vars.clone(); +// final Iterator<E> itr = src.iterator(); +// +// while (itr.hasNext()) { +// +// final E e = itr.next(); +// +// boolean keep = true; +// +// if (variablesToKeep != null) { +// +// keep = false; +// +// for (IVariable<?> x : variablesToKeep) { +// +// if (x == e.var) { +// +// keep = true; +// +// break; +// +// } +// +// } +// +// } +// +// if (keep) +// dst.add(new E(e.var, e.val)); +// +// } - vals = bindingSet.vals.clone(); + return dst; + + } + +// public ArrayBindingSet XXcopy(final IVariable[] variablesToKeep) { +// +// // bitflag for the old binding set +// final boolean[] keep = new boolean[nbound]; +// +// // for each var in the old binding set, see if we need to keep it +// for (int i = 0; i < nbound; i++) { +// +// final IVariable v = vars[i]; +// +// keep[i] = false; +// for (IVariable k : variablesToKeep) { +// if (v == k) { +// keep[i] = true; +// break; +// } +// } +// +// } +// +// // allocate the new vars +// final IVariable[] newVars = new IVariable[vars.length]; +// +// // allocate the new vals +// final IConstant[] newVals = new IConstant[vals.length]; +// +// // fill in the new binding set based on the keep bitflag +// int newbound = 0; +// for (int i = 0; i < nbound; i++) { +// if (keep[i]) { +// newVars[newbound] = vars[i]; +// newVals[newbound] = vals[i]; +// newbound++; +// } +// } +// +// ArrayBindingSet bs = new ArrayBindingSet(newVars, newVals); +// bs.nbound = newbound; +// +// return bs; +// +// } + + public ArrayBindingSet clone() { + + return new ArrayBindingSet(this, null/* variablesToKeep */); } - + + public ArrayBindingSet copy(final IVariable[] variablesToKeep) { + + return new ArrayBindingSet(this, variablesToKeep); + + } + /** * Initialized with the given bindings (assumes for efficiency that all * elements of bound arrays are non-<code>null</code> and that no @@ -105,21 +458,9 @@ if(vars.length != vals.length) throw new IllegalArgumentException(); - // for (int i = 0; i < vars.length; i++) { - // - // if (vars[i] == null) - // throw new IllegalArgumentException(); - // - // if (vals[i] == null) - // throw new IllegalArgumentException(); - // - // } - - this.vars = vars; - - this.vals = vals; + stack = new Stack<ST>(); - this.nbound = vars.length; + stack.push(new ST(vars.length, vars, vals)); } @@ -134,22 +475,32 @@ */ public ArrayBindingSet(final int capacity) { - if (capacity < 0) - throw new IllegalArgumentException(); + if (capacity < 0) + throw new IllegalArgumentException(); - vars = new IVariable[capacity]; + stack = new Stack<ST>(); - vals = new IConstant[capacity]; + stack.push(new ST(0/* nbound */, new IVariable[capacity], + new IConstant[capacity])); } - public Iterator<IVariable> vars() { + /** + * {@inheritDoc} + * <p> + * Iterator does not support either removal or concurrent modification of + * the binding set. + */ + public Iterator<IVariable> vars() { - return Collections.unmodifiableList(Arrays.asList(vars)).iterator(); - + return Collections.unmodifiableList(Arrays.asList(current().vars)) + .iterator(); + } /** + * {@inheritDoc} + * <p> * Iterator does not support either removal or concurrent modification of * the binding set. */ @@ -163,9 +514,11 @@ private int i = 0; + private ST cur = current(); + public boolean hasNext() { - return i < nbound; + return i < cur.nbound; } @@ -178,13 +531,13 @@ public IVariable getKey() { - return vars[index]; + return cur.vars[index]; } public IConstant getValue() { - return vals[index]; + return cur.vals[index]; } @@ -193,9 +546,9 @@ if (value == null) throw new IllegalArgumentException(); - final IConstant t = vals[index]; + final IConstant t = cur.vals[index]; - vals[index] = value; + cur.vals[index] = value; return t; @@ -215,89 +568,34 @@ public int size() { - return nbound; + return current().nbound; } public void clearAll() { - for (int i = nbound - 1; nbound > 0; i--, nbound--) { - - vars[i] = null; - - vals[i] = null; - - } - + current().clearAll(); + // clear the hash code. hash = 0; - assert nbound == 0; - } - /** - * Since the array is dense (no gaps), {@link #clear(IVariable)} requires - * that we copy down any remaining elements in the array by one position. - */ - public void clear(final IVariable var) { + public void clear(final IVariable var) { - if (var == null) - throw new IllegalArgumentException(); + if (current().clear(var)) { - for (int i = 0; i < nbound; i++) { + // clear the hash code. + hash = 0; - if (vars[i] == var) { + } - final int nremaining = nbound-(i+1); - - if (nremaining >= 0) { - - // Copy down to close up the gap! - System.arraycopy(vars, i+1, vars, i, nremaining); + } - System.arraycopy(vals, i+1, vals, i, nremaining); - - } else { - - // Just clear the reference. - - vars[i] = null; - - vals[i] = null; - - } - - // clear the hash code. - hash = 0; - - nbound--; - - break; - - } - - } - - } - public IConstant get(final IVariable var) { - if (var == null) - throw new IllegalArgumentException(); - - for (int i = 0; i < nbound; i++) { - - if (vars[i] == var) { + return current().get(var); - return vals[i]; - - } - - } - - return null; - } public boolean isBound(final IVariable var) { @@ -308,122 +606,40 @@ public void set(final IVariable var, final IConstant val) { - if (var == null) - throw new IllegalArgumentException(); + current().set(var, val); - if (val == null) - throw new IllegalArgumentException(); - - if (log.isTraceEnabled()) { - - log.trace("var=" + var + ", val=" + val + ", nbound=" + nbound - + ", capacity=" + vars.length); - - } - - for (int i = 0; i < nbound; i++) { - - if (vars[i] == var) { - - vals[i] = val; - - // clear the hash code. - hash = 0; - - return; - - } - - } - - vars[nbound] = var; - - vals[nbound] = val; - // clear the hash code. hash = 0; - nbound++; - } public String toString() { - final StringBuilder sb = new StringBuilder(); - - sb.append("{"); + final ST cur = current(); + + final StringBuilder sb = new StringBuilder(); - for(int i=0; i<nbound; i++) { - - if(i>0) sb.append(", "); - - sb.append(vars[i]); - - sb.append("="); - - sb.append(vals[i]); - - } - - sb.append("}"); - - return sb.toString(); - - } - - public ArrayBindingSet clone() { + sb.append("{"); - return new ArrayBindingSet(this); - - } + for (int i = 0; i < cur.nbound; i++) { - /** - * Return a shallow copy of the binding set, eliminating unecessary - * variables. - */ - public ArrayBindingSet copy(final IVariable[] variablesToKeep) { + if (i > 0) + sb.append(", "); - // bitflag for the old binding set - final boolean[] keep = new boolean[nbound]; - - // for each var in the old binding set, see if we need to keep it - for (int i = 0; i < nbound; i++) { - - final IVariable v = vars[i]; + sb.append(cur.vars[i]); - keep[i] = false; - for (IVariable k : variablesToKeep) { - if (v == k) { - keep[i] = true; - break; - } - } - + sb.append("="); + + sb.append(cur.vals[i]); + } - // allocate the new vars - final IVariable[] newVars = new IVariable[vars.length]; + sb.append("}"); - // allocate the new vals - final IConstant[] newVals = new IConstant[vals.length]; + return sb.toString(); - // fill in the new binding set based on the keep bitflag - int newbound = 0; - for (int i = 0; i < nbound; i++) { - if (keep[i]) { - newVars[newbound] = vars[i]; - newVals[newbound] = vals[i]; - newbound++; - } - } - - ArrayBindingSet bs = new ArrayBindingSet(newVars, newVals); - bs.nbound = newbound; - - return bs; - } - + public boolean equals(final Object t) { if (this == t) @@ -433,14 +649,16 @@ return false; final IBindingSet o = (IBindingSet)t; - - if (nbound != o.size()) + + final ST cur = current(); + + if (cur.nbound != o.size()) return false; - for(int i=0; i<nbound; i++) { + for(int i=0; i<cur.nbound; i++) { - IConstant<?> o_val = o.get ( vars [ i ] ) ; - if ( null == o_val || !vals[i].equals( o_val )) + final IConstant<?> o_val = o.get ( cur.vars [ i ] ) ; + if ( null == o_val || !cur.vals[i].equals( o_val )) return false; } @@ -455,12 +673,14 @@ int result = 0; - for (int i = 0; i < nbound; i++) { + final ST cur = current(); + + for (int i = 0; i < cur.nbound; i++) { - if (vals[i] == null) + if (cur.vals[i] == null) continue; - result ^= vals[i].hashCode(); + result ^= cur.vals[i].hashCode(); } @@ -471,5 +691,5 @@ } private int hash; - + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java 2010-12-11 16:04:20 UTC (rev 4003) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java 2010-12-12 22:22:01 UTC (rev 4004) @@ -44,6 +44,8 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ + * + * @todo test suite? */ final public class EmptyBindingSet implements IBindingSet, Serializable { @@ -158,5 +160,13 @@ return EmptyIterator.DEFAULT; } + + public void push() { + throw new IllegalStateException(); + } + public void pop(boolean save) { + throw new UnsupportedOperationException(); + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java 2010-12-11 16:04:20 UTC (rev 4003) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java 2010-12-12 22:22:01 UTC (rev 4004) @@ -29,82 +29,242 @@ package com.bigdata.bop.bindingSet; import java.util.Collections; -import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.LinkedList; import java.util.Map; +import java.util.Stack; import java.util.Map.Entry; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IVariable; -import com.bigdata.bop.Var; /** - * {@link IBindingSet} backed by a {@link HashMap}. + * {@link IBindingSet} backed by a {@link LinkedHashMap}. + * <p> + * Note: A {@link LinkedHashMap} provides a fast iterator, which we use a bunch. + * However, {@link IBindingSet}s are inherently unordered collections of + * bindings so the order preservation aspect of the {@link LinkedHashMap} is not + * relied upon. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ - * - * @todo Since {@link Var}s allow reference testing, a faster implementation - * could be written based on a {@link LinkedList}. Just scan the list - * until the entry is found with the desired {@link Var} reference and - * then return it. */ public class HashBindingSet implements IBindingSet { private static final long serialVersionUID = -2989802566387532422L; - /** - * Note: A {@link LinkedHashMap} provides a fast iterator, which we use a - * bunch. - */ - private LinkedHashMap<IVariable, IConstant> map; +// /** +// * Note: A {@link LinkedHashMap} provides a fast iterator, which we use a +// * bunch. +// */ +// private final LinkedHashMap<IVariable, IConstant> map; + /** + * The stack of symbol tables. Each symbol table is a mapping from an + * {@link IVariable} onto its non-<code>null</code> bound {@link IConstant}. + * The stack is initialized with an empty symbol table. Symbol tables may be + * pushed onto the stack or popped off of the stack, but the stack MAY NOT + * become empty. + */ + private final Stack<LinkedHashMap<IVariable, IConstant>> stack; + + /** + * Return the symbol table on the top of the stack. + */ + private LinkedHashMap<IVariable, IConstant> current() { + + return stack.peek(); + + } + + public void push() { + + // The current symbol table. + final LinkedHashMap<IVariable, IConstant> cur = current(); + + // Create a new symbol table. + final LinkedHashMap<IVariable, IConstant> tmp = new LinkedHashMap<IVariable, IConstant>( + cur.size()); + + // Push the new symbol table onto the stack. + stack.push(tmp); + + /* + * Make a copy of each entry in the symbol table which was on the top of + * the stack when we entered this method, inserting the entries into the + * new symbol table as we go. This avoids side effects of mutation on + * the nested symbol tables and also ensures that we do not need to read + * through to the nested symbol tables when answering a query about the + * current symbol table. The only down side of this is that naive + * serialization is that much less compact. + */ + for (Map.Entry<IVariable, IConstant> e : cur.entrySet()) { + + tmp.put(e.getKey(), e.getValue()); + + } + + } + + public void pop(final boolean save) { + + if (stack.size() < 2) { + /* + * The stack may never become empty. Therefore there must be at + * least two symbol tables on the stack for a pop() request. + */ + throw new IllegalArgumentException(); + } + + // Pop the symbol table off of the top of the stack. + final LinkedHashMap<IVariable,IConstant> old = stack.pop(); + + if (save) { + + // discard the current symbol table. + stack.pop(); + + // replacing it with the symbol table which we popped off the stack. + stack.push(old); + + } else { + + // clear the hash code. + hash = 0; + + } + + } + /** * New empty binding set. */ public HashBindingSet() { + + stack = new Stack<LinkedHashMap<IVariable, IConstant>>(); + + stack.push(new LinkedHashMap<IVariable, IConstant>()); - map = new LinkedHashMap<IVariable, IConstant>(); - } /** - * Copy constructor. + * Copy constructor (used by clone, copy). * * @param src */ - protected HashBindingSet(final HashBindingSet src) { + protected HashBindingSet(final HashBindingSet src, final IVariable[] variablesToKeep) { - map = new LinkedHashMap<IVariable, IConstant>(src.map); - - } + stack = new Stack<LinkedHashMap<IVariable,IConstant>>(); + final int stackSize = src.stack.size(); + + int depth = 1; + + for (LinkedHashMap<IVariable, IConstant> srcLst : src.stack) { + + /* + * Copy the source bindings. + * + * Note: If a restriction exists on the variables to be copied, then + * it is applied onto the the top level of the stack. If the symbol + * table is saved when it is pop()'d, then the modified bindings + * will replace the parent symbol table on the stack. + */ + final LinkedHashMap<IVariable,IConstant> tmp = copy(srcLst, + depth == stackSize ? variablesToKeep : null); + + // Push onto the stack. + stack.push(tmp); + + } + + } + + /** + * Return a copy of the source list. + * + * @param src + * The source list. + * @param variablesToKeep + * When non-<code>null</code>, only the bindings for the + * variables listed in this array will copied. + * + * @return The copy. + */ + private LinkedHashMap<IVariable, IConstant> copy( + final LinkedHashMap<IVariable, IConstant> src, + final IVariable[] variablesToKeep) { + + final LinkedHashMap<IVariable, IConstant> dst = new LinkedHashMap<IVariable, IConstant>( + variablesToKeep != null ? variablesToKeep.length : src.size()); + + final Iterator<Map.Entry<IVariable, IConstant>> itr = src.entrySet() + .iterator(); + + while (itr.hasNext()) { + + final Map.Entry<IVariable, IConstant> e = itr.next(); + + boolean keep = true; + + if (variablesToKeep != null) { + + keep = false; + + for (IVariable<?> x : variablesToKeep) { + + if (x == e.getKey()) { + + keep = true; + + break; + + } + + } + + } + + if (keep) + dst.put(e.getKey(), e.getValue()); + + } + + return dst; + + } + /** - * Copy constructor. + * Package private constructor used by the unit tests. * * @param src */ - public HashBindingSet(final IBindingSet src) { + HashBindingSet(final IBindingSet src) { - map = new LinkedHashMap<IVariable, IConstant>(src.size()); - + this(); + final Iterator<Map.Entry<IVariable, IConstant>> itr = src.iterator(); while (itr.hasNext()) { final Map.Entry<IVariable, IConstant> e = itr.next(); - map.put(e.getKey(), e.getValue()); + set(e.getKey(), e.getValue()); } } - public HashBindingSet(final IVariable[] vars, final IConstant[] vals) { + /** + * Package private constructor used by the unit tests. + * @param vars + * @param vals + */ + HashBindingSet(final IVariable[] vars, final IConstant[] vals) { + this(); + if (vars == null) throw new IllegalArgumentException(); @@ -114,22 +274,32 @@ if (vars.length != vals.length) throw new IllegalArgumentException(); - map = new LinkedHashMap<IVariable, IConstant>(vars.length); - for (int i = 0; i < vars.length; i++) { - map.put(vars[i], vals[i]); + set(vars[i], vals[i]); } } + public HashBindingSet clone() { + + return new HashBindingSet(this, null /* variablesToKeep */); + + } + + public HashBindingSet copy(final IVariable[] variablesToKeep) { + + return new HashBindingSet(this/* src */, variablesToKeep); + + } + public boolean isBound(final IVariable var) { if (var == null) throw new IllegalArgumentException(); - return map.containsKey(var); + return current().containsKey(var); } @@ -138,7 +308,7 @@ if (var == null) throw new IllegalArgumentException(); - return map.get(var); + return current().get(var); } @@ -150,7 +320,7 @@ if (val == null) throw new IllegalArgumentException(); - map.put(var,val); + current().put(var,val); // clear the hash code. hash = 0; @@ -162,7 +332,7 @@ if (var == null) throw new IllegalArgumentException(); - map.remove(var); + current().remove(var); // clear the hash code. hash = 0; @@ -171,7 +341,7 @@ public void clearAll() { - map.clear(); + current().clear(); // clear the hash code. hash = 0; @@ -186,7 +356,7 @@ int i = 0; - final Iterator<Map.Entry<IVariable, IConstant>> itr = map.entrySet() + final Iterator<Map.Entry<IVariable, IConstant>> itr = current().entrySet() .iterator(); while (itr.hasNext()) { @@ -217,52 +387,22 @@ */ public Iterator<Entry<IVariable, IConstant>> iterator() { - return Collections.unmodifiableMap(map).entrySet().iterator(); + return Collections.unmodifiableMap(current()).entrySet().iterator(); } public Iterator<IVariable> vars() { - return Collections.unmodifiableSet(map.keySet()).iterator(); + return Collections.unmodifiableSet(current().keySet()).iterator(); } public int size() { - return map.size(); + return current().size(); } - public HashBindingSet clone() { - - return new HashBindingSet( this ); - - } - - /** - * Return a shallow copy of the binding set, eliminating unecessary - * variables. - */ - public HashBindingSet copy(final IVariable[] variablesToKeep) { - - final HashBindingSet bs = new HashBindingSet(); - - for (IVariable<?> var : variablesToKeep) { - - final IConstant<?> val = map.get(var); - - if (val != null) { - - bs.map.put(var, val); - - } - - } - - return bs; - - } - public boolean equals(final Object t) { if (this == t) @@ -276,7 +416,7 @@ if (size() != o.size()) return false; - final Iterator<Map.Entry<IVariable,IConstant>> itr = map.entrySet().iterator(); + final Iterator<Map.Entry<IVariable,IConstant>> itr = current().entrySet().iterator(); while(itr.hasNext()) { @@ -288,7 +428,7 @@ // if (!o.isBound(vars[i])) // return false; - IConstant<?> o_val = o.get ( var ) ; + final IConstant<?> o_val = o.get ( var ) ; if (null == o_val || !val.equals(o_val)) return false; @@ -304,7 +444,7 @@ int result = 0; - for(IConstant<?> c : map.values()) { + for(IConstant<?> c : current().values()) { if (c == null) continue; Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ListBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ListBindingSet.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ListBindingSet.java 2010-12-12 22:22:01 UTC (rev 4004) @@ -0,0 +1,527 @@ +package com.bigdata.bop.bindingSet; + +import java.io.Serializable; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.Var; + +import cutthecrap.utils.striterators.Resolver; +import cutthecrap.utils.striterators.Striterator; + +/** + * <p>An {@link IBindingSet} based on a {@link LinkedList}. Since {@link Var}s may + * be compared using <code>==</code> this should be faster than a hash map for + * most operations unless the binding set has a large number of entries. + * </p><p> + * Note: {@link #push()} and {@link #pop(boolean)} are implemented by making a + * copy of the current symbol table with distinct {@link Map.Entry} objects. If + * the symbol table is saved when it is {@link #pop(boolean) popped), then it + * simply replaces the pre-existing symbol table which was uncovered when it + * was popped off of the stack. This design has several advantages, including: + * <ul> + * <li>Methods such as {@link #get(IVariable)}, {@link #set(IVariable, IConstant)}, + * and {@link #size()} can be written solely in terms of the current symbol table.</li> + * <li>{@link #clear(IVariable)} removes the {@link Map.Entry} from the + * current symbol table rather than introducing <code>null</code> values or + * delete markers.</li> + * </ul> + * </p> + * The only down side to this approach is that the serialized representation of + * the {@link IBindingSet} is more complex. However, java default serialization + * will do a good job by providing back references for the object graph. + * + * @version $Id: HashBindingSet.java 3836 2010-10-22 11:59:15Z thompsonbry $ + */ +public class ListBindingSet implements IBindingSet { + + private static final long serialVersionUID = 1L; + + /** + * A (var,val) entry. + */ + private static class E implements Map.Entry<IVariable<?>, IConstant<?>>, Serializable { + + /** + * + */ + private static final long serialVersionUID = 1L; + + private final IVariable<?> var; + + private IConstant<?> val; + + E(final IVariable<?> var, final IConstant<?> val) { + this.var = var; + this.val = val; + } + + public IVariable<?> getKey() { + return var; + } + + public IConstant<?> getValue() { + return val; + } + + public IConstant<?> setValue(final IConstant<?> value) { + if (value == null) { + // Null bindings are not permitted. + throw new IllegalArgumentException(); + } + final IConstant<?> tmp = this.val; + this.val = value; + return tmp; + } + }; + + /** + * The stack of symbol tables. Each symbol table is a mapping from an + * {@link IVariable} onto its non-<code>null</code> bound {@link IConstant}. + * The stack is initialized with an empty symbol table. Symbol tables may be + * pushed onto the stack or popped off of the stack, but the stack MAY NOT + * become empty. + */ + private final Stack<List<E>> stack; + + /** + * Return the symbol table on the top of the stack. + */ + private List<E> current() { + + return stack.peek(); + + } + + public void push() { + + // The current symbol table. + final List<E> cur = current(); + + // Create a new symbol table. + final List<E> tmp = new LinkedList<E>(); + + // Push the new symbol table onto the stack. + stack.push(tmp); + + /* + * Make a copy of each entry in the symbol table which was on the top of + * the stack when we entered this method, inserting the entries into the + * new symbol table as we go. This avoids side effects of mutation on + * the nested symbol tables and also ensures that we do not need to read + * through to the nested symbol tables when answering a query about the + * current symbol table. The only down side of this is that naive + * serialization is that much less compact. + */ + for (E e : cur) { + + tmp.add(new E(e.var, e.val)); + + } + + } + + public void pop(final boolean save) { + + if (stack.size() < 2) { + /* + * The stack may never become empty. Therefore there must be at + * least two symbol tables on the stack for a pop() request. + */ + throw new IllegalArgumentException(); + } + + // Pop the symbol table off of the top of the stack. + final List<E> old = stack.pop(); + + if (save) { + + // discard the current symbol table. + stack.pop(); + + // replacing it with the symbol table which we popped off the stack. + stack.push(old); + + } else { + + // clear the hash code. + hash = 0; + + } + + } + + /** + * Create an empty binding set. + */ + public ListBindingSet() { + + stack = new Stack<List<E>>(); + + stack.push(new LinkedList<E>()); + + } + + /** + * Package private constructor used by the unit tests. + * @param vars + * @param vals + */ + ListBindingSet(final IVariable[] vars, final IConstant[] vals) { + + this(); + + if (vars == null) + throw new IllegalArgumentException(); + + if (vals == null) + throw new IllegalArgumentException(); + + if (vars.length != vals.length) + throw new IllegalArgumentException(); + + for (int i = 0; i < vars.length; i++) { + + set(vars[i], vals[i]); + + } + + } + + /** + * Copy constructor (used by clone, copy). + * + * @param src + * The source to be copied. + * @param variablesToKeep + * The variables to be retained for the symbol table on the top + * of the stack (optional). + */ + protected ListBindingSet(final ListBindingSet src, + final IVariable[] variablesToKeep) { + + stack = new Stack<List<E>>(); + + final int stackSize = src.stack.size(); + + int depth = 1; + + for (List<E> srcLst : src.stack) { + + /* + * Copy the source bindings. + * + * Note: If a restriction exists on the variables to be copied, then + * it is applied onto the the top level of the stack. If the symbol + * table is saved when it is pop()'d, then the modified bindings + * will replace the parent symbol table on the stack. + */ + final List<E> tmp = copy(srcLst, + depth == stackSize ? variablesToKeep : null); + + // Push onto the stack. + stack.push(tmp); + + } + + } + + /** + * Return a copy of the source list. The copy will use new {@link E}s to + * represent the bindings so changes to the copy will not effect the source. + * + * @param src + * The source list. + * @param variablesToKeep + * When non-<code>null</code>, only the bindings for the + * variables listed in this array will copied. + * + * @return The copy. + */ + private List<E> copy(final List<E> src, final IVariable[] variablesToKeep) { + + final List<E> dst = new LinkedList<E>(); + + final Iterator<E> itr = src.iterator(); + + while (itr.hasNext()) { + + final E e = itr.next(); + + boolean keep = true; + + if (variablesToKeep != null) { + + keep = false; + + for (IVariable<?> x : variablesToKeep) { + + if (x == e.var) { + + keep = true; + + break; + + } + + } + + } + + if (keep) + dst.add(new E(e.var, e.val)); + + } + + return dst; + + } + + public ListBindingSet clone() { + + return new ListBindingSet(this, null /* variablesToKeep */); + + } + + public IBindingSet copy(final IVariable[] variablesToKeep) { + + return new ListBindingSet(this/*src*/, variablesToKeep); + + } + + public void clear(final IVariable var) { + + if (var == null) + throw new IllegalArgumentException(); + + final List<E> cur = current(); + + for(E e : cur) { + + if(e.var == var) { + + cur.remove(e); + + // clear the hash code. + hash = 0; + + return; + + } + + } + + } + + public void clearAll() { + + current().clear(); + + // clear the hash code. + hash = 0; + + } + + public IConstant get(final IVariable var) { + + if (var == null) + throw new IllegalArgumentException(); + + final List<E> cur = current(); + + for(E e : cur) { + + if(e.var == var) { + + return e.val; + + } + + } + + return null; + + } + + public boolean isBound(IVariable var) { + + if (var == null) + throw new IllegalArgumentException(); + + final List<E> cur = current(); + + for(E e : cur) { + + if(e.var == var) { + + return true; + + } + + } + + return false; + + } + + @SuppressWarnings("unchecked") + public Iterator<Map.Entry<IVariable, IConstant>> iterator() { + + return (Iterator<Map.Entry<IVariable, IConstant>>) ((List) Collections + .unmodifiableList(current())).iterator(); + + } + + public void set(final IVariable var, final IConstant val) { + + if (var == null) + throw new IllegalArgumentException(); + + if (val == null) + throw new IllegalArgumentException(); + + final List<E> cur = current(); + + for (E e : cur) { + + if (e.var == var) { + + e.val = val; + + // clear the hash code. + hash = 0; + + return; + + } + + } + + cur.add(new E(var, val)); + + // clear the hash code. + hash = 0; + + } + + public int size() { + + return current().size(); + + } + + @SuppressWarnings("unchecked") + public Iterator<IVariable> vars() { + return (Iterator<IVariable>) new Striterator(Collections + .unmodifiableList(current()).iterator()) + .addFilter(new Resolver() { + private static final long serialVersionUID = 1L; + + @Override + protected Object resolve(Object obj) { + return ((E) obj).var; + } + }); + } + + public String toString() { + + final StringBuilder sb = new StringBuilder(); + + sb.append("{ "); + + int i = 0; + + final Iterator<E> itr = current().iterator(); + + while (itr.hasNext()) { + + if (i > 0) + sb.append(", "); + + final E entry = itr.next(); + + sb.append(entry.getKey()); + + sb.append("="); + + sb.append(entry.getValue()); + + i++; + + } + + sb.append(" }"); + + return sb.toString(); + + } + + public boolean equals(final Object t) { + + if (this == t) + return true; + + if(!(t instanceof IBindingSet)) + return false; + + final IBindingSet o = (IBindingSet) t; + + if (size() != o.size()) + return false; + + final Iterator<E> itr = current().iterator(); + + while(itr.hasNext()) { + + final E entry = itr.next(); + + final IVariable<?> var = entry.getKey(); + + final IConstant<?> val = entry.getValue(); + +// if (!o.isBound(vars[i])) +// return false; + final IConstant<?> o_val = o.get ( var ) ; + if (null == o_val || !val.equals(o_val)) + return false; + + } + + return true; + + } + + public int hashCode() { + + if (hash == 0) { + + int result = 0; + + final List<E> cur = current(); + + for(E e : cur) { + + if (e.val == null) + continue; + + result ^= e.val.hashCode(); + + } + + hash = result; + + } + return hash; + + } + private int hash; + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java 2010-12-11 16:04:20 UTC (rev 4003) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java 2010-12-12 22:22:01 UTC (rev 4004) @@ -250,8 +250,16 @@ // System.err.println("accepted: " // + Arrays.toString(vals)); - accepted.add(new HashBindingSet(vars, vals)); + final HashBindingSet tmp = new HashBindingSet(); + + for (int i = 0; i < vars.length; i++) { + tmp.set(vars[i], vals[i]); + + } + + accepted.add(tmp); + naccepted++; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestAll.java 2010-12-11 16:04:20 UTC (rev 4003) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestAll.java 2010-12-12 22:22:01 UTC (rev 4004) @@ -60,9 +60,12 @@ final TestSuite suite = new TestSuite("binding sets"); + // @todo test EmptyBindingSet + // test binding set impls. suite.addTestSuite(TestArrayBindingSet.class); suite.addTestSuite(TestHashBindingSet.class); + suite.addTestSuite(TestListBindingSet.class); return suite; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestArrayBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestArrayBindingSet.java 2010-12-11 16:04:20 UTC (rev 4003) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestArrayBindingSet.java 2010-12-12 22:22:01 UTC (rev 4004) @@ -59,24 +59,24 @@ */ public TestArrayBindingSet ( String name ) { super ( name ) ; } - /** - * Unit test for {@link ArrayBindingSet#ArrayBindingSet(ArrayBindingSet)} - */ - public void testConstructorArrayBindingSet () - { - try { assertTrue ( null != new ArrayBindingSet ( null ) ) ; fail ( "IllegalArgumentException expected, copy from was null" ) ; } - catch ( IllegalArgumentException e ) {} +// /** +// * Unit test for {@link ArrayBindingSet#ArrayBindingSet(ArrayBindingSet)} +// */ +// public void testConstructorArrayBindingSet () +// { +// try { assertTrue ( null != new ArrayBindingSet ( null ) ) ; fail ( "IllegalArgumentException expected, copy from was null" ) ; } +// catch ( IllegalArgumentException e ) {} +// +// Var<?> var1 = Var.var ( "a" ) ; +// Var<?> var2 = Var.var ( "b" ) ; +// Constant<Integer> val1 = new Constant<Integer> ( 1 ) ; +// Constant<Integer> val2 = new Constant<Integer> ( 2 ) ; +// IVariable<?> vars [] = new IVariable [] { var1, var2 } ; +// IConstant<?> vals [] = new IConstant [] { val1, val2 } ; +// +// assertEqual ( new ArrayBindingSet ( new ArrayBindingSet ( vars, vals ) ), vars, vals ) ; +// } - Var<?> var1 = Var.var ( "a" ) ; - Var<?> var2 = Var.var ( "b" ) ; - Constant<Integer> val1 = new Constant<Integer> ( 1 ) ; - Constant<Integer> val2 = new Constant<Integer> ( 2 ) ; - IVariable<?> vars [] = new IVariable [] { var1, var2 } ; - IConstant<?> vals [] = new IConstant [] { val1, val2 } ; - - assertEqual ( new ArrayBindingSet ( new ArrayBindingSet ( vars, vals ) ), vars, vals ) ; - } - /** * Unit test for {@link ArrayBindingSet#ArrayBindingSet(IVariable[],IConstant[])} */ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestIBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestIBindingSet.java 2010-12-11 16:04:20 UTC (rev 4003) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestIBindingSet.java 2010-12-12 22:22:01 UTC (rev 4004) @@ -31,23 +31,29 @@ import java.util.Iterator; import java.util.Map; +import junit.framework.TestCase2; + import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IVariable; import com.bigdata.bop.Var; +import com.bigdata.io.SerializerUtil; -import junit.framework.TestCase2; - /** * Unit tests for {@link IBindingSet}. - * + * <p> * Note: - * a) these tests assume that the values held for a given key are not cloned, - * i.e. comparison is done by '==' and not '.equals' - * b) keys with the same 'name' are a unique object. + * <ul> + * <li>a) these tests assume that the values held for a given key are not + * cloned, i.e. comparison is done by '==' and not '.equals' (this is true + * except for the Serializatoin tests, where the {@link Var} references will be + * preserved but the {@link IConstant}s will be distinct).</li> + * <li>b) keys with the same 'name' are a unique object.</li> + * </ul> * * @author <a href="mailto:dm...@us...">David MacMillan</a> + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ public abstract class TestIBindingSet extends TestCase2 { @@ -259,7 +265,13 @@ IBindingSet bs = newBindingSet ( new IVariable [] { var1, var2, var3, var4, var5 } , new IConstant [] { val1, val2, val3, val4, val5 } ) ; - + + assertEqual( + bs.copy(null/* variablesToKeep */), // + new IVariable[] { var1, var2, var3, var4, var5 }, + new IConstant[] { val1, val2, val3, val4, val5 }// + ); + IBindingSet bs2 = bs.copy ( new IVariable [] { var1, var3, var5 } ) ; assertTrue ( 3 == bs2.size () ) ; @@ -321,13 +333,141 @@ assertTrue ( "expected equal: same bindings after mutation", bs1.hashCode () == bs4.hashCode () ) ; } + /* + * push()/pop() tests. + * + * Note: In addition to testing push() and pop(save:boolean), we have to + * test that copy() and clone() operate correctly in the presence of nested + * symbol tables, and that the visitation patterns for the bindings operate + * correctly when there are nested symbol tables. For example, if there "y" + * is bound at level zero, a push() is executed, and then "x" is bound at + * level one. The visitation pattern must visit both "x" and "y". + */ + + public void test_nestedSymbolTables() { + + final Var<?> var1 = Var.var ( "a" ) ; + final Var<?> var2 = Var.var ( "b" ) ; + final Constant<Integer> val1 = new Constant<Integer> ( 1 ) ; + final Constant<Integer> val2 = new Constant<Integer> ( 2 ) ; + + final IBindingSet bs1 = newBindingSet(2/* size */); + + bs1.set(var1,val1); + + /* + * push a symbol table onto the stack + */ + bs1.push(); + + bs1.set(var2, val2); + + bs1.pop(false/* save */); + + // verify the modified bindings were discarded. + assertEqual(bs1, new IVariable[] { var1 }, new IConstant[] { val1 }); + + /* + * push a symbol table onto the stack + */ + bs1.push(); + + bs1.set(var2, val2); + + bs1.pop(true/* save */); + + // verify the modified bindings were saved. + assertEqual(bs1, new IVariable[] { var1, var2 }, new IConstant[] { + val1, val2 }); + } + + public void test_serialization() { + + final Var<?> var1 = Var.var ( "a" ) ; + final Var<?> var2 = Var.var ( "b" ) ; + final Constant<Integer> val1 = new Constant<Integer> ( 1 ) ; + final Constant<Integer> val2 = new Constant<Integer> ( 2 ) ; + + final IBindingSet bs1 = newBindingSet(2/* size */); + + bs1.set(var1, val1); + + bs1.set(var2, val2); + + assertEqual(bs1, new IVariable[] { var1, var2 }, new IConstant[] { + val1, val2 }); + + final IBindingSet bs2 = (IBindingSet) SerializerUtil + .deserialize(SerializerUtil.serialize(bs1)); + + assertEquals(bs1, bs1); + + } + + /* + * Hooks for testing specific implementations. + */ + protected abstract IBindingSet newBindingSet ( IVariable<?> vars [], IConstant<?> vals [] ) ; protected abstract IBindingSet newBindingSet ( int size ) ; + /** + * Compare actual and expected, where the latter is expressed using + * (vars,vals). + * <p> + * Note: This does not follow the junit pattern for asserts, which puts the + * expected data first. + * + * @param actual + * @param vars + * @param vals + */ protected void assertEqual ( IBindingSet actual, IVariable<?> vars [], IConstant<?> vals [] ) { assertTrue ( "wrong size", actual.size () == vars.length ) ; for ( int i = 0; i < vars.length; i++ ) assertTrue ( "wrong value", vals [ i ] == actual.get ( vars [ i ] ) ) ; } -} \ No newline at end of file + + protected void assertEquals(IBindingSet expected, IBindingSet actual) { + + // expected variables in some order. + final Iterator<IVariable> evars = expected.vars(); + + // actual variables in some order (the order MAY be different). + final Iterator<IVariable> avars = actual.vars(); + + while(evars.hasNext()) { + + // Some variable for which we expect a binding. + f... [truncated message content] |
From: <tho...@us...> - 2010-12-18 14:31:30
|
Revision: 4013 http://bigdata.svn.sourceforge.net/bigdata/?rev=4013&view=rev Author: thompsonbry Date: 2010-12-18 14:31:23 +0000 (Sat, 18 Dec 2010) Log Message: ----------- Some work on optional join group support, primarily declaring the new annotations. I need to apply the annotations to the unit tests and modify the QueryEngine logic for setting up the altSink to handle the push/pop(bool:save) as indicated by the annotations. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-12-17 15:05:06 UTC (rev 4012) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-12-18 14:31:23 UTC (rev 4013) @@ -30,12 +30,9 @@ import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.FutureTask; -import java.util.concurrent.TimeUnit; import com.bigdata.bop.engine.BOpStats; import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.relation.accesspath.BlockingBuffer; -import com.bigdata.relation.accesspath.IBlockingBuffer; /** * Abstract base class for pipeline operators where the data moving along the @@ -85,6 +82,48 @@ boolean DEFAULT_SHARED_STATE = false; + /** + * Annotation used to mark the set of non-optional joins which may be + * input to either the static or runtime query optimizer. Joins within a + * join graph may be freely reordered by the query optimizer in order to + * minimize the amount of work required to compute the solutions. + * <p> + * Note: Optional joins MAY NOT appear within the a join graph. Optional + * joins SHOULD be evaluated as part of the "tail plan" following the + * join graph, but before operations such as SORT, DISTINCT, etc. + * + * @todo We should be able to automatically apply the static or runtime + * query optimizers to an operator tree using this annotation to + * identify the join graphs. + */ + String JOIN_GRAPH = PipelineOp.class.getName() + ".joinGraph"; + + /** + * Annotation marks a high level join group, which may include optional + * joins. Join groups are marked in order to decide the re-entry point + * in the query plan when a join within an optional join group fails. + * Also, the top-level join group is not marked -- only nested join + * groups are marked. This is used by the decision rule to handle do + * {@link IBindingSet#push()} when entering a + * <p> + * This is different from a {@link #JOIN_GRAPH} primarily in that the + * latter may not include optional joins. + */ + String JOIN_GROUP = PipelineOp.class.getName() + ".joinGroup"; + + /** + * Annotation is used to designate the target when a join within an + * optional join group fails. The value of this annotation must be the + * {@link #JOIN_GROUP} identifier corresponding to the next join group + * in the query plan. The target join group identifier is specified + * (rather than the bopId of the target join) since the joins in the + * target join group may be reordered by the query optimizer. The entry + * point for solutions redirected to the {@link #ALT_SINK_GROUP} is + * therefore the first operator in the target {@link #JOIN_GROUP}. This + * decouples the routing decisions from the join ordering decisions. + */ + String ALT_SINK_GROUP = PipelineOp.class.getName() + ".altSinkGroup"; + } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java 2010-12-17 15:05:06 UTC (rev 4012) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java 2010-12-18 14:31:23 UTC (rev 4013) @@ -75,6 +75,9 @@ // stress test for SliceOp. suite.addTestSuite(TestQueryEngine_Slice.class); + // test suite for optional join groups. + suite.addTestSuite(TestQueryEngineOptionalJoins.class); + // @todo test suite for query evaluation (DISTINCT, ORDER BY, GROUP BY). // suite.addTestSuite(TestQueryEngine2.class); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2010-12-17 15:05:06 UTC (rev 4012) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2010-12-18 14:31:23 UTC (rev 4013) @@ -28,17 +28,9 @@ package com.bigdata.bop.engine; import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; import java.util.Map; import java.util.Properties; import java.util.UUID; -import java.util.concurrent.CancellationException; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Executor; -import java.util.concurrent.FutureTask; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; import junit.framework.TestCase2; @@ -62,14 +54,10 @@ import com.bigdata.bop.bindingSet.HashBindingSet; import com.bigdata.bop.bset.ConditionalRoutingOp; import com.bigdata.bop.bset.StartOp; -import com.bigdata.bop.constraint.EQ; import com.bigdata.bop.constraint.EQConstant; import com.bigdata.bop.constraint.NEConstant; -import com.bigdata.bop.fed.TestFederatedQueryEngine; import com.bigdata.bop.join.PipelineJoin; import com.bigdata.bop.solutions.SliceOp; -import com.bigdata.bop.solutions.SliceOp.SliceStats; -import com.bigdata.io.DirectBufferPoolAllocator.IAllocationContext; import com.bigdata.journal.BufferMode; import com.bigdata.journal.ITx; import com.bigdata.journal.Journal; @@ -78,16 +66,16 @@ import com.bigdata.striterator.ChunkedArrayIterator; import com.bigdata.striterator.Dechunkerator; import com.bigdata.striterator.ICloseableIterator; -import com.bigdata.util.InnerCause; -import com.bigdata.util.concurrent.LatchedExecutor; -import com.ibm.icu.impl.ByteBuffer; /** - * Test suite for the {@link QueryEngine} against a local database instance. - * <p> - * Note: The {@link BOp}s are unit tested separately. This test suite is focused - * on interactions when {@link BOp}s are chained together in a query, such as a - * sequence of pipeline joins, a slice applied to a query, etc. + * Test suite for handling of optional join groups during query evaluation + * against a local database instance. Optional join groups are handled using + * {@link IBindingSet#push()} when entering the join group and + * {@link IBindingSet#pop(boolean)} when exiting the join group. If the join + * group was successful for a given binding set, then <code>save:=true</code> is + * specified for {@link IBindingSet#pop(boolean)} and the applied bindings will + * be visible to the downstream consumer. Otherwise the bindings applied during + * the join group are simply discarded. * * <pre> * -Dlog4j.configuration=bigdata/src/resources/logging/log4j.properties @@ -95,10 +83,6 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id: TestQueryEngine.java 3950 2010-11-17 02:14:08Z thompsonbry $ - * - * @see TestFederatedQueryEngine - * - * @todo write a unit and stress tests for deadlines. */ public class TestQueryEngineOptionalJoins extends TestCase2 { @@ -270,14 +254,14 @@ */ public void test_query_join2_optionals() throws Exception { - final int startId = 1; - final int joinId1 = 2; - final int predId1 = 3; - final int joinId2 = 4; - final int predId2 = 5; - final int joinId3 = 6; - final int predId3 = 7; - final int sliceId = 8; + final int startId = 1; // + final int joinId1 = 2; // : base join group. + final int predId1 = 3; // (a b) + final int joinId2 = 4; // : joinGroup1 + final int predId2 = 5; // (b c) + final int joinId3 = 6; // : joinGroup1 + final int predId3 = 7; // (c d) + final int sliceId = 8; // final IVariable<?> a = Var.var("a"); final IVariable<?> b = Var.var("b"); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java 2010-12-17 15:05:06 UTC (rev 4012) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java 2010-12-18 14:31:23 UTC (rev 4013) @@ -535,16 +535,16 @@ } - /** - * Unit tests for optional joins. For an optional join, an alternative sink - * may be specified in the {@link BOpContext}. When specified, it is used if - * the join fails (if not specified, the binding sets which do not join are - * forwarded to the primary sink). Binding sets which join go to the primary - * sink regardless. - * - * @throws ExecutionException - * @throws InterruptedException - */ + /** + * Unit tests for optional joins. For an optional join, an alternative sink + * may be specified for the join. When specified, it is used if the join + * fails (if not specified, the binding sets which do not join are forwarded + * to the primary sink). Binding sets which join go to the primary sink + * regardless. + * + * @throws ExecutionException + * @throws InterruptedException + */ public void test_optionalJoin() throws InterruptedException, ExecutionException { final Var<?> x = Var.var("x"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-21 23:07:15
|
Revision: 4039 http://bigdata.svn.sourceforge.net/bigdata/?rev=4039&view=rev Author: thompsonbry Date: 2010-12-21 23:07:08 +0000 (Tue, 21 Dec 2010) Log Message: ----------- Modified the QueryEngine (in RunningQuery) to support push/pop of binding sets when transitioning into or out of a conditional join group. I've updated the first of the unit tests developed by MikeP to show how to annotate the query plan in order to trigger the conditional binding mechanisms. It is clear that the conditional binding is working (solutions where the 3rd join fail discard the results from the 2nd join), but the query plan is overgenerating. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BadConditionalGroupIdTypeException.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -131,6 +131,7 @@ * join group. * * @see PipelineOp.Annotations#ALT_SINK_REF + * @see PipelineOp.Annotations#ALT_SINK_GROUP */ public final IBlockingBuffer<E[]> getSink2() { return sink2; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -426,6 +426,57 @@ } /** + * Lookup the first operator in the specified conditional binding group and + * return its bopId. + * + * @param query + * The query plan. + * @param groupId + * The identifier for the desired conditional binding group. + * + * @return The bopId of the first operator in that conditional binding group + * -or- <code>null</code> if the specified conditional binding group + * does not exist in the query plan. + * + * @throws IllegalArgumentException + * if either argument is <code>null</code>. + * + * @see PipelineOp.Annotations#CONDITIONAL_GROUP + * @see PipelineOp.Annotations#ALT_SINK_GROUP + */ + static public Integer getFirstBOpIdForConditionalGroup(final BOp query, + final Integer groupId) { + if (query == null) + throw new IllegalArgumentException(); + if (groupId == null) + throw new IllegalArgumentException(); + final Iterator<BOp> itr = postOrderIterator(query); + while (itr.hasNext()) { + final BOp t = itr.next(); + final Object x = t.getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); + if (x != null) { + if (!(x instanceof Integer)) { + throw new BadConditionalGroupIdTypeException( + "Must be Integer, not: " + x.getClass() + ": " + + PipelineOp.Annotations.CONDITIONAL_GROUP); + } + final Integer id = (Integer) t + .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); + if(id.equals(groupId)) { + /* + * Return the BOpId associated with the first operator in + * the pre-order traversal of the query plan which has the + * specified groupId. + */ + return t.getId(); + } + } + } + // No such groupId in the query plan. + return null; + } + + /** * Return the parent of the operator in the operator tree (this does not * search the annotations, just the children). * <p> Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BadConditionalGroupIdTypeException.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BadConditionalGroupIdTypeException.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BadConditionalGroupIdTypeException.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -0,0 +1,22 @@ +package com.bigdata.bop; + +/** + * Exception thrown when a {@link PipelineOp.Annotations#CONDITIONAL_GROUP} is + * not an {@link Integer}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: BadBOpIdTypeException.java 3466 2010-08-27 14:28:04Z + * thompsonbry $ + */ +public class BadConditionalGroupIdTypeException extends RuntimeException { + + /** + * @param msg + */ + public BadConditionalGroupIdTypeException(String msg) { + super(msg); + } + + private static final long serialVersionUID = 1L; + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BadConditionalGroupIdTypeException.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -33,10 +33,18 @@ import com.bigdata.bop.engine.BOpStats; import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.solutions.SliceOp; /** * Abstract base class for pipeline operators where the data moving along the * pipeline is chunks of {@link IBindingSet}s. + * <p> + * The top-level of a query plan is composed of a required + * {@link Annotations#JOIN_GRAPH}s followed by a mixture of optional joins and + * {@link Annotations#CONDITIONAL_GROUP}s. A + * {@link Annotations#CONDITIONAL_GROUP} will have at least one required join + * (in a {@link Annotations#JOIN_GRAPH}) followed by zero or more optional + * joins. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ @@ -61,6 +69,8 @@ * The value of the annotation is the {@link BOp.Annotations#BOP_ID} of * the ancestor in the operator tree which serves as the alternative * sink for binding sets (default is no alternative sink). + * + * @see #ALT_SINK_GROUP */ String ALT_SINK_REF = PipelineOp.class.getName() + ".altSinkRef"; @@ -82,46 +92,73 @@ boolean DEFAULT_SHARED_STATE = false; - /** - * Annotation used to mark the set of non-optional joins which may be - * input to either the static or runtime query optimizer. Joins within a - * join graph may be freely reordered by the query optimizer in order to - * minimize the amount of work required to compute the solutions. - * <p> - * Note: Optional joins MAY NOT appear within the a join graph. Optional - * joins SHOULD be evaluated as part of the "tail plan" following the - * join graph, but before operations such as SORT, DISTINCT, etc. - * - * @todo We should be able to automatically apply the static or runtime - * query optimizers to an operator tree using this annotation to - * identify the join graphs. - */ + /** + * Annotation used to mark a set of (non-optional) joins which may be + * freely reordered by the query optimizer in order to minimize the + * amount of work required to compute the solutions. + * <p> + * Note: Optional joins MAY NOT appear within a join graph. Optional + * joins SHOULD be evaluated as part of the "tail plan" following the + * join graph, but before operations such as SORT, DISTINCT, etc. When + * the query plan includes {@link #CONDITIONAL_GROUP}s, those groups + * include a leading {@link #JOIN_GRAPH} (required joins) followed by + * zero or more optional joins. + */ String JOIN_GRAPH = PipelineOp.class.getName() + ".joinGraph"; - /** - * Annotation marks a high level join group, which may include optional - * joins. Join groups are marked in order to decide the re-entry point - * in the query plan when a join within an optional join group fails. - * Also, the top-level join group is not marked -- only nested join - * groups are marked. This is used by the decision rule to handle do - * {@link IBindingSet#push()} when entering a - * <p> - * This is different from a {@link #JOIN_GRAPH} primarily in that the - * latter may not include optional joins. - */ - String JOIN_GROUP = PipelineOp.class.getName() + ".joinGroup"; + /** + * Annotation used to mark a set of operators belonging to a conditional + * binding group. Bindings within with the group will be discarded if + * any required operator in the group fails. For example, if a binding + * set exits via the alternative sink for a required join then any + * conditional bindings within the group will be discarded. + * <p> + * Together with {@link #ALT_SINK_GROUP}, the {@link #CONDITIONAL_GROUP} + * annotation provides the information necessary in order to decide the + * re-entry point in the query plan when a join within an conditional + * binding group fails. + * <p> + * The {@link #CONDITIONAL_GROUP} annotation controls the + * {@link IBindingSet#push()} and {@link IBindingSet#pop(boolean)} of + * individual solutions as they propagate through the pipeline. When a + * pipeline starts, the {@link IBindingSet} stack contains only the top + * level symbol table (i.e., name/value bindings). When an intermediate + * solution enters a {@link PipelineOp} marked as belonging to a + * {@link #CONDITIONAL_GROUP}, a new symbol table is + * {@link IBindingSet#push() pushed} onto the stack for that solution. + * If the solution leaves the optional join group via the default sink, + * then the symbol table is "saved" when it is + * {@link IBindingSet#pop(boolean) popped} off of the stack. If the + * solution leaves the join group via the alternative sink, then the + * symbol table is discarded when it is {@link IBindingSet#pop(boolean) + * popped} off of the stack. This provides for conditional binding of + * variables within the operators of the group. + * <p> + * The value of the {@link #CONDITIONAL_GROUP} is an {@link Integer} + * which uniquely identifies the group within the query. + */ + String CONDITIONAL_GROUP = PipelineOp.class.getName() + ".conditionalGroup"; - /** - * Annotation is used to designate the target when a join within an - * optional join group fails. The value of this annotation must be the - * {@link #JOIN_GROUP} identifier corresponding to the next join group - * in the query plan. The target join group identifier is specified - * (rather than the bopId of the target join) since the joins in the - * target join group may be reordered by the query optimizer. The entry - * point for solutions redirected to the {@link #ALT_SINK_GROUP} is - * therefore the first operator in the target {@link #JOIN_GROUP}. This - * decouples the routing decisions from the join ordering decisions. - */ + /** + * Annotation used to designate the target when a required operator + * within an {@link #CONDITIONAL_GROUP} fails. The value of this + * annotation must be the {@link #CONDITIONAL_GROUP} identifier + * corresponding to the next conditional binding group in the query + * plan. If there is no such group, then the {@link #ALT_SINK_REF} + * should be used instead to specify the target operator in the + * pipeline, e.g., a {@link SliceOp}. + * <p> + * The target {@link #CONDITIONAL_GROUP} is specified (rather than the + * bopId of the target join) since the non-optional joins in the target + * {@link #CONDITIONAL_GROUP} be reordered by the query optimizer. The + * entry point for solutions redirected to the {@link #ALT_SINK_GROUP} + * is therefore the first operator in the target + * {@link #CONDITIONAL_GROUP}. This decouples the routing decisions from + * the join ordering decisions. + * + * @see #CONDITIONAL_GROUP + * @see #ALT_SINK_REF + */ String ALT_SINK_GROUP = PipelineOp.class.getName() + ".altSinkGroup"; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -41,7 +41,6 @@ import java.util.concurrent.FutureTask; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionException; -import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; @@ -1568,8 +1567,9 @@ /** * The index of the operator which is the alternative sink for outputs * generated by this evaluation. This is <code>null</code> unless the - * operator explicitly specifies an alternative sink using - * {@link PipelineOp.Annotations#ALT_SINK_REF}. + * operator explicitly specifies an alternative sink using either + * {@link PipelineOp.Annotations#ALT_SINK_REF} or + * {@link PipelineOp.Annotations#ALT_SINK_GROUP}. */ private final Integer altSinkId; @@ -1674,9 +1674,30 @@ */ sinkId = BOpUtility.getEffectiveDefaultSink(bop, p); - // altSink (null when not specified). - altSinkId = (Integer) op - .getProperty(PipelineOp.Annotations.ALT_SINK_REF); + { + // altSink (null when not specified). + final Integer altSinkId = (Integer) op + .getProperty(PipelineOp.Annotations.ALT_SINK_REF); + final Integer altSinkGroup = (Integer) op + .getProperty(PipelineOp.Annotations.ALT_SINK_GROUP); + if (altSinkId != null && altSinkGroup != null) + throw new RuntimeException( + "Annotations are mutually exclusive: " + + PipelineOp.Annotations.ALT_SINK_REF + + " and " + + PipelineOp.Annotations.ALT_SINK_GROUP); + if (altSinkGroup != null) { + /* + * Lookup the first pipeline op in the conditional binding + * group and use its bopId as the altSinkId. + */ + this.altSinkId = BOpUtility.getFirstBOpIdForConditionalGroup( + query, altSinkGroup); + } else { + // MAY be null. + this.altSinkId = altSinkId; + } + } if (altSinkId != null && !bopIndex.containsKey(altSinkId)) throw new NoSuchBOpException(altSinkId); @@ -1714,13 +1735,39 @@ } assert stats != null; - sink = (p == null ? queryBuffer : newBuffer(op, sinkId, - sinkMessagesOut, stats)); + // The groupId (if any) for this operator. + final Integer fromGroupId = (Integer) op + .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - altSink = altSinkId == null ? null - : altSinkId.equals(sinkId) ? sink : newBuffer(op, - altSinkId, altSinkMessagesOut, stats); + if (p == null) { + sink = queryBuffer; + } else { + final BOp targetOp = bopIndex.get(sinkId); + final Integer toGroupId = (Integer) targetOp + .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); + sink = newBuffer(op, sinkId, new SinkTransitionMetadata( + fromGroupId, toGroupId, true/* isSink */), + sinkMessagesOut, stats); + } + if (altSinkId == null) { + altSink = null; + // } else if(altSinkId.equals(sinkId)){ + /* + * @todo Note: The optimization when altSink:=sink is now only + * possible when the groupId is not changing during the + * transition. + */ + // altSink = sink; + } else { + final BOp targetOp = bopIndex.get(altSinkId); + final Integer toGroupId = (Integer) targetOp + .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); + altSink = newBuffer(op, altSinkId, new SinkTransitionMetadata( + fromGroupId, toGroupId, false/* isSink */), + altSinkMessagesOut, stats); + } + // context : @todo pass in IChunkMessage or IChunkAccessor context = new BOpContext<IBindingSet>(RunningQuery.this, partitionId, stats, src, sink, altSink); @@ -1748,7 +1795,9 @@ * target that sink. */ private IBlockingBuffer<IBindingSet[]> newBuffer(final PipelineOp op, - final int sinkId, final AtomicInteger sinkMessagesOut, final BOpStats stats) { + final int sinkId, + final SinkTransitionMetadata sinkTransitionMetadata, + final AtomicInteger sinkMessagesOut, final BOpStats stats) { // final MultiplexBlockingBuffer<IBindingSet[]> factory = inputBufferMap == null ? null // : inputBufferMap.get(sinkId); @@ -1774,7 +1823,8 @@ // BufferAnnotations.chunkTimeoutUnit); return new HandleChunkBuffer(RunningQuery.this, bopId, sinkId, op - .getChunkCapacity(), sinkMessagesOut, stats); + .getChunkCapacity(), sinkTransitionMetadata, sinkMessagesOut, + stats); } @@ -1814,7 +1864,115 @@ } // call() } // class ChunkTask + + /** + * In order to setup the push/pop of the sink and altSink we need to specify + * certain metadata about the source groupId, the target groupId, and + * whether the transition is via the sink or the altSink. The groupId for + * the source and target operators MAY be null, in which case the operator + * is understood to be outside of any conditional binding group. + * <p> + * The action to be taken when the binding set is written to the sink or the + * altSink is determined by a simple decision matrix. + * + * <pre> + * | toGroup + * fromGroup + null + newGroup + sameGroup + * null | NOP | Push | n/a + * group | Pop | Pop+Push | NOP + * </pre> + * + * The value of the [boolean:save] flag for pop is decided based on whether + * the transition is via the default sink (save:=true) or the altSink + * (save:=false). + * + * @see PipelineOp.Annotations#CONDITIONAL_GROUP + */ + private static class SinkTransitionMetadata { + + private final Integer fromGroupId; + private final Integer toGroupId; + private final boolean isSink; + + public String toString() { + + return getClass().getSimpleName() + "{from=" + fromGroupId + ",to=" + + toGroupId + ",isSink=" + isSink + "}"; + + } + + public SinkTransitionMetadata(final Integer fromGroupId, + final Integer toGroupId, final boolean isSink) { + + this.fromGroupId = fromGroupId; + + this.toGroupId = toGroupId; + + this.isSink = isSink; + + } + + /** + * Apply the appropriate action(s) to the binding set. + * + * @param bset + * The binding set. + */ + public void handleBindingSet(final IBindingSet bset) { + if (fromGroupId == null) { + if (toGroupId == null) + return; + // Transition from no group to some group. + bset.push(); + return; + } else { + if (toGroupId == null) + // Transition from a group to no group. + bset.pop(isSink/* save */); + else if (toGroupId.equals(fromGroupId)) { + // NOP (transition to the same group) + } else { + // Transition to a different group. + bset.pop(isSink/* save */); + bset.push(); + } + } + } + + } +// /** +// * Type safe enumeration for after action on a generated binding set used to +// * manage exit from a conditional binding group via the defaultSink and the +// * altSink. +// * +// * @author <a href="mailto:tho...@us...">Bryan +// * Thompson</a> +// */ +// static private enum AfterActionEnum { +// /** +// * NOP +// */ +// None, +// /** +// * Use {@link IBindingSet#pop(boolean)} to discard the symbol table on +// * the top of the stack. +// */ +// Discard, +// /** +// * Use {@link IBindingSet#pop(boolean)} to save the symbol table on the +// * top of the stack. +// */ +// Save, +// /** +// * Use {@link IBindingSet#push()} to push a symbol table on the top of +// * the stack. Bindings made against that symbol table will be +// * conditional until they are either {@link #Discard discarded} or +// * {@link #Save saved}. +// */ +// Push; +// } + /** * Class traps {@link #add(IBindingSet[])} to handle the IBindingSet[] * chunks as they are generated by the running operator task, invoking @@ -1841,6 +1999,8 @@ */ private final int chunkCapacity; + private final SinkTransitionMetadata sinkTransitionMetadata; + private final AtomicInteger sinkMessagesOut; private final BOpStats stats; @@ -1869,11 +2029,13 @@ */ public HandleChunkBuffer(final RunningQuery q, final int bopId, final int sinkId, final int chunkCapacity, + final SinkTransitionMetadata sinkTransitionMetadata, final AtomicInteger sinkMessagesOut, final BOpStats stats) { this.q = q; this.bopId = bopId; this.sinkId = sinkId; this.chunkCapacity = chunkCapacity; + this.sinkTransitionMetadata = sinkTransitionMetadata; this.sinkMessagesOut = sinkMessagesOut; this.stats = stats; } @@ -1892,6 +2054,10 @@ if(!open) throw new BufferClosedException(); + for (IBindingSet bset : e) { + sinkTransitionMetadata.handleBindingSet(bset); + } + // if (chunkCapacity != 0 && e.length < (chunkCapacity >> 1)) { // /* // * The caller's array is significantly smaller than the target Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -137,7 +137,8 @@ /** * Marks the join as "optional" in the SPARQL sense. Binding sets which * fail the join will be routed to the alternative sink as specified by - * {@link PipelineOp.Annotations#ALT_SINK_REF}. + * either {@link PipelineOp.Annotations#ALT_SINK_REF} or + * {@link PipelineOp.Annotations#ALT_SINK_GROUP}. * * @see #DEFAULT_OPTIONAL */ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -31,6 +31,14 @@ import java.util.Map; import java.util.concurrent.FutureTask; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.ap.E; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.bset.StartOp; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.journal.ITx; + import junit.framework.TestCase2; /** @@ -579,6 +587,120 @@ } /** + * A conditional join group: + * + * <pre> + * (a b) + * optional { + * (b c) + * (c d) + * } + * </pre> + * + * where the groupId for the optional join group is ONE (1). The test should + * locate the first {@link PipelineJoin} in that join group, which is the + * one reading on the <code>(b c)</code> access path. + */ + public void test_getFirstBOpIdForConditionalGroup() { + + final String namespace = "kb"; + + final int startId = 1; // + final int joinId1 = 2; // : base join group. + final int predId1 = 3; // (a b) + final int joinId2 = 4; // : joinGroup1 + final int predId2 = 5; // (b c) + final int joinId3 = 6; // : joinGroup1 + final int predId3 = 7; // (c d) + final int sliceId = 8; // + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + + final Integer joinGroup1 = Integer.valueOf(1); + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { a, b }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { b, c }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred3Op = new Predicate<E>( + new IVariableOrConstant[] { c, d }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId3),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); + + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { join1Op },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); + + final PipelineOp join3Op = new PipelineJoin<E>(// + new BOp[] { join2Op },// + new NV(Predicate.Annotations.BOP_ID, joinId3),// + new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{join3Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // verify found. + assertEquals(Integer.valueOf(joinId2), BOpUtility + .getFirstBOpIdForConditionalGroup(query, joinGroup1)); + + // verify not-found. + assertEquals(null, BOpUtility.getFirstBOpIdForConditionalGroup(query, + Integer.valueOf(2)/* groupId */)); + + } + + /** * Unit test for {@link BOpUtility#getParent(BOp, BOp)}. */ public void test_getParent() { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -28,6 +28,8 @@ package com.bigdata.bop.engine; import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.Properties; import java.util.UUID; @@ -218,39 +220,46 @@ /** * Unit test for optional join group. Three joins are used and target a - * {@link SliceOp}. The 2nd and 3rd joins are an optional join group. - * Intermediate results which do not succeed on the optional join are + * {@link SliceOp}. The 2nd and 3rd joins are an optional join group. + * Intermediate results which do not succeed on the optional join are * forwarded to the {@link SliceOp} which is the target specified by the * {@link PipelineOp.Annotations#ALT_SINK_REF}. * * The optional join group takes the form: + * + * <pre> * (a b) * optional { * (b c) * (c d) * } - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be four solutions - * that succeed the optional join group: + * </pre> * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be four solutions that + * succeed the optional join group: + * + * <pre> * (paul mary brad fred) * (paul mary brad leon) * (john mary brad fred) * (john mary brad leon) + * </pre> * * and five more that don't succeed the optional join group: * + * <pre> * (paul brad) * * (john brad) * * (mary brad) * * (brad fred) * (brad leon) + * </pre> * - * In this cases marked with a *, ?c will become temporarily bound to fred - * and leon (since brad knows fred and leon), but the (c d) tail will fail - * since fred and leon don't know anyone else. At this point, the ?c binding - * must be removed from the solution. + * In this cases marked with a <code>*</code>, ?c will become temporarily + * bound to fred and leon (since brad knows fred and leon), but the (c d) + * tail will fail since fred and leon don't know anyone else. At this point, + * the ?c binding must be removed from the solution. */ public void test_query_join2_optionals() throws Exception { @@ -267,6 +276,8 @@ final IVariable<?> b = Var.var("b"); final IVariable<?> c = Var.var("c"); final IVariable<?> d = Var.var("d"); + + final Object joinGroup1 = Integer.valueOf(1); final PipelineOp startOp = new StartOp(new BOp[] {}, NV.asMap(new NV[] {// @@ -309,7 +320,8 @@ final PipelineOp join2Op = new PipelineJoin<E>(// new BOp[] { join1Op },// - new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// // join is optional. new NV(PipelineJoin.Annotations.OPTIONAL, true),// @@ -319,6 +331,7 @@ final PipelineOp join3Op = new PipelineJoin<E>(// new BOp[] { join2Op },// new NV(Predicate.Annotations.BOP_ID, joinId3),// + new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// // join is optional. new NV(PipelineJoin.Annotations.OPTIONAL, true),// @@ -415,6 +428,11 @@ ) }; + /* + * junit.framework.AssertionFailedError: Iterator will deliver too + * many objects: reminder(3)=[{ a=John, b=Brad }, { a=Mary, b=Brad + * }, { a=Paul, b=Brad }]. + */ assertSameSolutionsAnyOrder(expected, new Dechunkerator<IBindingSet>(runningQuery.iterator())); @@ -434,45 +452,54 @@ } /** - * Unit test for optional join group with a filter. Three joins are used - * and target a {@link SliceOp}. The 2nd and 3rd joins are an optional join - * group. Intermediate results which do not succeed on the optional join are + * Unit test for optional join group with a filter. Three joins are used and + * target a {@link SliceOp}. The 2nd and 3rd joins are an optional join + * group. Intermediate results which do not succeed on the optional join are * forwarded to the {@link SliceOp} which is the target specified by the - * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group + * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group * contains a filter. + * <p> + * The optional join group takes the form: * - * The optional join group takes the form: + * <pre> * (a b) * optional { * (b c) * (c d) * filter(d != Leon) * } - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be two solutions - * that succeed the optional join group: + * </pre> * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be two solutions that + * succeed the optional join group: + * + * <pre> * (paul mary brad fred) * (john mary brad fred) + * </pre> * * and five more that don't succeed the optional join group: * + * <pre> * (paul brad) * * (john brad) * * (mary brad) * * (brad fred) * (brad leon) + * </pre> * - * In this cases marked with a *, ?c will become temporarily bound to fred - * and leon (since brad knows fred and leon), but the (c d) tail will fail - * since fred and leon don't know anyone else. At this point, the ?c binding - * must be removed from the solution. - * + * In this cases marked with a <code>*</code>, ?c will become temporarily + * bound to fred and leon (since brad knows fred and leon), but the (c d) + * tail will fail since fred and leon don't know anyone else. At this point, + * the ?c binding must be removed from the solution. + * <p> * The filter (d != Leon) will prune the two solutions: * + * <pre> * (paul mary brad leon) * (john mary brad leon) + * </pre> * * since ?d is bound to Leon in those cases. */ @@ -647,43 +674,50 @@ } /** - * Unit test for optional join group with a filter on a variable outside - * the optional join group. Three joins are used and target a - * {@link SliceOp}. The 2nd and 3rd joins are an optional join - * group. Intermediate results which do not succeed on the optional join are - * forwarded to the {@link SliceOp} which is the target specified by the - * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group + * Unit test for optional join group with a filter on a variable outside the + * optional join group. Three joins are used and target a {@link SliceOp}. + * The 2nd and 3rd joins are an optional join group. Intermediate results + * which do not succeed on the optional join are forwarded to the + * {@link SliceOp} which is the target specified by the + * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group * contains a filter that uses a variable outside the optional join group. + * <P> + * The query takes the form: * - * The query takes the form: + * <pre> * (a b) * optional { * (b c) * (c d) * filter(a != Paul) * } - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be two solutions - * that succeed the optional join group: + * </pre> * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be two solutions that + * succeed the optional join group: + * + * <pre> * (john mary brad fred) * (john mary brad leon) + * </pre> * * and six more that don't succeed the optional join group: - * + * + * <pre> * (paul mary) * * (paul brad) * * (john brad) * (mary brad) * (brad fred) * (brad leon) + * </pre> * - * In this cases marked with a *, ?a is bound to Paul even though there is - * a filter that specifically prohibits a = Paul. This is because the filter - * is inside the optional join group, which means that solutions can still - * include a = Paul, but the optional join group should not run in that - * case. + * In this cases marked with a <code>*</code>, ?a is bound to Paul even + * though there is a filter that specifically prohibits a = Paul. This is + * because the filter is inside the optional join group, which means that + * solutions can still include a = Paul, but the optional join group should + * not run in that case. */ public void test_query_optionals_filter2() throws Exception { @@ -1006,8 +1040,15 @@ if (actual.hasNext()) { - fail("Iterator will deliver too many objects."); + final List<T> remainder = new LinkedList<T>(); + + while(actual.hasNext()) { + remainder.add(actual.next()); + } + fail("Iterator will deliver too many objects: reminder(" + + remainder.size() + ")=" + remainder); + } } finally { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-03 14:41:44
|
Revision: 4047 http://bigdata.svn.sourceforge.net/bigdata/?rev=4047&view=rev Author: thompsonbry Date: 2011-01-03 14:41:36 +0000 (Mon, 03 Jan 2011) Log Message: ----------- Added an OptionalJoinGroup operator. It issues a subquery for each binding set presented to the operator. If the subquery produces any solutions, then they are copied to the default sink. Otherwise the binding set presented to the operator is copied to the default sink. This provides optional semantics for the group. Modified the optional join group test suite to use the OptionalJoinGroup operator. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-01-02 22:49:27 UTC (rev 4046) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-01-03 14:41:36 UTC (rev 4047) @@ -136,6 +136,12 @@ * <p> * The value of the {@link #CONDITIONAL_GROUP} is an {@link Integer} * which uniquely identifies the group within the query. + * + * @deprecated The binding set stack push/pop mechanisms are not + * sufficient to support optional join groups. This + * annotation will be removed unless it proves valuable for + * marking the elements of a join group, in which case the + * javadoc needs to be updated. */ String CONDITIONAL_GROUP = PipelineOp.class.getName() + ".conditionalGroup"; @@ -158,6 +164,10 @@ * * @see #CONDITIONAL_GROUP * @see #ALT_SINK_REF + * + * @deprecated The binding set stack push/pop mechanisms are not + * sufficient to support optional join groups. This + * annotation will be removed. */ String ALT_SINK_GROUP = PipelineOp.class.getName() + ".altSinkGroup"; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-02 22:49:27 UTC (rev 4046) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-03 14:41:36 UTC (rev 4047) @@ -55,8 +55,9 @@ * executed independently. By default, the subqueries are run with unlimited * parallelism. * <p> - * Note: This operator must on the query controller. The - * {@link PipelineOp.Annotations#SINK_REF} of each child operand should be + * Note: This operator must execute on the query controller. + * <p> + * The {@link PipelineOp.Annotations#SINK_REF} of each child operand should be * overridden to specify the parent of the this operator. If you fail to do * this, then the intermediate results of the subqueries will be routed to this * operator, which DOES NOT pass them on. This may cause unnecessary network @@ -193,10 +194,10 @@ this.latch = new CountDownLatch(controllerOp.arity()); /* - * Create FutureTasks for each subquery. The futures are submitted - * to the Executor yet. That happens in call(). By deferring the - * evaluation until call() we gain the ability to cancel all - * subqueries if any subquery fails. + * Create FutureTasks for each subquery. The futures are not + * submitted to the Executor yet. That happens in call(). By + * deferring the evaluation until call() we gain the ability to + * cancel all subqueries if any subquery fails. */ for (BOp op : controllerOp.args()) { Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java 2011-01-03 14:41:36 UTC (rev 4047) @@ -0,0 +1,418 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 18, 2010 + */ + +package com.bigdata.bop.controller; + +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.Executor; +import java.util.concurrent.FutureTask; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.LocalChunkMessage; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; +import com.bigdata.util.concurrent.LatchedExecutor; + +/** + * For each binding set presented, this operator executes a subquery. Any + * solutions produced by the subquery are copied to the default sink. If no + * solutions are produced, then the original binding set is copied to the + * default sink (optional join semantics). Each subquery is run as a separate + * query but is linked to the parent query in the operator is being evaluated. + * + * FIXME Is this true?: "This operator must on the query controller." For an + * optional join group in scale-out, we need to concentrate the solutions back + * to the controller if this is true. If it is not a requirement, then we can + * just issue the subquery from ANY node. + * + * FIXME Parallel evaluation of subqueries is not implemented. What is the + * appropriate parallelism for this operator? More parallelism should reduce + * latency but could increase the memory burden. Review this decision once we + * have the RWStore operating as a binding set buffer on the Java process heap. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class OptionalJoinGroup extends PipelineOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public interface Annotations extends PipelineOp.Annotations { + + /** + * The subquery to be evaluated for each binding sets presented to the + * {@link OptionalJoinGroup} (required). This should be a + * {@link PipelineOp}. + */ + String SUBQUERY = OptionalJoinGroup.class.getName() + ".subquery"; + + /** + * The maximum parallelism with which the subqueries will be evaluated + * (default {@value #DEFAULT_MAX_PARALLEL}). + */ + String MAX_PARALLEL = OptionalJoinGroup.class.getName() + + ".maxParallel"; + + int DEFAULT_MAX_PARALLEL = 1; + + } + + /** + * @see Annotations#MAX_PARALLEL + */ + public int getMaxParallel() { + return getProperty(Annotations.MAX_PARALLEL, + Annotations.DEFAULT_MAX_PARALLEL); + } + + /** + * Deep copy constructor. + */ + public OptionalJoinGroup(final OptionalJoinGroup op) { + super(op); + } + + /** + * Shallow copy constructor. + * + * @param args + * @param annotations + */ + public OptionalJoinGroup(final BOp[] args, + final Map<String, Object> annotations) { + + super(args, annotations); + +// if (!getEvaluationContext().equals(BOpEvaluationContext.CONTROLLER)) +// throw new IllegalArgumentException(Annotations.EVALUATION_CONTEXT +// + "=" + getEvaluationContext()); + + getRequiredProperty(Annotations.SUBQUERY); + + if (!getProperty(Annotations.CONTROLLER, Annotations.DEFAULT_CONTROLLER)) + throw new IllegalArgumentException(Annotations.CONTROLLER); + +// // The id of this operator (if any). +// final Integer thisId = (Integer)getProperty(Annotations.BOP_ID); +// +// for(BOp op : args) { +// +// final Integer sinkId = (Integer) op +// .getRequiredProperty(Annotations.SINK_REF); +// +// if(sinkId.equals(thisId)) +// throw new RuntimeException("Operand may not target ") +// +// } + + } + + public OptionalJoinGroup(final BOp[] args, NV... annotations) { + + this(args, NV.asMap(annotations)); + + } + + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { + + return new FutureTask<Void>(new ControllerTask(this, context)); + + } + + /** + * Evaluates the arguments of the operator as subqueries. The arguments are + * evaluated in order. An {@link Executor} with limited parallelism to + * evaluate the arguments. If the controller operator is interrupted, then + * the subqueries are cancelled. If a subquery fails, then all subqueries + * are cancelled. + */ + private static class ControllerTask implements Callable<Void> { + + private final OptionalJoinGroup controllerOp; + private final BOpContext<IBindingSet> context; +// private final List<FutureTask<IRunningQuery>> tasks = new LinkedList<FutureTask<IRunningQuery>>(); +// private final CountDownLatch latch; + private final int nparallel; + private final PipelineOp subquery; + private final Executor executor; + + public ControllerTask(final OptionalJoinGroup controllerOp, final BOpContext<IBindingSet> context) { + + if (controllerOp == null) + throw new IllegalArgumentException(); + + if (context == null) + throw new IllegalArgumentException(); + + this.controllerOp = controllerOp; + + this.context = context; + + this.nparallel = controllerOp.getProperty(Annotations.MAX_PARALLEL, + Annotations.DEFAULT_MAX_PARALLEL); + + this.subquery = (PipelineOp) controllerOp + .getRequiredProperty(Annotations.SUBQUERY); + + this.executor = new LatchedExecutor(context.getIndexManager() + .getExecutorService(), nparallel); + +// this.latch = new CountDownLatch(controllerOp.arity()); + +// /* +// * Create FutureTasks for each subquery. The futures are submitted +// * to the Executor yet. That happens in call(). By deferring the +// * evaluation until call() we gain the ability to cancel all +// * subqueries if any subquery fails. +// */ +// for (BOp op : controllerOp.args()) { +// +// /* +// * Task runs subquery and cancels all subqueries in [tasks] if +// * it fails. +// */ +// tasks.add(new FutureTask<IRunningQuery>(new SubqueryTask(op, +// context)) { +// /* +// * Hook future to count down the latch when the task is +// * done. +// */ +// public void run() { +// try { +// super.run(); +// } finally { +// latch.countDown(); +// } +// } +// }); +// +// } + + } + + /** + * Evaluate the subquery. + * + * @todo Support limited parallelism for each binding set read from the + * source. We will need to keep track of the running subqueries in + * order to wait on them before returning from this method and in + * order to cancel them if something goes wrong. + */ + public Void call() throws Exception { + + try { + + final IAsynchronousIterator<IBindingSet[]> sitr = context + .getSource(); + + // @todo test for interrupt/halted query? + while(sitr.hasNext()) { + + final IBindingSet[] chunk = sitr.next(); + + for(IBindingSet bset : chunk) { + + final FutureTask<IRunningQuery> ft = new FutureTask<IRunningQuery>( + new SubqueryTask(bset, subquery, context)); + + // run the subquery. + executor.execute(ft); + + // wait for the outcome. + ft.get(); + + } + + } + +// /* +// * Run subqueries with limited parallelism. +// */ +// for (FutureTask<IRunningQuery> ft : tasks) { +// executor.execute(ft); +// } +// +// /* +// * Wait for all subqueries to complete. +// */ +// latch.await(); +// +// /* +// * Get the futures, throwing out any errors. +// */ +// for (FutureTask<IRunningQuery> ft : tasks) +// ft.get(); + + // Now that we know the subqueries ran Ok, flush the sink. + context.getSink().flush(); + + // Done. + return null; + + } finally { + +// // Cancel any tasks which are still running. +// for (FutureTask<IRunningQuery> ft : tasks) +// ft.cancel(true/* mayInterruptIfRunning */); + + context.getSource().close(); + + context.getSink().close(); + + if (context.getSink2() != null) + context.getSink2().close(); + + } + + } + + /** + * Run a subquery. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + private class SubqueryTask implements Callable<IRunningQuery> { + + /** + * The evaluation context for the parent query. + */ + private final BOpContext<IBindingSet> parentContext; + + /** + * The source binding set. This will be copied to the output if + * there are no solutions for the subquery (optional join + * semantics). + */ + private final IBindingSet bset; + + /** + * The root operator for the subquery. + */ + private final BOp subQueryOp; + + public SubqueryTask(final IBindingSet bset, final BOp subQuery, + final BOpContext<IBindingSet> parentContext) { + + this.bset = bset; + + this.subQueryOp = subQuery; + + this.parentContext = parentContext; + + } + + public IRunningQuery call() throws Exception { + + IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; + try { + + final QueryEngine queryEngine = parentContext.getRunningQuery() + .getQueryEngine(); + +// final IRunningQuery runningQuery = queryEngine +// .eval(subQueryOp); + + final BOp startOp = BOpUtility.getPipelineStart(subQueryOp); + + final int startId = startOp.getId(); + + final UUID queryId = UUID.randomUUID(); + + // execute the subquery, passing in the source binding set. + final IRunningQuery runningQuery = queryEngine + .eval( + queryId, + (PipelineOp) subQueryOp, + new LocalChunkMessage<IBindingSet>( + queryEngine, + queryId, + startId, + -1 /* partitionId */, + new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { new IBindingSet[] { bset } }))); + + // Iterator visiting the subquery solutions. + subquerySolutionItr = runningQuery.iterator(); + + // Copy solutions from the subquery to the query. + final long ncopied = BOpUtility.copy(subquerySolutionItr, + parentContext.getSink(), null/* sink2 */, + null/* constraints */, null/* stats */); + + // wait for the subquery. + runningQuery.get(); + + if (ncopied == 0L) { + + /* + * Since there were no solutions for the subquery, copy + * the original binding set to the default sink. + */ + parentContext.getSink().add(new IBindingSet[]{bset}); + + } + + // done. + return runningQuery; + + } catch (Throwable t) { + + /* + * If a subquery fails, then propagate the error to the + * parent and rethrow the first cause error out of the + * subquery. + */ + throw new RuntimeException(ControllerTask.this.context + .getRunningQuery().halt(t)); + + } finally { + + if (subquerySolutionItr != null) + subquerySolutionItr.close(); + + } + + } + + } // SubqueryTask + + } // ControllerTask + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java 2011-01-02 22:49:27 UTC (rev 4046) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java 2011-01-03 14:41:36 UTC (rev 4047) @@ -77,6 +77,8 @@ // test STEPS // suite.addTestSuite(TestUnion.class); + suite.addTestSuite(TestOptionalJoinGroup.class); + // @todo test STAR (transitive closure). // suite.addTestSuite(TestStar.class); Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java 2011-01-03 14:41:36 UTC (rev 4047) @@ -0,0 +1,1138 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 23, 2010 + */ + +package com.bigdata.bop.controller; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; + +import junit.framework.TestCase2; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.Var; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.ap.E; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.ap.R; +import com.bigdata.bop.bindingSet.ArrayBindingSet; +import com.bigdata.bop.bindingSet.HashBindingSet; +import com.bigdata.bop.bset.ConditionalRoutingOp; +import com.bigdata.bop.bset.StartOp; +import com.bigdata.bop.constraint.NEConstant; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.engine.IChunkMessage; +import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.LocalChunkMessage; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.journal.BufferMode; +import com.bigdata.journal.ITx; +import com.bigdata.journal.Journal; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; +import com.bigdata.striterator.ChunkedArrayIterator; +import com.bigdata.striterator.Dechunkerator; +import com.bigdata.striterator.ICloseableIterator; + +/** + * Test suite for handling of optional join groups during query evaluation + * against a local database instance. Optional join groups are handled using + * {@link IBindingSet#push()} when entering the join group and + * {@link IBindingSet#pop(boolean)} when exiting the join group. If the join + * group was successful for a given binding set, then <code>save:=true</code> is + * specified for {@link IBindingSet#pop(boolean)} and the applied bindings will + * be visible to the downstream consumer. Otherwise the bindings applied during + * the join group are simply discarded. + * + * <pre> + * -Dlog4j.configuration=bigdata/src/resources/logging/log4j.properties + * </pre> + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestOptionalJoinGroup extends TestCase2 { + + /** + * + */ + public TestOptionalJoinGroup() { + } + + /** + * @param name + */ + public TestOptionalJoinGroup(String name) { + super(name); + } + + @Override + public Properties getProperties() { + + final Properties p = new Properties(super.getProperties()); + + p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient + .toString()); + + return p; + + } + + static private final String namespace = "ns"; + private Journal jnl; + private QueryEngine queryEngine; + + public void setUp() throws Exception { + + jnl = new Journal(getProperties()); + + loadData(jnl); + + queryEngine = new QueryEngine(jnl); + + queryEngine.init(); + + } + + /** + * Create and populate relation in the {@link #namespace}. + */ + private void loadData(final Journal store) { + + // create the relation. + final R rel = new R(store, namespace, ITx.UNISOLATED, new Properties()); + rel.create(); + + // data to insert (in key order for convenience). + final E[] a = {// + new E("Paul", "Mary"),// [0] + new E("Paul", "Brad"),// [1] + + new E("John", "Mary"),// [2] + new E("John", "Brad"),// [3] + + new E("Mary", "Brad"),// [4] + + new E("Brad", "Fred"),// [5] + new E("Brad", "Leon"),// [6] + }; + + // insert data (the records are not pre-sorted). + rel.insert(new ChunkedArrayIterator<E>(a.length, a, null/* keyOrder */)); + + // Do commit since not scale-out. + store.commit(); + + } + + public void tearDown() throws Exception { + + if (queryEngine != null) { + queryEngine.shutdownNow(); + queryEngine = null; + } + + if (jnl != null) { + jnl.destroy(); + jnl = null; + } + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, + * empty {@link IBindingSet}. + * + * @param bindingSet + * the binding set. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet bindingSet) { + + return new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { new IBindingSet[] { bindingSet } }); + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, chunk + * containing all of the specified {@link IBindingSet}s. + * + * @param bindingSets + * the binding sets. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet[] bindingSets) { + + return new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { bindingSets }); + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, chunk + * containing all of the specified {@link IBindingSet}s. + * + * @param bindingSetChunks + * the chunks of binding sets. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet[][] bindingSetChunks) { + + return new ThickAsynchronousIterator<IBindingSet[]>(bindingSetChunks); + + } + + /** + * Unit test for optional join group. Three joins are used and target a + * {@link SliceOp}. The 2nd and 3rd joins are embedded in an + * {@link OptionalJoinGroup}. + * <P> + * The optional join group takes the form: + * + * <pre> + * (a b) + * optional { + * (b c) + * (c d) + * } + * </pre> + * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be four solutions that + * succeed the optional join group: + * + * <pre> + * (paul mary brad fred) + * (paul mary brad leon) + * (john mary brad fred) + * (john mary brad leon) + * </pre> + * + * and five more that don't succeed the optional join group: + * + * <pre> + * (paul brad) * + * (john brad) * + * (mary brad) * + * (brad fred) + * (brad leon) + * </pre> + * + * In this cases marked with a <code>*</code>, ?c will become temporarily + * bound to fred and leon (since brad knows fred and leon), but the (c d) + * tail will fail since fred and leon don't know anyone else. At this point, + * the ?c binding must be removed from the solution. + */ + public void test_query_join2_optionals() throws Exception { + + // main query + final int startId = 1; // + final int joinId1 = 2; // : base join group. + final int predId1 = 3; // (a b) + final int joinGroup1 = 9; + final int sliceId = 8; // + + // subquery + final int joinId2 = 4; // : joinGroup1 + final int predId2 = 5; // (b c) + final int joinId3 = 6; // : joinGroup1 + final int predId3 = 7; // (c d) + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { a, b }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { b, c }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred3Op = new Predicate<E>( + new IVariableOrConstant[] { c, d }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId3),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); + + final PipelineOp subQuery; + { + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { /*join1Op*/ },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp join3Op = new PipelineJoin<E>(// + new BOp[] { join2Op },// + new NV(Predicate.Annotations.BOP_ID, joinId3),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred3Op)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + subQuery = join3Op; + } + + final PipelineOp joinGroup1Op = new OptionalJoinGroup(new BOp[]{join1Op}, + new NV(Predicate.Annotations.BOP_ID, joinGroup1),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery),// + new NV(BOp.Annotations.CONTROLLER,true)// +// new NV(BOp.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{joinGroup1Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final IRunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // four solutions where the optional join succeeds. + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ), + // plus anything we read from the first access path which did not + // pass the optional join + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ) + }; + + /* + * junit.framework.AssertionFailedError: Iterator will deliver too + * many objects: reminder(3)=[{ a=John, b=Brad }, { a=Mary, b=Brad + * }, { a=Paul, b=Brad }]. + */ + assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(4, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + } + + /** + * Unit test for optional join group with a filter. Three joins are used and + * target a {@link SliceOp}. The 2nd and 3rd joins are embedded in an + * optional join group. The optional join group contains a filter. + * <p> + * The optional join group takes the form: + * + * <pre> + * (a b) + * optional { + * (b c) + * (c d) + * filter(d != Leon) + * } + * </pre> + * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be two solutions that + * succeed the optional join group: + * + * <pre> + * (paul mary brad fred) + * (john mary brad fred) + * </pre> + * + * and five more that don't succeed the optional join group: + * + * <pre> + * (paul brad) * + * (john brad) * + * (mary brad) * + * (brad fred) + * (brad leon) + * </pre> + * + * In the cases marked with a <code>*</code>, ?c will become temporarily + * bound to fred and leon (since brad knows fred and leon), but the (c d) + * tail will fail since fred and leon don't know anyone else. At this point, + * the ?c binding must be removed from the solution. + * <p> + * The filter (d != Leon) will prune the two solutions: + * + * <pre> + * (paul mary brad leon) + * (john mary brad leon) + * </pre> + * + * since ?d is bound to Leon in those cases. + */ + public void test_query_optionals_filter() throws Exception { + + // main query + final int startId = 1; + final int joinId1 = 2; // + final int predId1 = 3; // (a,b) + final int joinGroup1 = 9; + final int sliceId = 8; + + // subquery + final int joinId2 = 4; // : group1 + final int predId2 = 5; // (b,c) + final int joinId3 = 6; // : group1 + final int predId3 = 7; // (c,d) + + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { a, b }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { b, c }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred3Op = new Predicate<E>( + new IVariableOrConstant[] { c, d }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId3),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); + + final PipelineOp subQuery; + { + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { /*join1Op*/ },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp join3Op = new PipelineJoin<E>(// + new BOp[] { join2Op },// + new NV(Predicate.Annotations.BOP_ID, joinId3),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// + // constraint d != Leon + new NV(PipelineJoin.Annotations.CONSTRAINTS, + new IConstraint[] { new NEConstant(d, new Constant<String>("Leon")) }) +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + subQuery = join3Op; + } + + final PipelineOp joinGroup1Op = new OptionalJoinGroup(new BOp[]{join1Op}, + new NV(Predicate.Annotations.BOP_ID, joinGroup1),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery),// + new NV(BOp.Annotations.CONTROLLER,true)// +// new NV(BOp.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{joinGroup1Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final IRunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // two solutions where the optional join succeeds. + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + // plus anything we read from the first access path which did not + // pass the optional join + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ) + }; + + assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(4, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + } + + /** + * Unit test for optional join group with a filter on a variable outside the + * optional join group. Three joins are used and target a {@link SliceOp}. + * The 2nd and 3rd joins are in embedded an {@link OptionalJoinGroup}. The + * optional join group contains a filter that uses a variable outside the + * optional join group. + * <P> + * The query takes the form: + * + * <pre> + * (a b) + * optional { + * (b c) + * (c d) + * filter(a != Paul) + * } + * </pre> + * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be two solutions that + * succeed the optional join group: + * + * <pre> + * (john mary brad fred) + * (john mary brad leon) + * </pre> + * + * and six more that don't succeed the optional join group: + * + * <pre> + * (paul mary) * + * (paul brad) * + * (john brad) + * (mary brad) + * (brad fred) + * (brad leon) + * </pre> + * + * In the cases marked with a <code>*</code>, ?a is bound to Paul even + * though there is a filter that specifically prohibits a = Paul. This is + * because the filter is inside the optional join group, which means that + * solutions can still include a = Paul, but the optional join group should + * not run in that case. + */ + public void test_query_optionals_filter2() throws Exception { + + // main query + final int startId = 1; + final int joinId1 = 2; + final int predId1 = 3; // (a,b) + final int condId = 4; // (a != Paul) + final int joinGroup1 = 10; + final int sliceId = 9; + + // subquery (iff condition is satisfied) + final int joinId2 = 5; // : group1 + final int predId2 = 6; // (b,c) + final int joinId3 = 7; // : group1 + final int predId3 = 8; // (c,d) + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + +// final Integer joinGroup1 = Integer.valueOf(1); + + /* + * Not quite sure how to write this one. I think it probably goes + * something like this: + * + * 1. startOp + * 2. join1Op(a b) + * 3. conditionalRoutingOp( if a = Paul then goto sliceOp ) + * 4. join2Op(b c) + * 5. join3Op(c d) + * 6. sliceOp + */ + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { a, b }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { b, c }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred3Op = new Predicate<E>( + new IVariableOrConstant[] { c, d }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId3),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); + + final IConstraint condition = new NEConstant(a, new Constant<String>("Paul")); + + final ConditionalRoutingOp condOp = new ConditionalRoutingOp(new BOp[]{join1Op}, + NV.asMap(new NV[]{// + new NV(BOp.Annotations.BOP_ID,condId), + new NV(PipelineOp.Annotations.SINK_REF, joinGroup1), // a != Paul + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId), // a == Paul + new NV(ConditionalRoutingOp.Annotations.CONDITION, condition), + })); + + final PipelineOp subQuery; + { + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { /*condOp*/ },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp join3Op = new PipelineJoin<E>(// + new BOp[] { join2Op },// + new NV(Predicate.Annotations.BOP_ID, joinId3),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred3Op)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + subQuery = join3Op; + } + + final PipelineOp joinGroup1Op = new OptionalJoinGroup(new BOp[]{condOp}, + new NV(Predicate.Annotations.BOP_ID, joinGroup1),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery),// + new NV(BOp.Annotations.CONTROLLER,true)// +// new NV(BOp.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{joinGroup1Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final IRunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // two solutions where the optional join succeeds. + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + // plus anything we read from the first access path which did not + // pass the optional join + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ) + }; + + assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(5, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + } + + /** + * Verify the expected solutions. + * + * @param expected + * @param itr + */ + static public void assertSameSolutions(final IBindingSet[] expected, + final IAsynchronousIterator<IBindingSet[]> itr) { + try { + int n = 0; + while (itr.hasNext()) { + final IBindingSet[... [truncated message content] |
From: <tho...@us...> - 2011-01-03 18:25:42
|
Revision: 4048 http://bigdata.svn.sourceforge.net/bigdata/?rev=4048&view=rev Author: thompsonbry Date: 2011-01-03 18:25:34 +0000 (Mon, 03 Jan 2011) Log Message: ----------- Added try/finally to ensure that the subquery is cancelled if the query is interrupted. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java 2011-01-03 14:41:36 UTC (rev 4047) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java 2011-01-03 18:25:34 UTC (rev 4048) @@ -51,13 +51,8 @@ * solutions produced by the subquery are copied to the default sink. If no * solutions are produced, then the original binding set is copied to the * default sink (optional join semantics). Each subquery is run as a separate - * query but is linked to the parent query in the operator is being evaluated. + * query but will be cancelled if the parent query is cancelled. * - * FIXME Is this true?: "This operator must on the query controller." For an - * optional join group in scale-out, we need to concentrate the solutions back - * to the controller if this is true. If it is not a requirement, then we can - * just issue the subquery from ANY node. - * * FIXME Parallel evaluation of subqueries is not implemented. What is the * appropriate parallelism for this operator? More parallelism should reduce * latency but could increase the memory burden. Review this decision once we @@ -241,22 +236,33 @@ final IAsynchronousIterator<IBindingSet[]> sitr = context .getSource(); - // @todo test for interrupt/halted query? while(sitr.hasNext()) { final IBindingSet[] chunk = sitr.next(); for(IBindingSet bset : chunk) { - final FutureTask<IRunningQuery> ft = new FutureTask<IRunningQuery>( + FutureTask<IRunningQuery> ft = new FutureTask<IRunningQuery>( new SubqueryTask(bset, subquery, context)); // run the subquery. executor.execute(ft); + + try { + + // wait for the outcome. + ft.get(); + + } finally { + + /* + * Ensure that the inner task is cancelled if the + * outer task is interrupted. + */ + ft.cancel(true/* mayInterruptIfRunning */); + + } - // wait for the outcome. - ft.get(); - } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java 2011-01-03 14:41:36 UTC (rev 4047) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java 2011-01-03 18:25:34 UTC (rev 4048) @@ -28,8 +28,6 @@ import junit.framework.TestCase; import junit.framework.TestSuite; -import com.bigdata.bop.controller.JoinGraph; -import com.bigdata.relation.rule.IRule; import com.bigdata.relation.rule.eval.DefaultEvaluationPlan2; /** @@ -60,11 +58,9 @@ * Returns a test that will run each of the implementation specific test * suites in turn. * - * @todo Test the static optimization approach based on - * {@link DefaultEvaluationPlan2}, which will have to be reworked to - * remove its dependencies on the {@link IRule} model. - * - * @todo Test runtime optimization based on {@link JoinGraph}s. + * @todo If the static optimization is reworked as a controller operator + * based on {@link DefaultEvaluationPlan2}, then add a test suite for + * that operator here. */ public static Test suite() { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java 2011-01-03 14:41:36 UTC (rev 4047) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java 2011-01-03 18:25:34 UTC (rev 4048) @@ -75,9 +75,6 @@ // stress test for SliceOp. suite.addTestSuite(TestQueryEngine_Slice.class); - // test suite for optional join groups. - suite.addTestSuite(TestQueryEngineOptionalJoins.class); - // @todo test suite for query evaluation (DISTINCT, ORDER BY, GROUP BY). // suite.addTestSuite(TestQueryEngine2.class); Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2011-01-03 14:41:36 UTC (rev 4047) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2011-01-03 18:25:34 UTC (rev 4048) @@ -1,1077 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 23, 2010 - */ - -package com.bigdata.bop.engine; - -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicInteger; - -import junit.framework.TestCase2; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpEvaluationContext; -import com.bigdata.bop.Constant; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstant; -import com.bigdata.bop.IConstraint; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.IVariableOrConstant; -import com.bigdata.bop.NV; -import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.Var; -import com.bigdata.bop.IPredicate.Annotations; -import com.bigdata.bop.ap.E; -import com.bigdata.bop.ap.Predicate; -import com.bigdata.bop.ap.R; -import com.bigdata.bop.bindingSet.ArrayBindingSet; -import com.bigdata.bop.bindingSet.HashBindingSet; -import com.bigdata.bop.bset.ConditionalRoutingOp; -import com.bigdata.bop.bset.StartOp; -import com.bigdata.bop.constraint.EQConstant; -import com.bigdata.bop.constraint.NEConstant; -import com.bigdata.bop.join.PipelineJoin; -import com.bigdata.bop.solutions.SliceOp; -import com.bigdata.journal.BufferMode; -import com.bigdata.journal.ITx; -import com.bigdata.journal.Journal; -import com.bigdata.relation.accesspath.IAsynchronousIterator; -import com.bigdata.relation.accesspath.ThickAsynchronousIterator; -import com.bigdata.striterator.ChunkedArrayIterator; -import com.bigdata.striterator.Dechunkerator; -import com.bigdata.striterator.ICloseableIterator; - -/** - * Test suite for handling of optional join groups during query evaluation - * against a local database instance. Optional join groups are handled using - * {@link IBindingSet#push()} when entering the join group and - * {@link IBindingSet#pop(boolean)} when exiting the join group. If the join - * group was successful for a given binding set, then <code>save:=true</code> is - * specified for {@link IBindingSet#pop(boolean)} and the applied bindings will - * be visible to the downstream consumer. Otherwise the bindings applied during - * the join group are simply discarded. - * - * <pre> - * -Dlog4j.configuration=bigdata/src/resources/logging/log4j.properties - * </pre> - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id: TestQueryEngine.java 3950 2010-11-17 02:14:08Z thompsonbry $ - * - * @deprecated This test suite has been moved to the com.bigdata.bop.engine - * package and should be removed from this package. - */ -public class TestQueryEngineOptionalJoins extends TestCase2 { - - /** - * - */ - public TestQueryEngineOptionalJoins() { - } - - /** - * @param name - */ - public TestQueryEngineOptionalJoins(String name) { - super(name); - } - - @Override - public Properties getProperties() { - - final Properties p = new Properties(super.getProperties()); - - p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient - .toString()); - - return p; - - } - - static private final String namespace = "ns"; - Journal jnl; - QueryEngine queryEngine; - - public void setUp() throws Exception { - - jnl = new Journal(getProperties()); - - loadData(jnl); - - queryEngine = new QueryEngine(jnl); - - queryEngine.init(); - - } - - /** - * Create and populate relation in the {@link #namespace}. - */ - private void loadData(final Journal store) { - - // create the relation. - final R rel = new R(store, namespace, ITx.UNISOLATED, new Properties()); - rel.create(); - - // data to insert (in key order for convenience). - final E[] a = {// - new E("Paul", "Mary"),// [0] - new E("Paul", "Brad"),// [1] - - new E("John", "Mary"),// [2] - new E("John", "Brad"),// [3] - - new E("Mary", "Brad"),// [4] - - new E("Brad", "Fred"),// [5] - new E("Brad", "Leon"),// [6] - }; - - // insert data (the records are not pre-sorted). - rel.insert(new ChunkedArrayIterator<E>(a.length, a, null/* keyOrder */)); - - // Do commit since not scale-out. - store.commit(); - - } - - public void tearDown() throws Exception { - - if (queryEngine != null) { - queryEngine.shutdownNow(); - queryEngine = null; - } - - if (jnl != null) { - jnl.destroy(); - jnl = null; - } - - } - - /** - * Return an {@link IAsynchronousIterator} that will read a single, - * empty {@link IBindingSet}. - * - * @param bindingSet - * the binding set. - */ - protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( - final IBindingSet bindingSet) { - - return new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { new IBindingSet[] { bindingSet } }); - - } - - /** - * Return an {@link IAsynchronousIterator} that will read a single, chunk - * containing all of the specified {@link IBindingSet}s. - * - * @param bindingSets - * the binding sets. - */ - protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( - final IBindingSet[] bindingSets) { - - return new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { bindingSets }); - - } - - /** - * Return an {@link IAsynchronousIterator} that will read a single, chunk - * containing all of the specified {@link IBindingSet}s. - * - * @param bindingSetChunks - * the chunks of binding sets. - */ - protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( - final IBindingSet[][] bindingSetChunks) { - - return new ThickAsynchronousIterator<IBindingSet[]>(bindingSetChunks); - - } - - /** - * Unit test for optional join group. Three joins are used and target a - * {@link SliceOp}. The 2nd and 3rd joins are an optional join group. - * Intermediate results which do not succeed on the optional join are - * forwarded to the {@link SliceOp} which is the target specified by the - * {@link PipelineOp.Annotations#ALT_SINK_REF}. - * - * The optional join group takes the form: - * - * <pre> - * (a b) - * optional { - * (b c) - * (c d) - * } - * </pre> - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be four solutions that - * succeed the optional join group: - * - * <pre> - * (paul mary brad fred) - * (paul mary brad leon) - * (john mary brad fred) - * (john mary brad leon) - * </pre> - * - * and five more that don't succeed the optional join group: - * - * <pre> - * (paul brad) * - * (john brad) * - * (mary brad) * - * (brad fred) - * (brad leon) - * </pre> - * - * In this cases marked with a <code>*</code>, ?c will become temporarily - * bound to fred and leon (since brad knows fred and leon), but the (c d) - * tail will fail since fred and leon don't know anyone else. At this point, - * the ?c binding must be removed from the solution. - */ - public void test_query_join2_optionals() throws Exception { - - final int startId = 1; // - final int joinId1 = 2; // : base join group. - final int predId1 = 3; // (a b) - final int joinId2 = 4; // : joinGroup1 - final int predId2 = 5; // (b c) - final int joinId3 = 6; // : joinGroup1 - final int predId3 = 7; // (c d) - final int sliceId = 8; // - - final IVariable<?> a = Var.var("a"); - final IVariable<?> b = Var.var("b"); - final IVariable<?> c = Var.var("c"); - final IVariable<?> d = Var.var("d"); - - final Integer joinGroup1 = Integer.valueOf(1); - - final PipelineOp startOp = new StartOp(new BOp[] {}, - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - new NV(SliceOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final Predicate<?> pred1Op = new Predicate<E>( - new IVariableOrConstant[] { a, b }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId1),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred2Op = new Predicate<E>( - new IVariableOrConstant[] { b, c }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId2),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred3Op = new Predicate<E>( - new IVariableOrConstant[] { c, d }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId3),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final PipelineOp join1Op = new PipelineJoin<E>(// - new BOp[]{startOp},// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); - - final PipelineOp join2Op = new PipelineJoin<E>(// - new BOp[] { join1Op },// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// - // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp join3Op = new PipelineJoin<E>(// - new BOp[] { join2Op },// - new NV(Predicate.Annotations.BOP_ID, joinId3),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// - // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp sliceOp = new SliceOp(// - new BOp[]{join3Op}, - NV.asMap(new NV[] {// - new NV(BOp.Annotations.BOP_ID, sliceId),// - new NV(BOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final PipelineOp query = sliceOp; - - // start the query. - final UUID queryId = UUID.randomUUID(); - final IChunkMessage<IBindingSet> initialChunkMessage; - { - - final IBindingSet initialBindings = new HashBindingSet(); - -// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); - - initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, - queryId, startId,// - -1, // partitionId - newBindingSetIterator(initialBindings)); - } - final IRunningQuery runningQuery = queryEngine.eval(queryId, query, - initialChunkMessage); - - // verify solutions. - { - - // the expected solutions. - final IBindingSet[] expected = new IBindingSet[] {// - // four solutions where the optional join succeeds. - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ), - // plus anything we read from the first access path which did not - // pass the optional join - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Mary"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ) - }; - - /* - * junit.framework.AssertionFailedError: Iterator will deliver too - * many objects: reminder(3)=[{ a=John, b=Brad }, { a=Mary, b=Brad - * }, { a=Paul, b=Brad }]. - */ - assertSameSolutionsAnyOrder(expected, - new Dechunkerator<IBindingSet>(runningQuery.iterator())); - - } - - // Wait until the query is done. - runningQuery.get(); - final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); - { - // validate the stats map. - assertNotNull(statsMap); - assertEquals(5, statsMap.size()); - if (log.isInfoEnabled()) - log.info(statsMap.toString()); - } - - } - - /** - * Unit test for optional join group with a filter. Three joins are used and - * target a {@link SliceOp}. The 2nd and 3rd joins are an optional join - * group. Intermediate results which do not succeed on the optional join are - * forwarded to the {@link SliceOp} which is the target specified by the - * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group - * contains a filter. - * <p> - * The optional join group takes the form: - * - * <pre> - * (a b) - * optional { - * (b c) - * (c d) - * filter(d != Leon) - * } - * </pre> - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be two solutions that - * succeed the optional join group: - * - * <pre> - * (paul mary brad fred) - * (john mary brad fred) - * </pre> - * - * and five more that don't succeed the optional join group: - * - * <pre> - * (paul brad) * - * (john brad) * - * (mary brad) * - * (brad fred) - * (brad leon) - * </pre> - * - * In the cases marked with a <code>*</code>, ?c will become temporarily - * bound to fred and leon (since brad knows fred and leon), but the (c d) - * tail will fail since fred and leon don't know anyone else. At this point, - * the ?c binding must be removed from the solution. - * <p> - * The filter (d != Leon) will prune the two solutions: - * - * <pre> - * (paul mary brad leon) - * (john mary brad leon) - * </pre> - * - * since ?d is bound to Leon in those cases. - */ - public void test_query_optionals_filter() throws Exception { - - final int startId = 1; - final int joinId1 = 2; // - final int predId1 = 3; // (a,b) - final int joinId2 = 4; // : group1 - final int predId2 = 5; // (b,c) - final int joinId3 = 6; // : group1 - final int predId3 = 7; // (c,d) - final int sliceId = 8; - - final IVariable<?> a = Var.var("a"); - final IVariable<?> b = Var.var("b"); - final IVariable<?> c = Var.var("c"); - final IVariable<?> d = Var.var("d"); - - final Integer joinGroup1 = Integer.valueOf(1); - - final PipelineOp startOp = new StartOp(new BOp[] {}, - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - new NV(SliceOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final Predicate<?> pred1Op = new Predicate<E>( - new IVariableOrConstant[] { a, b }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId1),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred2Op = new Predicate<E>( - new IVariableOrConstant[] { b, c }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId2),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred3Op = new Predicate<E>( - new IVariableOrConstant[] { c, d }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId3),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final PipelineOp join1Op = new PipelineJoin<E>(// - new BOp[]{startOp},// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); - - final PipelineOp join2Op = new PipelineJoin<E>(// - new BOp[] { join1Op },// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// - // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp join3Op = new PipelineJoin<E>(// - new BOp[] { join2Op },// - new NV(Predicate.Annotations.BOP_ID, joinId3),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// - // constraint d != Leon - new NV(PipelineJoin.Annotations.CONSTRAINTS, - new IConstraint[] { new NEConstant(d, new Constant<String>("Leon")) }), - // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp sliceOp = new SliceOp(// - new BOp[]{join3Op}, - NV.asMap(new NV[] {// - new NV(BOp.Annotations.BOP_ID, sliceId),// - new NV(BOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final PipelineOp query = sliceOp; - - // start the query. - final UUID queryId = UUID.randomUUID(); - final IChunkMessage<IBindingSet> initialChunkMessage; - { - - final IBindingSet initialBindings = new HashBindingSet(); - -// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); - - initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, - queryId, startId,// - -1, // partitionId - newBindingSetIterator(initialBindings)); - } - final IRunningQuery runningQuery = queryEngine.eval(queryId, query, - initialChunkMessage); - - // verify solutions. - { - - // the expected solutions. - final IBindingSet[] expected = new IBindingSet[] {// - // two solutions where the optional join succeeds. - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - // plus anything we read from the first access path which did not - // pass the optional join - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Mary"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ) - }; - - assertSameSolutionsAnyOrder(expected, - new Dechunkerator<IBindingSet>(runningQuery.iterator())); - - } - - // Wait until the query is done. - runningQuery.get(); - final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); - { - // validate the stats map. - assertNotNull(statsMap); - assertEquals(5, statsMap.size()); - if (log.isInfoEnabled()) - log.info(statsMap.toString()); - } - - } - - /** - * Unit test for optional join group with a filter on a variable outside the - * optional join group. Three joins are used and target a {@link SliceOp}. - * The 2nd and 3rd joins are an optional join group. Intermediate results - * which do not succeed on the optional join are forwarded to the - * {@link SliceOp} which is the target specified by the - * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group - * contains a filter that uses a variable outside the optional join group. - * <P> - * The query takes the form: - * - * <pre> - * (a b) - * optional { - * (b c) - * (c d) - * filter(a != Paul) - * } - * </pre> - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be two solutions that - * succeed the optional join group: - * - * <pre> - * (john mary brad fred) - * (john mary brad leon) - * </pre> - * - * and six more that don't succeed the optional join group: - * - * <pre> - * (paul mary) * - * (paul brad) * - * (john brad) - * (mary brad) - * (brad fred) - * (brad leon) - * </pre> - * - * In the cases marked with a <code>*</code>, ?a is bound to Paul even - * though there is a filter that specifically prohibits a = Paul. This is - * because the filter is inside the optional join group, which means that - * solutions can still include a = Paul, but the optional join group should - * not run in that case. - */ - public void test_query_optionals_filter2() throws Exception { - - final int startId = 1; - final int joinId1 = 2; - final int predId1 = 3; // (a,b) - final int condId = 4; // (a != Paul) - final int joinId2 = 5; // : group1 - final int predId2 = 6; // (b,c) - final int joinId3 = 7; // : group1 - final int predId3 = 8; // (c,d) - final int sliceId = 9; - - final IVariable<?> a = Var.var("a"); - final IVariable<?> b = Var.var("b"); - final IVariable<?> c = Var.var("c"); - final IVariable<?> d = Var.var("d"); - - final Integer joinGroup1 = Integer.valueOf(1); - - /* - * Not quite sure how to write this one. I think it probably goes - * something like this: - * - * 1. startOp - * 2. join1Op(a b) - * 3. conditionalRoutingOp( if a = Paul then goto sliceOp ) - * 4. join2Op(b c) - * 5. join3Op(c d) - * 6. sliceOp - */ - - final PipelineOp startOp = new StartOp(new BOp[] {}, - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - new NV(SliceOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final Predicate<?> pred1Op = new Predicate<E>( - new IVariableOrConstant[] { a, b }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId1),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred2Op = new Predicate<E>( - new IVariableOrConstant[] { b, c }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId2),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred3Op = new Predicate<E>( - new IVariableOrConstant[] { c, d }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId3),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final PipelineOp join1Op = new PipelineJoin<E>(// - new BOp[]{startOp},// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); - - final IConstraint condition = new EQConstant(a, new Constant<String>("Paul")); - - final ConditionalRoutingOp condOp = new ConditionalRoutingOp(new BOp[]{join1Op}, - NV.asMap(new NV[]{// - new NV(BOp.Annotations.BOP_ID,condId), - new NV(PipelineOp.Annotations.SINK_REF, sliceId), // a == Paul - new NV(PipelineOp.Annotations.ALT_SINK_REF, joinId2), // a != Paul - new NV(ConditionalRoutingOp.Annotations.CONDITION, condition), - })); - - final PipelineOp join2Op = new PipelineJoin<E>(// - new BOp[] { condOp },// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// - // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp join3Op = new PipelineJoin<E>(// - new BOp[] { join2Op },// - new NV(Predicate.Annotations.BOP_ID, joinId3),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// - // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp sliceOp = new SliceOp(// - new BOp[]{join3Op}, - NV.asMap(new NV[] {// - new NV(BOp.Annotations.BOP_ID, sliceId),// - new NV(BOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final PipelineOp query = sliceOp; - - // start the query. - final UUID queryId = UUID.randomUUID(); - final IChunkMessage<IBindingSet> initialChunkMessage; - { - - final IBindingSet initialBindings = new HashBindingSet(); - -// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); - - initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, - queryId, startId,// - -1, // partitionId - newBindingSetIterator(initialBindings)); - } - final IRunningQuery runningQuery = queryEngine.eval(queryId, query, - initialChunkMessage); - - // verify solutions. - { - - // the expected solutions. - final IBindingSet[] expected = new IBindingSet[] {// - // two solutions where the optional join succeeds. - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - // plus anything we read from the first access path which did not - // pass the optional join - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Mary") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Mary"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ) - }; - - assertSameSolutionsAnyOrder(expected, - new Dechunkerator<IBindingSet>(runningQuery.iterator())); - - } - - // Wait until the query is done. - runningQuery.get(); - final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); - { - // validate the stats map. - assertNotNull(statsMap); - assertEquals(6, statsMap.size()); - if (log.isInfoEnabled()) - log.info(statsMap.toString()); - } - - } - - /** - * Verify the expected solutions. - * - * @param expected - * @param itr - */ - static public void assertSameSolutions(final IBindingSet[] expected, - final IAsynchronousIterator<IBindingSet[]> itr) { - try { - int n = 0; - while (itr.hasNext()) { - final IBindingSet[] e = itr.next(); - if (log.isInfoEnabled()) - log.info(n + " : chunkSize=" + e.length); - for (int i = 0; i < e.length; i++) { - if (log.isInfoEnabled()) - log.info(n + " : " + e[i]); - if (n >= expected.length) { - fail("Willing to deliver too many solutions: n=" + n - + " : " + e[i]); - } - if (!expected[n].equals(e[i])) { - fail("n=" + n + ", expected=" + expected[n] - + ", actual=" + e[i]); - } - n++; - } - } - assertEquals("Wrong number of solutions", expected.length, n); - } finally { - itr.close(); - } - } - - /** - * Verifies that the iterator visits the specified objects in some arbitrary - * ordering and that the iterator is exhausted once all expected objects - * have been visited. The implementation uses a selection without - * replacement "pattern". - * <p> - * Note: If the objects being visited do not correctly implement hashCode() - * and equals() then this can fail even if the desired objects would be - * visited. When this happens, fix the implementation classes. - */ - static public <T> void assertSameSolutionsAnyOrder(final T[] expected, - final Iterator<T> actual) { - - assertSameSolutionsAnyOrder("", expected, actual); - - } - - /** - * Verifies that the iterator visits the specified objects in some arbitrary - * ordering and that the iterator is exhausted once all expected objects - * have been visited. The implementation uses a selection without - * replacement "pattern". - * <p> - * Note: If the objects being visited do not correctly implement hashCode() - * and equals() then this can fail even if the desired objects would be - * visited. When this happens, fix the implementation classes. - */ - static public <T> void assertSameSolutionsAnyOrder(final String msg, - final T[] expected, final Iterator<T> actual) { - - try { - - /* - * Populate a map that we will use to realize the match and - * selection without replacement logic. The map uses counters to - * handle duplicate keys. This makes it possible to write tests in - * which two or more binding sets which are "equal" appear. - */ - - final int nrange = expected.length; - - final java.util.Map<T, AtomicInteger> range = new java.util.LinkedHashMap<T, AtomicInteger>(); - - for (int j = 0; j < nrange; j++) { - - AtomicInteger count = range.get(expected[j]); - - if (count == null) { - - count = new AtomicInteger(); - - } - - range.put(expected[j], count); - - count.incrementAndGet(); - - } - - // Do selection without replacement for the objects visited by - // iterator. - - for (int j = 0; j < nrange; j++) { - - if (!actual.hasNext()) { - - fail(msg - + ": Iterator exhausted while expecting more object(s)" - + ": index=" + j); - - } - - final T actualObject = actual.next(); - - if (log.isInfoEnabled()) - log.info("visting: " + actualObject); - - AtomicInteger counter = range.get(actualObject); - - if (counter == null || counter.get() == 0) { - - fail("Object not expected" + ": index=" + j + ", object=" - + actualObject); - - } - - counter.decrementAndGet(); - - } - - if (actual.hasNext()) { - - final List<T> remainder = new LinkedList<T>(); - - while(actual.hasNext()) { - remainder.add(actual.next()); - } - - fail("Iterator will deliver too many objects: reminder(" - + remainder.size() + ")=" + remainder); - - } - - } finally { - - if (actual instanceof ICloseableIterator<?>) { - - ((ICloseableIterator<T>) actual).close(); - - } - - } - - } - -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-04 00:23:48
|
Revision: 4049 http://bigdata.svn.sourceforge.net/bigdata/?rev=4049&view=rev Author: thompsonbry Date: 2011-01-04 00:23:40 +0000 (Tue, 04 Jan 2011) Log Message: ----------- Removed the push()/pop() support on IBindingSet in order to reduce the data on the heap when we have a bunch of binding sets during query processing. The push()/pop() logic did not work out for the optional join groups, so it is just overhead. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ListBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionMetadata.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestIBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/mutation/TestInsert.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -425,56 +425,56 @@ return Collections.unmodifiableMap(map); } - /** - * Lookup the first operator in the specified conditional binding group and - * return its bopId. - * - * @param query - * The query plan. - * @param groupId - * The identifier for the desired conditional binding group. - * - * @return The bopId of the first operator in that conditional binding group - * -or- <code>null</code> if the specified conditional binding group - * does not exist in the query plan. - * - * @throws IllegalArgumentException - * if either argument is <code>null</code>. - * - * @see PipelineOp.Annotations#CONDITIONAL_GROUP - * @see PipelineOp.Annotations#ALT_SINK_GROUP - */ - static public Integer getFirstBOpIdForConditionalGroup(final BOp query, - final Integer groupId) { - if (query == null) - throw new IllegalArgumentException(); - if (groupId == null) - throw new IllegalArgumentException(); - final Iterator<BOp> itr = postOrderIterator(query); - while (itr.hasNext()) { - final BOp t = itr.next(); - final Object x = t.getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - if (x != null) { - if (!(x instanceof Integer)) { - throw new BadConditionalGroupIdTypeException( - "Must be Integer, not: " + x.getClass() + ": " - + PipelineOp.Annotations.CONDITIONAL_GROUP); - } - final Integer id = (Integer) t - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - if(id.equals(groupId)) { - /* - * Return the BOpId associated with the first operator in - * the pre-order traversal of the query plan which has the - * specified groupId. - */ - return t.getId(); - } - } - } - // No such groupId in the query plan. - return null; - } +// /** +// * Lookup the first operator in the specified conditional binding group and +// * return its bopId. +// * +// * @param query +// * The query plan. +// * @param groupId +// * The identifier for the desired conditional binding group. +// * +// * @return The bopId of the first operator in that conditional binding group +// * -or- <code>null</code> if the specified conditional binding group +// * does not exist in the query plan. +// * +// * @throws IllegalArgumentException +// * if either argument is <code>null</code>. +// * +// * @see PipelineOp.Annotations#CONDITIONAL_GROUP +// * @see PipelineOp.Annotations#ALT_SINK_GROUP +// */ +// static public Integer getFirstBOpIdForConditionalGroup(final BOp query, +// final Integer groupId) { +// if (query == null) +// throw new IllegalArgumentException(); +// if (groupId == null) +// throw new IllegalArgumentException(); +// final Iterator<BOp> itr = postOrderIterator(query); +// while (itr.hasNext()) { +// final BOp t = itr.next(); +// final Object x = t.getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// if (x != null) { +// if (!(x instanceof Integer)) { +// throw new BadConditionalGroupIdTypeException( +// "Must be Integer, not: " + x.getClass() + ": " +// + PipelineOp.Annotations.CONDITIONAL_GROUP); +// } +// final Integer id = (Integer) t +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// if(id.equals(groupId)) { +// /* +// * Return the BOpId associated with the first operator in +// * the pre-order traversal of the query plan which has the +// * specified groupId. +// */ +// return t.getId(); +// } +// } +// } +// // No such groupId in the query plan. +// return null; +// } /** * Return the parent of the operator in the operator tree (this does not Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -157,38 +157,38 @@ */ public int hashCode(); - /** - * Make a copy of the current symbol table (aka current variable bindings) - * and push it onto onto the stack. Variable bindings will be made against - * the current symbol table. The symbol table stack is propagated by - * {@link #clone()} and {@link #copy(IVariable[])}. Symbols tables may be - * used to propagate conditional bindings through a data flow until a - * decision point is reached, at which point they may be either discarded or - * committed. This mechanism may be used to support SPARQL style optional - * join groups. - * - * @throws UnsupportedOperationException - * if the {@link IBindingSet} is not mutable. - * - * @see #pop(boolean) - */ - public void push(); +// /** +// * Make a copy of the current symbol table (aka current variable bindings) +// * and push it onto onto the stack. Variable bindings will be made against +// * the current symbol table. The symbol table stack is propagated by +// * {@link #clone()} and {@link #copy(IVariable[])}. Symbols tables may be +// * used to propagate conditional bindings through a data flow until a +// * decision point is reached, at which point they may be either discarded or +// * committed. This mechanism may be used to support SPARQL style optional +// * join groups. +// * +// * @throws UnsupportedOperationException +// * if the {@link IBindingSet} is not mutable. +// * +// * @see #pop(boolean) +// */ +// public void push(); +// +// /** +// * Pop the current symbol table off of the stack. +// * +// * @param save +// * When <code>true</code>, the bindings on the current symbol +// * table are copied to the parent symbol table before the current +// * symbol table is popped off of the stack. If <code>false</code> +// * , any bindings associated with that symbol table are +// * discarded. +// * +// * @throws IllegalStateException +// * if there is no nested symbol table. +// * +// * @see #push() +// */ +// public void pop(boolean save); - /** - * Pop the current symbol table off of the stack. - * - * @param save - * When <code>true</code>, the bindings on the current symbol - * table are copied to the parent symbol table before the current - * symbol table is popped off of the stack. If <code>false</code> - * , any bindings associated with that symbol table are - * discarded. - * - * @throws IllegalStateException - * if there is no nested symbol table. - * - * @see #push() - */ - public void pop(boolean save); - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -33,7 +33,6 @@ import com.bigdata.bop.engine.BOpStats; import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.bop.solutions.SliceOp; /** * Abstract base class for pipeline operators where the data moving along the @@ -106,70 +105,70 @@ */ String JOIN_GRAPH = PipelineOp.class.getName() + ".joinGraph"; - /** - * Annotation used to mark a set of operators belonging to a conditional - * binding group. Bindings within with the group will be discarded if - * any required operator in the group fails. For example, if a binding - * set exits via the alternative sink for a required join then any - * conditional bindings within the group will be discarded. - * <p> - * Together with {@link #ALT_SINK_GROUP}, the {@link #CONDITIONAL_GROUP} - * annotation provides the information necessary in order to decide the - * re-entry point in the query plan when a join within an conditional - * binding group fails. - * <p> - * The {@link #CONDITIONAL_GROUP} annotation controls the - * {@link IBindingSet#push()} and {@link IBindingSet#pop(boolean)} of - * individual solutions as they propagate through the pipeline. When a - * pipeline starts, the {@link IBindingSet} stack contains only the top - * level symbol table (i.e., name/value bindings). When an intermediate - * solution enters a {@link PipelineOp} marked as belonging to a - * {@link #CONDITIONAL_GROUP}, a new symbol table is - * {@link IBindingSet#push() pushed} onto the stack for that solution. - * If the solution leaves the optional join group via the default sink, - * then the symbol table is "saved" when it is - * {@link IBindingSet#pop(boolean) popped} off of the stack. If the - * solution leaves the join group via the alternative sink, then the - * symbol table is discarded when it is {@link IBindingSet#pop(boolean) - * popped} off of the stack. This provides for conditional binding of - * variables within the operators of the group. - * <p> - * The value of the {@link #CONDITIONAL_GROUP} is an {@link Integer} - * which uniquely identifies the group within the query. - * - * @deprecated The binding set stack push/pop mechanisms are not - * sufficient to support optional join groups. This - * annotation will be removed unless it proves valuable for - * marking the elements of a join group, in which case the - * javadoc needs to be updated. - */ - String CONDITIONAL_GROUP = PipelineOp.class.getName() + ".conditionalGroup"; +// /** +// * Annotation used to mark a set of operators belonging to a conditional +// * binding group. Bindings within with the group will be discarded if +// * any required operator in the group fails. For example, if a binding +// * set exits via the alternative sink for a required join then any +// * conditional bindings within the group will be discarded. +// * <p> +// * Together with {@link #ALT_SINK_GROUP}, the {@link #CONDITIONAL_GROUP} +// * annotation provides the information necessary in order to decide the +// * re-entry point in the query plan when a join within an conditional +// * binding group fails. +// * <p> +// * The {@link #CONDITIONAL_GROUP} annotation controls the +// * {@link IBindingSet#push()} and {@link IBindingSet#pop(boolean)} of +// * individual solutions as they propagate through the pipeline. When a +// * pipeline starts, the {@link IBindingSet} stack contains only the top +// * level symbol table (i.e., name/value bindings). When an intermediate +// * solution enters a {@link PipelineOp} marked as belonging to a +// * {@link #CONDITIONAL_GROUP}, a new symbol table is +// * {@link IBindingSet#push() pushed} onto the stack for that solution. +// * If the solution leaves the optional join group via the default sink, +// * then the symbol table is "saved" when it is +// * {@link IBindingSet#pop(boolean) popped} off of the stack. If the +// * solution leaves the join group via the alternative sink, then the +// * symbol table is discarded when it is {@link IBindingSet#pop(boolean) +// * popped} off of the stack. This provides for conditional binding of +// * variables within the operators of the group. +// * <p> +// * The value of the {@link #CONDITIONAL_GROUP} is an {@link Integer} +// * which uniquely identifies the group within the query. +// * +// * @deprecated The binding set stack push/pop mechanisms are not +// * sufficient to support optional join groups. This +// * annotation will be removed unless it proves valuable for +// * marking the elements of a join group, in which case the +// * javadoc needs to be updated. +// */ +// String CONDITIONAL_GROUP = PipelineOp.class.getName() + ".conditionalGroup"; - /** - * Annotation used to designate the target when a required operator - * within an {@link #CONDITIONAL_GROUP} fails. The value of this - * annotation must be the {@link #CONDITIONAL_GROUP} identifier - * corresponding to the next conditional binding group in the query - * plan. If there is no such group, then the {@link #ALT_SINK_REF} - * should be used instead to specify the target operator in the - * pipeline, e.g., a {@link SliceOp}. - * <p> - * The target {@link #CONDITIONAL_GROUP} is specified (rather than the - * bopId of the target join) since the non-optional joins in the target - * {@link #CONDITIONAL_GROUP} be reordered by the query optimizer. The - * entry point for solutions redirected to the {@link #ALT_SINK_GROUP} - * is therefore the first operator in the target - * {@link #CONDITIONAL_GROUP}. This decouples the routing decisions from - * the join ordering decisions. - * - * @see #CONDITIONAL_GROUP - * @see #ALT_SINK_REF - * - * @deprecated The binding set stack push/pop mechanisms are not - * sufficient to support optional join groups. This - * annotation will be removed. - */ - String ALT_SINK_GROUP = PipelineOp.class.getName() + ".altSinkGroup"; +// /** +// * Annotation used to designate the target when a required operator +// * within an {@link #CONDITIONAL_GROUP} fails. The value of this +// * annotation must be the {@link #CONDITIONAL_GROUP} identifier +// * corresponding to the next conditional binding group in the query +// * plan. If there is no such group, then the {@link #ALT_SINK_REF} +// * should be used instead to specify the target operator in the +// * pipeline, e.g., a {@link SliceOp}. +// * <p> +// * The target {@link #CONDITIONAL_GROUP} is specified (rather than the +// * bopId of the target join) since the non-optional joins in the target +// * {@link #CONDITIONAL_GROUP} be reordered by the query optimizer. The +// * entry point for solutions redirected to the {@link #ALT_SINK_GROUP} +// * is therefore the first operator in the target +// * {@link #CONDITIONAL_GROUP}. This decouples the routing decisions from +// * the join ordering decisions. +// * +// * @see #CONDITIONAL_GROUP +// * @see #ALT_SINK_REF +// * +// * @deprecated The binding set stack push/pop mechanisms are not +// * sufficient to support optional join groups. This +// * annotation will be removed. +// */ +// String ALT_SINK_GROUP = PipelineOp.class.getName() + ".altSinkGroup"; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -33,7 +33,6 @@ import java.util.Collections; import java.util.Iterator; import java.util.Map; -import java.util.Stack; import java.util.Map.Entry; import com.bigdata.bop.IBindingSet; @@ -206,60 +205,62 @@ * pushed onto the stack or popped off of the stack, but the stack MAY NOT * become empty. */ - private final Stack<ST> stack; +// private final Stack<ST> stack; + private final ST current; /** * Return the symbol table on the top of the stack. */ private ST current() { - return stack.peek(); + return current; +// return stack.peek(); } - public void push() { +// public void push() { +// +// // The current symbol table. +// final ST cur = current(); +// +// // Create a new symbol table. +// final ST tmp = new ST(cur.nbound, cur.vars.clone(), cur.vals.clone()); +// +// // Push the new symbol table onto the stack. +// stack.push(tmp); +// +// } +// +// public void pop(final boolean save) { +// +// if (stack.size() < 2) { +// /* +// * The stack may never become empty. Therefore there must be at +// * least two symbol tables on the stack for a pop() request. +// */ +// throw new IllegalArgumentException(); +// } +// +// // Pop the symbol table off of the top of the stack. +// final ST old = stack.pop(); +// +// if (save) { +// +// // discard the current symbol table. +// stack.pop(); +// +// // replacing it with the symbol table which we popped off the stack. +// stack.push(old); +// +// } else { +// +// // clear the hash code. +// hash = 0; +// +// } +// +// } - // The current symbol table. - final ST cur = current(); - - // Create a new symbol table. - final ST tmp = new ST(cur.nbound, cur.vars.clone(), cur.vals.clone()); - - // Push the new symbol table onto the stack. - stack.push(tmp); - - } - - public void pop(final boolean save) { - - if (stack.size() < 2) { - /* - * The stack may never become empty. Therefore there must be at - * least two symbol tables on the stack for a pop() request. - */ - throw new IllegalArgumentException(); - } - - // Pop the symbol table off of the top of the stack. - final ST old = stack.pop(); - - if (save) { - - // discard the current symbol table. - stack.pop(); - - // replacing it with the symbol table which we popped off the stack. - stack.push(old); - - } else { - - // clear the hash code. - hash = 0; - - } - - } - /** * Copy constructor (used by clone, copy). * @@ -272,30 +273,32 @@ protected ArrayBindingSet(final ArrayBindingSet src, final IVariable[] variablesToKeep) { - stack = new Stack<ST>(); +// stack = new Stack<ST>(); +// +// final int stackSize = src.stack.size(); +// +// int depth = 1; +// +// for (ST srcLst : src.stack) { +// +// /* +// * Copy the source bindings. +// * +// * Note: If a restriction exists on the variables to be copied, then +// * it is applied onto the the top level of the stack. If the symbol +// * table is saved when it is pop()'d, then the modified bindings +// * will replace the parent symbol table on the stack. +// */ +// final ST tmp = copy(srcLst, +// depth == stackSize ? variablesToKeep : null); +// +// // Push onto the stack. +// stack.push(tmp); +// +// } - final int stackSize = src.stack.size(); - - int depth = 1; + current = copy(src.current, variablesToKeep); - for (ST srcLst : src.stack) { - - /* - * Copy the source bindings. - * - * Note: If a restriction exists on the variables to be copied, then - * it is applied onto the the top level of the stack. If the symbol - * table is saved when it is pop()'d, then the modified bindings - * will replace the parent symbol table on the stack. - */ - final ST tmp = copy(srcLst, - depth == stackSize ? variablesToKeep : null); - - // Push onto the stack. - stack.push(tmp); - - } - } /** @@ -458,9 +461,11 @@ if(vars.length != vals.length) throw new IllegalArgumentException(); - stack = new Stack<ST>(); +// stack = new Stack<ST>(); +// +// stack.push(new ST(vars.length, vars, vals)); - stack.push(new ST(vars.length, vars, vals)); + current = new ST(vars.length, vars, vals); } @@ -478,11 +483,14 @@ if (capacity < 0) throw new IllegalArgumentException(); - stack = new Stack<ST>(); +// stack = new Stack<ST>(); +// +// stack.push(new ST(0/* nbound */, new IVariable[capacity], +// new IConstant[capacity])); - stack.push(new ST(0/* nbound */, new IVariable[capacity], - new IConstant[capacity])); - + current = new ST(0/* nbound */, new IVariable[capacity], + new IConstant[capacity]); + } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -161,12 +161,12 @@ } - public void push() { - throw new IllegalStateException(); - } - - public void pop(boolean save) { - throw new UnsupportedOperationException(); - } +// public void push() { +// throw new IllegalStateException(); +// } +// +// public void pop(boolean save) { +// throw new UnsupportedOperationException(); +// } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -32,7 +32,6 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; -import java.util.Stack; import java.util.Map.Entry; import com.bigdata.bop.IBindingSet; @@ -60,92 +59,98 @@ // */ // private final LinkedHashMap<IVariable, IConstant> map; - /** - * The stack of symbol tables. Each symbol table is a mapping from an - * {@link IVariable} onto its non-<code>null</code> bound {@link IConstant}. - * The stack is initialized with an empty symbol table. Symbol tables may be - * pushed onto the stack or popped off of the stack, but the stack MAY NOT - * become empty. - */ - private final Stack<LinkedHashMap<IVariable, IConstant>> stack; +// /** +// * The stack of symbol tables. Each symbol table is a mapping from an +// * {@link IVariable} onto its non-<code>null</code> bound {@link IConstant}. +// * The stack is initialized with an empty symbol table. Symbol tables may be +// * pushed onto the stack or popped off of the stack, but the stack MAY NOT +// * become empty. +// */ +// private final Stack<LinkedHashMap<IVariable, IConstant>> stack; + final private LinkedHashMap<IVariable,IConstant> current; + /** * Return the symbol table on the top of the stack. */ private LinkedHashMap<IVariable, IConstant> current() { - return stack.peek(); + return current; + +// return stack.peek(); } - public void push() { +// public void push() { +// +// // The current symbol table. +// final LinkedHashMap<IVariable, IConstant> cur = current(); +// +// // Create a new symbol table. +// final LinkedHashMap<IVariable, IConstant> tmp = new LinkedHashMap<IVariable, IConstant>( +// cur.size()); +// +// // Push the new symbol table onto the stack. +// stack.push(tmp); +// +// /* +// * Make a copy of each entry in the symbol table which was on the top of +// * the stack when we entered this method, inserting the entries into the +// * new symbol table as we go. This avoids side effects of mutation on +// * the nested symbol tables and also ensures that we do not need to read +// * through to the nested symbol tables when answering a query about the +// * current symbol table. The only down side of this is that naive +// * serialization is that much less compact. +// */ +// for (Map.Entry<IVariable, IConstant> e : cur.entrySet()) { +// +// tmp.put(e.getKey(), e.getValue()); +// +// } +// +// } - // The current symbol table. - final LinkedHashMap<IVariable, IConstant> cur = current(); +// public void pop(final boolean save) { +// +// if (stack.size() < 2) { +// /* +// * The stack may never become empty. Therefore there must be at +// * least two symbol tables on the stack for a pop() request. +// */ +// throw new IllegalArgumentException(); +// } +// +// // Pop the symbol table off of the top of the stack. +// final LinkedHashMap<IVariable,IConstant> old = stack.pop(); +// +// if (save) { +// +// // discard the current symbol table. +// stack.pop(); +// +// // replacing it with the symbol table which we popped off the stack. +// stack.push(old); +// +// } else { +// +// // clear the hash code. +// hash = 0; +// +// } +// +// } - // Create a new symbol table. - final LinkedHashMap<IVariable, IConstant> tmp = new LinkedHashMap<IVariable, IConstant>( - cur.size()); - - // Push the new symbol table onto the stack. - stack.push(tmp); - - /* - * Make a copy of each entry in the symbol table which was on the top of - * the stack when we entered this method, inserting the entries into the - * new symbol table as we go. This avoids side effects of mutation on - * the nested symbol tables and also ensures that we do not need to read - * through to the nested symbol tables when answering a query about the - * current symbol table. The only down side of this is that naive - * serialization is that much less compact. - */ - for (Map.Entry<IVariable, IConstant> e : cur.entrySet()) { - - tmp.put(e.getKey(), e.getValue()); - - } - - } - - public void pop(final boolean save) { - - if (stack.size() < 2) { - /* - * The stack may never become empty. Therefore there must be at - * least two symbol tables on the stack for a pop() request. - */ - throw new IllegalArgumentException(); - } - - // Pop the symbol table off of the top of the stack. - final LinkedHashMap<IVariable,IConstant> old = stack.pop(); - - if (save) { - - // discard the current symbol table. - stack.pop(); - - // replacing it with the symbol table which we popped off the stack. - stack.push(old); - - } else { - - // clear the hash code. - hash = 0; - - } - - } - /** * New empty binding set. */ public HashBindingSet() { - stack = new Stack<LinkedHashMap<IVariable, IConstant>>(); - - stack.push(new LinkedHashMap<IVariable, IConstant>()); +// stack = new Stack<LinkedHashMap<IVariable, IConstant>>(); +// +// stack.push(new LinkedHashMap<IVariable, IConstant>()); + current = new LinkedHashMap<IVariable, IConstant>(); + } /** @@ -155,30 +160,32 @@ */ protected HashBindingSet(final HashBindingSet src, final IVariable[] variablesToKeep) { - stack = new Stack<LinkedHashMap<IVariable,IConstant>>(); +// stack = new Stack<LinkedHashMap<IVariable,IConstant>>(); +// +// final int stackSize = src.stack.size(); +// +// int depth = 1; +// +// for (LinkedHashMap<IVariable, IConstant> srcLst : src.stack) { +// +// /* +// * Copy the source bindings. +// * +// * Note: If a restriction exists on the variables to be copied, then +// * it is applied onto the the top level of the stack. If the symbol +// * table is saved when it is pop()'d, then the modified bindings +// * will replace the parent symbol table on the stack. +// */ +// final LinkedHashMap<IVariable,IConstant> tmp = copy(srcLst, +// depth == stackSize ? variablesToKeep : null); +// +// // Push onto the stack. +// stack.push(tmp); +// +// } - final int stackSize = src.stack.size(); + current = copy(src.current, variablesToKeep); - int depth = 1; - - for (LinkedHashMap<IVariable, IConstant> srcLst : src.stack) { - - /* - * Copy the source bindings. - * - * Note: If a restriction exists on the variables to be copied, then - * it is applied onto the the top level of the stack. If the symbol - * table is saved when it is pop()'d, then the modified bindings - * will replace the parent symbol table on the stack. - */ - final LinkedHashMap<IVariable,IConstant> tmp = copy(srcLst, - depth == stackSize ? variablesToKeep : null); - - // Push onto the stack. - stack.push(tmp); - - } - } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ListBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ListBindingSet.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ListBindingSet.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -6,7 +6,6 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Stack; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; @@ -82,90 +81,94 @@ } }; - /** - * The stack of symbol tables. Each symbol table is a mapping from an - * {@link IVariable} onto its non-<code>null</code> bound {@link IConstant}. - * The stack is initialized with an empty symbol table. Symbol tables may be - * pushed onto the stack or popped off of the stack, but the stack MAY NOT - * become empty. - */ - private final Stack<List<E>> stack; +// /** +// * The stack of symbol tables. Each symbol table is a mapping from an +// * {@link IVariable} onto its non-<code>null</code> bound {@link IConstant}. +// * The stack is initialized with an empty symbol table. Symbol tables may be +// * pushed onto the stack or popped off of the stack, but the stack MAY NOT +// * become empty. +// */ +// private final Stack<List<E>> stack; + private final List<E> current; /** * Return the symbol table on the top of the stack. */ - private List<E> current() { + final private List<E> current() { - return stack.peek(); + return current; +// return stack.peek(); } - public void push() { +// public void push() { +// +// // The current symbol table. +// final List<E> cur = current(); +// +// // Create a new symbol table. +// final List<E> tmp = new LinkedList<E>(); +// +// // Push the new symbol table onto the stack. +// stack.push(tmp); +// +// /* +// * Make a copy of each entry in the symbol table which was on the top of +// * the stack when we entered this method, inserting the entries into the +// * new symbol table as we go. This avoids side effects of mutation on +// * the nested symbol tables and also ensures that we do not need to read +// * through to the nested symbol tables when answering a query about the +// * current symbol table. The only down side of this is that naive +// * serialization is that much less compact. +// */ +// for (E e : cur) { +// +// tmp.add(new E(e.var, e.val)); +// +// } +// +// } +// +// public void pop(final boolean save) { +// +// if (stack.size() < 2) { +// /* +// * The stack may never become empty. Therefore there must be at +// * least two symbol tables on the stack for a pop() request. +// */ +// throw new IllegalArgumentException(); +// } +// +// // Pop the symbol table off of the top of the stack. +// final List<E> old = stack.pop(); +// +// if (save) { +// +// // discard the current symbol table. +// stack.pop(); +// +// // replacing it with the symbol table which we popped off the stack. +// stack.push(old); +// +// } else { +// +// // clear the hash code. +// hash = 0; +// +// } +// +// } - // The current symbol table. - final List<E> cur = current(); - - // Create a new symbol table. - final List<E> tmp = new LinkedList<E>(); - - // Push the new symbol table onto the stack. - stack.push(tmp); - - /* - * Make a copy of each entry in the symbol table which was on the top of - * the stack when we entered this method, inserting the entries into the - * new symbol table as we go. This avoids side effects of mutation on - * the nested symbol tables and also ensures that we do not need to read - * through to the nested symbol tables when answering a query about the - * current symbol table. The only down side of this is that naive - * serialization is that much less compact. - */ - for (E e : cur) { - - tmp.add(new E(e.var, e.val)); - - } - - } - - public void pop(final boolean save) { - - if (stack.size() < 2) { - /* - * The stack may never become empty. Therefore there must be at - * least two symbol tables on the stack for a pop() request. - */ - throw new IllegalArgumentException(); - } - - // Pop the symbol table off of the top of the stack. - final List<E> old = stack.pop(); - - if (save) { - - // discard the current symbol table. - stack.pop(); - - // replacing it with the symbol table which we popped off the stack. - stack.push(old); - - } else { - - // clear the hash code. - hash = 0; - - } - - } - /** * Create an empty binding set. */ public ListBindingSet() { - stack = new Stack<List<E>>(); - - stack.push(new LinkedList<E>()); +// stack = new Stack<List<E>>(); +// +// stack.push(new LinkedList<E>()); + + current = new LinkedList<E>(); } @@ -207,30 +210,32 @@ protected ListBindingSet(final ListBindingSet src, final IVariable[] variablesToKeep) { - stack = new Stack<List<E>>(); +// stack = new Stack<List<E>>(); +// +// final int stackSize = src.stack.size(); +// +// int depth = 1; +// +// for (List<E> srcLst : src.stack) { +// +// /* +// * Copy the source bindings. +// * +// * Note: If a restriction exists on the variables to be copied, then +// * it is applied onto the the top level of the stack. If the symbol +// * table is saved when it is pop()'d, then the modified bindings +// * will replace the parent symbol table on the stack. +// */ +// final List<E> tmp = copy(srcLst, +// depth == stackSize ? variablesToKeep : null); +// +// // Push onto the stack. +// stack.push(tmp); +// +// } - final int stackSize = src.stack.size(); + current = copy(src.current, variablesToKeep); - int depth = 1; - - for (List<E> srcLst : src.stack) { - - /* - * Copy the source bindings. - * - * Note: If a restriction exists on the variables to be copied, then - * it is applied onto the the top level of the stack. If the symbol - * table is saved when it is pop()'d, then the modified bindings - * will replace the parent symbol table on the stack. - */ - final List<E> tmp = copy(srcLst, - depth == stackSize ? variablesToKeep : null); - - // Push onto the stack. - stack.push(tmp); - - } - } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -1070,30 +1070,33 @@ */ sinkId = BOpUtility.getEffectiveDefaultSink(bop, p); - { - // altSink (null when not specified). - final Integer altSinkId = (Integer) op - .getProperty(PipelineOp.Annotations.ALT_SINK_REF); - final Integer altSinkGroup = (Integer) op - .getProperty(PipelineOp.Annotations.ALT_SINK_GROUP); - if (altSinkId != null && altSinkGroup != null) - throw new RuntimeException( - "Annotations are mutually exclusive: " - + PipelineOp.Annotations.ALT_SINK_REF - + " and " - + PipelineOp.Annotations.ALT_SINK_GROUP); - if (altSinkGroup != null) { - /* - * Lookup the first pipeline op in the conditional binding - * group and use its bopId as the altSinkId. - */ - this.altSinkId = BOpUtility.getFirstBOpIdForConditionalGroup( - getQuery(), altSinkGroup); - } else { - // MAY be null. - this.altSinkId = altSinkId; - } - } + // altSink (null when not specified). + altSinkId = (Integer) op + .getProperty(PipelineOp.Annotations.ALT_SINK_REF); +// { +// // altSink (null when not specified). +// final Integer altSinkId = (Integer) op +// .getProperty(PipelineOp.Annotations.ALT_SINK_REF); +// final Integer altSinkGroup = (Integer) op +// .getProperty(PipelineOp.Annotations.ALT_SINK_GROUP); +// if (altSinkId != null && altSinkGroup != null) +// throw new RuntimeException( +// "Annotations are mutually exclusive: " +// + PipelineOp.Annotations.ALT_SINK_REF +// + " and " +// + PipelineOp.Annotations.ALT_SINK_GROUP); +// if (altSinkGroup != null) { +// /* +// * Lookup the first pipeline op in the conditional binding +// * group and use its bopId as the altSinkId. +// */ +// this.altSinkId = BOpUtility.getFirstBOpIdForConditionalGroup( +// getQuery(), altSinkGroup); +// } else { +// // MAY be null. +// this.altSinkId = altSinkId; +// } +// } if (altSinkId != null && !getBOpIndex().containsKey(altSinkId)) throw new NoSuchBOpException(altSinkId); @@ -1131,18 +1134,19 @@ } assert stats != null; - // The groupId (if any) for this operator. - final Integer fromGroupId = (Integer) op - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// // The groupId (if any) for this operator. +// final Integer fromGroupId = (Integer) op +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); if (p == null) { sink = getQueryBuffer(); } else { - final BOp targetOp = getBOpIndex().get(sinkId); - final Integer toGroupId = (Integer) targetOp - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - sink = newBuffer(op, sinkId, new SinkTransitionMetadata( - fromGroupId, toGroupId, true/* isSink */), +// final BOp targetOp = getBOpIndex().get(sinkId); +// final Integer toGroupId = (Integer) targetOp +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// final SinkTransitionMetadata stm = new SinkTransitionMetadata( +// fromGroupId, toGroupId, true/* isSink */); + sink = newBuffer(op, sinkId, //null/*stm*/, sinkMessagesOut, stats); } @@ -1156,11 +1160,12 @@ */ // altSink = sink; } else { - final BOp targetOp = getBOpIndex().get(altSinkId); - final Integer toGroupId = (Integer) targetOp - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - altSink = newBuffer(op, altSinkId, new SinkTransitionMetadata( - fromGroupId, toGroupId, false/* isSink */), +// final BOp targetOp = getBOpIndex().get(altSinkId); +// final Integer toGroupId = (Integer) targetOp +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// final SinkTransitionMetadata stm = new SinkTransitionMetadata( +// fromGroupId, toGroupId, false/* isSink */); + altSink = newBuffer(op, altSinkId, //null/* stm */, altSinkMessagesOut, stats); } @@ -1192,7 +1197,7 @@ */ private IBlockingBuffer<IBindingSet[]> newBuffer(final PipelineOp op, final int sinkId, - final SinkTransitionMetadata sinkTransitionMetadata, +// final SinkTransitionMetadata sinkTransitionMetadata, final AtomicInteger sinkMessagesOut, final BOpStats stats) { // final MultiplexBlockingBuffer<IBindingSet[]> factory = inputBufferMap == null ? null @@ -1218,10 +1223,14 @@ // .getChunkTimeout(), // BufferAnnotations.chunkTimeoutUnit); - return new SinkTransitionBuffer(new HandleChunkBuffer( + return +// new SinkTransitionBuffer( + new HandleChunkBuffer( ChunkedRunningQuery.this, bopId, sinkId, op - .getChunkCapacity(), sinkMessagesOut, stats), - sinkTransitionMetadata); + .getChunkCapacity(), sinkMessagesOut, stats) +// , +// sinkTransitionMetadata) + ; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -52,7 +52,7 @@ import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.bindingSet.HashBindingSet; +import com.bigdata.bop.bindingSet.ListBindingSet; import com.bigdata.bop.fed.QueryEngineFactory; import com.bigdata.btree.BTree; import com.bigdata.btree.IndexSegment; @@ -798,7 +798,7 @@ return eval(queryId, (PipelineOp) op, new LocalChunkMessage<IBindingSet>(this/* queryEngine */, queryId, startId, -1 /* partitionId */, - newBindingSetIterator(new HashBindingSet()))); + newBindingSetIterator(new ListBindingSet()))); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionBuffer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionBuffer.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionBuffer.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -34,6 +34,8 @@ /** * Delegation pattern handles the {@link SinkTransitionMetadata}. + * + * @deprecated along with {@link SinkTransitionMetadata} */ class SinkTransitionBuffer implements IBlockingBuffer<IBindingSet[]> { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionMetadata.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionMetadata.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionMetadata.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -53,10 +53,10 @@ * * @todo Unit tests of this class in isolation. * - * @todo It appears that this design can not be made to satisfy SPARQL optional - * group semantics. Therefore, we may be able to drop this class, support - * for it in the {@link ChunkedRunningQuery} and support for the symbol - * table stack in {@link IBindingSet}. + * @deprecated It appears that this design can not be made to satisfy SPARQL + * optional group semantics. Therefore, we may be able to drop this + * class, support for it in the {@link ChunkedRunningQuery} and + * support for the symbol table stack in {@link IBindingSet}. */ class SinkTransitionMetadata { @@ -91,24 +91,25 @@ * The binding set. */ public void handleBindingSet(final IBindingSet bset) { - if (fromGroupId == null) { - if (toGroupId == null) - return; - // Transition from no group to some group. - bset.push(); - return; - } else { - if (toGroupId == null) - // Transition from a group to no group. - bset.pop(isSink/* save */); - else if (toGroupId.equals(fromGroupId)) { - // NOP (transition to the same group) - } else { - // Transition to a different group. - bset.pop(isSink/* save */); - bset.push(); - } - } +// if (fromGroupId == null) { +// if (toGroupId == null) +// return; +// // Transition from no group to some group. +// bset.push(); +// return; +// } else { +// if (toGroupId == null) +// // Transition from a group to no group. +// bset.pop(isSink/* save */); +// else if (toGroupId.equals(fromGroupId)) { +// // NOP (transition to the same group) +// } else { +// // Transition to a different group. +// bset.pop(isSink/* save */); +// bset.push(); +// } +// } + throw new UnsupportedOperationException(); } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -622,30 +622,33 @@ */ sinkId = BOpUtility.getEffectiveDefaultSink(bop, p); - { - // altSink (null when not specified). - final Integer altSinkId = (Integer) op - .getProperty(PipelineOp.Annotations.ALT_SINK_REF); - final Integer altSinkGroup = (Integer) op - .getProperty(PipelineOp.Annotations.ALT_SINK_GROUP); - if (altSinkId != null && altSinkGroup != null) - throw new RuntimeException( - "Annotations are mutually exclusive: " - + PipelineOp.Annotations.ALT_SINK_REF - + " and " - + PipelineOp.Annotations.ALT_SINK_GROUP); - if (altSinkGroup != null) { - /* - * Lookup the first pipeline op in the conditional binding - * group and use its bopId as the altSinkId. - */ - this.altSinkId = BOpUtility.getFirstBOpIdForConditionalGroup( - getQuery(), altSinkGroup); - } else { - // MAY be null. - this.altSinkId = altSinkId; - } - } + // altSink (null when not specified). + altSinkId = (Integer) op + .getProperty(PipelineOp.Annotations.ALT_SINK_REF); +// { +// // altSink (null when not specified). +// final Integer altSinkId = (Integer) op +// .getProperty(PipelineOp.Annotations.ALT_SINK_REF); +// final Integer altSinkGroup = (Integer) op +// .getProperty(PipelineOp.Annotations.ALT_SINK_GROUP); +// if (altSinkId != null && altSinkGroup != null) +// throw new RuntimeException( +// "Annotations are mutually exclusive: " +// + PipelineOp.Annotations.ALT_SINK_REF +// + " and " +// + PipelineOp.Annotations.ALT_SINK_GROUP); +// if (altSinkGroup != null) { +// /* +// * Lookup the first pipeline op in the conditional binding +// * group and use its bopId as the altSinkId. +// */ +// this.altSinkId = BOpUtility.getFirstBOpIdForConditionalGroup( +// getQuery(), altSinkGroup); +// } else { +// // MAY be null. +// this.altSinkId = altSinkId; +// } +// } if (altSinkId != null && !getBOpIndex().containsKey(altSinkId)) throw new NoSuchBOpException(altSinkId); @@ -683,19 +686,20 @@ } assert stats != null; - // The groupId (if any) for this operator. - final Integer fromGroupId = (Integer) op - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// // The groupId (if any) for this operator. +// final Integer fromGroupId = (Integer) op +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); if (p == null) { sink = getQueryBuffer(); } else { - final BOp targetOp = getBOpIndex().get(sinkId); - final Integer toGroupId = (Integer) targetOp - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - sink = newBuffer(op, sinkId, new SinkTransitionMetadata( - fromGroupId, toGroupId, true/* isSink */), - /*sinkMessagesOut,*/ stats); +// final BOp targetOp = getBOpIndex().get(sinkId); +// final Integer toGroupId = (Integer) targetOp +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// final SinkTransitionMetadata stm = new SinkTransitionMetadata( +// fromGroupId, toGroupId, true/* isSink */); + sink = newBuffer(op, sinkId, //null/* stm */, + /* sinkMessagesOut, */stats); } if (altSinkId == null) { @@ -708,11 +712,12 @@ */ // altSink = sink; } else { - final BOp targetOp = getBOpIndex().get(altSinkId); - final Integer toGroupId = (Integer) targetOp - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - altSink = newBuffer(op, altSinkId, new SinkTransitionMetadata( - fromGroupId, toGroupId, false/* isSink */), +// final BOp targetOp = getBOpIndex().get(altSinkId); +// final Integer toGroupId = (Integer) targetOp +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// final SinkTransitionMetadata stm = new SinkTransitionMetadata( +// fromGroupId, toGroupId, false/* isSink */); + altSink = newBuffer(op, altSinkId, //null/*stm*/, /*altSinkMessagesOut,*/ stats); } @@ -744,7 +749,7 @@ */ private IBlockingBuffer<IBindingSet[]> newBuffer(final PipelineOp op, final int sinkId, - final SinkTransitionMetadata sinkTransitionMetadata, +// final SinkTransitionMetadata sinkTransitionMetadata, /* final AtomicInteger sinkMessagesOut, */final BOpStats stats) { final MultiplexBlockingBuffer<IBindingSet[]> factory = operatorQueues @@ -760,9 +765,11 @@ * operator writing on this sink. Wrap that to handle the sink * transition metadata. */ - return new SinkTransitionBuffer( + return //new SinkTransitionBuffer( new OutputStatsBuffer<IBindingSet[]>(factory.newInstance(), - stats), sinkTransitionMetadata); + stats) + //, sinkTransitionMetadata) + ; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -586,119 +586,119 @@ } - /** - * A conditional join group: - * - * <pre> - * (a b) - * optional { - * (b c) - * (c d) - * } - * </pre> - * - * where the groupId for the optional join group is ONE (1). The test should - * locate the first {@link PipelineJoin} in that join group, which is the - * one reading on the <code>(b c)</code> access path. - */ - public void test_getFirstBOpIdForConditionalGroup() { - - final String namespace = "kb"; - - final int startId = 1; // - final int joinId1 = 2; // : base join group. - final int predId1 = 3; // (a b) - final int joinId2 = 4; // : joinGroup1 - final int predId2 = 5; // (b c) - final int joinId3 = 6; // : joinGroup1 - final int predId3 = 7; // (c d) - final int sliceId = 8; // - - final IVariable<?> a = Var.var("a"); - final IVariable<?> b = Var.var("b"); - final IVariable<?> c = Var.var("c"); - final IVariable<?> d = Var.var("d"); - - final Integer joinGroup1 = Integer.valueOf(1); - - final PipelineOp startOp = new StartOp(new BOp[] {}, - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - new NV(SliceOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final Predicate<?> pred1Op = new Predicate<E>( - new IVariableOrConstant[] { a, b }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId1),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred2Op = new Predicate<E>( - new IVariableOrConstant[] { b, c }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId2),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred3Op = new Predicate<E>( - new IVariableOrConstant[] { c, d }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId3),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final PipelineOp join1Op = new PipelineJoin<E>(// - new BOp[]{startOp},// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); - - final PipelineOp join2Op = new PipelineJoin<E>(// - new BOp[] { join1Op },// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// - // join is optional. - new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp join3Op = ... [truncated message content] |
From: <tho...@us...> - 2011-01-11 01:37:24
|
Revision: 4072 http://bigdata.svn.sourceforge.net/bigdata/?rev=4072&view=rev Author: thompsonbry Date: 2011-01-11 01:37:17 +0000 (Tue, 11 Jan 2011) Log Message: ----------- Removed the CONTROLLER annotation for the OptionalJoinGroup operator. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java 2011-01-10 22:09:06 UTC (rev 4071) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java 2011-01-11 01:37:17 UTC (rev 4072) @@ -129,8 +129,8 @@ getRequiredProperty(Annotations.SUBQUERY); - if (!getProperty(Annotations.CONTROLLER, Annotations.DEFAULT_CONTROLLER)) - throw new IllegalArgumentException(Annotations.CONTROLLER); +// if (!getProperty(Annotations.CONTROLLER, Annotations.DEFAULT_CONTROLLER)) +// throw new IllegalArgumentException(Annotations.CONTROLLER); // // The id of this operator (if any). // final Integer thisId = (Integer)getProperty(Annotations.BOP_ID); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java 2011-01-10 22:09:06 UTC (rev 4071) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java 2011-01-11 01:37:17 UTC (rev 4072) @@ -345,8 +345,8 @@ final PipelineOp joinGroup1Op = new OptionalJoinGroup(new BOp[]{join1Op}, new NV(Predicate.Annotations.BOP_ID, joinGroup1),// // new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery),// - new NV(BOp.Annotations.CONTROLLER,true)// + new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery)// +// , new NV(BOp.Annotations.CONTROLLER,true)// // new NV(BOp.Annotations.EVALUATION_CONTEXT, // BOpEvaluationContext.CONTROLLER)// // // join is optional. @@ -610,8 +610,8 @@ final PipelineOp joinGroup1Op = new OptionalJoinGroup(new BOp[]{join1Op}, new NV(Predicate.Annotations.BOP_ID, joinGroup1),// // new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery),// - new NV(BOp.Annotations.CONTROLLER,true)// + new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery)// +// new NV(BOp.Annotations.CONTROLLER,true)// // new NV(BOp.Annotations.EVALUATION_CONTEXT, // BOpEvaluationContext.CONTROLLER)// // // join is optional. @@ -871,8 +871,8 @@ final PipelineOp joinGroup1Op = new OptionalJoinGroup(new BOp[]{condOp}, new NV(Predicate.Annotations.BOP_ID, joinGroup1),// // new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery),// - new NV(BOp.Annotations.CONTROLLER,true)// + new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery)// +// new NV(BOp.Annotations.CONTROLLER,true)// // new NV(BOp.Annotations.EVALUATION_CONTEXT, // BOpEvaluationContext.CONTROLLER)// // // join is optional. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-14 14:26:35
|
Revision: 4096 http://bigdata.svn.sourceforge.net/bigdata/?rev=4096&view=rev Author: thompsonbry Date: 2011-01-14 14:26:21 +0000 (Fri, 14 Jan 2011) Log Message: ----------- javadoc update and rename of the test suite for the SubqueryOp. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-13 22:13:47 UTC (rev 4095) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-14 14:26:21 UTC (rev 4096) @@ -49,9 +49,10 @@ /** * For each binding set presented, this operator executes a subquery. Any * solutions produced by the subquery are copied to the default sink. If no - * solutions are produced, then the original binding set is copied to the - * default sink (optional join semantics). Each subquery is run as a separate - * query but will be cancelled if the parent query is cancelled. + * solutions are produced and {@link Annotations#OPTIONAL} is <code>true</code>, + * then the original binding set is copied to the default sink (optional join + * semantics). Each subquery is run as a separate query but will be cancelled if + * the parent query is cancelled. * * FIXME Parallel evaluation of subqueries is not implemented. What is the * appropriate parallelism for this operator? More parallelism should reduce Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java 2011-01-13 22:13:47 UTC (rev 4095) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java 2011-01-14 14:26:21 UTC (rev 4096) @@ -73,7 +73,7 @@ // test STEPS // suite.addTestSuite(TestUnion.class); - suite.addTestSuite(TestOptionalJoinGroup.class); + suite.addTestSuite(TestSubqueryOp.class); // @todo test STAR (transitive closure). // suite.addTestSuite(TestStar.class); Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java 2011-01-13 22:13:47 UTC (rev 4095) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java 2011-01-14 14:26:21 UTC (rev 4096) @@ -1,1132 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 23, 2010 - */ - -package com.bigdata.bop.controller; - -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicInteger; - -import junit.framework.TestCase2; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpEvaluationContext; -import com.bigdata.bop.Constant; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstant; -import com.bigdata.bop.IConstraint; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.IVariableOrConstant; -import com.bigdata.bop.NV; -import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.Var; -import com.bigdata.bop.IPredicate.Annotations; -import com.bigdata.bop.ap.E; -import com.bigdata.bop.ap.Predicate; -import com.bigdata.bop.ap.R; -import com.bigdata.bop.bindingSet.ArrayBindingSet; -import com.bigdata.bop.bindingSet.HashBindingSet; -import com.bigdata.bop.bset.ConditionalRoutingOp; -import com.bigdata.bop.bset.StartOp; -import com.bigdata.bop.constraint.NEConstant; -import com.bigdata.bop.engine.BOpStats; -import com.bigdata.bop.engine.IChunkMessage; -import com.bigdata.bop.engine.IRunningQuery; -import com.bigdata.bop.engine.LocalChunkMessage; -import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.bop.join.PipelineJoin; -import com.bigdata.bop.solutions.SliceOp; -import com.bigdata.journal.BufferMode; -import com.bigdata.journal.ITx; -import com.bigdata.journal.Journal; -import com.bigdata.relation.accesspath.IAsynchronousIterator; -import com.bigdata.relation.accesspath.ThickAsynchronousIterator; -import com.bigdata.striterator.ChunkedArrayIterator; -import com.bigdata.striterator.Dechunkerator; -import com.bigdata.striterator.ICloseableIterator; - -/** - * Test suite for handling of optional join groups during query evaluation - * against a local database instance. - * - * <pre> - * -Dlog4j.configuration=bigdata/src/resources/logging/log4j.properties - * </pre> - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class TestOptionalJoinGroup extends TestCase2 { - - /** - * - */ - public TestOptionalJoinGroup() { - } - - /** - * @param name - */ - public TestOptionalJoinGroup(String name) { - super(name); - } - - @Override - public Properties getProperties() { - - final Properties p = new Properties(super.getProperties()); - - p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient - .toString()); - - return p; - - } - - static private final String namespace = "ns"; - private Journal jnl; - private QueryEngine queryEngine; - - public void setUp() throws Exception { - - jnl = new Journal(getProperties()); - - loadData(jnl); - - queryEngine = new QueryEngine(jnl); - - queryEngine.init(); - - } - - /** - * Create and populate relation in the {@link #namespace}. - */ - private void loadData(final Journal store) { - - // create the relation. - final R rel = new R(store, namespace, ITx.UNISOLATED, new Properties()); - rel.create(); - - // data to insert (in key order for convenience). - final E[] a = {// - new E("Paul", "Mary"),// [0] - new E("Paul", "Brad"),// [1] - - new E("John", "Mary"),// [2] - new E("John", "Brad"),// [3] - - new E("Mary", "Brad"),// [4] - - new E("Brad", "Fred"),// [5] - new E("Brad", "Leon"),// [6] - }; - - // insert data (the records are not pre-sorted). - rel.insert(new ChunkedArrayIterator<E>(a.length, a, null/* keyOrder */)); - - // Do commit since not scale-out. - store.commit(); - - } - - public void tearDown() throws Exception { - - if (queryEngine != null) { - queryEngine.shutdownNow(); - queryEngine = null; - } - - if (jnl != null) { - jnl.destroy(); - jnl = null; - } - - } - - /** - * Return an {@link IAsynchronousIterator} that will read a single, - * empty {@link IBindingSet}. - * - * @param bindingSet - * the binding set. - */ - protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( - final IBindingSet bindingSet) { - - return new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { new IBindingSet[] { bindingSet } }); - - } - - /** - * Return an {@link IAsynchronousIterator} that will read a single, chunk - * containing all of the specified {@link IBindingSet}s. - * - * @param bindingSets - * the binding sets. - */ - protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( - final IBindingSet[] bindingSets) { - - return new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { bindingSets }); - - } - - /** - * Return an {@link IAsynchronousIterator} that will read a single, chunk - * containing all of the specified {@link IBindingSet}s. - * - * @param bindingSetChunks - * the chunks of binding sets. - */ - protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( - final IBindingSet[][] bindingSetChunks) { - - return new ThickAsynchronousIterator<IBindingSet[]>(bindingSetChunks); - - } - - /** - * Unit test for optional join group. Three joins are used and target a - * {@link SliceOp}. The 2nd and 3rd joins are embedded in an - * {@link SubqueryOp}. - * <P> - * The optional join group takes the form: - * - * <pre> - * (a b) - * optional { - * (b c) - * (c d) - * } - * </pre> - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be four solutions that - * succeed the optional join group: - * - * <pre> - * (paul mary brad fred) - * (paul mary brad leon) - * (john mary brad fred) - * (john mary brad leon) - * </pre> - * - * and five more that don't succeed the optional join group: - * - * <pre> - * (paul brad) * - * (john brad) * - * (mary brad) * - * (brad fred) - * (brad leon) - * </pre> - * - * In this cases marked with a <code>*</code>, ?c will become temporarily - * bound to fred and leon (since brad knows fred and leon), but the (c d) - * tail will fail since fred and leon don't know anyone else. At this point, - * the ?c binding must be removed from the solution. - */ - public void test_query_join2_optionals() throws Exception { - - // main query - final int startId = 1; // - final int joinId1 = 2; // : base join group. - final int predId1 = 3; // (a b) - final int joinGroup1 = 9; - final int sliceId = 8; // - - // subquery - final int joinId2 = 4; // : joinGroup1 - final int predId2 = 5; // (b c) - final int joinId3 = 6; // : joinGroup1 - final int predId3 = 7; // (c d) - - final IVariable<?> a = Var.var("a"); - final IVariable<?> b = Var.var("b"); - final IVariable<?> c = Var.var("c"); - final IVariable<?> d = Var.var("d"); - - final PipelineOp startOp = new StartOp(new BOp[] {}, - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - new NV(SliceOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final Predicate<?> pred1Op = new Predicate<E>( - new IVariableOrConstant[] { a, b }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId1),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred2Op = new Predicate<E>( - new IVariableOrConstant[] { b, c }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId2),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred3Op = new Predicate<E>( - new IVariableOrConstant[] { c, d }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId3),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final PipelineOp join1Op = new PipelineJoin<E>(// - new BOp[]{startOp},// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); - - final PipelineOp subQuery; - { - final PipelineOp join2Op = new PipelineJoin<E>(// - new BOp[] { /*join1Op*/ },// - new NV(Predicate.Annotations.BOP_ID, joinId2),// -// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred2Op)// -// // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// -// // optional target is the same as the default target. -// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) - ); - - final PipelineOp join3Op = new PipelineJoin<E>(// - new BOp[] { join2Op },// - new NV(Predicate.Annotations.BOP_ID, joinId3),// -// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred3Op)// -// // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// -// // optional target is the same as the default target. -// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) - ); - subQuery = join3Op; - } - - final PipelineOp joinGroup1Op = new SubqueryOp(new BOp[]{join1Op}, - new NV(Predicate.Annotations.BOP_ID, joinGroup1),// -// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(SubqueryOp.Annotations.SUBQUERY, subQuery)// -// , new NV(BOp.Annotations.CONTROLLER,true)// -// new NV(BOp.Annotations.EVALUATION_CONTEXT, -// BOpEvaluationContext.CONTROLLER)// -// // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// -// // optional target is the same as the default target. -// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) - ); - - final PipelineOp sliceOp = new SliceOp(// - new BOp[]{joinGroup1Op}, - NV.asMap(new NV[] {// - new NV(BOp.Annotations.BOP_ID, sliceId),// - new NV(BOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final PipelineOp query = sliceOp; - - // start the query. - final UUID queryId = UUID.randomUUID(); - final IChunkMessage<IBindingSet> initialChunkMessage; - { - - final IBindingSet initialBindings = new HashBindingSet(); - -// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); - - initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, - queryId, startId,// - -1, // partitionId - newBindingSetIterator(initialBindings)); - } - final IRunningQuery runningQuery = queryEngine.eval(queryId, query, - initialChunkMessage); - - // verify solutions. - { - - // the expected solutions. - final IBindingSet[] expected = new IBindingSet[] {// - // four solutions where the optional join succeeds. - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ), - // plus anything we read from the first access path which did not - // pass the optional join - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Mary"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ) - }; - - /* - * junit.framework.AssertionFailedError: Iterator will deliver too - * many objects: reminder(3)=[{ a=John, b=Brad }, { a=Mary, b=Brad - * }, { a=Paul, b=Brad }]. - */ - assertSameSolutionsAnyOrder(expected, - new Dechunkerator<IBindingSet>(runningQuery.iterator())); - - } - - // Wait until the query is done. - runningQuery.get(); - final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); - { - // validate the stats map. - assertNotNull(statsMap); - assertEquals(4, statsMap.size()); - if (log.isInfoEnabled()) - log.info(statsMap.toString()); - } - - } - - /** - * Unit test for optional join group with a filter. Three joins are used and - * target a {@link SliceOp}. The 2nd and 3rd joins are embedded in an - * optional join group. The optional join group contains a filter. - * <p> - * The optional join group takes the form: - * - * <pre> - * (a b) - * optional { - * (b c) - * (c d) - * filter(d != Leon) - * } - * </pre> - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be two solutions that - * succeed the optional join group: - * - * <pre> - * (paul mary brad fred) - * (john mary brad fred) - * </pre> - * - * and five more that don't succeed the optional join group: - * - * <pre> - * (paul brad) * - * (john brad) * - * (mary brad) * - * (brad fred) - * (brad leon) - * </pre> - * - * In the cases marked with a <code>*</code>, ?c will become temporarily - * bound to fred and leon (since brad knows fred and leon), but the (c d) - * tail will fail since fred and leon don't know anyone else. At this point, - * the ?c binding must be removed from the solution. - * <p> - * The filter (d != Leon) will prune the two solutions: - * - * <pre> - * (paul mary brad leon) - * (john mary brad leon) - * </pre> - * - * since ?d is bound to Leon in those cases. - */ - public void test_query_optionals_filter() throws Exception { - - // main query - final int startId = 1; - final int joinId1 = 2; // - final int predId1 = 3; // (a,b) - final int joinGroup1 = 9; - final int sliceId = 8; - - // subquery - final int joinId2 = 4; // : group1 - final int predId2 = 5; // (b,c) - final int joinId3 = 6; // : group1 - final int predId3 = 7; // (c,d) - - - final IVariable<?> a = Var.var("a"); - final IVariable<?> b = Var.var("b"); - final IVariable<?> c = Var.var("c"); - final IVariable<?> d = Var.var("d"); - - final PipelineOp startOp = new StartOp(new BOp[] {}, - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - new NV(SliceOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final Predicate<?> pred1Op = new Predicate<E>( - new IVariableOrConstant[] { a, b }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId1),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred2Op = new Predicate<E>( - new IVariableOrConstant[] { b, c }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId2),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred3Op = new Predicate<E>( - new IVariableOrConstant[] { c, d }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId3),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final PipelineOp join1Op = new PipelineJoin<E>(// - new BOp[]{startOp},// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); - - final PipelineOp subQuery; - { - final PipelineOp join2Op = new PipelineJoin<E>(// - new BOp[] { /*join1Op*/ },// - new NV(Predicate.Annotations.BOP_ID, joinId2),// -// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred2Op)// -// // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// -// // optional target is the same as the default target. -// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) - ); - - final PipelineOp join3Op = new PipelineJoin<E>(// - new BOp[] { join2Op },// - new NV(Predicate.Annotations.BOP_ID, joinId3),// -// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// - // constraint d != Leon - new NV(PipelineJoin.Annotations.CONSTRAINTS, - new IConstraint[] { new NEConstant(d, new Constant<String>("Leon")) }) -// // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// -// // optional target is the same as the default target. -// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) - ); - - subQuery = join3Op; - } - - final PipelineOp joinGroup1Op = new SubqueryOp(new BOp[]{join1Op}, - new NV(Predicate.Annotations.BOP_ID, joinGroup1),// -// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(SubqueryOp.Annotations.SUBQUERY, subQuery)// -// new NV(BOp.Annotations.CONTROLLER,true)// -// new NV(BOp.Annotations.EVALUATION_CONTEXT, -// BOpEvaluationContext.CONTROLLER)// -// // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// -// // optional target is the same as the default target. -// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) - ); - - final PipelineOp sliceOp = new SliceOp(// - new BOp[]{joinGroup1Op}, - NV.asMap(new NV[] {// - new NV(BOp.Annotations.BOP_ID, sliceId),// - new NV(BOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final PipelineOp query = sliceOp; - - // start the query. - final UUID queryId = UUID.randomUUID(); - final IChunkMessage<IBindingSet> initialChunkMessage; - { - - final IBindingSet initialBindings = new HashBindingSet(); - -// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); - - initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, - queryId, startId,// - -1, // partitionId - newBindingSetIterator(initialBindings)); - } - final IRunningQuery runningQuery = queryEngine.eval(queryId, query, - initialChunkMessage); - - // verify solutions. - { - - // the expected solutions. - final IBindingSet[] expected = new IBindingSet[] {// - // two solutions where the optional join succeeds. - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - // plus anything we read from the first access path which did not - // pass the optional join - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Mary"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ) - }; - - assertSameSolutionsAnyOrder(expected, - new Dechunkerator<IBindingSet>(runningQuery.iterator())); - - } - - // Wait until the query is done. - runningQuery.get(); - final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); - { - // validate the stats map. - assertNotNull(statsMap); - assertEquals(4, statsMap.size()); - if (log.isInfoEnabled()) - log.info(statsMap.toString()); - } - - } - - /** - * Unit test for optional join group with a filter on a variable outside the - * optional join group. Three joins are used and target a {@link SliceOp}. - * The 2nd and 3rd joins are in embedded an {@link SubqueryOp}. The - * optional join group contains a filter that uses a variable outside the - * optional join group. - * <P> - * The query takes the form: - * - * <pre> - * (a b) - * optional { - * (b c) - * (c d) - * filter(a != Paul) - * } - * </pre> - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be two solutions that - * succeed the optional join group: - * - * <pre> - * (john mary brad fred) - * (john mary brad leon) - * </pre> - * - * and six more that don't succeed the optional join group: - * - * <pre> - * (paul mary) * - * (paul brad) * - * (john brad) - * (mary brad) - * (brad fred) - * (brad leon) - * </pre> - * - * In the cases marked with a <code>*</code>, ?a is bound to Paul even - * though there is a filter that specifically prohibits a = Paul. This is - * because the filter is inside the optional join group, which means that - * solutions can still include a = Paul, but the optional join group should - * not run in that case. - */ - public void test_query_optionals_filter2() throws Exception { - - // main query - final int startId = 1; - final int joinId1 = 2; - final int predId1 = 3; // (a,b) - final int condId = 4; // (a != Paul) - final int joinGroup1 = 10; - final int sliceId = 9; - - // subquery (iff condition is satisfied) - final int joinId2 = 5; // : group1 - final int predId2 = 6; // (b,c) - final int joinId3 = 7; // : group1 - final int predId3 = 8; // (c,d) - - final IVariable<?> a = Var.var("a"); - final IVariable<?> b = Var.var("b"); - final IVariable<?> c = Var.var("c"); - final IVariable<?> d = Var.var("d"); - -// final Integer joinGroup1 = Integer.valueOf(1); - - /* - * Not quite sure how to write this one. I think it probably goes - * something like this: - * - * 1. startOp - * 2. join1Op(a b) - * 3. conditionalRoutingOp( if a = Paul then goto sliceOp ) - * 4. join2Op(b c) - * 5. join3Op(c d) - * 6. sliceOp - */ - - final PipelineOp startOp = new StartOp(new BOp[] {}, - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - new NV(SliceOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final Predicate<?> pred1Op = new Predicate<E>( - new IVariableOrConstant[] { a, b }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId1),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred2Op = new Predicate<E>( - new IVariableOrConstant[] { b, c }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId2),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred3Op = new Predicate<E>( - new IVariableOrConstant[] { c, d }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId3),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final PipelineOp join1Op = new PipelineJoin<E>(// - new BOp[]{startOp},// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); - - final IConstraint condition = new NEConstant(a, new Constant<String>("Paul")); - - final ConditionalRoutingOp condOp = new ConditionalRoutingOp(new BOp[]{join1Op}, - NV.asMap(new NV[]{// - new NV(BOp.Annotations.BOP_ID,condId), - new NV(PipelineOp.Annotations.SINK_REF, joinGroup1), // a != Paul - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId), // a == Paul - new NV(ConditionalRoutingOp.Annotations.CONDITION, condition), - })); - - final PipelineOp subQuery; - { - final PipelineOp join2Op = new PipelineJoin<E>(// - new BOp[] { /*condOp*/ },// - new NV(Predicate.Annotations.BOP_ID, joinId2),// -// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred2Op)// -// // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// -// // optional target is the same as the default target. -// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) - ); - - final PipelineOp join3Op = new PipelineJoin<E>(// - new BOp[] { join2Op },// - new NV(Predicate.Annotations.BOP_ID, joinId3),// -// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred3Op)// -// // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// -// // optional target is the same as the default target. -// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) - ); - subQuery = join3Op; - } - - final PipelineOp joinGroup1Op = new SubqueryOp(new BOp[]{condOp}, - new NV(Predicate.Annotations.BOP_ID, joinGroup1),// -// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(SubqueryOp.Annotations.SUBQUERY, subQuery)// -// new NV(BOp.Annotations.CONTROLLER,true)// -// new NV(BOp.Annotations.EVALUATION_CONTEXT, -// BOpEvaluationContext.CONTROLLER)// -// // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// -// // optional target is the same as the default target. -// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) - ); - - final PipelineOp sliceOp = new SliceOp(// - new BOp[]{joinGroup1Op}, - NV.asMap(new NV[] {// - new NV(BOp.Annotations.BOP_ID, sliceId),// - new NV(BOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final PipelineOp query = sliceOp; - - // start the query. - final UUID queryId = UUID.randomUUID(); - final IChunkMessage<IBindingSet> initialChunkMessage; - { - - final IBindingSet initialBindings = new HashBindingSet(); - -// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); - - initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, - queryId, startId,// - -1, // partitionId - newBindingSetIterator(initialBindings)); - } - final IRunningQuery runningQuery = queryEngine.eval(queryId, query, - initialChunkMessage); - - // verify solutions. - { - - // the expected solutions. - final IBindingSet[] expected = new IBindingSet[] {// - // two solutions where the optional join succeeds. - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - // plus anything we read from the first access path which did not - // pass the optional join - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Mary") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Mary"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ) - }; - - assertSameSolutionsAnyOrder(expected, - new Dechunkerator<IBindingSet>(runningQuery.iterator())); - - } - - // Wait until the query is done. - runningQuery.get(); - final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); - { - // validate the stats map. - assertNotNull(statsMap); - assertEquals(5, statsMap.size()); - if (log.isInfoEnabled()) - log.info(statsMap.toString()); - } - - } - - /** - * Verify the expected solutions. - * - * @param expected - * @param itr - */ - static public void assertSameSolutions(final IBindingSet[] expected, - final IAsynchronousIterator<IBindingSet[]> itr) { - try { - int n = 0; - while (itr.hasNext()) { - final IBindingSet[] e = itr.next(); - if (log.isInfoEnabled()) - log.info(n + " : chunkSize=" + e.length); - for (int i = 0; i < e.length; i++) { - if (log.isInfoEnabled()) - log.info(n + " : " + e[i]); - if (n >= expected.length) { - fail("Willing to deliver too many solutions: n=" + n - + " : " + e[i]); - } - if (!expected[n].equals(e[i])) { - fail("n=" + n + ", expected=" + expected[n] - + ", actual=" + e[i]); - } - n++; - } - } - assertEquals("Wrong number of solutions", expected.length, n); - } finally { - itr.close(); - } - } - - /** - * Verifies that the iterator visits the specified objects in some arbitrary - * ordering and that the iterator is exhausted once all expected objects - * have been visited. The implementation uses a selection without - * replacement "pattern". - * <p> - * Note: If the objects being visited do not correctly implement hashCode() - * and equals() then this can fail even if the desired objects would be - * visited. When this happens, fix the implementation classes. - */ - static public <T> void assertSameSolutionsAnyOrder(final T[] expected, - final Iterator<T> actual) { - - assertSameSolutionsAnyOrder("", expected, actual); - - } - - /** - * Verifies that the iterator visits the specified objects in some arbitrary - * ordering and that the iterator is exhausted once all expected objects - * have been visited. The implementation uses a selection without - * replacement "pattern". - * <p> - * Note: If the objects being visited do not correctly implement hashCode() - * and equals() then this can fail even if the desired objects would be - * visited. When this happens, fix the implementation classes. - */ - static public <T> void assertSameSolutionsAnyOrder(final String msg, - final T[] expected, final Iterator<T> actual) { - - try { - - /* - * Populate a map that we will use to realize the match and - * selection without replacement logic. The map uses counters to - * handle duplicate keys. This makes it possible to write tests in - * which two or more binding sets which are "equal" appear. - */ - - final int nrange = expected.length; - - final java.util.Map<T, AtomicInteger> range = new java.util.LinkedHashMap<T, AtomicInteger>(); - - for (int j = 0; j < nrange; j++) { - - AtomicInteger count = range.get(expected[j]); - - if (count == null) { - - count = new AtomicInteger(); - - } - - range.put(expected[j], count); - - count.incrementAndGet(); - - } - - // Do selection without replacement for the objects visited by - // iterator. - - for (int j = 0; j < nrange; j++) { - - if (!actual.hasNext()) { - - fail(msg - + ": Iterator exhausted while expecting more object(s)" - + ": index=" + j); - - } - - final T actualObject = actual.next(); - - if (log.isInfoEnabled()) - log.info("visting: " + actualObject); - - AtomicInteger counter = range.get(actualObject); - - if (counter == null || counter.get() == 0) { - - fail("Object not expected" + ": index=" + j + ", object=" - + actualObject); - - } - - counter.decrementAndGet(); - - } - - if (actual.hasNext()) { - - final List<T> remainder = new LinkedList<T>(); - - while(actual.hasNext()) { - remainder.add(actual.next()); - } - - fail("Iterator will deliver too many objects: reminder(" - + remainder.size() + ")=" + remainder); - - } - - } finally { - - if (actual instanceof ICloseableIterator<?>) { - - ((ICloseableIterator<T>) actual).close(); - - } - - } - - } - -} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java (from rev 4086, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java 2011-01-14 14:26:21 UTC (rev 4096) @@ -0,0 +1,1132 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 23, 2010 + */ + +package com.bigdata.bop.controller; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; + +import junit.framework.TestCase2; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.Var; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.ap.E; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.ap.R; +import com.bigdata.bop.bindingSet.ArrayBindingSet; +import com.bigdata.bop.bindingSet.HashBindingSet; +import com.bigdata.bop.bset.ConditionalRoutingOp; +import com.bigdata.bop.bset.StartOp; +import com.bigdata.bop.constraint.NEConstant; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.engine.IChunkMessage; +import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.LocalChunkMessage; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.journal.BufferMode; +import com.bigdata.journal.ITx; +import com.bigdata.journal.Journal; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; +import com.bigdata.striterator.ChunkedArrayIterator; +import com.bigdata.striterator.Dechunkerator; +import com.bigdata.striterator.ICloseableIterator; + +/** + * Test suite for handling of optional join groups during query evaluation + * against a local database instance. + * + * <pre> + * -Dlog4j.configuration=bigdata/src/resources/logging/log4j.properties + * </pre> + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestSubqueryOp extends TestCase2 { + + /** + * + */ + public TestSubqueryOp() { + } + + /** + * @param name + */ + public TestSubqueryOp(String name) { + super(name); + } + + @Override + public Properties getProperties() { + + final Properties p = new Properties(super.getProperties()); + + p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient + .toString()); + + return p; + + } + + static private final String namespace = "ns"; + private Journal jnl; + private QueryEngine queryEngine; + + public void setUp() throws Exception { + + jnl = new Journal(getProperties()); + + loadData(jnl); + + queryEngine = new QueryEngine(jnl); + + queryEngine.init(); + + } + + /** + * Create and populate relation in the {@link #namespace}. + */ + private void loadData(final Journal store) { + + // create the relation. + final R rel = new R(store, namespace, ITx.UNISOLATED, new Properties()); + rel.create(); + + // data to insert (in key order for convenience). + final E[] a = {// + new E("Paul", "Mary"),// [0] + new E("Paul", "Brad"),// [1] + + new E("John", "Mary"),// [2] + new E("John", "Brad"),// [3] + + new E("Mary", "Brad"),// [4] + + new E("Brad", "Fred"),// [5] + new E("Brad", "Leon"),// [6] + }; + + // insert data (the records are not pre-sorted). + rel.insert(new ChunkedArrayIterator<E>(a.length, a, null/* keyOrder */)); + + // Do commit since not scale-out. + store.commit(); + + } + + public void tearDown() throws Exception { + + if (queryEngine != null) { + queryEngine.shutdownNow(); + queryEngine = null; + } + + if (jnl != null) { + jnl.destroy(); + jnl = null; + } + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, + * empty {@link IBindingSet}. + * + * @param bindingSet + * the binding set. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet bindingSet) { + + return new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { new IBindingSet[] { bindingSet } }); + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, chunk + * containing all of the specified {@link IBindingSet}s. + * + * @param bindingSets + * the binding sets. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet[] bindingSets) { + + return new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { bindingSets }); + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, chunk + * containing all of the specified {@link IBindingSet}s. + * + * @param bindingSetChunks + * the chunks of binding sets. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet[][] bindingSetChunks) { + + return new ThickAsynchronousIterator<IBindingSet[]>(bindingSetChunks); + + } + + /** + * Unit test for optional join group. Three joins are used and target a + * {@link SliceOp}. The 2nd and 3rd joins are embedded in an + * {@link SubqueryOp}. + * <P> + * The optional join group takes the form: + * + * <pre> + * (a b) + * optional { + * (b c) + * (c d) + * } + * </pre> + * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be four solutions that + * succeed the optional join group: + * + * <pre> + * (paul mary brad fred) + * (paul mary brad leon) + * (john mary brad fred) + * (john mary brad leon) + * </pre> + * + * and five more that don't succeed the optional join group: + * + * <pre> + * (paul brad) * + * (john brad) * + * (mary brad) * + * (brad fred) + * (brad leon) + * </pre> + * + * In this cases marked with a <code>*</code>, ?c will become temporarily + * bound to fred and leon (since brad knows fred and leon), but the (c d) + * tail will fail since fred and leon don't know anyone else. At this point, + * the ?c binding must be removed from the solution. + */ + public void test_query_join2_optionals() throws Exception { + + // main query + final int startId = 1; // + final int joinId1 = 2; // : base join group. + final int predId1 = 3; // (a b) + final int joinGroup1 = 9; + final int sliceId = 8; // + + // subquery + final int joinId2 = 4; // : joinGroup1 + final int predId2 = 5; // (b c) + final int joinId3 = 6; // : joinGroup1 + final int predId3 = 7; // (c d) + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { a, b }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { b, c }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred3Op = new Predicate<E>( + new IVariableOrConstant[] { c, d }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId3),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); + + final PipelineOp subQuery; + { + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { /*join1Op*/ },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp join3Op = new PipelineJoin<E>(// + new BOp[] { join2Op },// + new NV(Predicate.Annotations.BOP_ID, joinId3),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred3Op)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + subQuery = join3Op; + } + + final PipelineOp joinGroup1Op = new SubqueryOp(new BOp[]{join1Op}, + new NV(Predicate.Annotations.BOP_ID, joinGroup1),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(SubqueryOp.Annotations.SUBQUERY, subQuery)// +// , new NV(BOp.Annotations.CONTROLLER,true)// +// new NV(BOp.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{joinGroup1Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final IRunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + ... [truncated message content] |
From: <tho...@us...> - 2011-01-14 14:51:34
|
Revision: 4098 http://bigdata.svn.sourceforge.net/bigdata/?rev=4098&view=rev Author: thompsonbry Date: 2011-01-14 14:51:28 +0000 (Fri, 14 Jan 2011) Log Message: ----------- Modified SubqueryOp to ignore an InterruptedException or BufferClosedException in the subquery. These exceptions are normal if the subquery employs a slice and should not cause the parent query to terminate. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-14 14:47:56 UTC (rev 4097) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-14 14:51:28 UTC (rev 4098) @@ -42,8 +42,10 @@ import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.LocalChunkMessage; import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.relation.accesspath.BufferClosedException; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.ThickAsynchronousIterator; +import com.bigdata.util.InnerCause; import com.bigdata.util.concurrent.LatchedExecutor; /** @@ -361,6 +363,7 @@ public IRunningQuery call() throws Exception { IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; + IRunningQuery runningQuery = null; try { final QueryEngine queryEngine = parentContext.getRunningQuery() @@ -376,7 +379,7 @@ final UUID queryId = UUID.randomUUID(); // execute the subquery, passing in the source binding set. - final IRunningQuery runningQuery = queryEngine + runningQuery = queryEngine .eval( queryId, (PipelineOp) subQueryOp, @@ -415,13 +418,25 @@ } catch (Throwable t) { /* - * If a subquery fails, then propagate the error to the - * parent and rethrow the first cause error out of the - * subquery. + * Note: SliceOp will cause other operators to be + * interrupted during normal evaluation but we do not want + * to terminate the parent query when this occurs. */ - throw new RuntimeException(ControllerTask.this.context - .getRunningQuery().halt(t)); + if (!InnerCause.isInnerCause(t, InterruptedException.class) + && !InnerCause.isInnerCause(t, BufferClosedException.class)) { + /* + * If a subquery fails, then propagate the error to the + * parent and rethrow the first cause error out of the + * subquery. + */ + throw new RuntimeException(ControllerTask.this.context + .getRunningQuery().halt(t)); + + } + + return runningQuery; + } finally { if (subquerySolutionItr != null) Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-01-14 14:47:56 UTC (rev 4097) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-01-14 14:51:28 UTC (rev 4098) @@ -50,6 +50,7 @@ import com.bigdata.bop.solutions.SliceOp; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.ITx; +import com.bigdata.relation.accesspath.BufferClosedException; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.service.IBigdataFederation; @@ -798,7 +799,13 @@ try { - if (!InnerCause.isInnerCause(t, InterruptedException.class)) + /* + * Note: SliceOp will cause other operators to be interrupted + * during normal evaluation so it is not useful to log an + * InterruptedException @ ERROR. + */ + if (!InnerCause.isInnerCause(t, InterruptedException.class) + && !InnerCause.isInnerCause(t, BufferClosedException.class)) log.error(toString(), t); try { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2011-01-14 14:47:56 UTC (rev 4097) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2011-01-14 14:51:28 UTC (rev 4098) @@ -1543,12 +1543,6 @@ * which do not succeed on the optional join are forwarded to the * {@link SliceOp} which is the target specified by the * {@link PipelineOp.Annotations#ALT_SINK_REF}. - * - * @todo Write unit test for optional join groups. Here the goal is to - * verify that intermediate results may skip more than one join. This - * was a problem for the old query evaluation approach since binding - * sets had to cascade through the query one join at a time. However, - * the new query engine design should handle this case. */ public void test_query_join2_optionals() throws Exception { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java 2011-01-14 14:47:56 UTC (rev 4097) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java 2011-01-14 14:51:28 UTC (rev 4098) @@ -123,6 +123,14 @@ } +// public void testStressThreadSafe() throws Exception { +// +// for(int i=0; i<1000; i++) { +// test_slice_threadSafe(); +// } +// +// } + public void test_slice_threadSafe() throws Exception { final long timeout = 10000; // ms This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-19 21:22:08
|
Revision: 4132 http://bigdata.svn.sourceforge.net/bigdata/?rev=4132&view=rev Author: thompsonbry Date: 2011-01-19 21:22:01 +0000 (Wed, 19 Jan 2011) Log Message: ----------- More unit tests for the PartitionedJoinGroup. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-01-19 21:21:03 UTC (rev 4131) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-01-19 21:22:01 UTC (rev 4132) @@ -31,7 +31,10 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * * @todo However, how do we manage when there are things like conditional - * routing operators? + * routing operators? [Answer - the CONDITION is raised onto the subquery + * such that we only conditionally run the subquery rather than routing + * out of the subquery if the condition is not satisfied - MikeP is making + * this change.] * * @todo The order of the {@link IPredicate}s in the tail plan is currently * unchanged from their given order (optional joins without constraints @@ -48,38 +51,97 @@ * leave a variable unbound). * * @todo runFirst flag on the expander (for free text search). this should be an - * annotation. this can be a headPlan. [There can be constraints which are - * evaluated against the head plan. They need to get attached to the joins - * generated for the head plan. MikeP writes: There is a free text search - * access path that replaces the actual access path for the predicate, - * which is meaningless in an of itself because the P is magical.] + * annotation. this can be a [headPlan]. [There can be constraints which + * are evaluated against the head plan. They need to get attached to the + * joins generated for the head plan. MikeP writes: There is a free text + * search access path that replaces the actual access path for the + * predicate, which is meaningless in an of itself because the P is + * magical.] * - * @todo inline APs and get rid of DataSetJoin. Rewrite NG and DG first. - * * @todo write a method which returns the set of constraints which should be run * for the last predicate in a given join path (a join path is just an * ordered array of predicates). + * + * FIXME Add a method to generate a runnable query plan from a collection + * of predicates and constraints. This is a bit different for the join + * graph and the optionals in the tail plan. The join graph itself should + * either be a {@link JoinGraph} operator which gets evaluated at run time + * or reordered by whichever optimizer is selected for the query (query + * hints). */ public class PartitionedJoinGroup { -// private final IPredicate<?>[] headPlan; -// -// private final IConstraint[] headPlanConstraints; + /** + * The set of variables bound by the non-optional predicates. + */ + private final Set<IVariable<?>> joinGraphVars = new LinkedHashSet<IVariable<?>>(); - private final IPredicate<?>[] joinGraphPredicates; + /** + * An unordered list of constraints which do not involve ANY variables. + * These constraints should be run first, before the join graph. + * + * @todo integrate into evaluation. + */ + private final List<IConstraint> runFirstConstraints = new LinkedList<IConstraint>(); - private final IConstraint[] joinGraphConstraints; + /** + * The set of the {@link IPredicate}s which have been flagged as + * "run first". These must all be non-optional predicates. They are usually + * special access paths created using an expander which replaces a mock + * access path. For example, free text search. + */ + private final List<IPredicate<?>> headPlan = new LinkedList<IPredicate<?>>(); - private final IPredicate<?>[] tailPlan; + /** + * The set of constraints which can be evaluated with the head plan + * predicates because the variables appearing in those constraints are known + * to become bound within the head plan. + */ + private final List<IConstraint> headPlanConstraints = new LinkedList<IConstraint>(); -// private final IConstraint[] tailPlanConstraints; - + /** + * The set of non-optional predicates which represent the join graph. + */ + private final List<IPredicate<?>> joinGraphPredicates = new LinkedList<IPredicate<?>>(); /** + * The set of constraints which can be evaluated with the join graph + * predicates because the variables appearing in those constraints are known + * to become bound within the join graph. + */ + private final List<IConstraint> joinGraphConstraints = new LinkedList<IConstraint>(); + + /** + * A set of optional predicates which will be run after the join graph. + */ + private final List<IPredicate<?>> tailPlan = new LinkedList<IPredicate<?>>(); + + /** + * An unordered list of those constraints containing at least one variable + * known be bound (and optionally) bound within the tail plan. + */ + private final List<IConstraint> tailPlanConstraints = new LinkedList<IConstraint>(); + + /** + * A map indicating which constraints are run for which predicate in the + * tail plan. The keys are the bopIds of the predicates in the tail plan. + * The values are the sets of constraints to run for that tail. + */ + private final Map<Integer/* predId */, List<IConstraint>> tailPlanConstraintMap = new LinkedHashMap<Integer, List<IConstraint>>(); + + /** + * The set of variables bound by the non-optional predicates. + */ + public Set<IVariable<?>> getJoinGraphVars() { + return joinGraphVars; + } + + /** * The {@link IPredicate}s in the join graph (required joins). */ public IPredicate<?>[] getJoinGraphPredicates() { - return joinGraphPredicates; + return joinGraphPredicates.toArray(new IPredicate[joinGraphPredicates + .size()]); } /** @@ -91,7 +153,8 @@ * query optimizer when it decides on an evaluation order for those joins). */ public IConstraint[] getJoinGraphConstraints() { - return joinGraphConstraints; + return joinGraphConstraints + .toArray(new IConstraint[joinGraphConstraints.size()]); } /** @@ -101,25 +164,60 @@ * tail plan in which their variable(S) MIGHT have been bound. */ public IPredicate<?>[] getTailPlan() { - return tailPlan; + return tailPlan.toArray(new IPredicate[tailPlan.size()]); } -// private PartitionedJoinGroup(// -// final IPredicate<?>[] headPlan,// -// final IConstraint[] headPlanConstraints,// -// final IPredicate<?>[] joinGraphPredicates,// -// final IConstraint[] joinGraphConstraints,// -// final IPredicate<?>[] tailPlan,// -// final IConstraint[] tailPlanConstraints// -// ) { -// this.headPlan = headPlan; -// this.headPlanConstraints = headPlanConstraints; -// this.joinGraphPredicates = joinGraphPredicates; -// this.joinGraphConstraints = joinGraphConstraints; -// this.tailPlan = tailPlan; -// this.tailPlanConstraints = tailPlanConstraints; -// } + /** + * Return the set of {@link IConstraint}s which should be evaluated when an + * identified predicate having SPARQL optional semantics is evaluated. For + * constraints whose variables are not known to be bound when entering the + * tail plan, the constraint should be evaluated at the last predicate for + * which its variables MIGHT become bound. + * + * @param bopId + * The identifier for an {@link IPredicate} appearing in the tail + * plan. + * + * @return The set of constraints to be imposed by the join which evaluates + * that predicate. This will be an empty array if there are no + * constraints which can be imposed when that predicate is + * evaluated. + * + * @throws IllegalArgumentException + * if there is no such predicate in the tail plan. + */ + public IConstraint[] getTailPlanConstraints(final int bopId) { + boolean found = false; + + for (IPredicate<?> p : tailPlan) { + + if (p.getId() == bopId) { + + found = true; + + break; + + } + + } + + if (!found) + throw new IllegalArgumentException( + "No such predicate in tail plan: bopId=" + bopId); + + final List<IConstraint> constraints = tailPlanConstraintMap.get(bopId); + + if (constraints == null) { + + return new IConstraint[0]; + + } + + return constraints.toArray(new IConstraint[constraints.size()]); + + } + /** * Analyze a set of {@link IPredicate}s representing optional and * non-optional joins and a collection of {@link IConstraint}s, partitioning @@ -156,52 +254,6 @@ } /* - * An unordered list of constraints which do not involve ANY variables. - * These constraints should be run first, before the join graph. - * - * @todo add to the class instance fields. - */ - final List<IConstraint> runFirstConstraints = new LinkedList<IConstraint>(); - -// final List<IPredicate<?>> headPlan = new LinkedList<IPredicate<?>>(); -// -// final List<IConstraint> headPlanConstraints = new LinkedList<IConstraint>(); - - /* - * The non-optional predicates. - */ - final List<IPredicate<?>> joinGraphPredicates = new LinkedList<IPredicate<?>>(); - - /* - * The set of variables bound by the non-optional predicates. - */ - final Set<IVariable<?>> joinGraphVars = new LinkedHashSet<IVariable<?>>(); - - /* - * An unordered list of those constraints whose variables are known to - * be bound by the non-optional predicates. - */ - final List<IConstraint> joinGraphConstraints = new LinkedList<IConstraint>(); - - /* - * The predicates representing the optional joins. - */ - final List<IPredicate<?>> tailPlan = new LinkedList<IPredicate<?>>(); - - /* - * An unordered list of those constraints containing at least one - * variable known to NOT be bound by the non-optional predicates. - */ - final List<IConstraint> tailPlanConstraints = new LinkedList<IConstraint>(); - - /* - * Map indicating which constraints are run for which predicate in the - * tail plan. The keys are the bopIds of the predicates in the tail - * plan. The values are the sets of constraints to run for that tail. - */ - final Map<Integer/* predId */, List<IConstraint>> tailPlanConstraintMap = new LinkedHashMap<Integer, List<IConstraint>>(); - - /* * First identify the predicates which correspond to non-optional joins. * All other pipeline operators are inserted into the tail plan in the * order in which they are given. @@ -209,25 +261,38 @@ for (IPredicate<?> p : sourcePreds) { if (p == null) throw new IllegalArgumentException(); - if (!p.isOptional()) { + if (p.isOptional()) { + if (p.getAccessPathExpander() != null + && p.getAccessPathExpander().runFirst()) + throw new IllegalStateException( + "runFirst is not compatible with optional: " + p); + // an optional predicate + tailPlan.add(p); + } else { // non-optional predicate. - joinGraphPredicates.add(p); - // variables which will be bound by the join graph. + if (p.getAccessPathExpander() != null + && p.getAccessPathExpander().runFirst()) { + headPlan.add(p); + } else { + // part of the join graph. + joinGraphPredicates.add(p); + } + /* + * Add to the set of variables which will be bound by the time + * the join graph is done executing. + */ final Iterator<IVariable<?>> vitr = BOpUtility .getArgumentVariables(p); while (vitr.hasNext()) { joinGraphVars.add(vitr.next()); } - } else { - // an optional predicate - tailPlan.add(p); } } /* - * Now break the constraints into two groups - those whose variables are - * bound by the predicates in the join graph (required joins) and those - * having at least one variable bound by an optional join. + * Now break the constraints into different groups based on their + * variables and when those variables are known to be bound (required + * joins) or might be bound (optionals). */ for (IConstraint c : constraints) { boolean allFound = true; @@ -239,7 +304,7 @@ * we should evaluate it as soon as possible. I.e., before the * join graph. */ - runFirstConstraints.add(c); + runFirstConstraints.add(c); // @todo unit test. continue; } while (vitr.hasNext()) { @@ -342,19 +407,6 @@ } - /* - * Assign to instance fields. - */ - // @todo headPlan -// this.headPlan = null; -// this.headPlanConstraints = null; - this.joinGraphPredicates = joinGraphPredicates - .toArray(new IPredicate[joinGraphPredicates.size()]); - this.joinGraphConstraints = joinGraphConstraints - .toArray(new IConstraint[joinGraphConstraints.size()]); - this.tailPlan = tailPlan.toArray(new IPredicate[tailPlan.size()]); - // @todo tailPlanConstraintMap - } - + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java 2011-01-19 21:21:03 UTC (rev 4131) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java 2011-01-19 21:22:01 UTC (rev 4132) @@ -27,9 +27,21 @@ package com.bigdata.bop.controller; +import java.util.Arrays; +import java.util.Iterator; + import junit.framework.TestCase2; +import com.bigdata.bop.BOp; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.Var; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.constraint.NEConstant; /** * Unit tests for {@link PartitionedJoinGroup}. @@ -84,13 +96,407 @@ } /** - * @todo test with headPlan, tailPlan. - * @todo test association of constraints to optional joins. + * A test based loosely on LUBM Q2. There are no RDF specific constructions + * used here. + */ + public void test_requiredJoins() { + + final String rdfType = "rdfType"; + final String graduateStudent = "graduateStudent"; + final String university = "university"; + final String department = "department"; + final String memberOf = "memberOf"; + final String subOrganizationOf = "subOrganizationOf"; + final String undergraduateDegreeFrom = "undergraduateDegreeFrom"; + + final IPredicate<?>[] preds; + final IPredicate<?> p0, p1, p2, p3, p4, p5; + final IVariable<?> x = Var.var("x"); + final IVariable<?> y = Var.var("y"); + final IVariable<?> z = Var.var("z"); + { + + // The name space for the SPO relation. + final String[] relation = new String[] { "spo" }; + + final long timestamp = System.currentTimeMillis(); + + int nextId = 0; + + // ?x a ub:GraduateStudent . + p0 = new Predicate(new BOp[] { x, + new Constant<String>(rdfType), + new Constant<String>(graduateStudent) },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // ?y a ub:University . + p1 = new Predicate(new BOp[] { y, + new Constant<String>(rdfType), + new Constant<String>(university) },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // ?z a ub:Department . + p2 = new Predicate(new BOp[] { z, + new Constant<String>(rdfType), + new Constant<String>(department) },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // ?x ub:memberOf ?z . + p3 = new Predicate(new BOp[] { x, + new Constant<String>(memberOf), z },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // ?z ub:subOrganizationOf ?y . + p4 = new Predicate(new BOp[] { z, + new Constant<String>(subOrganizationOf), y },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // ?x ub:undergraduateDegreeFrom ?y + p5 = new Predicate(new BOp[] { x, + new Constant<String>(undergraduateDegreeFrom), y },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // the vertices of the join graph (the predicates). + preds = new IPredicate[] { p0, p1, p2, p3, p4, p5 }; + } + + // Test w/o any constraints. + { + + final IConstraint[] constraints = new IConstraint[] { + + }; + + final PartitionedJoinGroup fixture = new PartitionedJoinGroup( + preds, constraints); + + // all variables are bound within the join graph. + assertSameIteratorAnyOrder("joinGraphVars", new IVariable[] { x, y, + z }, fixture.getJoinGraphVars().iterator()); + + // verify all predicates were placed into the join graph. + assertSameIteratorAnyOrder("joinGraph", preds, Arrays.asList( + fixture.getJoinGraphPredicates()).iterator()); + + // there are no constraints. + assertEquals("joinGraphConstraints.size", 0, fixture + .getJoinGraphConstraints().length); + + // there is no tail plan. + assertEquals("tailPlan", new IPredicate[] {}, fixture.getTailPlan()); + + } + + // Test w/ constraint(s) on the join graph. + { + + final IConstraint c1 = new NEConstant(x, + new Constant<String>("Bob")); + + final IConstraint c2 = new NEConstant(y, + new Constant<String>("UNCG")); + + final IConstraint[] constraints = new IConstraint[] { c1, c2 }; + + final PartitionedJoinGroup fixture = new PartitionedJoinGroup( + preds, constraints); + + // all variables are bound within the join graph. + assertSameIteratorAnyOrder("joinGraphVars", new IVariable[] { x, y, + z }, fixture.getJoinGraphVars().iterator()); + + // verify all predicates were placed into the join graph. + assertSameIteratorAnyOrder("joinGraph", preds, Arrays.asList( + fixture.getJoinGraphPredicates()).iterator()); + + // verify all constraints were place on the join graph. + assertSameIteratorAnyOrder("joinGraphConstraints", constraints, + Arrays.asList(fixture.getJoinGraphConstraints()).iterator()); + + // there is no tail plan. + assertEquals("tailPlan", new IPredicate[] {}, fixture.getTailPlan()); + + } + + } + + /** + * A test when there are optional joins involved. In this test, we again + * start with LUBM Q2, but the predicates which would bind <code>z</code> + * are both marked as optional. This should shift the constraint on [z] into + * the tail plan as well. + */ + public void test_withOptionalJoins() { + + final String rdfType = "rdfType"; + final String graduateStudent = "graduateStudent"; + final String university = "university"; + final String department = "department"; + final String memberOf = "memberOf"; + final String subOrganizationOf = "subOrganizationOf"; + final String undergraduateDegreeFrom = "undergraduateDegreeFrom"; + + final IPredicate<?>[] preds; + final IPredicate<?> p0, p1, p2, p3, p4, p5; + final IVariable<?> x = Var.var("x"); + final IVariable<?> y = Var.var("y"); + final IVariable<?> z = Var.var("z"); + { + + // The name space for the SPO relation. + final String[] relation = new String[] { "spo" }; + + final long timestamp = System.currentTimeMillis(); + + int nextId = 0; + + // ?x a ub:GraduateStudent . + p0 = new Predicate(new BOp[] { x, + new Constant<String>(rdfType), + new Constant<String>(graduateStudent) },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // ?y a ub:University . + p1 = new Predicate(new BOp[] { y, + new Constant<String>(rdfType), + new Constant<String>(university) },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // ?z a ub:Department . (optional) + p2 = new Predicate(new BOp[] { z, + new Constant<String>(rdfType), + new Constant<String>(department) },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.OPTIONAL, true),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // ?x ub:memberOf ?z . (optional). + p3 = new Predicate(new BOp[] { x, + new Constant<String>(memberOf), z },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.OPTIONAL, true),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // ?z ub:subOrganizationOf ?y . (optional). + p4 = new Predicate(new BOp[] { z, + new Constant<String>(subOrganizationOf), y },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.OPTIONAL, true),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // ?x ub:undergraduateDegreeFrom ?y + p5 = new Predicate(new BOp[] { x, + new Constant<String>(undergraduateDegreeFrom), y },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // the vertices of the join graph (the predicates). + preds = new IPredicate[] { p0, p1, p2, p3, p4, p5 }; + } + + // Test w/o any constraints. + { + + final IConstraint[] constraints = new IConstraint[] { + + }; + + final PartitionedJoinGroup fixture = new PartitionedJoinGroup( + preds, constraints); + + // only {x,y} are bound within the join graph. + assertSameIteratorAnyOrder("joinGraphVars", + new IVariable[] { x, y }, fixture.getJoinGraphVars() + .iterator()); + + // verify predicates placed into the join graph. + assertSameIteratorAnyOrder("joinGraph", new IPredicate[] { p0, p1, + p5 }, Arrays.asList(fixture.getJoinGraphPredicates()) + .iterator()); + + // there are no constraints on the join graph predicates. + assertEquals("joinGraphConstraints.size", 0, fixture + .getJoinGraphConstraints().length); + + // {p2, p3,p4} are in the tail plan. + assertEquals("tailPlan", new IPredicate[] { p2, p3, p4 }, fixture + .getTailPlan()); + + // no constraints were assigned to optional predicate [p2]. + assertEquals("", 0, + fixture.getTailPlanConstraints(p2.getId()).length); + + // no constraints were assigned to optional predicate [p3]. + assertEquals("", 0, + fixture.getTailPlanConstraints(p3.getId()).length); + + // no constraints were assigned to optional predicate [p4]. + assertEquals("", 0, + fixture.getTailPlanConstraints(p4.getId()).length); + + } + + // Test w/ constraint(s) on the join graph. + { + + final IConstraint c1 = new NEConstant(x, + new Constant<String>("Bob")); + + final IConstraint c2 = new NEConstant(y, + new Constant<String>("UNCG")); + + final IConstraint c3 = new NEConstant(z, + new Constant<String>("Physics")); + + final IConstraint[] constraints = new IConstraint[] { c1, c2, c3 }; + + final PartitionedJoinGroup fixture = new PartitionedJoinGroup( + preds, constraints); + + // only {x,y} are bound within the join graph. + assertSameIteratorAnyOrder("joinGraphVars", + new IVariable[] { x, y }, fixture.getJoinGraphVars() + .iterator()); + + // verify predicates placed into the join graph. + assertSameIteratorAnyOrder("joinGraph", new IPredicate[] { p0, p1, + p5 }, Arrays.asList(fixture.getJoinGraphPredicates()) + .iterator()); + + // verify constraints on the join graph. + assertSameIteratorAnyOrder("joinGraphConstraints", + new IConstraint[] { c1, c2 }, Arrays.asList( + fixture.getJoinGraphConstraints()).iterator()); + + // {p2,p3,p4} are in the tail plan. + assertEquals("tailPlan", new IPredicate[] { p2, p3, p4 }, fixture + .getTailPlan()); + + // no constraints were assigned to optional predicate [p2]. + assertEquals("", new IConstraint[] {}, fixture + .getTailPlanConstraints(p2.getId())); + + // no constraints were assigned to optional predicate [p3]. + assertEquals("", new IConstraint[] {}, fixture + .getTailPlanConstraints(p3.getId())); + + // the constraint on [z] was assigned to optional predicate [p4]. + assertEquals("", new IConstraint[] { c3 }, fixture + .getTailPlanConstraints(p4.getId())); + + } + + } + + /** + * @todo test with headPlan. + * + * @todo test for runFirst constraints. + * * @todo test logic to attach constraints to non-optional joins based on a * given join path (not yet written). */ public void test_something() { fail("write tests"); } - + + /** + * Verifies that the iterator visits the specified objects in some arbitrary + * ordering and that the iterator is exhausted once all expected objects + * have been visited. The implementation uses a selection without + * replacement "pattern". + */ + @SuppressWarnings("unchecked") + static public void assertSameIteratorAnyOrder(final Object[] expected, + final Iterator actual) { + + assertSameIteratorAnyOrder("", expected, actual); + + } + + /** + * Verifies that the iterator visits the specified objects in some arbitrary + * ordering and that the iterator is exhausted once all expected objects + * have been visited. The implementation uses a selection without + * replacement "pattern". + */ + @SuppressWarnings("unchecked") + static public void assertSameIteratorAnyOrder(final String msg, + final Object[] expected, final Iterator actual) { + + // Populate a map that we will use to realize the match and + // selection without replacement logic. + + final int nrange = expected.length; + + java.util.Map range = new java.util.HashMap(); + + for (int j = 0; j < nrange; j++) { + + range.put(expected[j], expected[j]); + + } + + // Do selection without replacement for the objects visited by + // iterator. + + for (int j = 0; j < nrange; j++) { + + if (!actual.hasNext()) { + + fail(msg + ": Index exhausted while expecting more object(s)" + + ": index=" + j); + + } + + Object actualObject = actual.next(); + + if (range.remove(actualObject) == null) { + + fail("Object not expected" + ": index=" + j + ", object=" + + actualObject); + + } + + } + + if (actual.hasNext()) { + + fail("Iterator will deliver too many objects."); + + } + + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-19 22:41:58
|
Revision: 4133 http://bigdata.svn.sourceforge.net/bigdata/?rev=4133&view=rev Author: thompsonbry Date: 2011-01-19 22:41:51 +0000 (Wed, 19 Jan 2011) Log Message: ----------- More work on the PartitionedJoinGroup utility and its test suite. Some javadoc and minor edits related to [1]. [1] https://sourceforge.net/apps/trac/bigdata/ticket/123 Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/DiskBackedBufferStrategy.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/DiskOnlyStrategy.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-01-19 21:22:01 UTC (rev 4132) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-01-19 22:41:51 UTC (rev 4133) @@ -30,12 +30,6 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * - * @todo However, how do we manage when there are things like conditional - * routing operators? [Answer - the CONDITION is raised onto the subquery - * such that we only conditionally run the subquery rather than routing - * out of the subquery if the condition is not satisfied - MikeP is making - * this change.] - * * @todo The order of the {@link IPredicate}s in the tail plan is currently * unchanged from their given order (optional joins without constraints * can not reduce the selectivity of the query). However, it could be @@ -75,39 +69,26 @@ * The set of variables bound by the non-optional predicates. */ private final Set<IVariable<?>> joinGraphVars = new LinkedHashSet<IVariable<?>>(); - - /** - * An unordered list of constraints which do not involve ANY variables. - * These constraints should be run first, before the join graph. - * - * @todo integrate into evaluation. - */ - private final List<IConstraint> runFirstConstraints = new LinkedList<IConstraint>(); /** - * The set of the {@link IPredicate}s which have been flagged as - * "run first". These must all be non-optional predicates. They are usually - * special access paths created using an expander which replaces a mock - * access path. For example, free text search. + * The set of non-optional predicates which have been flagged as + * "run first". These are usually special access paths created using an + * expander which replaces a mock access path. For example, free text + * search. */ private final List<IPredicate<?>> headPlan = new LinkedList<IPredicate<?>>(); /** - * The set of constraints which can be evaluated with the head plan - * predicates because the variables appearing in those constraints are known - * to become bound within the head plan. - */ - private final List<IConstraint> headPlanConstraints = new LinkedList<IConstraint>(); - - /** * The set of non-optional predicates which represent the join graph. */ - private final List<IPredicate<?>> joinGraphPredicates = new LinkedList<IPredicate<?>>(); + private final List<IPredicate<?>> joinGraph = new LinkedList<IPredicate<?>>(); /** - * The set of constraints which can be evaluated with the join graph - * predicates because the variables appearing in those constraints are known - * to become bound within the join graph. + * The set of constraints which can be evaluated with the head plan and/or + * join graph predicates because the variables appearing in those + * constraints are known to become bound within the join graph. ( The + * {@link #headPlan} and the {@link #joinGraph} share the same + * pool of constraints.) */ private final List<IConstraint> joinGraphConstraints = new LinkedList<IConstraint>(); @@ -128,9 +109,10 @@ * The values are the sets of constraints to run for that tail. */ private final Map<Integer/* predId */, List<IConstraint>> tailPlanConstraintMap = new LinkedHashMap<Integer, List<IConstraint>>(); - + /** - * The set of variables bound by the non-optional predicates. + * The set of variables bound by the non-optional predicates (either the + * head plan or the join graph). */ public Set<IVariable<?>> getJoinGraphVars() { return joinGraphVars; @@ -139,9 +121,8 @@ /** * The {@link IPredicate}s in the join graph (required joins). */ - public IPredicate<?>[] getJoinGraphPredicates() { - return joinGraphPredicates.toArray(new IPredicate[joinGraphPredicates - .size()]); + public IPredicate<?>[] getJoinGraph() { + return joinGraph.toArray(new IPredicate[joinGraph.size()]); } /** @@ -275,7 +256,7 @@ headPlan.add(p); } else { // part of the join graph. - joinGraphPredicates.add(p); + joinGraph.add(p); } /* * Add to the set of variables which will be bound by the time @@ -300,12 +281,9 @@ .getSpannedVariables(c); if (!vitr.hasNext()) { /* - * This is a constraint which does not involve any variable so - * we should evaluate it as soon as possible. I.e., before the - * join graph. + * All constraints should have at least one variable. */ - runFirstConstraints.add(c); // @todo unit test. - continue; + throw new RuntimeException("No variables in constraint: " + c); } while (vitr.hasNext()) { final IVariable<?> var = vitr.next(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/DiskBackedBufferStrategy.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/DiskBackedBufferStrategy.java 2011-01-19 21:22:01 UTC (rev 4132) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/DiskBackedBufferStrategy.java 2011-01-19 22:41:51 UTC (rev 4133) @@ -109,7 +109,7 @@ /** * Forces the data to disk. */ - public void force( boolean metadata ) { + public void force(final boolean metadata) { try { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/DiskOnlyStrategy.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/DiskOnlyStrategy.java 2011-01-19 21:22:01 UTC (rev 4132) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/DiskOnlyStrategy.java 2011-01-19 22:41:51 UTC (rev 4133) @@ -1363,10 +1363,10 @@ } /** - * {@link #flushWriteCache() flushs} the {@link #writeCache} before syncing + * {@link #flushWriteCache() flushes} the {@link #writeCache} before syncing * the disk. */ - public void force(boolean metadata) { + public void force(final boolean metadata) { assertOpen(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java 2011-01-19 21:22:01 UTC (rev 4132) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java 2011-01-19 22:41:51 UTC (rev 4133) @@ -194,11 +194,11 @@ // verify all predicates were placed into the join graph. assertSameIteratorAnyOrder("joinGraph", preds, Arrays.asList( - fixture.getJoinGraphPredicates()).iterator()); + fixture.getJoinGraph()).iterator()); // there are no constraints. - assertEquals("joinGraphConstraints.size", 0, fixture - .getJoinGraphConstraints().length); + assertEquals("joinGraphConstraints.size", new IConstraint[] {}, + fixture.getJoinGraphConstraints()); // there is no tail plan. assertEquals("tailPlan", new IPredicate[] {}, fixture.getTailPlan()); @@ -225,7 +225,7 @@ // verify all predicates were placed into the join graph. assertSameIteratorAnyOrder("joinGraph", preds, Arrays.asList( - fixture.getJoinGraphPredicates()).iterator()); + fixture.getJoinGraph()).iterator()); // verify all constraints were place on the join graph. assertSameIteratorAnyOrder("joinGraphConstraints", constraints, @@ -343,7 +343,7 @@ // verify predicates placed into the join graph. assertSameIteratorAnyOrder("joinGraph", new IPredicate[] { p0, p1, - p5 }, Arrays.asList(fixture.getJoinGraphPredicates()) + p5 }, Arrays.asList(fixture.getJoinGraph()) .iterator()); // there are no constraints on the join graph predicates. @@ -392,7 +392,7 @@ // verify predicates placed into the join graph. assertSameIteratorAnyOrder("joinGraph", new IPredicate[] { p0, p1, - p5 }, Arrays.asList(fixture.getJoinGraphPredicates()) + p5 }, Arrays.asList(fixture.getJoinGraph()) .iterator()); // verify constraints on the join graph. @@ -423,8 +423,6 @@ /** * @todo test with headPlan. * - * @todo test for runFirst constraints. - * * @todo test logic to attach constraints to non-optional joins based on a * given join path (not yet written). */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-20 22:06:01
|
Revision: 4149 http://bigdata.svn.sourceforge.net/bigdata/?rev=4149&view=rev Author: thompsonbry Date: 2011-01-20 22:05:55 +0000 (Thu, 20 Jan 2011) Log Message: ----------- Fixed build broken by commit of the memory manager with a missing interface (I just commented out the references to the interface -- see the detailed commit log). Added the addr for which a problem was reported to the runtime exception thrown by the RWStore. Added the TestMemoryManager into the automated test suite runs (TestAll). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/sector/MemoryManager.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/sector/SectorAllocator.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/rwstore/TestAll.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/rwstore/sector/TestAll.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2011-01-20 21:54:50 UTC (rev 4148) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2011-01-20 22:05:55 UTC (rev 4149) @@ -1441,7 +1441,7 @@ // log.error(e,e); // throw new IllegalArgumentException("Unable to read data: "+e, e); - throw new RuntimeException(e); + throw new RuntimeException("addr=" + addr + " : cause=" + e, e); } } finally { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/sector/MemoryManager.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/sector/MemoryManager.java 2011-01-20 21:54:50 UTC (rev 4148) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/sector/MemoryManager.java 2011-01-20 22:05:55 UTC (rev 4149) @@ -82,7 +82,7 @@ if ((m_allocation + m_sectorSize) > m_maxResource) { throw new MemoryManagerResourceError(); } - SectorAllocator sector = new SectorAllocator(this, null); + SectorAllocator sector = new SectorAllocator(this);//, null); sector.setSectorAddress(m_allocation, m_sectorSize); sector.setIndex(m_sectors.size()); m_sectors.add(sector); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/sector/SectorAllocator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/sector/SectorAllocator.java 2011-01-20 21:54:50 UTC (rev 4148) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/sector/SectorAllocator.java 2011-01-20 22:05:55 UTC (rev 4149) @@ -33,7 +33,7 @@ import com.bigdata.io.DirectBufferPool; import com.bigdata.rwstore.FixedOutputStream; -import com.bigdata.rwstore.IWriteCacheManager; +//import com.bigdata.rwstore.IWriteCacheManager; /** * The SectorAllocator is designed as an alternative the the standard RWStore @@ -126,11 +126,11 @@ final ISectorManager m_store; boolean m_onFreeList = false; private long m_diskAddr; - private final IWriteCacheManager m_writes; +// private final IWriteCacheManager m_writes; - public SectorAllocator(ISectorManager store, IWriteCacheManager writes) { + public SectorAllocator(ISectorManager store) {//, IWriteCacheManager writes) { m_store = store; - m_writes = writes; +// m_writes = writes; } /** @@ -276,10 +276,10 @@ m_store.addToFreeList(this); } - if (m_writes != null && m_writes.removeWriteToAddr(getPhysicalAddress(bit))) { - if (log.isTraceEnabled()) - log.trace("Removed potential DUPLICATE"); - } +// if (m_writes != null && m_writes.removeWriteToAddr(getPhysicalAddress(bit))) { +// if (log.isTraceEnabled()) +// log.trace("Removed potential DUPLICATE"); +// } } return false; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/rwstore/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/rwstore/TestAll.java 2011-01-20 21:54:50 UTC (rev 4148) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/rwstore/TestAll.java 2011-01-20 22:05:55 UTC (rev 4149) @@ -63,6 +63,8 @@ suite.addTest(com.bigdata.rwstore.TestRWJournal.suite()); + suite.addTest(com.bigdata.rwstore.sector.TestAll.suite()); + return suite; } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/rwstore/sector/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/rwstore/sector/TestAll.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/rwstore/sector/TestAll.java 2011-01-20 22:05:55 UTC (rev 4149) @@ -0,0 +1,70 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 14, 2006 + */ + +package com.bigdata.rwstore.sector; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * Runs all tests for all journal implementations. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestAll.java 4069 2011-01-09 20:58:02Z thompsonbry $ + */ +public class TestAll extends TestCase { + + /** + * + */ + public TestAll() { + } + + /** + * @param arg0 + */ + public TestAll(String arg0) { + super(arg0); + } + + /** + * Returns a test that will run each of the implementation specific test + * suites in turn. + */ + public static Test suite() + { + + final TestSuite suite = new TestSuite("memory manager"); + + suite.addTestSuite(com.bigdata.rwstore.sector.TestMemoryManager.class); + + return suite; + + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |