From: <tho...@us...> - 2014-07-13 12:23:33
|
Revision: 8540 http://sourceforge.net/p/bigdata/code/8540 Author: thompsonbry Date: 2014-07-13 12:23:29 +0000 (Sun, 13 Jul 2014) Log Message: ----------- Looking back at the thread dump attached to the ticket, I see very little in the way of stack traces through bigdata. The only one in the property path code is this. {{{ "com.bigdata.journal.Journal.executorService5" - Thread t@43 java.lang.Thread.State: RUNNABLE at com.bigdata.bop.bindingSet.ListBindingSet.copy(ListBindingSet.java:290) at com.bigdata.bop.bindingSet.ListBindingSet.<init>(ListBindingSet.java:267) at com.bigdata.bop.bindingSet.ListBindingSet.clone(ListBindingSet.java:325) at com.bigdata.bop.bindingSet.ListBindingSet.clone(ListBindingSet.java:43) at com.bigdata.bop.paths.ArbitraryLengthPathOp$ArbitraryLengthPathTask.processChunk(ArbitraryLengthPathOp.java:511) at com.bigdata.bop.paths.ArbitraryLengthPathOp$ArbitraryLengthPathTask.call(ArbitraryLengthPathOp.java:270) at com.bigdata.bop.paths.ArbitraryLengthPathOp$ArbitraryLengthPathTask.call(ArbitraryLengthPathOp.java:196) at java.util.concurrent.FutureTask.run(FutureTask.java:273) at com.bigdata.bop.engine.ChunkedRunningQuery$ChunkTask.call(ChunkedRunningQuery.java:1281) at com.bigdata.bop.engine.ChunkedRunningQuery$ChunkTaskWrapper.run(ChunkedRunningQuery.java:836) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:482) at java.util.concurrent.FutureTask.run(FutureTask.java:273) at com.bigdata.concurrent.FutureTaskMon.run(FutureTaskMon.java:63) at com.bigdata.bop.engine.ChunkedRunningQuery$ChunkFutureTask.run(ChunkedRunningQuery.java:731) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1156) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:626) at java.lang.Thread.run(Thread.java:804) }}} I've added tests for interrupts to two locations in the processChunk() code. One corresponds to the point where this stack trace passes through processChunk(). The other corresponds to the point where the initial solutions are flowing into the property path operator. Both check for an interrupt every 10 solutions. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/paths/ArbitraryLengthPathOp.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/paths/ArbitraryLengthPathOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/paths/ArbitraryLengthPathOp.java 2014-07-12 01:23:36 UTC (rev 8539) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/paths/ArbitraryLengthPathOp.java 2014-07-13 12:23:29 UTC (rev 8540) @@ -52,6 +52,7 @@ import com.bigdata.bop.engine.AbstractRunningQuery; import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.join.JVMDistinctFilter; import cutthecrap.utils.striterators.ICloseableIterator; @@ -83,6 +84,10 @@ * solutions from the subquery with those in the parent context. * * @author <a href="mailto:mpe...@us...">Mike Personick</a> + * + * TODO There should be two version of this operator. One for the JVM + * heap and another for the native heap. This will help when large + * amounts of data are materialized by the internal collections. */ public class ArbitraryLengthPathOp extends PipelineOp { @@ -187,6 +192,7 @@ } + @Override public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { return new FutureTask<Void>(new ArbitraryLengthPathTask(this, context)); @@ -251,6 +257,7 @@ } + @Override public Void call() throws Exception { try { @@ -346,10 +353,21 @@ } if (!noInput) { + + long chunksIn = 0L; + + for (IBindingSet parentSolutionIn : chunkIn) { + + /** + * @see <a href="http://trac.bigdata.com/ticket/865" + * >OutOfMemoryError instead of Timeout for SPARQL + * Property Paths </a> + */ + if (chunksIn++ % 10 == 0 && Thread.interrupted()) { + throw new InterruptedException(); + } - for (IBindingSet parentSolutionIn : chunkIn) { - - final IConstant<?> key = joinVar != null ? parentSolutionIn.get(joinVar) : null; + final IConstant<?> key = joinVar != null ? parentSolutionIn.get(joinVar) : null; if (log.isDebugEnabled()) { log.debug("adding parent solution for joining: " + parentSolutionIn); @@ -451,13 +469,16 @@ try { - /* - * TODO replace with code that does the PipelineJoins manually - */ + /* + * TODO Replace with code that does the PipelineJoins + * manually. Unrolling these iterations can be a major + * performance benefit. Another possibility is to use + * the GASEngine to expand the paths. + */ runningSubquery = queryEngine.eval(subquery, nextRoundInput.toArray(new IBindingSet[nextRoundInput.size()])); - long count = 0L; + long subqueryChunksOut = 0L; // #of chunks read from subquery try { // Declare the child query to the parent. @@ -476,7 +497,14 @@ for (IBindingSet bs : chunk) { - count++; + /** + * @see <a href="http://trac.bigdata.com/ticket/865" + * >OutOfMemoryError instead of Timeout for SPARQL + * Property Paths </a> + */ + if (subqueryChunksOut++ % 10 == 0 && Thread.interrupted()) { + throw new InterruptedException(); + } if (log.isDebugEnabled()) { log.debug("round " + i + " solution: " + bs); @@ -532,7 +560,7 @@ if (log.isDebugEnabled()) { log.debug("done with round " + i + - ", count=" + count + + ", count=" + subqueryChunksOut + ", totalBefore=" + sizeBefore + ", totalAfter=" + solutionsOut.size() + ", totalNew=" + (solutionsOut.size() - sizeBefore)); @@ -1116,6 +1144,7 @@ } + @Override public String toString() { final StringBuilder sb = new StringBuilder(); @@ -1144,7 +1173,11 @@ } /** - * Lifted directly from the JVMDistinctBindingSetsOp. + * Lifted directly from the {@link JVMDistinctFilter}. + * + * TODO Refactor to use {@link JVMDistinctFilter} directly iff possible + * (e.g., a chain of the AALP operator followed by the DISTINCT + * solutions operator) */ private final static class SolutionKey { @@ -1157,10 +1190,12 @@ this.hash = java.util.Arrays.hashCode(vals); } + @Override public int hashCode() { return hash; } + @Override public boolean equals(final Object o) { if (this == o) return true; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |