From: <tho...@us...> - 2010-11-12 00:18:16
|
Revision: 3938 http://bigdata.svn.sourceforge.net/bigdata/?rev=3938&view=rev Author: thompsonbry Date: 2010-11-12 00:18:05 +0000 (Fri, 12 Nov 2010) Log Message: ----------- Checkpoint of some work on runtime query optimization. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2010-11-11 22:00:41 UTC (rev 3937) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2010-11-12 00:18:05 UTC (rev 3938) @@ -68,7 +68,8 @@ * sampling, including: uniform distribution, randomly distribution, tuple * at a time versus clustered (sampling with leaves), adaptive sampling * until the sample reflects some statistical property of the underlying - * population, etc. + * population, etc. Support for different kinds of sampling could be added + * using appropriate annotations. */ public class SampleIndex<E> extends AbstractAccessPathOp<E> { @@ -308,6 +309,10 @@ * Taking a clustered sample really requires knowing where the * leaf boundaries are in the index, e.g., using * {@link ILeafCursor}. + * <p> + * Taking all tuples from a few leaves in each sample might + * produce a faster estimation of the correlation when sampling + * join paths. * * @todo Rather than evenly spaced samples, we should be taking a random * sample. This could be achieved using a random initial offset Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-11 22:00:41 UTC (rev 3937) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 00:18:05 UTC (rev 3938) @@ -28,9 +28,12 @@ package com.bigdata.bop.controller; import java.io.Serializable; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Formatter; import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -46,7 +49,7 @@ import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpContextBase; import com.bigdata.bop.BOpEvaluationContext; -import com.bigdata.bop.Constant; +import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IElement; import com.bigdata.bop.IPredicate; @@ -66,6 +69,7 @@ import com.bigdata.relation.accesspath.ThickAsynchronousIterator; import com.bigdata.relation.rule.Rule; import com.bigdata.striterator.Dechunkerator; +import com.bigdata.striterator.IChunkedIterator; /** * A join graph with annotations for estimated cardinality and other details in @@ -81,7 +85,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ * - * @todo Some edges can be eliminated by transitivity. For example, given + * TODO Some edges can be eliminated by transitivity. For example, given * * <pre> * query: @@ -103,7 +107,7 @@ * It is necessary to execute e1 and either e2 or e3, but not both e2 and e3. * </pre> * - * @todo In order to combine pipelining with runtime query optimization we need + * TODO In order to combine pipelining with runtime query optimization we need * to sample based on the first chunk(s) delivered by the pipeline. If * necessary, we can buffer multiple chunks for semi-selective queries. * However, for unselective queries we would accept as many buffers worth @@ -127,11 +131,11 @@ String VERTICES = JoinGraph.class.getName() + ".vertices"; /** - * The initial sample size (default {@value #DEFAULT_SAMPLE_SIZE}). + * The initial limit for cutoff sampling (default {@value #DEFAULT_LIMIT}). */ - String SAMPLE_SIZE = JoinGraph.class.getName() + ".sampleSize"; + String LIMIT = JoinGraph.class.getName() + ".limit"; - int DEFAULT_SAMPLE_SIZE = 100; + int DEFAULT_LIMIT = 100; } /** @@ -144,11 +148,11 @@ } /** - * @see Annotations#SAMPLE_SIZE + * @see Annotations#LIMIT */ - public int getSampleSize() { + public int getLimit() { - return getProperty(Annotations.SAMPLE_SIZE, Annotations.DEFAULT_SAMPLE_SIZE); + return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); } @@ -160,17 +164,17 @@ /** * - * @todo We can derive the vertices from the join operators or the join + * TODO We can derive the vertices from the join operators or the join * operators from the vertices. However, if a specific kind of join * operator is required then the question is whether we have better * information to make that choice when the join graph is evaluated or * before it is constructed. * - * @todo How we will handle optional joins? Presumably they are outside of + * TODO How we will handle optional joins? Presumably they are outside of * the code join graph as part of the tail attached to that join * graph. * - * @todo How can join constraints be moved around? Just attach them where + * TODO How can join constraints be moved around? Just attach them where * ever a variable becomes bound? And when do we filter out variables * which are not required downstream? Once we decide on a join path * and execute it fully (rather than sampling that join path). @@ -196,10 +200,90 @@ } + /** + * Used to assign row identifiers. + */ + static private final IVariable<Integer> ROWID = Var.var("__rowid"); + + /** + * A sample of a {@link Vertex} (an access path). + */ + public static class VertexSample { + + /** + * Fast range count. This will be the same for each sample taken + * (assuming a read historical view or even a time scale of query which + * is significantly faster than update). + */ + public final long rangeCount; + + /** + * The limit used to produce the {@link #sample}. + */ + public final int limit; + + /** + * When <code>true</code>, the result is not a sample but the + * materialized access path. + * + * @todo When <code>true</code>, we could run the join against the + * sample rather than the disk. This would require wrapping the + * sample as an access path. Since all exact samples will be + * pretty small, this is not likely to have any great performance + * benefit. + */ + public final boolean exact; + + /** + * Sample. + */ + final Object[] sample; + + /** + * + * @param rangeCount + * @param limit + * @param exact + * @param sample + */ + public VertexSample(final long rangeCount, final int limit, final boolean exact, final Object[] sample) { + + if (rangeCount < 0L) + throw new IllegalArgumentException(); + + if (limit <= 0) + throw new IllegalArgumentException(); + + if (sample == null) + throw new IllegalArgumentException(); + + this.rangeCount = rangeCount; + + this.limit = limit; + + this.exact = exact; + + this.sample = sample; + + } + + public String toString() { + return "VertexSample{rangeCount=" + rangeCount + ",limit=" + limit + + ",exact=" + exact + ", sampleSize=" + sample.length + "}"; + } + + } + /** * A vertex of the join graph is an annotated relation (this corresponds to * an {@link IPredicate} with additional annotations to support the adaptive * query optimization algorithm). + * <p> + * The unique identifier for a {@link Vertex} (within a given join graph) is + * the {@link BOp.Annotations#BOP_ID} decorating its {@link IPredicate}. + * {@link #hashCode()} is defined in terms of this unique identifier so we + * can readily detect when a {@link Set} already contains a given + * {@link Vertex}. */ public static class Vertex implements Serializable { @@ -208,23 +292,13 @@ */ private static final long serialVersionUID = 1L; - final IPredicate<?> pred; + public final IPredicate<?> pred; /** - * The limit used to produce the {@link #sample}. + * The most recently taken sample of the {@link Vertex}. */ - int limit; + VertexSample sample = null; - /** - * Fast range count and <code>null</code> until initialized. - */ - Long rangeCount; - - /** - * Sample (when not-null). - */ - Object[] sample; - Vertex(final IPredicate<?> pred) { if (pred == null) @@ -236,45 +310,270 @@ public String toString() { - return "\nVertex{pred=" + pred + ",rangeCount=" + rangeCount - + ",sampleSize=" + (sample == null ? "N/A" : sample.length) - + "}"; + return "Vertex{pred=" + pred + ",sample=" + sample + "}"; } - public void sample(final BOpContextBase context,final int limit) { + /** + * Equals is based on a reference test. + */ + public boolean equals(Object o) { + return this == o; + } + /** + * The hash code is just the {@link BOp.Annotations#BOP_ID} of the + * associated {@link IPredicate}. + */ + public int hashCode() { + return pred.getId(); + } + + /** + * Take a sample of the vertex. If the sample is already exact, then + * this is a NOP. + * + * @param context + * @param limit + * The sample cutoff. + */ + public void sample(final BOpContextBase context, final int limit) { + + if (context == null) + throw new IllegalArgumentException(); + + if (limit <= 0) + throw new IllegalArgumentException(); + + final VertexSample oldSample = this.sample; + + if(oldSample != null && oldSample.exact) { + + /* + * The old sample is already the full materialization of the + * vertex. + */ + + return; + + } + final IRelation r = context.getRelation(pred); final IAccessPath ap = context.getAccessPath(r, pred); - if (rangeCount == null) { + final long rangeCount = oldSample == null ? ap + .rangeCount(false/* exact */) : oldSample.rangeCount; - rangeCount = ap.rangeCount(false/* exact */); + if (rangeCount <= limit) { - } + /* + * Materialize the access path. + * + * @todo This could be more efficient if we raised it onto the + * AP or if we overrode CHUNK_CAPACITY and the fully buffered + * iterator threshold such that everything was materialized as a + * single chunk. + */ + + final List<Object> tmp = new ArrayList<Object>((int) rangeCount); - if (sample == null) { // @todo new sample each time? + final IChunkedIterator<Object> itr = ap.iterator(); - final SampleIndex sampleOp = new SampleIndex(new BOp[] {}, // - NV.asMap(// - new NV(SampleIndex.Annotations.PREDICATE, pred),// - new NV(SampleIndex.Annotations.LIMIT, limit))); + try { + + while (itr.hasNext()) { + + tmp.add(itr.next()); + + } + + } finally { + + itr.close(); + } + + sample = new VertexSample(rangeCount, limit, true/* exact */, + tmp.toArray(new Object[0])); - sample = sampleOp.eval(context); - - this.limit = limit; - + return; + } + + /* + * Materialize a random sample from the access path. + */ + final SampleIndex sampleOp = new SampleIndex(new BOp[] {}, // + NV.asMap(// + new NV(SampleIndex.Annotations.PREDICATE, pred),// + new NV(SampleIndex.Annotations.LIMIT, limit))); + + sample = new VertexSample(rangeCount, limit, false/*exact*/, sampleOp + .eval(context)); + } } /** + * A sample of an {@link Edge} (a join). + */ + public static class EdgeSample { + + /** + * The fast range count (aka cardinality) for the source vertex of the + * edge (whichever vertex has the lower cardinality). + */ + public final long rangeCount; + + /** + * The limit used to sample the edge (this is the limit on the #of + * solutions generated by the cutoff join used when this sample was + * taken). + */ + public final int limit; + + /** + * The #of binding sets out of the source sample vertex sample which + * were consumed. + */ + public final int inputCount; + + /** + * The #of binding sets generated before the join was cutoff. + * <p> + * Note: If the outputCount is zero then this is a good indicator that + * there is an error in the query such that the join will not select + * anything. This is not 100%, merely indicative. + */ + public final int outputCount; + + /** + * The ratio of the #of input samples consumed to the #of output samples + * generated (the join hit ratio or scale factor). + */ + public final double f; + + /** + * The estimated cardinality of the join. + */ + public final long estimatedCardinality; + + /** + * Flag is set when the estimate is likely to be a lower bound for the + * cardinality of the edge. + * <p> + * If the {@link #inputCount} is ONE (1) and the {@link #outputCount} is + * the {@link #limit} then the {@link #estimatedCardinality} is a lower + * bound as more than {@link #outputCount} solutions could have been + * produced by the join against a single input solution. + */ + public final boolean estimateIsLowerBound; + + /** + * Flag indicates that the {@link #estimatedCardinality} underflowed. + * <p> + * Note: When the source vertex sample was not exact, then it is + * possible for the cardinality estimate to underflow. When, in + * addition, {@link #outputCount} is LT {@link #limit}, then feeding the + * sample of source tuples in is not sufficient to generated the desired + * #of output tuples. In this case, {@link #f join hit ratio} will be + * low. It may even be that zero output tuples were generated, in which + * case the join hit ratio will appear to be zero. However, the join hit + * ratio actually underflowed and an apparent join hit ratio of zero + * does not imply that the join will be empty unless the source vertex + * sample is actually the fully materialized access path - see + * {@link VertexSample#exact} and {@link #exact}. + */ + public final boolean estimateIsUpperBound; + + /** + * <code>true</code> if the sample is the exact solution for the join + * path. + * <p> + * Note: If the entire source vertex is being feed into the sample, + * {@link VertexSample#exact} flags this condition, and outputCount is + * also LT the limit, then the edge sample is the actual result of the + * join. That is, feeding all source tuples into the join gives fewer + * than the desired number of output tuples. + * + * @todo This field marks this condition and should be used to avoid + * needless recomputation of a join whose exact solution is + * already known. + */ + public final boolean exact; + + /** + * The sample of the solutions for the join path. + */ + private final IBindingSet[] sample; + + /** + * Create an object which encapsulates a sample of an edge. + * + * @param limit + * The limit used to sample the edge (this is the limit on + * the #of solutions generated by the cutoff join used when + * this sample was taken). + * @param sourceVertexSample + * The sample for source vertex of the edge (whichever vertex + * has the lower cardinality). + * @param inputCount + * The #of binding sets out of the source sample vertex + * sample which were consumed. + * @param outputCount + * The #of binding sets generated before the join was cutoff. + */ + EdgeSample(final VertexSample sourceVertexSample, final int limit, + final int inputCount, final int outputCount, + final IBindingSet[] sample) { + + if(sample == null) + throw new IllegalArgumentException(); + + this.rangeCount = sourceVertexSample.rangeCount; + + this.limit = limit; + + this.inputCount = inputCount; + + this.outputCount = outputCount; + + f = outputCount == 0 ? 0 : (outputCount / (double) inputCount); + + estimatedCardinality = (long) (rangeCount * f); + + estimateIsLowerBound = inputCount == 1 && outputCount == limit; + + estimateIsUpperBound = !sourceVertexSample.exact + && outputCount < limit; + + this.exact = sourceVertexSample.exact && outputCount < limit; + + this.sample = sample; + } + + public String toString() { + return getClass().getName() + "{inputRangeCount=" + rangeCount + + ", limit=" + limit + ", inputCount=" + inputCount + + ", outputCount=" + outputCount + ", f=" + f + + ", estimatedCardinality=" + estimatedCardinality + + ", estimateIsLowerBound=" + estimateIsLowerBound + + ", estimateIsUpperBound=" + estimateIsUpperBound + + ", sampleIsExactSolution=" + exact + + "}"; + } + + }; + + /** * An edge of the join graph is an annotated join operator. The edges of the * join graph are undirected. Edges exist when the vertices share at least * one variable. + * <p> + * {@link #hashCode()} is defined in terms of the unordered hash codes of + * the individual vertices. */ public static class Edge implements Serializable { @@ -286,97 +585,18 @@ /** * The vertices connected by that edge. */ - final Vertex v1, v2; + public final Vertex v1, v2; /** * The set of shared variables. */ - final Set<IVariable<?>> shared; + public final Set<IVariable<?>> shared; - class EdgeSample { - - /** - * The fast range count (aka cardinality) for the source vertex of - * the edge (whichever vertex has the lower cardinality). - */ - final long inputRangeCount; - /** - * The limit used to sample the edge (this is the limit on the #of - * solutions generated by the cutoff join used when this sample was - * taken). - */ - final int limit; - /** - * The #of binding sets out of the source sample vertex sample which - * were consumed. - */ - final int inputCount; - /** - * The #of binding sets generated before the join was cutoff. - */ - final int outputCount; - /** - * The ratio of the #of input samples consumed to the #of output - * samples generated. - */ - final double f; - /** - * The estimated cardinality of the join. - */ - final long estimatedCardinality; - - /** - * @param limit - * The limit used to sample the edge (this is the limit - * on the #of solutions generated by the cutoff join used - * when this sample was taken). - * @param inputRangeCount - * The fast range count (aka cardinality) for the source - * vertex of the edge (whichever vertex has the lower - * cardinality). - * @param inputCount - * The #of binding sets out of the source sample vertex - * sample which were consumed. - * @param outputCount - * The #of binding sets generated before the join was - * cutoff. - * - * @todo If the outputCount is zero then this is a good indicator - * that there is an error in the query such that the join will - * not select anything. This is not 100%, merely indicative. - */ - EdgeSample(final long inputRangeCount, final int limit, final int inputCount, - final int outputCount) { - - this.inputRangeCount = inputRangeCount; - - this.limit = limit; - - this.inputCount = inputCount; - - this.outputCount = outputCount; - - f = outputCount == 0 ? 0 : (outputCount / (double) inputCount); - - estimatedCardinality = (long) (inputRangeCount * f); - - } - - public String toString() { - return "EdgeSample" + "{inputRangeCount=" + inputRangeCount - + ", limit=" + limit + ", inputCount=" + inputCount - + ", outputCount=" + outputCount + ", f=" + f - + ", estimatedCardinality=" + estimatedCardinality - + "}"; - } - - }; - /** * The last sample for this edge and <code>null</code> if the edge has * not been sampled. */ - EdgeSample sample = null; + public EdgeSample sample = null; public Edge(final Vertex v1, final Vertex v2, final Set<IVariable<?>> shared) { if (v1 == null) @@ -391,21 +611,108 @@ this.v2 = v2; this.shared = shared; } - + + /** + * Note: The vertices of the edge are labeled using the + * {@link BOp.Annotations#BOP_ID} associated with the {@link IPredicate} + * for each vertex. + */ public String toString() { - return "\nEdge{v1=" + v1.pred.getId() + ",v2=" + v2.pred.getId() - + ",shared=" + shared.toString() + ", sample=" + sample + "}"; + return "Edge{ (V" + v1.pred.getId() + ",V" + v2.pred.getId() + ")" + + ", shared=" + shared.toString() + ", sample=" + sample + + "}"; } /** + * Equality is determined by reference testing. + */ + public boolean equals(final Object o) { + + return this == o; + + } + + /** + * The hash code of an edge is the hash code of the vertex with the + * smaller hash code X 31 plus the hash code of the vertex with the + * larger hash code. This definition compensates for the arbitrary order + * in which the vertices may be expressed and also recognizes that the + * vertex hash codes are based on the bop ids, which are often small + * integers. + */ + public int hashCode() { + + if (hash == 0) { + + final int h1 = v1.hashCode(); + final int h2 = v2.hashCode(); + + final int h; + if (h1 < h2) { + + h = h1 * 31 + h2; + + } else { + + h = h2 * 31 + h1; + + } + + hash = h; + + } + return hash; + + } + private int hash; + + /** + * Return the vertex with the smaller estimated cardinality. + * + * @throws IllegalStateException + * if either vertex has not been sampled. + */ + public Vertex getMinimumCardinalityVertex() { + + if (v1.sample == null) // vertex not sampled. + throw new IllegalStateException(); + + if (v2.sample == null) // vertex not sampled. + throw new IllegalStateException(); + + return (v1.sample.rangeCount < v2.sample.rangeCount) ? v1 : v2; + + } + + /** + * Return the vertex with the larger estimated cardinality (the vertex + * not returned by {@link #getMinimumCardinalityVertex()}). + * + * @throws IllegalStateException + * if either vertex has not been sampled. + */ + public Vertex getMaximumCardinalityVertex() { + + // The vertex with the minimum cardinality. + final Vertex o = getMinimumCardinalityVertex(); + + // Return the other vertex. + return (v1 == o) ? v2 : v1; + + } + + /** * Estimate the cardinality of the edge. * * @param context + * + * @return The estimated cardinality of the edge. + * * @throws Exception */ - public void estimateCardinality(final QueryEngine queryEngine, + public long estimateCardinality(final QueryEngine queryEngine, final int limit) throws Exception { if (limit <= 0) @@ -418,7 +725,11 @@ */ // vertex v, vprime final Vertex v, vp; - if (v1.rangeCount < v2.rangeCount) { + if (v1.sample == null) // vertex not sampled. + throw new IllegalStateException(); + if (v2.sample == null) // vertex not sampled. + throw new IllegalStateException(); + if (v1.sample.rangeCount < v2.sample.rangeCount) { v = v1; vp = v2; } else { @@ -427,7 +738,7 @@ } /* - * @todo This is difficult to setup because we do not have a concept + * TODO This is difficult to setup because we do not have a concept * (or class) corresponding to a fly weight relation and we do not * have a general purpose relation, just arrays or sequences of * IBindingSets. Also, all relations are persistent. Temporary @@ -446,28 +757,66 @@ * both the input and the output of the cutoff evaluation of the * edge rather than rows of the materialized relation. * - * @todo On subsequent iterations we would probably re-sample [v] + * TODO On subsequent iterations we would probably re-sample [v] * and we would run against the materialized intermediate result for * [v']. */ /* - * Convert the source sample into an IBindingSet[], injecting a - * rowid column. + * Convert the source sample into an IBindingSet[]. + * + * @todo We might as well do this when we sample the vertex. */ - final IVariable<Integer> ROWID = Var.var("__rowid"); - final IBindingSet[] sample = new IBindingSet[v.sample.length]; + final IBindingSet[] sourceSample = new IBindingSet[v.sample.sample.length]; { - for (int i = 0; i < sample.length; i++) { + for (int i = 0; i < sourceSample.length; i++) { final IBindingSet bset = new HashBindingSet(); - BOpContext.copyValues((IElement) v.sample[i], v.pred, bset); - bset.set(ROWID, new Constant<Integer>(Integer.valueOf(i))); - sample[i] = bset; + BOpContext.copyValues((IElement) v.sample.sample[i], v.pred, bset); + sourceSample[i] = bset; } } + // Sample the edge and save the sample on the edge as a side-effect. + this.sample = estimateCardinality(queryEngine, limit, v, vp, sourceSample); + + return sample.estimatedCardinality; + + } + + /** + * Estimate the cardinality of the edge. + * + * @param queryEngine + * @param limit + * @param vSource + * The source vertex. + * @param vTarget + * The target vertex + * @param sourceSample + * The sample for the source vertex. When this is a one-step + * estimation of the cardinality of the edge, then this + * sample is taken from the {@link VertexSample}. When the + * edge (vSource,vTarget) extends some {@link Path}, then + * this is taken from the {@link EdgeSample} for that + * {@link Path}. + * + * @return The result of sampling that edge. + * + * @throws Exception + */ + public EdgeSample estimateCardinality(final QueryEngine queryEngine, + final int limit, final Vertex vSource, final Vertex vTarget, + IBindingSet[] sourceSample) throws Exception { + + if (limit <= 0) + throw new IllegalArgumentException(); + + // Inject a rowId column. + sourceSample = BOpUtility.injectRowIdColumn(ROWID, 1/* start */, + sourceSample); + /* - * @todo Any constraints on the edge (other than those implied by + * TODO Any constraints on the edge (other than those implied by * shared variables) need to be annotated on the join. Constraints * (other than range constraints which are directly coded by the * predicate) will not reduce the effort to compute the join, but @@ -476,7 +825,7 @@ */ final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, // new NV(BOp.Annotations.BOP_ID, 1),// - new NV(PipelineJoin.Annotations.PREDICATE,vp.pred.setBOpId(3)) + new NV(PipelineJoin.Annotations.PREDICATE,vTarget.pred.setBOpId(3)) ); final SliceOp sliceOp = new SliceOp(new BOp[] { joinOp },// @@ -494,12 +843,13 @@ queryId, joinOp.getId()/* startId */, -1 /* partitionId */, new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { sample }))); + new IBindingSet[][] { sourceSample }))); // #of source samples consumed. int inputCount = 0; // #of output samples generated. int outputCount = 0; + final List<IBindingSet> result = new LinkedList<IBindingSet>(); try { try { IBindingSet bset = null; @@ -508,11 +858,12 @@ runningQuery.iterator()); while (itr.hasNext()) { bset = itr.next(); + result.add(bset); outputCount++; } - // #of input rows consumed. Note: +1 since origin ZERO. + // #of input rows consumed. inputCount = bset == null ? 0 : ((Integer) bset.get(ROWID) - .get()) + 1; + .get()); } finally { // verify no problems. FIXME Restore test of the query. // runningQuery.get(); @@ -521,18 +872,365 @@ runningQuery.cancel(true/* mayInterruptIfRunning */); } - this.sample = new EdgeSample(v.rangeCount, limit, inputCount, - outputCount); + /* + * Note: This needs to be based on the source vertex having the + * minimum cardinality for the Path which is being extended which + * connects via some edge defined in the join graph. If a different + * vertex is chosen as the source then the estimated cardinality + * will be falsely high by whatever ratio the chosen vertex + * cardinality exceeds the one having the minimum cardinality which + * is connected via an edge to the target vertex). + */ + final VertexSample moreSelectiveVertexSample = vSource.sample.rangeCount < vTarget.sample.rangeCount ? vSource.sample + : vTarget.sample; + final EdgeSample edgeSample = new EdgeSample( + moreSelectiveVertexSample/* vSource.sample */, limit, + inputCount, outputCount, result + .toArray(new IBindingSet[result.size()])); + if (log.isInfoEnabled()) - log.info("edge=" + this + sample); + log.info("edge=" + this + ", sample=" + edgeSample); + return edgeSample; + } } +// /** +// * A path sample includes the materialized binding sets from the as-executed +// * join path. +// * +// * @todo The sample {@link IBindingSet}[] could be saved with the +// * {@link EdgeSample}. However, when we are sampling a join path we +// * want to associate the net sample with the path, not each edge in +// * that path, because we need to be able to generate join paths in +// * which the path is extended from any vertex already part of the path +// * to any vertex which has not yet incorporated in the path and has +// * not yet been executed. To do this we need to intermediate results +// * for the path, which includes all variables bound by each join for +// * each edge in the path, not just on an edge by edge basis. +// */ +// public static class PathSample extends EdgeSample { +// +// /** +// * <code>true</code> if the sample is the exact solution for the join path. +// */ +// private final boolean exact; +// +// /** +// * The sample of the solutions for the join path. +// */ +// private final IBindingSet[] sample; +// +// PathSample(final long inputRangeCount, final int limit, +// final int inputCount, final int outputCount, +// final boolean exact, final IBindingSet[] sample) { +// +// super(inputRangeCount, limit, inputCount, outputCount); +// +// if(sample == null) +// throw new IllegalArgumentException(); +// +// this.exact = exact; +// +// this.sample = sample; +// +// } +// +// public String toString() { +// +// return super.toString() + ":{exact=" + exact + ", sampleSize=" +// + sample.length + "}"; +// +// } +// +// } + /** + * A sequence of {@link Edge}s (aka join steps). + */ + public static class Path { + + public final List<Edge> edges; + + /* + * These fields carry state used by chainSample. It would be better to + * have that state on a data structure which is purely local to + * chainSample, but perhaps Path is that data structure. + */ + + public EdgeSample sample = null; + +// /** +// * Input to the next round of sampling. +// */ +// private VertexSample inputSample; + + /** + * The vertex at which the path from which this path was derived + * stopped. This is initialized to the source vertex when entering the + * chainSample() method. + */ + private Vertex stopVertex; + + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append("Path{"); + boolean first = true; + for (Edge e : edges) { + if (!first) + sb.append(","); + sb.append("(" + e.v1.pred.getId() + "," + e.v2.pred.getId() + ")"); + first = false; + } + sb.append(",sample=" + sample + "}"); + return sb.toString(); + } + + /** + * Create an empty path. + */ + public Path() { + this.edges = new LinkedList<Edge>(); + } + + /** + * Create a path from a single edge. + * + * @param e + * The edge. + */ + public Path(final Edge e) { + if (e == null) + throw new IllegalArgumentException(); + this.edges = new LinkedList<Edge>(); + this.edges.add(e); + this.sample = e.sample; + } + + /** + * Return <code>true</code> iff the {@link Path} contains at least one + * {@link Edge} for that {@link Vertex}. + * + * @param v + * The vertex + * + * @return true if the vertex is already part of the path. + */ + public boolean contains(final Vertex v) { + + if (v == null) + throw new IllegalArgumentException(); + + for (Edge e : edges) { + + if (e.v1 == v || e.v2 == v) + return true; + + } + + return false; + } + + /** + * Add an edge to a path, computing the estimated cardinality of the new + * path, and returning the new path. + * + * @param queryEngine + * @param limit + * @param e + * The edge. + * + * @return The new path. + * + * @throws Exception + */ + public Path addEdge(final QueryEngine queryEngine, final int limit, + final Edge e) throws Exception { + + if (e == null) + throw new IllegalArgumentException(); + + // Figure out which vertices are already part of this path. + final boolean v1Found = contains(e.v1); + final boolean v2Found = contains(e.v2); + + if (!v1Found && !v2Found) + throw new IllegalArgumentException( + "Edge does not extend path: edge=" + e + ", path=" + + this); + + if (v1Found && v2Found) + throw new IllegalArgumentException( + "Edge already present in path: edge=" + e + ", path=" + + this); + + // The vertex which is already part of this path. + final Vertex sourceVertex = v1Found ? e.v1 : e.v2; + + // The new vertex, which is not part of this path. + final Vertex targetVertex = v1Found ? e.v2 : e.v1; + + // Extend the path. + final Path tmp = new Path(); + + tmp.edges.addAll(edges); + + tmp.edges.add(e); + + /* + * Chain sample the edge. + * + * Note: ROX uses the intermediate result I(p) for the existing path + * as the input when sampling the edge. The corresponding concept + * for us is the sample for this Path, which will have all variable + * bindings produced so far. In order to estimate the cardinality of + * the new join path we have to do a one step cutoff evaluation of + * the new Edge, given the sample available on the current Path. + * + * TODO It is possible for the path sample to be empty. Unless the + * sample also happens to be exact, this is an indication that the + * estimated cardinality has underflowed. How are we going to deal + * with this situation?!? What would appear to matter is the amount + * of work being performed by the join in achieving that low + * cardinality. If we have to do a lot of work to get a small + * cardinality then we should prefer join paths which achieve the + * same reduction in cardinality with less 'intermediate + * cardinality' - that is, by examining fewer possible solutions. + */ + +// final IBindingSet[] sample = BOpUtility.injectRowIdColumn(ROWID, +// 0/* start */, this.sample.sample); + + final EdgeSample edgeSample = e.estimateCardinality(queryEngine, + limit, sourceVertex, targetVertex, this.sample.sample); + + tmp.sample = edgeSample; + +// tmp.stopVertex = e.getMaximumCardinalityVertex(); + + return tmp; + + } + +// /** +// * Equality is defined by comparison of the unordered set of edges. +// */ +// public boolean equals(final Object o) { +// if (this == o) +// return true; +// if (!(o instanceof Path)) +// return false; +// final Path t = (Path) o; +// if (edges.length != t.edges.length) +// return false; +// for (Edge e : edges) { +// boolean found = false; +// for (Edge x : t.edges) { +// if (x.equals(e)) { +// found = true; +// break; +// } +// } +// if (!found) +// return false; +// } +// return true; +// } +// +// /** +// * The hash code of path is defined as the bit-wise XOR of the hash +// * codes of the edges in that path. +// */ +// public int hashCode() { +// +// if (hash == 0) { +// +// int result = 0; +// +// for(Edge e : edges) { +// +// result ^= e.hashCode(); +// +// } +// +// hash = result; +// +// } +// return hash; +// +// } +// private int hash; + + } + + /** + * Comma delimited table showing the estimated join hit ratio, the estimated + * cardinality, and the set of vertices for each of the specified join + * paths. + * + * @param a + * An array of join paths. + * + * @return A table with that data. + */ + static public String showTable(final Path[] a) { + final StringBuilder sb = new StringBuilder(); + final Formatter f = new Formatter(sb); + for (Path x : a) { + if (x.sample == null) { + f.format("%7s, %10s", "N/A", "N/A"); + } else { + f.format("% 7.2f, % 10d", x.sample.f, + x.sample.estimatedCardinality); + } + sb.append(","); + for (Edge e : x.edges) + sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() + + ")"); + sb.append("\n"); + } + return sb.toString(); + } + + /** * A join graph (data structure and methods only). + * + * Note: ROX was stated in terms of materialization of intermediate results. + * Bigdata was originally designed to support pipelined join evaluation in + * which the zero investment property is true (there exists an index for the + * join). While support is being developed for operator-at-once joins (e.g., + * hash joins), that support is aimed at more efficient evaluation of high + * cardinality joins using multi-block IO. Therefore, unlike ROX, the + * runtime query optimizer does not materialize the intermediate results + * when chain sampling. Instead, it feeds a sample into a cutoff pipeline + * evaluation for the join path. Since some join paths can eliminate a lot + * of intermediate solutions and hence take a long time to satisfy the + * cutoff, we also specify a timeout for the cutoff evaluation of a join + * path. Given the zero investment property (an index exists for the join), + * if the cutoff is not satisfied within the timeout, then the join has a + * low correlation. If no solutions are generated within the timeout, then + * the estimate of the correlation "underflows". + * + * Note: timeouts are a bit tricky when you are not running on a real-time + * platform. In particular, heavy swapping or heavy GC workloads could both + * cause a timeout to expire because no work was done on sampling the join + * path rather than because there was a lot of work to be done. Therefore, + * the timeout should be used to protect against join paths which take a + * long time to materialize <i>cutoff</i> solutions rather than to fine tune + * the running time of the query optimizer. + * + * TODO Runtime query optimization is probably useless (or else should rely + * on materialization of intermediate results) when the cardinality of the + * vertices and edges for the query is small. This would let us balance the + * design characteristics of MonetDB and bigdata. For this purpose, we need + * to flag when a {@link VertexSample} is complete (e.g., the cutoff is GTE + * the actual range count). This also needs to be done for each join path so + * we can decide when the sample for the path is in fact the exact solution + * rather than an estimate of the cardinality of the solution together with + * a sample of the solution. */ public static class JGraph { @@ -546,6 +1244,9 @@ */ private final Edge[] E; + // The set of vertices which have been consumed by the query. + private final Set<Vertex> executedVertices = new LinkedHashSet<Vertex>(); + public List<Vertex> getVertices() { return Collections.unmodifiableList(Arrays.asList(V)); } @@ -555,8 +1256,25 @@ } public String toString() { - return super.toString() + "{V=" + Arrays.toString(V) + ",E=" - + Arrays.toString(E) + "}"; + final StringBuilder sb = new StringBuilder(); + sb.append("JoinGraph"); + sb.append("{V=["); + for(Vertex v : V) { + sb.append("\nV["+v.pred.getId()+"]="+v); + } + sb.append("],E=["); + for(Edge e : E) { + sb.append("\n"+e); + } + sb.append("\n],ExecutedVertices=["); + for(Vertex v : executedVertices) { + sb.append("\nV["+v.pred.getId()+"]="+v); + } + sb.append("\n]}"); + return sb.toString(); + +// return super.toString() + "{V=" + Arrays.toString(V) + ",E=" +// + Arrays.toString(E) + ", executedVertices="+executedVertices+"}"; } public JGraph(final IPredicate[] v) { @@ -611,6 +1329,45 @@ } /** + * Return the {@link Vertex} whose {@link IPredicate} is associated with + * the given {@link BOp.Annotations#BOP_ID}. + * + * @param bopId + * The bop identifier. + * @return The {@link Vertex} -or- <code>null</code> if there is no such + * vertex in the join graph. + */ + public Vertex getVertex(int bopId) { + for(Vertex v : V) { + if(v.pred.getId()==bopId) + return v; + } + return null; + } + + /** + * Return the {@link Edge} associated with the given vertices. The + * vertices may appear in any order. + * + * @param v1 + * One vertex. + * @param v2 + * Another vertex. + * + * @return The edge -or- <code>null</code> if there is no such edge in + * the join graph. + */ + public Edge getEdge(Vertex v1, Vertex v2) { + for(Edge e : E) { + if (e.v1 == v1 && e.v2 == v2) + return e; + if (e.v1 == v2 && e.v2 == v1) + return e; + } + return null; + } + + /** * Obtain a sample and estimated cardinality (fast range count) for each vertex. * * @param context @@ -632,28 +1389,601 @@ * * @param context * - * @throws Exception + * @throws Exception */ - public void estimateEdgeWeights(final QueryEngine queryEngine, final int limit) throws Exception { - - for(Edge e : E) { - + public void estimateEdgeWeights(final QueryEngine queryEngine, + final int limit) throws Exception { + + for (Edge e : E) { + if (e.v1.sample == null || e.v2.sample == null) { - + /* * We can only estimate the cardinality of edges connecting * vertices for which samples were obtained. */ continue; + + } + + e.estimateCardinality( + queryEngine, limit); + + } + + } + + /** + * Return the {@link Edge} having the minimum estimated cardinality out + * of those edges whose cardinality has been estimated. + * + * @param A + * set of vertices to be excluded from consideration + * (optional). + * + * @return The minimum cardinality edge -or- <code>null</code> if there + * are no {@link Edge}s having an estimated cardinality. + */ + public Edge getMinimumCardinalityEdge(final Set<Vertex> visited) { + + long minCard = Long.MIN_VALUE; + Edge minEdge = null; + + for (Edge e : E) { + + if (e.sample == null) { + + // Edge has not been sampled. + continue; + + } + + if (visited != null + && (visited.contains(e.v1) || visited.contains(e.v2))) { + // A vertex of that edge has already been consumed. + continue; + } - e.estimateCardinality(queryEngine, limit); + final long estimatedCardinality = e.sample.estimatedCardinality; + + if (minEdge == null || estimatedCardinality < minCard) { + + minEdge = e; + + minCard = estimatedCardinality; + + } + + } + + return minEdge; + + } + +// /** +// * Return the {@link Edge} having the minimum estimated cardinality out +// * of those edges whose cardinality has been estimated. +// * +// * @return The minimum cardinality edge -or- <code>null</code> if there +// * are no {@link Edge}s having an estimated cardinality. +// */ +// public Edge getMinimumCardinalityEdge() { +// +// return getMinimumCardinalityEdge(null); +// +// } + + /** + * Return the #of edges in which the given vertex appears where the + * other vertex of the edge does not appear in the set of visited + * vertices. + * + * @param v + * The vertex. + * @param visited + * A set of vertices to be excluded from consideration. + * + * @return The #of such edges. + */ + public int getEdgeCount(final Vertex v, final Set<Vertex> visited) { + + return getEdges(v, visited).size(); + + } + + /** + * Return the edges in which the given vertex appears where the other + * vertex of the edge does not appear in the set of visited vertices. + * + * @param v + * The vertex. + * @param visited + * A set of vertices to be excluded from consideration + * (optional). + * + * @return Those edges. + */ + public List<Edge> getEdges(final Vertex v, final Set<Vertex> visited) { + + if (v == null) + throw new IllegalArgumentException(); + + if (visited != null && visited.contains(v)) + return Collections.emptyList(); + + final List<Edge> tmp = new LinkedList<Edge>(); + + for (Edge e : E) { + + if (v.equals(e.v1) || v.equals(e.v2)) { + + if (visited != null) { + + if (visited.contains(e.v1)) + continue; + + if (visited.contains(e.v2)) + continue; + + } + + tmp.add(e); + + } } + return tmp; + } - + + /** + * + * @param queryEngine + * @param limit + * The limit for sampling a vertex and the initial limit for + * cutoff join evaluation. A reasonable value is + * <code>100</code>. + * @param timeout + * The timeout for cutoff join path evaluation + * (milliseconds). A reasonable value is <code>100</code>ms. + * @throws Exception + * + * FIXME This must either return the query plan or copy the + * results as they are materialized to the sink for the join + * graph operator. + * + * + * @todo We do not need the [timeout] as long as we evaluate each cutoff + * join separately. The limited number of input solutions to the + * join automatically limits the amount of work the join can do. + * However, if we do cutoff evaluation of a series of edges then + * it is possible to do a lot of work in order to find [limit] + * solutions. In this case, a [timeout] protects us against join + * paths which have poor correlations and large cardinality for + * their vertices (a lot of solutions are considered to produce + * very few results). + */ + public void runtimeOptimizer(final QueryEngine queryEngine, + final int limit, final long timeout) throws Exception { + + final BOpContextBase context = new BOpContextBase(queryEngine); + + if (log.isInfoEnabled()) + log.info("limit=" + limit); + + /* + * Sample the vertices. + * + * TODO Sampling for scale-out not yet finished. + * + * FIXME Re-sampling will always produce the same sample depending + * on the sample operator impl (it should be random, but it is not). + */ + sampleVertices(context, limit); + + if(log.isDebugEnabled()) + log.debug("joinGraph=" + toString()); + + /* + * Estimate the cardinality and weights for each edge, obtaining the + * Edge with the minimum estimated cardinality. This will be the + * starting point for the join graph evaluation. + * + * @todo It would be very interesting to see the variety and/or + * distribution of the values bound when the edge is sampled. This + * can be easily done using a hash map with a counter. That could + * tell us a lot about the cardinality of the next join path + * (sampling the join path also tells us a lot, but it does not + * explain it as much as seeing the histogram of the bound values). + * I believe that there are some interesting online algorithms for + * computing the N most frequent observations and the like which + * could be used here. + * + * TODO ROX is choosing the starting edge based on the minimum + * estimated cardinality. However, it is possible for there to be + * more than one edge with an estimated cardinality which is + * substantially to the minimum estimated cardinality. It would be + * best to start from multiple vertices so we can explore join paths + * which begin with those alternative starting vertices as well. + * (LUBM Q2 is an example of such a query). + */ + estimateEdgeWeights(queryEngine, limit); + + while(moreEdgesToVisit(executedVertices)) { + + // Decide on the next join path to execute. + final Path p = chainSample(queryEngine, limit, timeout); + + for(Edge e : p.edges) { + + /* + * FIXME Finish the algorithm. + * + * Execute the edge. We have two choices here. If join path + * is currently materialized and the expected cardinality of + * the edge is small to moderate (LTE limit * 10) then we + * can simply materialize the result of evaluating the edge. + * + * In this case, we replace the sample for the vertex with + * the actual result of evaluating the edge. [This concept + * pre-supposes that a vertex sample is the set of matching + * elements and that we do not store the binding sets which + * satisfy the join path. I think that this is perhaps the + * primary point of difference for MonetDB/ROX and bigdata. + * bigdata is working with IBindingSet[]s and should + * associate the set of intermediate solutions which + * represent the materialized intermediate result with the + * join path, not the vertex or the edge.] + * + * Otherwise, either the join path is already only a sample + * or the expected cardinality of this edge is too large so + * we do the cutoff evaluation of the edge in order to + * propagate a sample. + * + * 1. exec(e,T1(v1),T2(v2)) + */ + + executedVertices.add(e.v1); + executedVertices.add(e.v2); + + } + + /* + * Re-sample edges branching from any point in the path which we + * just executed. The purpose of this is to improve the + * detection of correlations using a materialized sample of the + * intermediate results (which will be correlated) rather than + * independent samples of the vertices (which are not + * correlated). + * + * Also, note that ROX only samples vertices which satisfy the + * zero investment property and therefore there could be + * vertices which have not yet been sampled if some vertices are + * not associated with an index. + * + * @todo This could just be another call to sampleVertices() and + * estimateEdgeWeights() if those methods accepted the set of + * already executed vertices so they could make the proper + * exclusions (or if we had a method which returned the + * un-executed vertices and/or edges). + */ +// e.v1.sample(context, limit); +// e.v2.sample(context, limit); + + } + + } + + /** + * Return <code>true</code> iff there exists at least one {@link Edge} + * branching from a vertex NOT found in the set of vertices which have + * visited. + * + * @param visited + * A set of vertices. + * + * @return <code>true</code> if there are more edges to explore. + */ + private boolean moreEdgesToVisit(final Set<Vertex> visited) { + + // Consider all edges. + for(Edge e : E) { + + if (visited.contains(e.v1) && visited.contains(e.v2)) { + /* + * Since both vertices for this edge have been executed the + * edge is now redundant. Either it was explicitly executed + * or another join path was used which implies the edge by + * transitivity in the join graph. + */ + continue; + } + + /* + * We found a counter example (an edge which has not been + * explored). + */ + if (log.isTraceEnabled()) + log.trace("Edge has not been explored: " + e); + + return true; + + } + + // No more edges to explore. + return false; + + } + + /** + * E + * + * @param limit + * @return + * + * TODO How to indicate the set of edges which remain to be + * explored? + * + * @throws Exception + */ + public Path chainSample(final QueryEngine queryEngine, final int limit, + final long timeout) throws Exception { + + final Vertex source; + { + /* + * Find the edge having the minimum estimated cardinality. + */ + final Edge e = getMinimumCardinalityEdge(executedVertices); + + if (e == null) + throw new RuntimeException("No weighted edges."); + + /* + * Decide which vertex of that edge will be the starting point + * for chain sampling (if any). + */ + if (getEdgeCount(e.v1, executedVertices) > 1 + || getEdgeCount(e.v2, executedVertices) > 1) { + /* + * There is at least one vertex of that edge which branches. + * Chain sampling will begin with the vertex of that edge + * which has the lower estimated cardinality. + * + * TODO It could be that the minimum cardinality vertex does + * not branch. What happens for that code path? Do we just + * execute that edge and then reenter chain sampling? If so, + * it would be cleared to test for this condition explicitly + * up front. + */ + source = e.getMinimumCardinalityVertex(); + } else { + /* + * There is no vertex which branches for that edge. This is + * a stopping condition for chain sampling. The path + * consisting of just that edge is returned and should be + * executed by the caller. + */ + return new Path(e); + } + + } + + /* + * Setup some data structures for one or more breadth first + * expansions of the set of path(s) which are being sampled. This + * iteration will continue until we reach a stopping condition. + */ + + // The set of paths being considered. + final List<Path> paths = new LinkedList<Path>(); + + { + // The current path. + final Path p = new Path(); + + p.stopVertex = source; +// p.inputSample = source.sample; + paths.add(p); + } + + // initialize the cutoff to the limit used to sample the vertices. + int cutoff = limit; + long cutoffMillis = timeout; + + final Set<Vertex> unsampled = new LinkedHashSet<Vertex>( + executedVertices); + + /* + * One breadth first expansion of the join paths. + * + * Note: This expands each join path one vertex in each iteration. + * However, different join paths can expand from different vertices. + * + * For ROX, each join path is expanded from the last vertex which + * was added to that join path so the initial edge for each join + * path strongly determines the edges in the join graph along which + * that join path can grow. + * + * For bigdata, we can grow the path from any vertex already in the + * path to any vertex which (a) is not yet in the path; and (b) has + * not yet been evaluated. + * + * This suggests that this loop must consider each of the paths to + * decide whether that path can be extended. + */ + while (moreEdgesToVisit(unsampled)) { + + // increment the cutoff. + cutoff += limit; + cutoffMillis += timeout; + + // Consider each path. + for(Path p : paths) { + + /* + * The vertex at which we stopped expanding that path the + * last time. + * + * TODO ROX might have to traverse vertex to vertex along + * edges, but we can execute any edge whose preconditions + * have been satisfied. + */ + final Vertex v = p.stopVertex; + + // TODO depends on the notion of the paths remaining. + if (getEdgeCount(v, null/*executed+sampled(p)*/) > 0) { + /* + * This path branches at this vertex, so remove the old + * path 1st. + */ + paths.remove(p); + } + + // For each edge which is a neighbor of the vertex [v]. + final List<Edge> neighbors = null; + for(Edge e : neighbors) { + // 1. append the edge to the path + final Path p1 = p.addEdge(queryEngine, cutoff, e); + // 3. add the path to paths. + paths.add(p1); + } + + } + + final Path p = getSelectedJoinPath(paths.toArray(new Path[paths.size()])); + + if(p != null) { + + return p; + + } + + } // while(moreEdgesToSample) + + final Path p = getBestAlternativeJoinPath(paths.toArray(new Path[paths.size()])); + + if(p != null) { + + return p; + + } + + // TODO ROX as given can return null here, which looks like a bug. + return null; + + } // chainSample() + + /** + * Return the path which is selected by the termination criteria + * (looking for a path which dominates the alternatives). + * + * @param a + * An array of {@link Path}s to be tested. + * + * @return The selected path -or- <code>null</code> if none of the paths + * is selected. + * + * @todo Should we only explore beneath the diagonal? + * + * @todo What is the basis for comparing the expected cardinality of + * join paths? Where one path is not simply the one step extension + * of the other. + * <p> + * This rule might only be able to compare the costs for paths in + * which one path directly extends another. + * <p> + * It is not clear that this code is comparing all paths which + * need to be compared. + */ + public Path getSelectedJoinPath(final Path[] a) { + final StringBuilder sb = new StringBuilder(); + final Formatter f = new Formatter(sb); + for (int i = 0; i < a.length; i++) { + Path p = null; + final Path Pi = a[i]; + if (Pi.sample == null) + throw new RuntimeException("Not sampled: " + Pi); + for (int j = 0; j < a.length; j++) { + if (i == j) + continue; + final Path Pj = a[j]; + if (Pj.sample == null) + throw new RuntimeException("Not sampled: " + Pj); + final long costPi = Pi.sample.estimatedCardinality; + final double sfPi = Pi.sample.f; + final long costPj = Pj.sample.estimatedCardinality; + final long expectedCombinedCost = costPi + + (long) (sfPi * costPj); + final boolean lt = expectedCombinedCost < costPj; + { + f + .format( + "Comparing: P[% 2d] with P[% 2d] : % 10d + (% 7.2f * % 10d) %2s %10d", + i, j, costPi, sfPi, costPj, (lt ? "<" + : ">="), costPj); + System.err.println(sb); + sb.setLength(0); + } + if (lt) { + p = Pi; + } else { + p = null; + break; + } + } // Pj + if (p != null) + return p; + } // Pi + /* + * None of the paths is a winner according to the selection + * criteria. + */ + return null; + } + + /** + * Termination condition if no more edges to sample. This + * breaks the deadlock by preferring the path whose .... + */ + public Path getBestAlternativeJoinPath(final Path[] a) { + for (int i = 0; i < a.length; i++) { + Path p = null; + final Pat... [truncated message content] |
From: <tho...@us...> - 2010-11-12 16:33:34
|
Revision: 3941 http://bigdata.svn.sourceforge.net/bigdata/?rev=3941&view=rev Author: thompsonbry Date: 2010-11-12 16:33:28 +0000 (Fri, 12 Nov 2010) Log Message: ----------- more work on runtime query optimization Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 15:48:11 UTC (rev 3940) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 16:33:28 UTC (rev 3941) @@ -31,6 +31,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.Formatter; import java.util.Iterator; import java.util.LinkedHashSet; @@ -525,14 +526,18 @@ * @param outputCount * The #of binding sets generated before the join was cutoff. */ - EdgeSample(final VertexSample sourceVertexSample, final int limit, + EdgeSample(//final VertexSample sourceVertexSample, + final long sourceSampleRangeCount, + final boolean sourceSampleExact, + final int limit, final int inputCount, final int outputCount, final IBindingSet[] sample) { if(sample == null) throw new IllegalArgumentException(); - this.rangeCount = sourceVertexSample.rangeCount; +// this.rangeCount = sourceVertexSample.rangeCount; + this.rangeCount = sourceSampleRangeCount; this.limit = limit; @@ -546,10 +551,11 @@ estimateIsLowerBound = inputCount == 1 && outputCount == limit; - estimateIsUpperBound = !sourceVertexSample.exact +// final boolean sourceSampleExact = sourceVertexSample.exact; + estimateIsUpperBound = !sourceSampleExact && outputCount < limit; - this.exact = sourceVertexSample.exact && outputCount < limit; + this.exact = sourceSampleExact && outputCount < limit; this.sample = sample; } @@ -777,8 +783,9 @@ } // Sample the edge and save the sample on the edge as a side-effect. - this.sample = estimateCardinality(queryEngine, limit, v, vp, sourceSample); - + this.sample = estimateCardinality(queryEngine, limit, v, vp, + v.sample.rangeCount, v.sample.exact, sourceSample); + return sample.estimatedCardinality; } @@ -806,7 +813,9 @@ */ public EdgeSample estimateCardinality(final QueryEngine queryEngine, final int limit, final Vertex vSource, final Vertex vTarget, - IBindingSet[] sourceSample) throws Exception { + final long sourceSampleRangeCount, + final boolean sourceSampleExact, IBindingSet[] sourceSample) + throws Exception { if (limit <= 0) throw new IllegalArgumentException(); @@ -884,13 +893,15 @@ * FIXME I am not convinced that this approach is quite right. I am * also not convinced that this approach will correctly carry the * additional metadata on the EdgeSample (exact, estimate overflow - * and underflow, etc). + * and underflow, etc). [This needs to be the estimated cardinality + * of the path which is being extended by an edge to the target + * vertex.] */ - final VertexSample moreSelectiveVertexSample = vSource.sample.rangeCount < vTarget.sample.rangeCount ? vSource.sample - : vTarget.sample; +// final VertexSample moreSelectiveVertexSample = vSource.sample.rangeCount < vTarget.sample.rangeCount ? vSource.sample +// : vTarget.sample; final EdgeSample edgeSample = new EdgeSample( - moreSelectiveVertexSample/* vSource.sample */, limit, + sourceSampleRangeCount, sourceSampleExact, limit, inputCount, outputCount, result .toArray(new IBindingSet[result.size()])); @@ -958,25 +969,42 @@ */ public static class Path { + /** + * An immutable ordered list of the edges in the (aka the sequence of + * joins represented by this path). + */ public final List<Edge> edges; - /* - * These fields carry state used by chainSample. It would be better to - * have that state on a data structure which is purely local to - * chainSample, but perhaps Path is that data structure. + /** + * The sample obtained by the step-wise cutoff evaluation of the ordered + * edges of the path. This sample is generated one edge at a time rather + * than by attempting the cutoff evaluation of the entire join path (the + * latter approach does allow us to limit the amount of work to be done + * to satisfy the cutoff). */ - - public EdgeSample sample = null; + final public EdgeSample sample; -// /** -// * Input to the next round of sampling. -// */ -// private VertexSample inputSample; + /** + * The cumulative estimated cardinality of the path. This is zero for an + * empty path. For a path consisting of a single edge, this is the + * estimated cardinality of that edge. When creating a new path adding + * an edge to an existing path, the cumulative cardinality of the new + * path is the cumulative cardinality of the existing path plus the + * estimated cardinality of the cutoff join of the new edge given the + * input sample of the existing path. + */ + final public long cumulativeEstimatedCardinality; /** * The vertex at which the path from which this path was derived * stopped. This is initialized to the source vertex when entering the * chainSample() method. + * + * @todo This is used by ROX to only grow the path from its end. We + * could of course just look at the last edge in the path. + * However, I think that I prefer to grow a path from any + * branching vertex as long as the path does not duplicate any + * path already generated (including those which were pruned). */ private Vertex stopVertex; @@ -990,7 +1018,8 @@ sb.append("(" + e.v1.pred.getId() + "," + e.v2.pred.getId() + ")"); first = false; } - sb.append(",sample=" + sample + "}"); + sb.append(",cumEstCard=" + cumulativeEstimatedCardinality + + ",sample=" + sample + "}"); return sb.toString(); } @@ -998,7 +1027,9 @@ * Create an empty path. */ public Path() { - this.edges = new LinkedList<Edge>(); + this.edges = Collections.emptyList(); + this.cumulativeEstimatedCardinality = 0; + this.sample = null; } /** @@ -1008,14 +1039,49 @@ * The edge. */ public Path(final Edge e) { + if (e == null) throw new IllegalArgumentException(); - this.edges = new LinkedList<Edge>(); - this.edges.add(e); + + if (e.sample == null) + throw new IllegalArgumentException("Not sampled: "+e); + + this.edges = Collections.singletonList(e); + this.sample = e.sample; + + this.cumulativeEstimatedCardinality = e.sample.estimatedCardinality; + } /** + * Constructor used by {@link #addEdge(QueryEngine, int, Edge)} + * @param edges The edges in the new path. + * @param cumulativeEstimatedCardinality The cumulative estimated cardinality of the new path. + * @param sample The sample from the last + */ + private Path(final List<Edge> edges, + final long cumulativeEstimatedCardinality, + final EdgeSample sample) { + + if (edges == null) + throw new IllegalArgumentException(); + + if (cumulativeEstimatedCardinality < 0) + throw new IllegalArgumentException(); + + if (sample == null) + throw new IllegalArgumentException(); + + this.edges = Collections.unmodifiableList(edges); + + this.cumulativeEstimatedCardinality = cumulativeEstimatedCardinality; + + this.sample = sample; + + } + + /** * Return <code>true</code> iff the {@link Path} contains at least one * {@link Edge} for that {@link Vertex}. * @@ -1038,6 +1104,86 @@ return false; } + + /** + * Return <code>true</code> if this path is an unordered super set of + * the given path. In the case where both paths have the same vertices + * this will also return <code>true</code>. + * + * @param p + * Another path. + * + * @return <code>true</code> if this path is an unordered super set of + * the given path. + */ + public boolean isUnorderedSuperSet(final Path p) { + + if (p == null) + throw new IllegalArgumentException(); + + if (edges.size() < p.edges.size()) { + /* + * Fast rejection. This assumes that each edge after the first + * adds one distinct vertex to the path. That assumption is + * enforced by #addEdge(). + */ + return false; + } + + final Vertex[] v1 = getVertices(); + final Vertex[] v2 = p.getVertices(); + + if (v1.length < v2.length) { + // Proven false since the other set is larger. + return false; + } + + /* + * Scan the vertices of the caller's path. If any of those vertices + * are NOT found in this path then the caller's path can not be a + * subset of this path. + */ + for (int i = 0; i < v2.length; i++) { + + final Vertex tmp = v2[i]; + + boolean found = false; + for (int j = 0; j < v1.length; j++) { + + if (v1[j] == tmp) { + found = true; + break; + } + + } + + if (!found) { + return false; + } + + } + + return true; + + } + + /** + * Return the vertices in this path (in path order). + * + * @return The vertices (in path order). + * + * @todo this could be rewritten without the toArray() using a method + * which visits the vertices of a path in any order. + */ + public Vertex[] getVertices() { + final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); + for (Edge e : edges) { + tmp.add(e.v1); + tmp.add(e.v2); + } + final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); + return a; + } /** * Add an edge to a path, computing the estimated cardinality of the new @@ -1078,13 +1224,6 @@ // The new vertex, which is not part of this path. final Vertex targetVertex = v1Found ? e.v2 : e.v1; - // Extend the path. - final Path tmp = new Path(); - - tmp.edges.addAll(edges); - - tmp.edges.add(e); - /* * Chain sample the edge. * @@ -1110,14 +1249,32 @@ // 0/* start */, this.sample.sample); final EdgeSample edgeSample = e.estimateCardinality(queryEngine, - limit, sourceVertex, targetVertex, this.sample.sample); + limit, sourceVertex, targetVertex, + this.sample.estimatedCardinality, this.sample.exact, + this.sample.sample); - tmp.sample = edgeSample; + { + + final List<Edge> edges = new ArrayList<Edge>( + this.edges.size() + 1); + + edges.addAll(this.edges); + + edges.add(e); + + final long cumulativeEstimatedCardinality = this.cumulativeEstimatedCardinality + + edgeSample.estimatedCardinality; + + // Extend the path. + final Path tmp = new Path(edges, + cumulativeEstimatedCardinality, edgeSample); + + // tmp.stopVertex = e.getMaximumCardinalityVertex(); + + return tmp; + + } -// tmp.stopVertex = e.getMaximumCardinalityVertex(); - - return tmp; - } // /** @@ -1184,17 +1341,24 @@ static public String showTable(final Path[] a) { final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); - for (Path x : a) { + for(int i=0; i<a.length; i++) { + final Path x = a[i]; if (x.sample == null) { - f.format("%7s, %10s", "N/A", "N/A"); + f.format("p[%2d] %7s, %10s %10s", "N/A", "N/A", "N/A", i); } else { - f.format("% 7.2f, % 10d", x.sample.f, - x.sample.estimatedCardinality); + f.format("p[%2d] % 7.2f, % 10d % 10d", i, x.sample.f, + x.sample.estimatedCardinality, + x.cumulativeEstimatedCardinality); } - sb.append(","); - for (Edge e : x.edges) - sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() - + ")"); + sb.append(", ["); + final Vertex[] vertices = x.getVertices(); + for(Vertex v : vertices) { + f.format("%2d ", v.pred.getId()); + } + sb.append("]"); +// for (Edge e : x.edges) +// sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() +// + ")"); sb.append("\n"); } return sb.toString(); @@ -1903,12 +2067,40 @@ * <p> * It is not clear that this code is comparing all paths which * need to be compared. + * + * @todo I have restated the termination rule as follows. + * <p> + * If there is a path [p] whose total cost is LTE the cost of + * executing just its last edge [e], then the path [p] dominates + * all paths beginning with edge [e]. The dominated paths should + * be pruned. + * <p> + * If there is a path, [p], which is an unordered extension of + * another path, [p1] (the vertices of p are a superset of the + * vertices of p1), and the cost of [p] is LTE the cost of [p1], + * then [p] dominates [p1]. The dominated paths should be pruned. + * <p> + * If there is a path, [p], which has the same vertices as a path + * [p1] and the cost of [p] is LTE the cost of [p1], then [p] + * dominates (or is equivalent to) [p1]. The path [p1] should be + * pruned. + * + * For a given path length [l], if no paths of length [l] remain + * then the minimum cost path of length GT [l] may be executed. + * + * @todo Due to sampling error and the desire to be robust to small + * differences in the expected cost of an operation, we should + * only consider two significant digits when comparing estimates + * of cost. E.g., 990 and 1000 should not be differentiated as + * they are the same within the sampling error. This should be + * used to chose all starting vertices which have the same minimum + * cardinality. */ public Path getSelectedJoinPath(final Path[] a) { final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); + Path p = null; for (int i = 0; i < a.length; i++) { - Path p = null; final Path Pi = a[i]; if (Pi.sample == null) throw new RuntimeException("Not sampled: " + Pi); @@ -1918,31 +2110,45 @@ final Path Pj = a[j]; if (Pj.sample == null) throw new RuntimeException("Not sampled: " + Pj); + /* + * FIXME This needs to compare the cost of Pj given path Pi + * against the cost of Pj when executed as a single edge (or + * by any other alternative join path sequence). The choice + * of Pi and Pj is not coherent and the same value of costPj + * is being used for both sides of the equation. + */ final long costPi = Pi.sample.estimatedCardinality; final double sfPi = Pi.sample.f; final long costPj = Pj.sample.estimatedCardinality; final long expectedCombinedCost = costPi + (long) (sfPi * costPj); - final boolean lt = expectedCombinedCost < costPj; + /* + * @todo I think that LTE makes more sense here since having + * the same net cardinality for a given edge after + * performing more steps would appear to be worth while. + */ + final boolean lte = expectedCombinedCost <= costPj; { f .format( - "Comparing: P[% 2d] with P[% 2d] : % 10d + (% 7.2f * % 10d) %2s %10d", - i, j, costPi, sfPi, costPj, (lt ? "<" - : ">="), costPj); + "Comparing: P[%2d] with P[%2d] : (% 10d + (% 7.2f * % 10d) = %10d) %2s %10d", + i, j, costPi, sfPi, costPj, expectedCombinedCost, (lte ? "<=" + : ">"), costPj); System.err.println(sb); sb.setLength(0); } - if (lt) { + if (lte) { p = Pi; - } else { - p = null; +// } else { +// p = null; break; } } // Pj - if (p != null) - return p; +// if (p != null) +// return p; } // Pi + if (p != null) + return p; /* * None of the paths is a winner according to the selection * criteria. @@ -1951,6 +2157,98 @@ } /** + * Prune paths which are dominated by other paths. Start the algorithm + * by passing in all edges which have the minimum cardinality (when + * comparing their expected cardinality after rounding to 2 significant + * digits). + * <p> + * If there is a path [p] whose total cost is LTE the cost of executing + * just its last edge [e], then the path [p] dominates all paths + * beginning with edge [e]. The dominated paths should be pruned. [This + * is a degenerate case of the next rule.] + * <p> + * If there is a path, [p] != [p1], where [p] is an unordered superset + * of [p1] (that is the vertices of p are a superset of the vertices of + * p1, but allowing the special case where the set of vertices are the + * same), and the cumulative cost of [p] is LTE the cumulative cost of + * [p1], then [p] dominates (or is equivalent to) [p1] and p1 should be + * pruned. + * <p> + * If there is a path, [p], which has the same vertices as a path [p1] + * and the cumulative cost of [p] is LTE the cumulative cost of [p1], + * then [p] dominates (or is equivalent to) [p1]. The path [p1] should + * be pruned. [This is a degenerate case of the prior rule.] + * + * @param a + * A set of paths. + * + * @return The set of paths with all dominated paths removed. + * + * @todo This does not give us a stopping condition unless the set of + * paths becomes empty. I think it will tend to search too far for + * a best path, running the risk of increasing inaccuracy + * introduced by propagation of samples. Resampling the vertices + * and increasing the vertex and edge cutoff at each iteration of + * the search could compensate for that. + * + * @todo Cumulative estimated cardinality is an estimate of the work to + * be done. However, the actual cost of a join depends on whether + * we will use nested index subquery or a hash join and the cost + * of that operation on the database. There could be counter + * examples where the cost of the hash join with a range scan + * using the unbound variable is LT the nested index subquery. For + * those cases, we will do the same amount of IO on the hash join + * but there will still be a lower cardinality to the join path + * since we are feeding in fewer solutions to be joined. + */ + public Path[] pruneJoinPaths(final Path[] a) { + final StringBuilder sb = new StringBuilder(); + final Formatter f = new Formatter(sb); + final Set<Path> pruned = new LinkedHashSet<Path>(); + for (int i = 0; i < a.length; i++) { + final Path Pi = a[i]; + if (Pi.sample == null) + throw new RuntimeException("Not sampled: " + Pi); + for (int j = 0; j < a.length; j++) { + if (i == j) + continue; + final Path Pj = a[j]; + if (Pj.sample == null) + throw new RuntimeException("Not sampled: " + Pj); + final boolean isPiSuperSet = Pi.isUnorderedSuperSet(Pj); + if(!isPiSuperSet) { + // Can not directly compare these join paths. + continue; + } + final long costPi = Pi.cumulativeEstimatedCardinality; + final long costPj = Pj.cumulativeEstimatedCardinality; + final boolean lte = costPi <= costPj; + { + f + .format( + "Comparing: P[%2d] with P[%2d] : %10d %2s %10d %s", + i, j, costPi, (lte ? "<=" : ">"), + costPj, lte ? " **prune P["+j+"]**" : ""); + System.err.println(sb); + sb.setLength(0); + } + if (lte) { + pruned.add(Pj); + } + } // Pj + } // Pi + System.err.println("Pruned "+pruned.size()+" of out "+a.length+" paths"); + final Set<Path> keep = new LinkedHashSet<Path>(); + for(Path p : a) { + if(pruned.contains(p)) + continue; + keep.add(p); + } + final Path[] b = keep.toArray(new Path[keep.size()]); + return b; + } + + /** * Termination condition if no more edges to sample. This * breaks the deadlock by preferring the path whose .... */ @@ -2044,4 +2342,39 @@ } + private static double roundToSignificantFigures(final double num, + final int n) { + if (num == 0) { + return 0; + } + + final double d = Math.ceil(Math.log10(num < 0 ? -num : num)); + final int power = n - (int) d; + + final double magnitude = Math.pow(10, power); + final long shifted = Math.round(num * magnitude); + return shifted / magnitude; + } + + /** + * Places vertices into order by the {@link BOp#getId()} associated + * with their {@link IPredicate}. + */ + private static class BOpIdComparator implements Comparator<Vertex> { + + private static final transient Comparator<Vertex> INSTANCE = new BOpIdComparator(); + + @Override + public int compare(Vertex o1, Vertex o2) { + final int id1 = o1.pred.getId(); + final int id2 = o2.pred.getId(); + if (id1 < id2) + return 1; + if (id2 > id1) + return -1; + return 0; + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 15:48:11 UTC (rev 3940) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 16:33:28 UTC (rev 3941) @@ -627,7 +627,8 @@ */ final Path p0 = new Path(g.getEdge(v2, v3)); final Path p1 = new Path(g.getEdge(v2, v4)); - final Path[] paths_t0 = new Path[] { p0, p1 }; + final Path p2 = new Path(g.getEdge(v4, v1)); + final Path[] paths_t0 = new Path[] { p0, p1, p2 }; System.err.println("\n*** Paths @ t0\n" + JoinGraph.showTable(paths_t0)); @@ -638,7 +639,7 @@ // System.err.println("Selected path: " + selected_t0); // // } - + /* * The set of one step extensions of those paths. * @@ -648,28 +649,47 @@ * distinct from all other paths already generated in this breadth * first expansion of the search space. (ROX further constrains the * new paths to extend the stop vertex of the path from which they - * are derived.) + * are derived.) + * + * @todo always label edges by either minimum bopId or minimum + * estimated cardinality (with tie broken by bopId)? When extending + * a path in which more than one edge can reach the target vertex, + * always chose the edge having the source vertex with the minimum + * cardinality? */ final Path[] paths_t1 = new Path[] {// + // t0 + p0, // (2,3) + p1, // (2,4) + p2, // (4,1) + // t1 p0.addEdge(queryEngine, limit, g.getEdge(v2, v4)), // aka (v3,v4) p0.addEdge(queryEngine, limit, g.getEdge(v3, v0)), // p0.addEdge(queryEngine, limit, g.getEdge(v3, v5)), // + // p1.addEdge(queryEngine, limit, g.getEdge(v4, v1)), // p1.addEdge(queryEngine, limit, g.getEdge(v4, v3)), // p1.addEdge(queryEngine, limit, g.getEdge(v4, v5)), // + // + p2.addEdge(queryEngine, limit, g.getEdge(v1, v5)), // aka (4,5) + p2.addEdge(queryEngine, limit, g.getEdge(v4, v3)), // + p2.addEdge(queryEngine, limit, g.getEdge(v4, v2)), // + }; System.err.println("\n*** Paths @ t1\n" + JoinGraph.showTable(paths_t1)); - final Path selected_t1 = g.getSelectedJoinPath(paths_t1); - - if (selected_t1 != null) { + g.pruneJoinPaths(paths_t1); + +// final Path selected_t1 = g.getSelectedJoinPath(paths_t1); +// +// if (selected_t1 != null) { +// +// System.err.println("Selected path: " + selected_t1); +// +// } - System.err.println("Selected path: " + selected_t1); - - } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java 2010-11-12 15:48:11 UTC (rev 3940) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java 2010-11-12 16:33:28 UTC (rev 3941) @@ -286,79 +286,79 @@ if (startEdge == null) throw new RuntimeException("No weighted edges."); - /* - * Generate a set of paths by extending that starting vertex in one - * step in each possible direction. For the initial one-step - * extension of the starting vertex we can reuse the estimated - * cardinality of each edge in the join graph, which was already - * computed above. - */ - final Path[] paths; - { +// /* +// * Generate a set of paths by extending that starting vertex in one +// * step in each possible direction. For the initial one-step +// * extension of the starting vertex we can reuse the estimated +// * cardinality of each edge in the join graph, which was already +// * computed above. +// */ +// final Path[] paths; +// { +// +// System.err.println("startEdge="+startEdge); +// +// // The starting vertex is the one with the minimum est. +// // cardinality. +// final Vertex startVertex = startEdge +// .getMinimumCardinalityVertex(); +// +// System.err.println("startVertex=" + startVertex); +// +// // Find the set of edges branching from the starting vertex. +// final List<Edge> branches = g +// .getEdges(startVertex, null/* visited */); +// +// if (branches.isEmpty()) { +// +// // No vertices remain to be explored so we should just execute something. +// throw new RuntimeException("Paths can not be extended"); +// +// } else if (branches.size() == 1) { +// +// final Edge e = branches.get(0); +// +// final Path path = new Path(e); +// +// // The initial sample is just the sample for that edge. +// path.sample = e.sample; +// +// System.err.println("path=" + path); +// +// paths = new Path[] { path }; +// +// } else { +// +// final List<Path> list = new LinkedList<Path>(); +// +// // Create one path for each of those branches. +// for (Edge e : branches) { +// +// if (e.v1 != startVertex && e.v2 != startVertex) +// continue; +// +// // Create a one step path. +// final Path path = new Path(e); +// +// // The initial sample is just the sample for that edge. +// path.sample = e.sample; +// +// System.err +// .println("path[" + list.size() + "]: " + path); +// +// list.add(path); +// +// } +// +// paths = list.toArray(new Path[list.size()]); +// +// } +// +// System.err.println("selectedJoinPath: " +// + g.getSelectedJoinPath(paths)); +// +// } - System.err.println("startEdge="+startEdge); - - // The starting vertex is the one with the minimum est. - // cardinality. - final Vertex startVertex = startEdge - .getMinimumCardinalityVertex(); - - System.err.println("startVertex=" + startVertex); - - // Find the set of edges branching from the starting vertex. - final List<Edge> branches = g - .getEdges(startVertex, null/* visited */); - - if (branches.isEmpty()) { - - // No vertices remain to be explored so we should just execute something. - throw new RuntimeException("Paths can not be extended"); - - } else if (branches.size() == 1) { - - final Edge e = branches.get(0); - - final Path path = new Path(e); - - // The initial sample is just the sample for that edge. - path.sample = e.sample; - - System.err.println("path=" + path); - - paths = new Path[] { path }; - - } else { - - final List<Path> list = new LinkedList<Path>(); - - // Create one path for each of those branches. - for (Edge e : branches) { - - if (e.v1 != startVertex && e.v2 != startVertex) - continue; - - // Create a one step path. - final Path path = new Path(e); - - // The initial sample is just the sample for that edge. - path.sample = e.sample; - - System.err - .println("path[" + list.size() + "]: " + path); - - list.add(path); - - } - - paths = list.toArray(new Path[list.size()]); - - } - - System.err.println("selectedJoinPath: " - + g.getSelectedJoinPath(paths)); - - } - /* * FIXME Now extend the initial paths some more and explore the * termination criteria and how they handle paths which are extended This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-12 17:28:16
|
Revision: 3943 http://bigdata.svn.sourceforge.net/bigdata/?rev=3943&view=rev Author: thompsonbry Date: 2010-11-12 17:28:10 +0000 (Fri, 12 Nov 2010) Log Message: ----------- more on runtime query optimizer Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 17:17:33 UTC (rev 3942) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 17:28:10 UTC (rev 3943) @@ -1184,6 +1184,35 @@ final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); return a; } + + /** + * Return <code>true</code> if this path begins with the given path. + * + * @param p + * The given path. + * + * @return <code>true</code> if this path begins with the given path. + */ + public boolean beginsWith(final Path p) { + + if (p == null) + throw new IllegalArgumentException(); + + if (p.edges.size() > edges.size()) { + // Proven false since the caller's path is longer. + return false; + } + + for (int i = 0; i < p.edges.size(); i++) { + final Edge eSelf = edges.get(i); + final Edge eOther = p.edges.get(i); + if (eSelf != eOther) { + return false; + } + } + + return true; + } /** * Add an edge to a path, computing the estimated cardinality of the new @@ -1498,6 +1527,78 @@ } /** + * Do one breadth first expansion. + * + * @param queryEngine + * @param limit + * @param round + * @param a + * @return + * @throws Exception + */ + final public Path[] expand(final QueryEngine queryEngine, final int limit, + final int round, final Path[] a) throws Exception { + + final List<Path> tmp = new LinkedList<Path>(); + + // First, copy all existing paths. + for (Path x : a) { + tmp.add(x); + } + + // Then expand each path. + for (Path x : a) { + + if (x.edges.size() < round) { + + continue; + + } + + final Set<Vertex> used = new LinkedHashSet<Vertex>(); + + for (Edge edgeInGraph : E) { + + // Figure out which vertices are already part of this path. + final boolean v1Found = x.contains(edgeInGraph.v1); + final boolean v2Found = x.contains(edgeInGraph.v2); + + if (!v1Found && !v2Found) { + // Edge is not connected to this path. + continue; + } + + if (v1Found && v2Found) { + // Edge is already present in this path. + continue; + } + + final Vertex newVertex = v1Found ? edgeInGraph.v2 : edgeInGraph.v1; + + if(used.contains(newVertex)) { + // Vertex already used to extend this path. + continue; + } + + // add the new vertex to the set of used vertices. + used.add(newVertex); + + // Extend the path to the new vertex. + final Path p = x.addEdge(queryEngine, limit * round, + edgeInGraph); + + // Add to the set of paths for this round. + tmp.add(p); + + } + + } + + return tmp.toArray(new Path[tmp.size()]); + + } + + /** * Return the {@link Vertex} whose {@link IPredicate} is associated with * the given {@link BOp.Annotations#BOP_ID}. * @@ -2209,12 +2310,16 @@ final Path Pi = a[i]; if (Pi.sample == null) throw new RuntimeException("Not sampled: " + Pi); + if (pruned.contains(Pi)) + continue; for (int j = 0; j < a.length; j++) { if (i == j) continue; final Path Pj = a[j]; if (Pj.sample == null) throw new RuntimeException("Not sampled: " + Pj); + if (pruned.contains(Pj)) + continue; final boolean isPiSuperSet = Pi.isUnorderedSuperSet(Pj); if(!isPiSuperSet) { // Can not directly compare these join paths. @@ -2223,23 +2328,35 @@ final long costPi = Pi.cumulativeEstimatedCardinality; final long costPj = Pj.cumulativeEstimatedCardinality; final boolean lte = costPi <= costPj; + List<Integer> prunedByThisPath = null; + if (lte) { + prunedByThisPath = new LinkedList<Integer>(); + if (pruned.add(Pj)) + prunedByThisPath.add(j); + for (int k = 0; k < a.length; k++) { + final Path x = a[k]; + if (x.beginsWith(Pj)) { + if (pruned.add(x)) + prunedByThisPath.add(k); + } + } + } { f .format( "Comparing: P[%2d] with P[%2d] : %10d %2s %10d %s", i, j, costPi, (lte ? "<=" : ">"), - costPj, lte ? " **prune P["+j+"]**" : ""); + costPj, lte ? " *** pruned " + + prunedByThisPath : ""); System.err.println(sb); sb.setLength(0); } - if (lte) { - pruned.add(Pj); - } } // Pj } // Pi - System.err.println("Pruned "+pruned.size()+" of out "+a.length+" paths"); + System.err.println("Pruned " + pruned.size() + " of out " + + a.length + " paths"); final Set<Path> keep = new LinkedHashSet<Path>(); - for(Path p : a) { + for (Path p : a) { if(pruned.contains(p)) continue; keep.add(p); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 17:17:33 UTC (rev 3942) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 17:28:10 UTC (rev 3943) @@ -641,47 +641,116 @@ // } /* - * The set of one step extensions of those paths. - * - * @todo build this programmatically by finding the set of edges - * branching from the existing paths to a vertex not already part of - * the existing paths and having a total set of vertices which is - * distinct from all other paths already generated in this breadth - * first expansion of the search space. (ROX further constrains the - * new paths to extend the stop vertex of the path from which they - * are derived.) - * - * @todo always label edges by either minimum bopId or minimum - * estimated cardinality (with tie broken by bopId)? When extending - * a path in which more than one edge can reach the target vertex, - * always chose the edge having the source vertex with the minimum - * cardinality? + * t1 */ - final Path[] paths_t1 = new Path[] {// - // t0 - p0, // (2,3) - p1, // (2,4) - p2, // (4,1) - // t1 - p0.addEdge(queryEngine, limit, g.getEdge(v2, v4)), // aka (v3,v4) - p0.addEdge(queryEngine, limit, g.getEdge(v3, v0)), // - p0.addEdge(queryEngine, limit, g.getEdge(v3, v5)), // - // - p1.addEdge(queryEngine, limit, g.getEdge(v4, v1)), // - p1.addEdge(queryEngine, limit, g.getEdge(v4, v3)), // - p1.addEdge(queryEngine, limit, g.getEdge(v4, v5)), // - // - p2.addEdge(queryEngine, limit, g.getEdge(v1, v5)), // aka (4,5) - p2.addEdge(queryEngine, limit, g.getEdge(v4, v3)), // - p2.addEdge(queryEngine, limit, g.getEdge(v4, v2)), // + +// /* +// * The set of one step extensions of those paths. +// * +// * @todo build this programmatically by finding the set of edges +// * branching from the existing paths to a vertex not already part of +// * the existing paths and having a total set of vertices which is +// * distinct from all other paths already generated in this breadth +// * first expansion of the search space. (ROX further constrains the +// * new paths to extend the stop vertex of the path from which they +// * are derived.) +// * +// * @todo always label edges by either minimum bopId or minimum +// * estimated cardinality (with tie broken by bopId)? When extending +// * a path in which more than one edge can reach the target vertex, +// * always chose the edge having the source vertex with the minimum +// * cardinality? +// */ +// final Path[] paths_t1 = new Path[] {// +// // t0 +// p0, // (2,3) +// p1, // (2,4) +// p2, // (4,1) +// // t1 +// p0.addEdge(queryEngine, limit*2, g.getEdge(v2, v4)), // aka (v3,v4) +// p0.addEdge(queryEngine, limit*2, g.getEdge(v3, v0)), // +// p0.addEdge(queryEngine, limit*2, g.getEdge(v3, v5)), // +// // +// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v1)), // +// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v3)), // +// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v5)), // +// // +// p2.addEdge(queryEngine, limit*2, g.getEdge(v1, v5)), // aka (4,5) +// p2.addEdge(queryEngine, limit*2, g.getEdge(v4, v3)), // +// p2.addEdge(queryEngine, limit*2, g.getEdge(v4, v2)), // +// + /* + * *** Paths @ t1 - }; +p[ 1] 0.69, 68931 168831, [ 2 3 0 ] +p[ 2] 1.00, 99900 199800, [ 2 3 4 ] +p[ 3] 1.00, 99900 199800, [ 2 3 5 ] +p[ 5] 1.00, 999 1998, [ 2 4 1 ] +p[ 6] 100.00, 99900 100899, [ 2 4 3 ] +p[ 7] 20.00, 19980 20979, [ 2 4 5 ] +p[ 9] 16.67, 40650 43089, [ 1 4 5 ] +p[10] 1.00, 2439 4878, [ 1 4 2 ] +p[11] 5.00, 12195 14634, [ 1 4 3 ] + */ +// }; + int round = 1; + + final Path[] paths_t1 = g.expand(queryEngine, limit, round, paths_t0); + System.err.println("\n*** Paths @ t1\n" + JoinGraph.showTable(paths_t1)); - g.pruneJoinPaths(paths_t1); + final Path[] paths_t1_pruned = g.pruneJoinPaths(paths_t1); + + System.err.println("\n*** Paths @ t1 (after pruning)\n" + + JoinGraph.showTable(paths_t1_pruned)); + + /* + * t2 + */ + final Path[] paths_t2 = g.expand(queryEngine, limit, round++, paths_t1_pruned); + + System.err.println("\n*** Paths @ t2\n" + + JoinGraph.showTable(paths_t2)); + + final Path[] paths_t2_pruned = g.pruneJoinPaths(paths_t2); + + System.err.println("\n*** Paths @ t2 (after pruning)\n" + + JoinGraph.showTable(paths_t2_pruned)); + + +/* +p[ 4] 0.69, 68931 168831, (2 3) (0 3) (0 5) +p[ 4] 0.69, 68931 168831, (2 3) (0 3) (2 4) +p[ 4] 0.69, 68931 168831, (2 3) (0 3) (3 4) +p[ 4] 0.69, 68931 168831, (2 3) (0 3) (3 5) + +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (0 3) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (0 5) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (1 5) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (2 4) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (3 4) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (4 5) + +p[ 6] 1.00, 999 1998, (2 4) (1 4) (1 5) +p[ 6] 1.00, 999 1998, (2 4) (1 4) (2 3) +p[ 6] 1.00, 999 1998, (2 4) (1 4) (3 4) +p[ 6] 1.00, 999 1998, (2 4) (1 4) (4 5) + +p[ 7] 100.00, 99900 100899, (2 4) (3 4) (0 3) +p[ 7] 100.00, 99900 100899, (2 4) (3 4) (1 4) +p[ 7] 100.00, 99900 100899, (2 4) (3 4) (3 5) +p[ 7] 100.00, 99900 100899, (2 4) (3 4) (4 5) + +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (0 5) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (1 4) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (1 5) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (2 3) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (3 4) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (3 5) */ + // final Path selected_t1 = g.getSelectedJoinPath(paths_t1); // // if (selected_t1 != null) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-12 17:52:18
|
Revision: 3944 http://bigdata.svn.sourceforge.net/bigdata/?rev=3944&view=rev Author: thompsonbry Date: 2010-11-12 17:52:10 +0000 (Fri, 12 Nov 2010) Log Message: ----------- Added the path prunning logic into JGraph.expand() Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 17:28:10 UTC (rev 3943) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 17:52:10 UTC (rev 3944) @@ -1554,9 +1554,9 @@ continue; } - + // The set of vertices used to expand this path in this round. final Set<Vertex> used = new LinkedHashSet<Vertex>(); - + // Check all edges in the graph. for (Edge edgeInGraph : E) { // Figure out which vertices are already part of this path. @@ -1589,12 +1589,23 @@ // Add to the set of paths for this round. tmp.add(p); - + } } - return tmp.toArray(new Path[tmp.size()]); + final Path[] paths_tp1 = tmp.toArray(new Path[tmp.size()]); + + System.err.println("\n*** Paths @ round=" + round + "\n" + + JoinGraph.showTable(paths_tp1)); + + final Path[] paths_tp1_pruned = pruneJoinPaths(paths_tp1); + + System.err.println("\n*** Paths @ round=" + round + + " (after pruning)\n" + + JoinGraph.showTable(paths_tp1_pruned)); + + return paths_tp1_pruned; } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 17:28:10 UTC (rev 3943) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 17:52:10 UTC (rev 3944) @@ -632,133 +632,12 @@ System.err.println("\n*** Paths @ t0\n" + JoinGraph.showTable(paths_t0)); -// final Path selected_t0 = g.getSelectedJoinPath(paths_t0); -// -// if (selected_t0 != null) { -// -// System.err.println("Selected path: " + selected_t0); -// -// } - - /* - * t1 - */ - -// /* -// * The set of one step extensions of those paths. -// * -// * @todo build this programmatically by finding the set of edges -// * branching from the existing paths to a vertex not already part of -// * the existing paths and having a total set of vertices which is -// * distinct from all other paths already generated in this breadth -// * first expansion of the search space. (ROX further constrains the -// * new paths to extend the stop vertex of the path from which they -// * are derived.) -// * -// * @todo always label edges by either minimum bopId or minimum -// * estimated cardinality (with tie broken by bopId)? When extending -// * a path in which more than one edge can reach the target vertex, -// * always chose the edge having the source vertex with the minimum -// * cardinality? -// */ -// final Path[] paths_t1 = new Path[] {// -// // t0 -// p0, // (2,3) -// p1, // (2,4) -// p2, // (4,1) -// // t1 -// p0.addEdge(queryEngine, limit*2, g.getEdge(v2, v4)), // aka (v3,v4) -// p0.addEdge(queryEngine, limit*2, g.getEdge(v3, v0)), // -// p0.addEdge(queryEngine, limit*2, g.getEdge(v3, v5)), // -// // -// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v1)), // -// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v3)), // -// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v5)), // -// // -// p2.addEdge(queryEngine, limit*2, g.getEdge(v1, v5)), // aka (4,5) -// p2.addEdge(queryEngine, limit*2, g.getEdge(v4, v3)), // -// p2.addEdge(queryEngine, limit*2, g.getEdge(v4, v2)), // -// - /* - * *** Paths @ t1 - -p[ 1] 0.69, 68931 168831, [ 2 3 0 ] -p[ 2] 1.00, 99900 199800, [ 2 3 4 ] -p[ 3] 1.00, 99900 199800, [ 2 3 5 ] -p[ 5] 1.00, 999 1998, [ 2 4 1 ] -p[ 6] 100.00, 99900 100899, [ 2 4 3 ] -p[ 7] 20.00, 19980 20979, [ 2 4 5 ] -p[ 9] 16.67, 40650 43089, [ 1 4 5 ] -p[10] 1.00, 2439 4878, [ 1 4 2 ] -p[11] 5.00, 12195 14634, [ 1 4 3 ] - - */ -// }; int round = 1; + final Path[] paths_t1 = g.expand(queryEngine, limit, round++, paths_t0); + final Path[] paths_t2 = g.expand(queryEngine, limit, round++, paths_t1); + final Path[] paths_t3 = g.expand(queryEngine, limit, round++, paths_t2); + final Path[] paths_t4 = g.expand(queryEngine, limit, round++, paths_t3); - final Path[] paths_t1 = g.expand(queryEngine, limit, round, paths_t0); - - System.err.println("\n*** Paths @ t1\n" - + JoinGraph.showTable(paths_t1)); - - final Path[] paths_t1_pruned = g.pruneJoinPaths(paths_t1); - - System.err.println("\n*** Paths @ t1 (after pruning)\n" - + JoinGraph.showTable(paths_t1_pruned)); - - /* - * t2 - */ - - final Path[] paths_t2 = g.expand(queryEngine, limit, round++, paths_t1_pruned); - - System.err.println("\n*** Paths @ t2\n" - + JoinGraph.showTable(paths_t2)); - - final Path[] paths_t2_pruned = g.pruneJoinPaths(paths_t2); - - System.err.println("\n*** Paths @ t2 (after pruning)\n" - + JoinGraph.showTable(paths_t2_pruned)); - - -/* -p[ 4] 0.69, 68931 168831, (2 3) (0 3) (0 5) -p[ 4] 0.69, 68931 168831, (2 3) (0 3) (2 4) -p[ 4] 0.69, 68931 168831, (2 3) (0 3) (3 4) -p[ 4] 0.69, 68931 168831, (2 3) (0 3) (3 5) - -p[ 5] 1.00, 99900 199800, (2 3) (3 5) (0 3) -p[ 5] 1.00, 99900 199800, (2 3) (3 5) (0 5) -p[ 5] 1.00, 99900 199800, (2 3) (3 5) (1 5) -p[ 5] 1.00, 99900 199800, (2 3) (3 5) (2 4) -p[ 5] 1.00, 99900 199800, (2 3) (3 5) (3 4) -p[ 5] 1.00, 99900 199800, (2 3) (3 5) (4 5) - -p[ 6] 1.00, 999 1998, (2 4) (1 4) (1 5) -p[ 6] 1.00, 999 1998, (2 4) (1 4) (2 3) -p[ 6] 1.00, 999 1998, (2 4) (1 4) (3 4) -p[ 6] 1.00, 999 1998, (2 4) (1 4) (4 5) - -p[ 7] 100.00, 99900 100899, (2 4) (3 4) (0 3) -p[ 7] 100.00, 99900 100899, (2 4) (3 4) (1 4) -p[ 7] 100.00, 99900 100899, (2 4) (3 4) (3 5) -p[ 7] 100.00, 99900 100899, (2 4) (3 4) (4 5) - -p[ 8] 20.00, 19980 20979, (2 4) (4 5) (0 5) -p[ 8] 20.00, 19980 20979, (2 4) (4 5) (1 4) -p[ 8] 20.00, 19980 20979, (2 4) (4 5) (1 5) -p[ 8] 20.00, 19980 20979, (2 4) (4 5) (2 3) -p[ 8] 20.00, 19980 20979, (2 4) (4 5) (3 4) -p[ 8] 20.00, 19980 20979, (2 4) (4 5) (3 5) */ - -// final Path selected_t1 = g.getSelectedJoinPath(paths_t1); -// -// if (selected_t1 != null) { -// -// System.err.println("Selected path: " + selected_t1); -// -// } - } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-12 20:48:01
|
Revision: 3945 http://bigdata.svn.sourceforge.net/bigdata/?rev=3945&view=rev Author: thompsonbry Date: 2010-11-12 20:47:51 +0000 (Fri, 12 Nov 2010) Log Message: ----------- More clean up on JoinGraph. Added LUBM Q9 to the test case. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 17:52:10 UTC (rev 3944) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 20:47:51 UTC (rev 3945) @@ -20,7 +20,7 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ + */ /* * Created on Aug 16, 2010 */ @@ -77,8 +77,9 @@ * support of runtime query optimization. A join graph is a collection of * relations and joins which connect those relations. This boils down to a * collection of {@link IPredicate}s (selects on relations) and shared variables - * (which identify joins). - * <p> + * (which identify joins). Operators other than standard joins (including + * optional joins, sort, order by, etc.) must be handled downstream from the + * join graph in a "tail plan". * * @see http://arxiv.org/PS_cache/arxiv/pdf/0810/0810.4809v1.pdf, XQuery Join * Graph Isolation. @@ -86,130 +87,98 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ * - * TODO Some edges can be eliminated by transitivity. For example, given - * - * <pre> - * query: - * - * :- (A loves B), (B loves A), (B marriedTo C). - * - * vertices: - * - * v1=(A loves B); - * v2=(B loves A); - * v3=(B marriedTo C); - * - * edges: - * - * e1=(v1,v2) // JOIN( SCAN(A loves B), SCAN(B loves A)) - * e2=(v1,v3) // JOIN( SCAN(A loves B), SCAN(B marriedTo C)) - * e3=(v2,v3) // JOIN( SCAN(B loves A), SCAN(B marriedTo C)) - * - * It is necessary to execute e1 and either e2 or e3, but not both e2 and e3. - * </pre> - * - * TODO In order to combine pipelining with runtime query optimization we need - * to sample based on the first chunk(s) delivered by the pipeline. If - * necessary, we can buffer multiple chunks for semi-selective queries. - * However, for unselective queries we would accept as many buffers worth - * of bindings as were allowed for a given join and then sample the - * binding sets from left hand side (the buffers) and run those samples - * against the right hand side (the local shard). + * @todo Examine the overhead of the runtime optimizer. Look at ways to prune + * its costs. For example, by pruning the search, by recognizing when the + * query is simple enough to execute directly, by recognizing when we have + * already materialized the answer to the query, etc. */ public class JoinGraph extends PipelineOp { - private static final transient Logger log = Logger.getLogger(JoinGraph.class); - - private static final long serialVersionUID = 1L; + private static final transient Logger log = Logger + .getLogger(JoinGraph.class); - /** - * Known annotations. - */ - public interface Annotations extends PipelineOp.Annotations { + private static final long serialVersionUID = 1L; + + /** + * Known annotations. + */ + public interface Annotations extends PipelineOp.Annotations { + /** - * The vertices of the join graph expressed an an {@link IPredicate}[]. - */ + * The vertices of the join graph expressed an an {@link IPredicate}[]. + */ String VERTICES = JoinGraph.class.getName() + ".vertices"; - + /** - * The initial limit for cutoff sampling (default {@value #DEFAULT_LIMIT}). + * The initial limit for cutoff sampling (default + * {@value #DEFAULT_LIMIT}). */ String LIMIT = JoinGraph.class.getName() + ".limit"; - - int DEFAULT_LIMIT = 100; - } + int DEFAULT_LIMIT = 100; + } + /** - * @see Annotations#VERTICES - */ - public IPredicate[] getVertices() { - - return (IPredicate[]) getRequiredProperty(Annotations.VERTICES); - - } + * @see Annotations#VERTICES + */ + public IPredicate[] getVertices() { - /** - * @see Annotations#LIMIT - */ - public int getLimit() { - - return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); - - } - - public JoinGraph(final NV ...anns) { - + return (IPredicate[]) getRequiredProperty(Annotations.VERTICES); + + } + + /** + * @see Annotations#LIMIT + */ + public int getLimit() { + + return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); + + } + + public JoinGraph(final NV... anns) { + this(BOpBase.NOARGS, NV.asMap(anns)); - - } + } + /** * - * TODO We can derive the vertices from the join operators or the join - * operators from the vertices. However, if a specific kind of join - * operator is required then the question is whether we have better - * information to make that choice when the join graph is evaluated or - * before it is constructed. - * - * TODO How we will handle optional joins? Presumably they are outside of - * the code join graph as part of the tail attached to that join - * graph. - * * TODO How can join constraints be moved around? Just attach them where - * ever a variable becomes bound? And when do we filter out variables - * which are not required downstream? Once we decide on a join path - * and execute it fully (rather than sampling that join path). + * ever a variable becomes bound? And when do we filter out variables which + * are not required downstream? Once we decide on a join path and execute it + * fully (rather than sampling that join path). */ - public JoinGraph(final BOp[] args, final Map<String,Object> anns) { + public JoinGraph(final BOp[] args, final Map<String, Object> anns) { - super(args,anns); + super(args, anns); - switch (getEvaluationContext()) { - case CONTROLLER: - break; - default: - throw new UnsupportedOperationException( - Annotations.EVALUATION_CONTEXT + "=" - + getEvaluationContext()); - } + switch (getEvaluationContext()) { + case CONTROLLER: + break; + default: + throw new UnsupportedOperationException( + Annotations.EVALUATION_CONTEXT + "=" + + getEvaluationContext()); + } - } + } - public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { - return new FutureTask<Void>(new JoinGraphTask(context)); + return new FutureTask<Void>(new JoinGraphTask(context)); - } + } - /** - * Used to assign row identifiers. - */ + /** + * Used to assign row identifiers. + */ static private final IVariable<Integer> ROWID = Var.var("__rowid"); - /** - * A sample of a {@link Vertex} (an access path). - */ - public static class VertexSample { + /** + * A sample of a {@link Vertex} (an access path). + */ + public static class VertexSample { /** * Fast range count. This will be the same for each sample taken @@ -218,23 +187,22 @@ */ public final long rangeCount; - /** + /** * The limit used to produce the {@link #sample}. */ - public final int limit; + public final int limit; /** * When <code>true</code>, the result is not a sample but the * materialized access path. * - * @todo When <code>true</code>, we could run the join against the - * sample rather than the disk. This would require wrapping the - * sample as an access path. Since all exact samples will be - * pretty small, this is not likely to have any great performance - * benefit. + * TODO When <code>true</code>, we could run the join against the sample + * rather than the disk. This would require wrapping the sample as an + * access path. Since all exact samples will be pretty small, this is + * not likely to have any great performance benefit. */ - public final boolean exact; - + public final boolean exact; + /** * Sample. */ @@ -247,34 +215,35 @@ * @param exact * @param sample */ - public VertexSample(final long rangeCount, final int limit, final boolean exact, final Object[] sample) { + public VertexSample(final long rangeCount, final int limit, + final boolean exact, final Object[] sample) { if (rangeCount < 0L) throw new IllegalArgumentException(); if (limit <= 0) throw new IllegalArgumentException(); - + if (sample == null) throw new IllegalArgumentException(); this.rangeCount = rangeCount; - + this.limit = limit; - + this.exact = exact; - + this.sample = sample; - + } public String toString() { return "VertexSample{rangeCount=" + rangeCount + ",limit=" + limit + ",exact=" + exact + ", sampleSize=" + sample.length + "}"; } - - } + } + /** * A vertex of the join graph is an annotated relation (this corresponds to * an {@link IPredicate} with additional annotations to support the adaptive @@ -299,20 +268,20 @@ * The most recently taken sample of the {@link Vertex}. */ VertexSample sample = null; - + Vertex(final IPredicate<?> pred) { if (pred == null) throw new IllegalArgumentException(); - + this.pred = pred; - + } - + public String toString() { return "Vertex{pred=" + pred + ",sample=" + sample + "}"; - + } /** @@ -334,30 +303,31 @@ * Take a sample of the vertex. If the sample is already exact, then * this is a NOP. * - * @param context * @param limit * The sample cutoff. */ - public void sample(final BOpContextBase context, final int limit) { + public void sample(final QueryEngine queryEngine, final int limit) { - if (context == null) + if (queryEngine == null) throw new IllegalArgumentException(); if (limit <= 0) throw new IllegalArgumentException(); - + final VertexSample oldSample = this.sample; - if(oldSample != null && oldSample.exact) { + if (oldSample != null && oldSample.exact) { /* * The old sample is already the full materialization of the * vertex. */ - + return; - + } + + final BOpContextBase context = new BOpContextBase(queryEngine); final IRelation r = context.getRelation(pred); @@ -371,12 +341,12 @@ /* * Materialize the access path. * - * @todo This could be more efficient if we raised it onto the - * AP or if we overrode CHUNK_CAPACITY and the fully buffered + * TODO This could be more efficient if we raised it onto the AP + * or if we overrode CHUNK_CAPACITY and the fully buffered * iterator threshold such that everything was materialized as a * single chunk. */ - + final List<Object> tmp = new ArrayList<Object>((int) rangeCount); final IChunkedIterator<Object> itr = ap.iterator(); @@ -396,25 +366,31 @@ sample = new VertexSample(rangeCount, limit, true/* exact */, tmp.toArray(new Object[0])); - - return; - } + } else { - /* - * Materialize a random sample from the access path. - */ - - final SampleIndex sampleOp = new SampleIndex(new BOp[] {}, // - NV.asMap(// + /* + * Materialize a random sample from the access path. + */ + + final SampleIndex sampleOp = new SampleIndex( + new BOp[] {}, // + NV.asMap(// new NV(SampleIndex.Annotations.PREDICATE, pred),// new NV(SampleIndex.Annotations.LIMIT, limit))); - sample = new VertexSample(rangeCount, limit, false/*exact*/, sampleOp - .eval(context)); + sample = new VertexSample(rangeCount, limit, false/* exact */, + sampleOp.eval(context)); + } + + if (log.isInfoEnabled()) + log.info("Sampled: " + sample); + + return; + } - + } /** @@ -449,13 +425,13 @@ * anything. This is not 100%, merely indicative. */ public final int outputCount; - + /** * The ratio of the #of input samples consumed to the #of output samples * generated (the join hit ratio or scale factor). */ public final double f; - + /** * The estimated cardinality of the join. */ @@ -499,12 +475,12 @@ * join. That is, feeding all source tuples into the join gives fewer * than the desired number of output tuples. * - * @todo This field marks this condition and should be used to avoid - * needless recomputation of a join whose exact solution is - * already known. + * TODO This field marks this condition and should be used to avoid + * needless re-computation of a join whose exact solution is already + * known. */ - public final boolean exact; - + public final boolean exact; + /** * The sample of the solutions for the join path. */ @@ -526,40 +502,39 @@ * @param outputCount * The #of binding sets generated before the join was cutoff. */ - EdgeSample(//final VertexSample sourceVertexSample, + EdgeSample( + // final VertexSample sourceVertexSample, final long sourceSampleRangeCount, - final boolean sourceSampleExact, - final int limit, + final boolean sourceSampleExact, final int limit, final int inputCount, final int outputCount, final IBindingSet[] sample) { - if(sample == null) + if (sample == null) throw new IllegalArgumentException(); - -// this.rangeCount = sourceVertexSample.rangeCount; + + // this.rangeCount = sourceVertexSample.rangeCount; this.rangeCount = sourceSampleRangeCount; - + this.limit = limit; - + this.inputCount = inputCount; - + this.outputCount = outputCount; - + f = outputCount == 0 ? 0 : (outputCount / (double) inputCount); estimatedCardinality = (long) (rangeCount * f); - + estimateIsLowerBound = inputCount == 1 && outputCount == limit; - -// final boolean sourceSampleExact = sourceVertexSample.exact; - estimateIsUpperBound = !sourceSampleExact - && outputCount < limit; - + + // final boolean sourceSampleExact = sourceVertexSample.exact; + estimateIsUpperBound = !sourceSampleExact && outputCount < limit; + this.exact = sourceSampleExact && outputCount < limit; - + this.sample = sample; } - + public String toString() { return getClass().getName() + "{inputRangeCount=" + rangeCount + ", limit=" + limit + ", inputCount=" + inputCount @@ -567,10 +542,9 @@ + ", estimatedCardinality=" + estimatedCardinality + ", estimateIsLowerBound=" + estimateIsLowerBound + ", estimateIsUpperBound=" + estimateIsUpperBound - + ", sampleIsExactSolution=" + exact - + "}"; + + ", sampleIsExactSolution=" + exact + "}"; } - + }; /** @@ -603,13 +577,14 @@ * not been sampled. */ public EdgeSample sample = null; - - public Edge(final Vertex v1, final Vertex v2, final Set<IVariable<?>> shared) { + + public Edge(final Vertex v1, final Vertex v2, + final Set<IVariable<?>> shared) { if (v1 == null) throw new IllegalArgumentException(); if (v2 == null) throw new IllegalArgumentException(); - if (shared==null) + if (shared == null) throw new IllegalArgumentException(); if (shared.isEmpty()) throw new IllegalArgumentException(); @@ -624,8 +599,10 @@ * for each vertex. */ public String toString() { - - return "Edge{ (V" + v1.pred.getId() + ",V" + v2.pred.getId() + ")" + + return "Edge{ (V" + v1.pred.getId() + ",V" + v2.pred.getId() + + "), estCard=" + + (sample == null ? "N/A" : sample.estimatedCardinality) + ", shared=" + shared.toString() + ", sample=" + sample + "}"; @@ -635,9 +612,9 @@ * Equality is determined by reference testing. */ public boolean equals(final Object o) { - + return this == o; - + } /** @@ -657,24 +634,25 @@ final int h; if (h1 < h2) { - + h = h1 * 31 + h2; - + } else { - + h = h2 * 31 + h1; - + } hash = h; } - return hash; + return hash; - } - private int hash; + } - /** + private int hash; + + /** * Return the vertex with the smaller estimated cardinality. * * @throws IllegalStateException @@ -684,15 +662,15 @@ if (v1.sample == null) // vertex not sampled. throw new IllegalStateException(); - + if (v2.sample == null) // vertex not sampled. throw new IllegalStateException(); - + return (v1.sample.rangeCount < v2.sample.rangeCount) ? v1 : v2; - + } - - /** + + /** * Return the vertex with the larger estimated cardinality (the vertex * not returned by {@link #getMinimumCardinalityVertex()}). * @@ -703,12 +681,12 @@ // The vertex with the minimum cardinality. final Vertex o = getMinimumCardinalityVertex(); - + // Return the other vertex. return (v1 == o) ? v2 : v1; - + } - + /** * Estimate the cardinality of the edge. * @@ -716,7 +694,7 @@ * * @return The estimated cardinality of the edge. * - * @throws Exception + * @throws Exception */ public long estimateCardinality(final QueryEngine queryEngine, final int limit) throws Exception { @@ -763,21 +741,22 @@ * both the input and the output of the cutoff evaluation of the * edge rather than rows of the materialized relation. * - * TODO On subsequent iterations we would probably re-sample [v] - * and we would run against the materialized intermediate result for + * TODO On subsequent iterations we would probably re-sample [v] and + * we would run against the materialized intermediate result for * [v']. */ /* * Convert the source sample into an IBindingSet[]. * - * @todo We might as well do this when we sample the vertex. + * TODO We might as well do this when we sample the vertex. */ final IBindingSet[] sourceSample = new IBindingSet[v.sample.sample.length]; { for (int i = 0; i < sourceSample.length; i++) { final IBindingSet bset = new HashBindingSet(); - BOpContext.copyValues((IElement) v.sample.sample[i], v.pred, bset); + BOpContext.copyValues((IElement) v.sample.sample[i], + v.pred, bset); sourceSample[i] = bset; } } @@ -819,7 +798,7 @@ if (limit <= 0) throw new IllegalArgumentException(); - + // Inject a rowId column. sourceSample = BOpUtility.injectRowIdColumn(ROWID, 1/* start */, sourceSample); @@ -834,15 +813,14 @@ */ final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, // new NV(BOp.Annotations.BOP_ID, 1),// - new NV(PipelineJoin.Annotations.PREDICATE,vTarget.pred.setBOpId(3)) - ); + new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred + .setBOpId(3))); final SliceOp sliceOp = new SliceOp(new BOp[] { joinOp },// NV.asMap(// new NV(BOp.Annotations.BOP_ID, 2), // - new NV(SliceOp.Annotations.LIMIT, (long)limit), // - new NV( - BOp.Annotations.EVALUATION_CONTEXT, + new NV(SliceOp.Annotations.LIMIT, (long) limit), // + new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER))); // run the cutoff sampling of the edge. @@ -875,31 +853,18 @@ .get()); } finally { // verify no problems. FIXME Restore test of the query. -// runningQuery.get(); + // runningQuery.get(); } } finally { runningQuery.cancel(true/* mayInterruptIfRunning */); } /* - * Note: This needs to be based on the source vertex having the - * minimum cardinality for the Path which is being extended which - * connects via some edge defined in the join graph. If a different - * vertex is chosen as the source then the estimated cardinality - * will be falsely high by whatever ratio the chosen vertex - * cardinality exceeds the one having the minimum cardinality which - * is connected via an edge to the target vertex). - * - * FIXME I am not convinced that this approach is quite right. I am - * also not convinced that this approach will correctly carry the - * additional metadata on the EdgeSample (exact, estimate overflow - * and underflow, etc). [This needs to be the estimated cardinality - * of the path which is being extended by an edge to the target - * vertex.] + * TODO Improve comments here. See if it is possible to isolate a + * common base class which would simplify the setup of the cutoff + * join and the computation of the sample stats. */ -// final VertexSample moreSelectiveVertexSample = vSource.sample.rangeCount < vTarget.sample.rangeCount ? vSource.sample -// : vTarget.sample; - + final EdgeSample edgeSample = new EdgeSample( sourceSampleRangeCount, sourceSampleExact, limit, inputCount, outputCount, result @@ -911,64 +876,14 @@ return edgeSample; } - + } -// /** -// * A path sample includes the materialized binding sets from the as-executed -// * join path. -// * -// * @todo The sample {@link IBindingSet}[] could be saved with the -// * {@link EdgeSample}. However, when we are sampling a join path we -// * want to associate the net sample with the path, not each edge in -// * that path, because we need to be able to generate join paths in -// * which the path is extended from any vertex already part of the path -// * to any vertex which has not yet incorporated in the path and has -// * not yet been executed. To do this we need to intermediate results -// * for the path, which includes all variables bound by each join for -// * each edge in the path, not just on an edge by edge basis. -// */ -// public static class PathSample extends EdgeSample { -// -// /** -// * <code>true</code> if the sample is the exact solution for the join path. -// */ -// private final boolean exact; -// -// /** -// * The sample of the solutions for the join path. -// */ -// private final IBindingSet[] sample; -// -// PathSample(final long inputRangeCount, final int limit, -// final int inputCount, final int outputCount, -// final boolean exact, final IBindingSet[] sample) { -// -// super(inputRangeCount, limit, inputCount, outputCount); -// -// if(sample == null) -// throw new IllegalArgumentException(); -// -// this.exact = exact; -// -// this.sample = sample; -// -// } -// -// public String toString() { -// -// return super.toString() + ":{exact=" + exact + ", sampleSize=" -// + sample.length + "}"; -// -// } -// -// } - /** * A sequence of {@link Edge}s (aka join steps). */ public static class Path { - + /** * An immutable ordered list of the edges in the (aka the sequence of * joins represented by this path). @@ -995,19 +910,6 @@ */ final public long cumulativeEstimatedCardinality; - /** - * The vertex at which the path from which this path was derived - * stopped. This is initialized to the source vertex when entering the - * chainSample() method. - * - * @todo This is used by ROX to only grow the path from its end. We - * could of course just look at the last edge in the path. - * However, I think that I prefer to grow a path from any - * branching vertex as long as the path does not duplicate any - * path already generated (including those which were pruned). - */ - private Vertex stopVertex; - public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("Path{"); @@ -1015,7 +917,8 @@ for (Edge e : edges) { if (!first) sb.append(","); - sb.append("(" + e.v1.pred.getId() + "," + e.v2.pred.getId() + ")"); + sb.append("(" + e.v1.pred.getId() + "," + e.v2.pred.getId() + + ")"); first = false; } sb.append(",cumEstCard=" + cumulativeEstimatedCardinality @@ -1042,23 +945,27 @@ if (e == null) throw new IllegalArgumentException(); - + if (e.sample == null) - throw new IllegalArgumentException("Not sampled: "+e); - + throw new IllegalArgumentException("Not sampled: " + e); + this.edges = Collections.singletonList(e); - + this.sample = e.sample; - + this.cumulativeEstimatedCardinality = e.sample.estimatedCardinality; - + } /** * Constructor used by {@link #addEdge(QueryEngine, int, Edge)} - * @param edges The edges in the new path. - * @param cumulativeEstimatedCardinality The cumulative estimated cardinality of the new path. - * @param sample The sample from the last + * + * @param edges + * The edges in the new path. + * @param cumulativeEstimatedCardinality + * The cumulative estimated cardinality of the new path. + * @param sample + * The sample from the last */ private Path(final List<Edge> edges, final long cumulativeEstimatedCardinality, @@ -1066,19 +973,19 @@ if (edges == null) throw new IllegalArgumentException(); - + if (cumulativeEstimatedCardinality < 0) throw new IllegalArgumentException(); - + if (sample == null) throw new IllegalArgumentException(); this.edges = Collections.unmodifiableList(edges); - + this.cumulativeEstimatedCardinality = cumulativeEstimatedCardinality; - + this.sample = sample; - + } /** @@ -1132,7 +1039,7 @@ final Vertex[] v1 = getVertices(); final Vertex[] v2 = p.getVertices(); - + if (v1.length < v2.length) { // Proven false since the other set is larger. return false; @@ -1164,7 +1071,7 @@ } return true; - + } /** @@ -1172,8 +1079,8 @@ * * @return The vertices (in path order). * - * @todo this could be rewritten without the toArray() using a method - * which visits the vertices of a path in any order. + * TODO This could be rewritten without the toArray() using a + * method which visits the vertices of a path in any order. */ public Vertex[] getVertices() { final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); @@ -1190,7 +1097,7 @@ * * @param p * The given path. - * + * * @return <code>true</code> if this path begins with the given path. */ public boolean beginsWith(final Path p) { @@ -1210,10 +1117,10 @@ return false; } } - + return true; } - + /** * Add an edge to a path, computing the estimated cardinality of the new * path, and returning the new path. @@ -1272,11 +1179,11 @@ * cardinality then we should prefer join paths which achieve the * same reduction in cardinality with less 'intermediate * cardinality' - that is, by examining fewer possible solutions. + * [In fact, the estimated (cumulative) cardinality might not be a + * good reflection of the IOs to be done -- this needs more + * thought.] */ -// final IBindingSet[] sample = BOpUtility.injectRowIdColumn(ROWID, -// 0/* start */, this.sample.sample); - final EdgeSample edgeSample = e.estimateCardinality(queryEngine, limit, sourceVertex, targetVertex, this.sample.estimatedCardinality, this.sample.exact, @@ -1286,9 +1193,9 @@ final List<Edge> edges = new ArrayList<Edge>( this.edges.size() + 1); - + edges.addAll(this.edges); - + edges.add(e); final long cumulativeEstimatedCardinality = this.cumulativeEstimatedCardinality @@ -1303,58 +1210,58 @@ return tmp; } - + } - -// /** -// * Equality is defined by comparison of the unordered set of edges. -// */ -// public boolean equals(final Object o) { -// if (this == o) -// return true; -// if (!(o instanceof Path)) -// return false; -// final Path t = (Path) o; -// if (edges.length != t.edges.length) -// return false; -// for (Edge e : edges) { -// boolean found = false; -// for (Edge x : t.edges) { -// if (x.equals(e)) { -// found = true; -// break; -// } -// } -// if (!found) -// return false; -// } -// return true; -// } -// -// /** -// * The hash code of path is defined as the bit-wise XOR of the hash -// * codes of the edges in that path. -// */ -// public int hashCode() { -// -// if (hash == 0) { -// -// int result = 0; -// -// for(Edge e : edges) { -// -// result ^= e.hashCode(); -// -// } -// -// hash = result; -// -// } -// return hash; -// -// } -// private int hash; + // /** + // * Equality is defined by comparison of the unordered set of edges. + // */ + // public boolean equals(final Object o) { + // if (this == o) + // return true; + // if (!(o instanceof Path)) + // return false; + // final Path t = (Path) o; + // if (edges.length != t.edges.length) + // return false; + // for (Edge e : edges) { + // boolean found = false; + // for (Edge x : t.edges) { + // if (x.equals(e)) { + // found = true; + // break; + // } + // } + // if (!found) + // return false; + // } + // return true; + // } + // + // /** + // * The hash code of path is defined as the bit-wise XOR of the hash + // * codes of the edges in that path. + // */ + // public int hashCode() { + // + // if (hash == 0) { + // + // int result = 0; + // + // for(Edge e : edges) { + // + // result ^= e.hashCode(); + // + // } + // + // hash = result; + // + // } + // return hash; + // + // } + // private int hash; + } /** @@ -1364,13 +1271,13 @@ * * @param a * An array of join paths. - * + * * @return A table with that data. */ static public String showTable(final Path[] a) { final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); - for(int i=0; i<a.length; i++) { + for (int i = 0; i < a.length; i++) { final Path x = a[i]; if (x.sample == null) { f.format("p[%2d] %7s, %10s %10s", "N/A", "N/A", "N/A", i); @@ -1381,18 +1288,18 @@ } sb.append(", ["); final Vertex[] vertices = x.getVertices(); - for(Vertex v : vertices) { + for (Vertex v : vertices) { f.format("%2d ", v.pred.getId()); } sb.append("]"); -// for (Edge e : x.edges) -// sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() -// + ")"); + // for (Edge e : x.edges) + // sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() + // + ")"); sb.append("\n"); } return sb.toString(); } - + /** * A join graph (data structure and methods only). * @@ -1442,9 +1349,6 @@ */ private final Edge[] E; - // The set of vertices which have been consumed by the query. - private final Set<Vertex> executedVertices = new LinkedHashSet<Vertex>(); - public List<Vertex> getVertices() { return Collections.unmodifiableList(Arrays.asList(V)); } @@ -1457,28 +1361,25 @@ final StringBuilder sb = new StringBuilder(); sb.append("JoinGraph"); sb.append("{V=["); - for(Vertex v : V) { - sb.append("\nV["+v.pred.getId()+"]="+v); + for (Vertex v : V) { + sb.append("\nV[" + v.pred.getId() + "]=" + v); } sb.append("],E=["); - for(Edge e : E) { - sb.append("\n"+e); + for (Edge e : E) { + sb.append("\n" + e); } - sb.append("\n],ExecutedVertices=["); - for(Vertex v : executedVertices) { - sb.append("\nV["+v.pred.getId()+"]="+v); - } sb.append("\n]}"); return sb.toString(); - -// return super.toString() + "{V=" + Arrays.toString(V) + ",E=" -// + Arrays.toString(E) + ", executedVertices="+executedVertices+"}"; + + // return super.toString() + "{V=" + Arrays.toString(V) + ",E=" + // + Arrays.toString(E) + + // ", executedVertices="+executedVertices+"}"; } - + public JGraph(final IPredicate[] v) { - if (v == null) - throw new IllegalArgumentException(); + if (v == null) + throw new IllegalArgumentException(); if (v.length < 2) throw new IllegalArgumentException(); @@ -1527,18 +1428,200 @@ } /** + * + * @param queryEngine + * @param limit + * The limit for sampling a vertex and the initial limit for + * cutoff join evaluation. A reasonable value is + * <code>100</code>. + * + * @throws Exception + */ + public void runtimeOptimizer(final QueryEngine queryEngine, + final int limit) throws Exception { + + // // The set of vertices which have been consumed by the query. + // final Set<Vertex> executedVertices = new LinkedHashSet<Vertex>(); + + // Setup the join graph. + Path[] paths = round0(queryEngine, limit, 2/* nedges */); + + /* + * The input paths for the first round have two vertices (one edge + * is two vertices). Each round adds one more vertex, so we have + * three vertices by the end of round 1. We are done once we have + * generated paths which include all vertices. + * + * This occurs at round := nvertices - 1 + */ + + final int nvertices = V.length; + + int round = 1; + + while (round < nvertices - 1) { + + paths = expand(queryEngine, limit, round++, paths); + + } + + /* + * FIXME Choose the best join path and execute it (or return the + * evaluation order to the caller). + * + * FIXME This must either recognize each time a join path is known + * to dominate all other join paths and then execute it or iterator + * until the total join path is decided and then execute the + * original query using that join path. + * + * @todo When executing the query, it is actually being executed as + * a subquery. Therefore we have to take appropriate care to ensure + * that the results are copied out of the subquery and into the + * parent query. + * + * @todo When we execute the query, we should clear the references + * to the sample (unless they are exact, in which case they can be + * used as is) in order to release memory associated with those + * samples if the query is long running. + */ + + } + + /** + * Choose the starting vertices. + * + * @param nedges + * The maximum #of edges to choose. + */ + public Path[] choseStartingPaths(final int nedges) { + + final List<Path> tmp = new LinkedList<Path>(); + + // All edges in the graph. + final Edge[] edges = getEdges().toArray(new Edge[0]); + + // Sort them by ascending expected cardinality. + Arrays.sort(edges, 0, edges.length, + EstimatedEdgeCardinalityComparator.INSTANCE); + + // Choose the top-N edges (those with the least cardinality). + for (int i = 0; i < edges.length && i < nedges; i++) { + + tmp.add(new Path(edges[i])); + + } + + final Path[] a = tmp.toArray(new Path[tmp.size()]); + + return a; + + } + + /** + * Choose up to <i>nedges</i> edges to be the starting point. + * + * @param queryEngine + * The query engine. + * @param limit + * The cutoff used when sampling the vertices and when + * sampling the edges. + * @param nedges + * The maximum #of edges to choose. Those having the smallest + * expected cardinality will be chosen. + * + * @throws Exception + */ + public Path[] round0(final QueryEngine queryEngine, final int limit, + final int nedges) throws Exception { + + /* + * Sample the vertices. + */ + sampleVertices(queryEngine, limit); + + if (log.isInfoEnabled()) { + final StringBuilder sb = new StringBuilder(); + sb.append("Vertices:\n"); + for (Vertex v : V) { + sb.append(v.toString()); + sb.append("\n"); + } + log.info(sb.toString()); + } + + /* + * Estimate the cardinality for each edge. + * + * TODO It would be very interesting to see the variety and/or + * distribution of the values bound when the edge is sampled. This + * can be easily done using a hash map with a counter. That could + * tell us a lot about the cardinality of the next join path + * (sampling the join path also tells us a lot, but it does not + * explain it as much as seeing the histogram of the bound values). + * I believe that there are some interesting online algorithms for + * computing the N most frequent observations and the like which + * could be used here. + */ + estimateEdgeWeights(queryEngine, limit); + + if (log.isInfoEnabled()) { + final StringBuilder sb = new StringBuilder(); + sb.append("Edges:\n"); + for (Edge e : E) { + sb.append(e.toString()); + sb.append("\n"); + } + log.info(sb.toString()); + } + + /* + * Choose the initial set of paths. + */ + final Path[] paths_t0 = choseStartingPaths(nedges); + + if (log.isInfoEnabled()) + log.info("\n*** Paths @ t0\n" + JoinGraph.showTable(paths_t0)); + + return paths_t0; + + } + + /** * Do one breadth first expansion. * * @param queryEngine + * The query engine. * @param limit + * The limit (this is automatically multiplied by the round + * to increase the sample size in each round). * @param round + * The round number in [1:n]. * @param a - * @return + * The set of paths from the previous round. For the first + * round, this is formed from the initial set of edges to + * consider. + * + * @return The set of paths which survived pruning in this round. + * * @throws Exception */ - final public Path[] expand(final QueryEngine queryEngine, final int limit, + public Path[] expand(final QueryEngine queryEngine, int limit, final int round, final Path[] a) throws Exception { + if (queryEngine == null) + throw new IllegalArgumentException(); + if (limit <= 0) + throw new IllegalArgumentException(); + if (round <= 0) + throw new IllegalArgumentException(); + if (a == null) + throw new IllegalArgumentException(); + if (a.length == 0) + throw new IllegalArgumentException(); + + // increment the limit by itself in each round. + limit *= round; + final List<Path> tmp = new LinkedList<Path>(); // First, copy all existing paths. @@ -1546,16 +1629,20 @@ tmp.add(x); } + // Vertices are inserted into this collection when they are resampled. + final Set<Vertex> resampled = new LinkedHashSet<Vertex>(); + // Then expand each path. for (Path x : a) { if (x.edges.size() < round) { - + // Path is from a previous round. continue; - } + // The set of vertices used to expand this path in this round. final Set<Vertex> used = new LinkedHashSet<Vertex>(); + // Check all edges in the graph. for (Edge edgeInGraph : E) { @@ -1573,18 +1660,27 @@ continue; } - final Vertex newVertex = v1Found ? edgeInGraph.v2 : edgeInGraph.v1; - - if(used.contains(newVertex)) { + final Vertex newVertex = v1Found ? edgeInGraph.v2 + : edgeInGraph.v1; + + if (used.contains(newVertex)) { // Vertex already used to extend this path. continue; } - + // add the new vertex to the set of used vertices. used.add(newVertex); + + if (!resampled.add(newVertex)&&round>1) { + /* + * Resample this vertex before we sample a new edge + * which targets this vertex. + */ + newVertex.sample(queryEngine, limit); + } // Extend the path to the new vertex. - final Path p = x.addEdge(queryEngine, limit * round, + final Path p = x.addEdge(queryEngine, limit, edgeInGraph); // Add to the set of paths for this round. @@ -1596,19 +1692,22 @@ final Path[] paths_tp1 = tmp.toArray(new Path[tmp.size()]); - System.err.println("\n*** Paths @ round=" + round + "\n" - + JoinGraph.showTable(paths_tp1)); + if (log.isDebugEnabled()) + log.debug("\n*** round=" + round + " : generated paths\n" + + JoinGraph.showTable(paths_tp1)); final Path[] paths_tp1_pruned = pruneJoinPaths(paths_tp1); - System.err.println("\n*** Paths @ round=" + round - + " (after pruning)\n" - + JoinGraph.showTable(paths_tp1_pruned)); + if (log.isInfoEnabled()) + log.info("\n*** round=" + round + ": paths{in=" + a.length + + ",considered=" + paths_tp1.length + ",out=" + + paths_tp1_pruned.length + "}\n" + + JoinGraph.showTable(paths_tp1_pruned)); return paths_tp1_pruned; - + } - + /** * Return the {@link Vertex} whose {@link IPredicate} is associated with * the given {@link BOp.Annotations#BOP_ID}. @@ -1619,8 +1718,8 @@ * vertex in the join graph. */ public Vertex getVertex(int bopId) { - for(Vertex v : V) { - if(v.pred.getId()==bopId) + for (Vertex v : V) { + if (v.pred.getId() == bopId) return v; } return null; @@ -1639,7 +1738,7 @@ * the join graph. */ public Edge getEdge(Vertex v1, Vertex v2) { - for(Edge e : E) { + for (Edge e : E) { if (e.v1 == v1 && e.v2 == v2) return e; if (e.v1 == v2 && e.v2 == v1) @@ -1647,22 +1746,23 @@ } return null; } - + /** - * Obtain a sample and estimated cardinality (fast range count) for each vertex. + * Obtain a sample and estimated cardinality (fast range count) for each + * vertex. * - * @param context + * @param queryEngine * @param limit * The sample size. */ - public void sampleVertices(final BOpContextBase context, final int limit) { + public void sampleVertices(final QueryEngine queryEngine, final int limit) { for (Vertex v : V) { - v.sample(context, limit); - + v.sample(queryEngine, limit); + } - + } /** @@ -1687,8 +1787,7 @@ } - e.estimateCardinality( - queryEngine, limit); + e.estimateCardinality(queryEngine, limit); } @@ -1706,14 +1805,14 @@ * are no {@link Edge}s having an estimated cardinality. */ public Edge getMinimumCardinalityEdge(final Set<Vertex> visited) { - + long minCard = Long.MIN_VALUE; Edge minEdge = null; for (Edge e : E) { if (e.sample == null) { - + // Edge has not been sampled. continue; @@ -1721,12 +1820,12 @@ if (visited != null && (visited.contains(e.v1) || visited.contains(e.v2))) { - + // A vertex of that edge has already been consumed. continue; - + } - + final long estimatedCardinality = e.sample.estimatedCardinality; if (minEdge == null || estimatedCardinality < minCard) { @@ -1740,22 +1839,24 @@ } return minEdge; - + } -// /** -// * Return the {@link Edge} having the minimum estimated cardinality out -// * of those edges whose cardinality has been estimated. -// * -// * @return The minimum cardinality edge -or- <code>null</code> if there -// * are no {@link Edge}s having an estimated cardinality. -// */ -// public Edge getMinimumCardinalityEdge() { -// -// return getMinimumCardinalityEdge(null); -// -// } - + // /** + // * Return the {@link Edge} having the minimum estimated cardinality + // out + // * of those edges whose cardinality has been estimated. + // * + // * @return The minimum cardinality edge -or- <code>null</code> if + // there + // * are no {@link Edge}s having an estimated cardinality. + // */ + // public Edge getMinimumCardinalityEdge() { + // + // return getMinimumCardinalityEdge(null); + // + // } + /** * Return the #of edges in which the given vertex appears where the * other vertex of the edge does not appear in the set of visited @@ -1765,7 +1866,7 @@ * The vertex. * @param visited * A set of vertices to be excluded from consideration. - * + * * @return The #of such edges. */ public int getEdgeCount(final Vertex v, final Set<Vertex> visited) { @@ -1787,17 +1888,17 @@ * @return Those edges. */ public List<Edge> getEdges(final Vertex v, final Set<Vertex> visited) { - + if (v == null) throw new IllegalArgumentException(); if (visited != null && visited.contains(v)) return Collections.emptyList(); - + final List<Edge> tmp = new LinkedList<Edge>(); - + for (Edge e : E) { - + if (v.equals(e.v1) || v.equals(e.v2)) { if (visited != null) { @@ -1811,464 +1912,16 @@ } tmp.add(e); - - } - - } - - return tmp; - - } - /** - * - * @param queryEngine - * @param limit - * The limit for sampling a vertex and the initial limit for - * cutoff join evaluation. A reasonable value is - * <code>100</code>. - * @param timeout - * The timeout for cutoff join path evaluation - * (milliseconds). A reasonable value is <code>100</code>ms. - * @throws Exception - * - * FIXME This must either return the query plan or copy the - * results as they are materialized to the sink for the join - * graph operator. - * - * - * @todo We do not need the [timeout] as long as we evaluate each cutoff - * join separately. The limited number of input solutions to the - * join automatically limits the amount of work the join can do. - * However, if we do cutoff evaluation of a series of edges then - * it is possible to do a lot of work in order to find [limit] - * solutions. In this case, a [timeout] protects us against join - * paths which have poor correlations and large cardinality for - * their vertices (a lot of solutions are considered to produce - * very few results). - */ - public void runtimeOptimizer(final QueryEngine queryEngine, - final int limit, final long timeout) throws Exception { - - final BOpContextBase context = new BOpContextBase(queryEngine); - - if (log.isInfoEnabled()) - log.info("limit=" + limit); - - /* - * Sample the vertices. - * - * TODO Sampling for scale-out not yet finished. - * - * FIXME Re-sampling will always produce the same sample depending - * on the sample operator impl (it should be random, but it is not). - */ - sampleVertices(context, limit); - - if(log.isDebugEnabled()) - log.debug("joinGraph=" + toString()); - - /* - * Estimate the cardinality and weights for each edge, obtaining the - * Edge with the minimum estimated cardinality. This will be the - * starting point for the join graph evaluation. - * - * @todo It would be very interesting to see the variety and/or - * distribution of the values bound when the edge is sampled. This - * can be easily done using a hash map with a counter. That could - * tell us a lot about the cardinality of the next join path - * (sampling the join path also tells us a lot, but it does not - * explain it as much as seeing the histogram of the bound values). - * I believe that there are some interesting online algorithms for - * computing the N most frequent observations and the like which - * could be used here. - * - * TODO ROX is choosing the starting edge based on the minimum - * estimated cardinality. However, it is possible for there to be - * more than one edge with an estimated cardinality which is - * substantially to the minimum estimated cardinality. It would be - * best to start from multiple vertices so we can explore join paths - * which begin with those alternative starting vertices as well. - * (LUBM Q2 is an example of such a query). - */ - estimateEdgeWeights(queryEngine, limit); - - while(moreEdgesToVisit(executedVertices)) { - - // Decide on the next join path to execute. - final Path p = chainSample(queryEngine, limit, timeout); - - for(Edge e : p.edges) { - - /* - * FIXME Finish the algorithm. - * - * Execute the edge. We have two choices here. If join path - * is currently materialized and the expected cardinality of - * the edge is small to moderate (LTE limit * 10) then we - * can simply materialize the result of evaluating the edge. - * - * In this case, we replace the sample for the vertex with - * the actual result of evaluating the edge. [This concept - * pre-supposes that a vertex sample is the set of matching - * elements and that we do not store the binding sets which - * satisfy the join path. I think that this is perhaps the - * primary point of difference for MonetDB/ROX and bigdata. - * bigdata is working with IBindingSet[]s and should - * associate the set of intermediate solutions which - * represent the materialized intermediate result with the - * join path, not the vertex or the edge.] - * - * Otherwise, either the join path is already only a sample - * or the expected cardinality of this edge is too large so - * we do the cutoff evaluation of the edge in order to - * propagate a sample. - * - * 1. exec(e,T1(v1),T2(v2)) - */ - - executedVertices.add(e.v1); - executedVertices.add(e.v2); - } - /* - * Re-sample edges branching from any point in the path which we - * just executed. The purpose of this is to improve the - * detection of correlations using a materialized sample of the - * intermediate results (which will be correlated) rather than - * independent samples of the vertices (which are not - * correlated). - * - * Also, note that ROX only samples vertices which satisfy the - * zero investment property and therefore there could be - * vertices which have not yet been sampled if some vertices are - * not associated with an index. - * - * @todo This could just be another call to sampleVertices() and - * estimateEdgeWeights() if those methods accepted the set of - * already executed vertices so they could make the proper - * exclusions (or if we had a method which returned the - * un-executed vertices and/or edges). - */ -// e.v1.sample(context, limit); -// e.v2.sample(context, limit); - } - } + return tmp; - /** - * Return <code>true</code> iff there exists at least one {@link Edge} - * branching from a vertex NOT found in the set of vertices which have - * visited. - * - * @param visited - * A set of vertices. - * - * @return <code>true</code> if there are more edges to explore. - */ - private boolean moreEdgesToVisit(final Set<Vertex> visited) { - - // Consider all edges. - for(Edge e : E) { - - if (visited.contains(e.v1) && visited.contains(e.v2)) { - /* - * Since both vertices for this edge have been executed the - * edge is now redundant. Either it was explicitly executed - * or another join path was used which implies the edge by - * transitivity in the join graph. - */ - continue; - } - - /* - * We found a counter example (an edge which has not been - * explored). - */ - if (log.isTraceEnabled()) - log.trace("Edge has not been explored: " + e); - - return true; - - } - - // No more edges to explore. - return false; - } /** - * E - * - * @param limit - * @return - * - * TODO How to indicate the set of edges which remain to be - * explored? - * - * @throws Exception - */ - public Path chainSample(final QueryEngine queryEngine, final int limit, - final long timeout) throws Exception { - - final Vertex source; - { - /* - * Find the edge having the minimum estimated cardinality. - */ - final Edge e = getMinimumCardinalityEdge(executedVertices); - - if (e == null) - throw new RuntimeException("No weighted edges."); - - /* - * Decide which vertex of that edge will be the starting point - * for chain sampling (if any). - */ - if (getEdgeCount(e.v1, executedVertices) > 1 - || getEdgeCount(e.v2, executedVertices) > 1) { - /* - * There is at least one vertex of that edge which branches. - * Chain sampling will begin with the vertex of that edge - * which has the lower estimated cardinality. - * - * TODO It could be that the minimum cardinality vertex does - * not branch. What happens for that code path? Do we just - * execute that edge and then reenter chain sampling? If so, - * it would be cleared to test for this condition explicitly - * up front. - */ - source = e.getMinimumCardinalityVertex(); - } else { - /* - * There is no vertex which branches for that edge. This is - * a stopping condition for chain sampling. The path - * consisting of just that edge is returned and should be - * executed by the caller. - */ - return new Path(e); - } - - } - - /* - * Setup some data structures for one or more breadth first - * expansions of the set of path(s) which are being sampled. This - * iteration will continue until we reach a stopping condition. - */ - - // The set of paths being considered. - final List<Path> paths = new LinkedList<Path>(); - - { - // The current path. - final Path p = new Path(); - - p.stopVertex = source; -// p.inputSample = source.sample; - paths.add(p); - } - - // initialize the cutoff to the limit used to sample the vertices. - int cutoff = limit; - long cutoffMillis = timeout; - - final Set<Vertex> unsampled = new LinkedHashSet<Vertex>( - executedVertices); - - /* - * One breadth first expansion of the join paths. - * - * Note: This expands each join path one vertex in each iteration. - * However, different join paths can expand from different vertices. - * - * For ROX, each join path is expanded from the last vertex which - * was added to that join path so the initial edge for each join - * path strongly determines the edges in the join graph along which - * that join path can grow. - * - * For bigdata, we can grow the path from any vertex already in the - * path to any vertex which (a) is not yet in the path; and (b) has - * not yet been evaluated. - * - * This suggests that this loop must consider each of the paths to - * decide whether that path can be extended. - */ - while (moreEdgesToVisit(unsampled)) { - - // increment the cutoff. - cutoff += limit; - cutoffMillis += timeout; - - // Consider each path. - for(Path p : paths) { - - /* - * The vertex at which we stopped expanding that path the - * last time. - * - * TODO ROX might have to traverse vertex to vertex along - * edges, but we can execute any edge whose preconditions - * have been satisfied. - */ - final Vertex v = p.stopVertex; - - // TODO depends on the notion of the paths remaining. - if (getEdgeCount(v, null/*executed+sampled(p)*/) > 0) { - /* - * This path branches at this vertex, so remove the old - * path 1st. - */ - paths.remove(p); - } - - // For each edge which is a neighbor of the vertex [v]. - final List<Edge> neighbors = null; - for(Edge e : neighbors) { - // 1. append the edge to the path - final Path p1 = p.addEdge(queryEngine, cutoff, e); - // 3. add the path to paths. - paths.add(p1); - } - - } - - final Path p = getSelectedJoinPath(paths.toArray(new Path[paths.size()])); - - if(p != null) { - - return p; - - } - - } // while(moreEdgesToSample) - - final Path p = getBestAlternativeJoinPath(paths.toArray(new Path[paths.size()])); - - if(p != null) { - - return p; - - } - - // TODO ROX as given can return null here, which looks like a bug. - return null; - - } // chainSample() - - /** - * Return the path which is selected by the termination criteria - * (looking for a path which dominates the alternatives). - * - * @param a - * An array of {@link Path}s to be tested. - * - * @return The selected path -or- <code>null</code> if none of the paths - * is selected. - * - * @todo Should we only explore beneath the diagonal? - * - * @todo What is the basis for comparing the expected cardinality of - * join paths? Where one path is not simply the one step extension - * of the other. - * <p> - * This rule might only be able to compare the costs for paths in - * which one path directly extends another. - * <p> - * It is not clear that this code is comparing all paths which - * need to be compared. - * - * @todo I have restated the termination rule as follows. - * <p> - * If there is a path [p] whose total cost is LTE the cost of - * executing just its last edge [e], then the path [p] dominates - * all paths beginning with edge [e]. The dominated paths should - * be pruned. - * <p> - * If there is a path, [p], which is an unordered extension of - * another path, [p1] (the vertices of p are a superset of the - * vertices of p1), and the cost of [p] is LTE the cost of [p1], - * then [p] dominates [p1]. The dominated paths should be pruned. - * <p> - * If there is a path, [p], which has the same vertices as a path - * [p1] and the cost of [p] is LTE the cost of [p1], then [p] - * dominates (or is equivalent to) [p1]. The path [p1] should be - * pruned. - * - * For a given path length [l], if no paths of length [l] remain - * then the minimum cost path of length GT [l] may be executed. - * - * @todo Due to sampling error and the desire to be robust to small - * differences in the expected cost of an operation, we should - * only consider two significant digits when comparing estimates - * of cost. E.g., 990 and 1000 should not be differentiated as - * they are the same within the sampling error. This should be - * used to chose all starting vertices which have the same minimum - * cardinality. - */ - public Path getSelectedJoinPath(final Path[] a) { - final StringBuilder sb = new StringBuilder(); - final Formatter f = new Formatter(sb); - Path p = null; - for (int i = 0; i < a.length; i++) { - final Path Pi = a[i]; - if (Pi.sample == null) - throw new RuntimeException("Not sampled: " + Pi); - for (int j = 0; j < a.length; j++) { - if (i == j) - continue; - final Path Pj = a[j]; - if (Pj.sample == null) - throw new RuntimeException("Not sampled: " + Pj); - /* - * FIXME This needs to compare the cost of Pj given path Pi - * against the cost of Pj when executed as a single edge (or - * by any other alternative join path sequence). The choice - * of Pi and Pj is not coherent and the same value of costPj - * is being used for both sides of the equation. - */ - final long costPi = Pi.sample.estimatedCardinality; - final double sfPi = Pi.sample.f; - final long costPj = Pj.sample.estimatedCardinality; - final long expectedCombinedCost = costPi - + (long) (sfPi * costPj); - /* - * @todo I think that LTE makes more sense here since having - * the same net cardinality for a given edge after - * performing more steps would appear to be worth while. - */ - final boolean lte = expectedCombinedCost <= costPj; - { - f - .format( - ... [truncated message content] |
From: <tho...@us...> - 2010-11-12 21:29:27
|
Revision: 3947 http://bigdata.svn.sourceforge.net/bigdata/?rev=3947&view=rev Author: thompsonbry Date: 2010-11-12 21:29:20 +0000 (Fri, 12 Nov 2010) Log Message: ----------- Added tearDown() to the unit test to close the journal. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 21:02:48 UTC (rev 3946) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 21:29:20 UTC (rev 3947) @@ -1440,9 +1440,6 @@ public void runtimeOptimizer(final QueryEngine queryEngine, final int limit) throws Exception { - // // The set of vertices which have been consumed by the query. - // final Set<Vertex> executedVertices = new LinkedHashSet<Vertex>(); - // Setup the join graph. Path[] paths = round0(queryEngine, limit, 2/* nedges */); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 21:02:48 UTC (rev 3946) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 21:29:20 UTC (rev 3947) @@ -213,6 +213,18 @@ } + protected void tearDown() throws Exception { + + if(jnl != null) { + jnl.close(); + jnl = null; + } + + super.tearDown(); + + } + + /** * Loads the data into the closureStore and computes the closure. */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-15 18:13:15
|
Revision: 3949 http://bigdata.svn.sourceforge.net/bigdata/?rev=3949&view=rev Author: thompsonbry Date: 2010-11-15 18:13:06 +0000 (Mon, 15 Nov 2010) Log Message: ----------- More work on the runtime query optimizer. It now converges onto a single solution. Modified PipelineJoin to support a cutoff (LIMIT annotation). Modified PipelineJoin to accurately track the counters required to compute the join hit ratio. Modified PipelineJoin to quietly ignore empty binding set chunks. Modified the QueryEngine to track recently terminated queries in a 'doneQueries' LRU. This is used to detect events which arrive late for a recently terminated query. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-11-14 16:45:55 UTC (rev 3948) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-11-15 18:13:06 UTC (rev 3949) @@ -192,13 +192,16 @@ super(runningQuery.getFederation(), runningQuery.getIndexManager()); - this.runningQuery = runningQuery; if (stats == null) throw new IllegalArgumentException(); + if (source == null) throw new IllegalArgumentException(); + if (sink == null) throw new IllegalArgumentException(); + + this.runningQuery = runningQuery; this.partitionId = partitionId; this.stats = stats; this.source = source; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-11-14 16:45:55 UTC (rev 3948) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-11-15 18:13:06 UTC (rev 3949) @@ -68,6 +68,23 @@ String ALT_SINK_REF = PipelineOp.class.getName() + ".altSinkRef"; + /** + * The value reported by {@link PipelineOp#isSharedState()} (default + * {@value #DEFAULT_SHARED_STATE}). This may be overridden to + * <code>true</code> to have instances operators evaluated in the same + * query engine context share the same {@link BOpStats} instance. + * <p> + * Note: {@link BOp#getEvaluationContext()} MUST be overridden to return + * {@link BOpEvaluationContext#CONTROLLER} if this annotation is + * overridden to <code>true</code>. + * <p> + * When <code>true</code>, the {@link QueryEngine} will impose the + * necessary constraints when the operator is evaluated. + */ + String SHARED_STATE = PipelineOp.class.getName() + ".sharedState"; + + boolean DEFAULT_SHARED_STATE = false; + } /** @@ -135,18 +152,14 @@ /** * Return <code>true</code> iff {@link #newStats()} must be shared across * all invocations of {@link #eval(BOpContext)} for this operator for a - * given query (default <code>false</code>). - * <p> - * Note: {@link BOp#getEvaluationContext()} MUST be overridden to return - * {@link BOpEvaluationContext#CONTROLLER} if this method is overridden to - * return <code>true</code>. - * <p> - * When <code>true</code>, the {@link QueryEngine} will impose the necessary - * constraints when the operator is evaluated. + * given query. + * + * @see Annotations#SHARED_STATE */ public boolean isSharedState() { - - return false; + + return getProperty(Annotations.SHARED_STATE, + Annotations.DEFAULT_SHARED_STATE); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2010-11-14 16:45:55 UTC (rev 3948) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2010-11-15 18:13:06 UTC (rev 3949) @@ -216,7 +216,7 @@ final long rangeCount = accessPath.rangeCount(false/* exact */); - if (limit > rangeCount) { + if (limit >= rangeCount) { /* * The sample will contain everything in the access path. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-14 16:45:55 UTC (rev 3948) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-15 18:13:06 UTC (rev 3949) @@ -50,21 +50,20 @@ import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpContextBase; import com.bigdata.bop.BOpEvaluationContext; -import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IElement; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.Var; import com.bigdata.bop.ap.SampleIndex; import com.bigdata.bop.bindingSet.HashBindingSet; import com.bigdata.bop.engine.LocalChunkMessage; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.engine.RunningQuery; import com.bigdata.bop.join.PipelineJoin; -import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; +import com.bigdata.bop.rdf.join.DataSetJoin; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.ThickAsynchronousIterator; @@ -91,6 +90,41 @@ * its costs. For example, by pruning the search, by recognizing when the * query is simple enough to execute directly, by recognizing when we have * already materialized the answer to the query, etc. + * + * @todo Cumulative estimated cardinality is an estimate of the work to be done. + * However, the actual cost of a join depends on whether we will use + * nested index subquery or a hash join and the cost of that operation on + * the database. There could be counter examples where the cost of the + * hash join with a range scan using the unbound variable is LT the nested + * index subquery. For those cases, we will do the same amount of IO on + * the hash join but there will still be a lower cardinality to the join + * path since we are feeding in fewer solutions to be joined. + * + * @todo Look at the integration with the SAIL. We decorate the joins with some + * annotations. Those will have to be correctly propagated to the "edges" + * in order for edge sampling and incremental evaluation (or final + * evaluation) to work. The {@link DataSetJoin} essentially inlines one of + * its access paths. That should really be changed into an inline access + * path and a normal join operator so we can defer some of the details + * concerning the join operator annotations until we decide on the join + * path to be executed. An inline AP really implies an inline relation, + * which in turn implies that the query is a searchable context for + * query-local resources. + * <p> + * For s/o, when the AP is remote, the join evaluation context must be ANY + * and otherwise (for s/o) it must be SHARDED. + * <p> + * Since the join graph is fed the vertices (APs), it does not have access + * to the annotated joins so we need to generated appropriately annotated + * joins when sampling an edge and when evaluation a subquery. + * + * @todo Examine behavior when we do not have perfect covering indices. This + * will mean that some vertices can not be sampled using an index and that + * estimation of their cardinality will have to await the estimation of + * the cardinality of the edge(s) leading to that vertex. Still, the + * approach should be able to handle queries without perfect / covering + * automatically. Then experiment with carrying fewer statement indices + * for quads. */ public class JoinGraph extends PipelineOp { @@ -170,10 +204,10 @@ } - /** - * Used to assign row identifiers. - */ - static private final IVariable<Integer> ROWID = Var.var("__rowid"); +// /** +// * Used to assign row identifiers. +// */ +// static private final IVariable<Integer> ROWID = Var.var("__rowid"); /** * A sample of a {@link Vertex} (an access path). @@ -301,7 +335,8 @@ /** * Take a sample of the vertex. If the sample is already exact, then - * this is a NOP. + * this is a NOP. If the vertex was already sampled to that limit, then + * this is a NOP (you have to raise the limit to re-sample the vertex). * * @param limit * The sample cutoff. @@ -327,6 +362,16 @@ } + if (oldSample != null && oldSample.limit >= limit) { + + /* + * The vertex was already sampled to this limit. + */ + + return; + + } + final BOpContextBase context = new BOpContextBase(queryEngine); final IRelation r = context.getRelation(pred); @@ -384,8 +429,8 @@ } - if (log.isInfoEnabled()) - log.info("Sampled: " + sample); + if (log.isTraceEnabled()) + log.trace("Sampled: " + sample); return; @@ -394,6 +439,46 @@ } /** + * Type safe enumeration describes the edge condition (if any) for a + * cardinality estimate. + */ + public static enum EstimateEnum { + /** + * An estimate, but not any of the edge conditions. + */ + Normal(" "), + /** + * The cardinality estimate is exact. + */ + Exact("E"), + /** + * The cardinality estimation is a lower bound (the actual cardinality + * may be higher than the estimated value). + */ + LowerBound("L"), + /** + * Flag is set when the cardinality estimate underflowed (false zero + * (0)). + */ + Underflow("U"); + + private EstimateEnum(final String code) { + + this.code = code; + + } + + private final String code; + + public String getCode() { + + return code; + + } + + } // EstimateEnum + + /** * A sample of an {@link Edge} (a join). */ public static class EdgeSample { @@ -405,6 +490,13 @@ public final long rangeCount; /** + * <code>true</code> iff the source sample is exact (because the source + * is either a fully materialized vertex or an edge whose solutions have + * been fully materialized). + */ + public final boolean sourceSampleExact; + + /** * The limit used to sample the edge (this is the limit on the #of * solutions generated by the cutoff join used when this sample was * taken). @@ -438,49 +530,14 @@ public final long estimatedCardinality; /** - * Flag is set when the estimate is likely to be a lower bound for the - * cardinality of the edge. - * <p> - * If the {@link #inputCount} is ONE (1) and the {@link #outputCount} is - * the {@link #limit} then the {@link #estimatedCardinality} is a lower - * bound as more than {@link #outputCount} solutions could have been - * produced by the join against a single input solution. - */ - public final boolean estimateIsLowerBound; - - /** - * Flag indicates that the {@link #estimatedCardinality} underflowed. - * <p> - * Note: When the source vertex sample was not exact, then it is - * possible for the cardinality estimate to underflow. When, in - * addition, {@link #outputCount} is LT {@link #limit}, then feeding the - * sample of source tuples in is not sufficient to generated the desired - * #of output tuples. In this case, {@link #f join hit ratio} will be - * low. It may even be that zero output tuples were generated, in which - * case the join hit ratio will appear to be zero. However, the join hit - * ratio actually underflowed and an apparent join hit ratio of zero - * does not imply that the join will be empty unless the source vertex - * sample is actually the fully materialized access path - see - * {@link VertexSample#exact} and {@link #exact}. - */ - public final boolean estimateIsUpperBound; - - /** - * <code>true</code> if the sample is the exact solution for the join - * path. - * <p> - * Note: If the entire source vertex is being feed into the sample, - * {@link VertexSample#exact} flags this condition, and outputCount is - * also LT the limit, then the edge sample is the actual result of the - * join. That is, feeding all source tuples into the join gives fewer - * than the desired number of output tuples. + * Indicates whether the estimate is exact, an upper bound, or a lower + * bound. * - * TODO This field marks this condition and should be used to avoid - * needless re-computation of a join whose exact solution is already - * known. + * TODO This field should be used to avoid needless re-computation of a + * join whose exact solution is already known. */ - public final boolean exact; - + public final EstimateEnum estimateEnum; + /** * The sample of the solutions for the join path. */ @@ -504,9 +561,12 @@ */ EdgeSample( // final VertexSample sourceVertexSample, - final long sourceSampleRangeCount, - final boolean sourceSampleExact, final int limit, - final int inputCount, final int outputCount, + final long sourceSampleRangeCount,// + final boolean sourceSampleExact, // + final int sourceSampleLimit,// + final int limit,// + final int inputCount, // + final int outputCount,// final IBindingSet[] sample) { if (sample == null) @@ -514,6 +574,8 @@ // this.rangeCount = sourceVertexSample.rangeCount; this.rangeCount = sourceSampleRangeCount; + + this.sourceSampleExact = sourceSampleExact; this.limit = limit; @@ -525,24 +587,64 @@ estimatedCardinality = (long) (rangeCount * f); - estimateIsLowerBound = inputCount == 1 && outputCount == limit; + if (sourceSampleExact && outputCount < limit) { + /* + * Note: If the entire source vertex is being fed into the + * cutoff join and the cutoff join outputCount is LT the limit, + * then the sample is the actual result of the join. That is, + * feeding all source solutions into the join gives fewer than + * the desired number of output solutions. + */ + estimateEnum = EstimateEnum.Exact; + } else if (inputCount == 1 && outputCount == limit) { + /* + * If the inputCount is ONE (1) and the outputCount is the + * limit, then the estimated cardinality is a lower bound as + * more than outputCount solutions might be produced by the join + * when presented with a single input solution. + */ + estimateEnum = EstimateEnum.LowerBound; + } else if (!sourceSampleExact + && inputCount == Math.min(sourceSampleLimit, rangeCount) + && outputCount == 0) { + /* + * When the source sample was not exact, the inputCount is EQ to + * the lesser of the source range count and the source sample + * limit, and the outputCount is ZERO (0), then feeding in all + * source solutions in is not sufficient to generate any output + * solutions. In this case, the estimated join hit ratio appears + * to be zero. However, the estimation of the join hit ratio + * actually underflowed and the real join hit ratio might be a + * small non-negative value. A real zero can only be identified + * by executing the full join. + * + * Note: An apparent join hit ratio of zero does NOT imply that + * the join will be empty (unless the source vertex sample is + * actually the fully materialized access path - this case is + * covered above). + */ + estimateEnum = EstimateEnum.Underflow; + } else { + estimateEnum = EstimateEnum.Normal; + } - // final boolean sourceSampleExact = sourceVertexSample.exact; - estimateIsUpperBound = !sourceSampleExact && outputCount < limit; - - this.exact = sourceSampleExact && outputCount < limit; - this.sample = sample; } public String toString() { - return getClass().getName() + "{inputRangeCount=" + rangeCount - + ", limit=" + limit + ", inputCount=" + inputCount - + ", outputCount=" + outputCount + ", f=" + f - + ", estimatedCardinality=" + estimatedCardinality - + ", estimateIsLowerBound=" + estimateIsLowerBound - + ", estimateIsUpperBound=" + estimateIsUpperBound - + ", sampleIsExactSolution=" + exact + "}"; + return getClass().getName() // + + "{ rangeCount=" + rangeCount// + + ", sourceSampleExact=" + sourceSampleExact// + + ", limit=" + limit // + + ", inputCount=" + inputCount// + + ", outputCount=" + outputCount // + + ", f=" + f// + + ", estimatedCardinality=" + estimatedCardinality// + + ", estimateEnum=" + estimateEnum// +// + ", estimateIsLowerBound=" + estimateIsLowerBound// +// + ", estimateIsUpperBound=" + estimateIsUpperBound// +// + ", sampleIsExactSolution=" + estimateIsExact // + + "}"; } }; @@ -703,6 +805,14 @@ throw new IllegalArgumentException(); /* + * Note: There is never a need to "re-sample" the edge. Unlike ROX, + * we always can sample a vertex. This means that we can sample the + * edges exactly once, during the initialization of the join graph. + */ + if (sample != null) + throw new RuntimeException(); + + /* * Figure out which vertex has the smaller cardinality. The sample * of that vertex is used since it is more representative than the * sample of the other vertex. @@ -722,7 +832,7 @@ } /* - * TODO This is difficult to setup because we do not have a concept + * TODO This is awkward to setup because we do not have a concept * (or class) corresponding to a fly weight relation and we do not * have a general purpose relation, just arrays or sequences of * IBindingSets. Also, all relations are persistent. Temporary @@ -740,10 +850,6 @@ * Together, this means that we are dealing with IBindingSet[]s for * both the input and the output of the cutoff evaluation of the * edge rather than rows of the materialized relation. - * - * TODO On subsequent iterations we would probably re-sample [v] and - * we would run against the materialized intermediate result for - * [v']. */ /* @@ -763,7 +869,8 @@ // Sample the edge and save the sample on the edge as a side-effect. this.sample = estimateCardinality(queryEngine, limit, v, vp, - v.sample.rangeCount, v.sample.exact, sourceSample); + v.sample.rangeCount, v.sample.exact, v.sample.limit, + sourceSample); return sample.estimatedCardinality; @@ -793,17 +900,28 @@ public EdgeSample estimateCardinality(final QueryEngine queryEngine, final int limit, final Vertex vSource, final Vertex vTarget, final long sourceSampleRangeCount, - final boolean sourceSampleExact, IBindingSet[] sourceSample) + final boolean sourceSampleExact, + final int sourceSampleLimit, + final IBindingSet[] sourceSample) throws Exception { if (limit <= 0) throw new IllegalArgumentException(); - // Inject a rowId column. - sourceSample = BOpUtility.injectRowIdColumn(ROWID, 1/* start */, - sourceSample); +// // Inject a rowId column. +// sourceSample = BOpUtility.injectRowIdColumn(ROWID, 1/* start */, +// sourceSample); /* + * Note: This sets up a cutoff pipeline join operator which makes an + * accurate estimate of the #of input solutions consumed and the #of + * output solutions generated. From that, we can directly compute + * the join hit ratio. This approach is preferred to injecting a + * "RowId" column as the estimates are taken based on internal + * counters in the join operator and the join operator knows how to + * cutoff evaluation as soon as the limit is satisfied, thus + * avoiding unnecessary effort. + * * TODO Any constraints on the edge (other than those implied by * shared variables) need to be annotated on the join. Constraints * (other than range constraints which are directly coded by the @@ -811,31 +929,61 @@ * they can reduce the cardinality of the join and that is what we * are trying to estimate here. */ + final int joinId = 1; final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, // - new NV(BOp.Annotations.BOP_ID, 1),// - new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred - .setBOpId(3))); + new NV(BOp.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred + .setBOpId(3)), + // disallow parallel evaluation. + new NV(PipelineJoin.Annotations.MAX_PARALLEL,0), + // disable access path coalescing + new NV(PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS,false), + // cutoff join. + new NV(PipelineJoin.Annotations.LIMIT,(long)limit), + /* + * Note: In order to have an accurate estimate of the join + * hit ratio we need to make sure that the join operator + * runs using a single PipelineJoinStats instance which will + * be visible to us when the query is cutoff. In turn, this + * implies that the join must be evaluated on the query + * controller. + * + * @todo This implies that sampling of scale-out joins must + * be done using remote access paths. + */ + new NV(PipelineJoin.Annotations.SHARED_STATE,true), + new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT,BOpEvaluationContext.CONTROLLER) +// // make sure the chunks are large enough to hold the result. +// new NV(PipelineJoin.Annotations.CHUNK_CAPACITY,limit), +// // no chunk timeout +// new NV(PipelineJoin.Annotations.CHUNK_TIMEOUT,Long.MAX_VALUE) + ); - final SliceOp sliceOp = new SliceOp(new BOp[] { joinOp },// - NV.asMap(// - new NV(BOp.Annotations.BOP_ID, 2), // - new NV(SliceOp.Annotations.LIMIT, (long) limit), // - new NV(BOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER))); +// BOpContext context = new BOpContext(runningQuery, partitionId, stats, source, sink, sink2); +// joinOp.eval(context); + +// final SliceOp sliceOp = new SliceOp(new BOp[] { joinOp },// +// NV.asMap(// +// new NV(BOp.Annotations.BOP_ID, 2), // +// new NV(SliceOp.Annotations.LIMIT, (long) limit), // +// new NV(BOp.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER))); + final PipelineOp queryOp = joinOp; + // run the cutoff sampling of the edge. final UUID queryId = UUID.randomUUID(); final RunningQuery runningQuery = queryEngine.eval(queryId, - sliceOp, new LocalChunkMessage<IBindingSet>(queryEngine, + queryOp, new LocalChunkMessage<IBindingSet>(queryEngine, queryId, joinOp.getId()/* startId */, -1 /* partitionId */, new ThickAsynchronousIterator<IBindingSet[]>( new IBindingSet[][] { sourceSample }))); - // #of source samples consumed. - int inputCount = 0; - // #of output samples generated. - int outputCount = 0; +// // #of source samples consumed. +// int inputCount; +// // #of output samples generated. +// int outputCount = 0; final List<IBindingSet> result = new LinkedList<IBindingSet>(); try { try { @@ -845,20 +993,30 @@ runningQuery.iterator()); while (itr.hasNext()) { bset = itr.next(); +// final int rowid = (Integer) bset.get(ROWID).get(); +// if (rowid > inputCount) +// inputCount = rowid; result.add(bset); - outputCount++; +// outputCount++; } - // #of input rows consumed. - inputCount = bset == null ? 0 : ((Integer) bset.get(ROWID) - .get()); +// // #of input rows consumed. +// inputCount = bset == null ? 0 : ((Integer) bset.get(ROWID) +// .get()); } finally { - // verify no problems. FIXME Restore test of the query. - // runningQuery.get(); + // verify no problems. + runningQuery.get(); } } finally { runningQuery.cancel(true/* mayInterruptIfRunning */); } + // The join hit ratio can be computed directly from these stats. + final PipelineJoinStats joinStats = (PipelineJoinStats) runningQuery + .getStats().get(joinId); + + if (log.isDebugEnabled()) + log.debug(joinStats.toString()); + /* * TODO Improve comments here. See if it is possible to isolate a * common base class which would simplify the setup of the cutoff @@ -866,12 +1024,16 @@ */ final EdgeSample edgeSample = new EdgeSample( - sourceSampleRangeCount, sourceSampleExact, limit, - inputCount, outputCount, result - .toArray(new IBindingSet[result.size()])); + sourceSampleRangeCount, // + sourceSampleExact, // @todo redundant with sourceSampleLimit + sourceSampleLimit, // + limit, // + (int) joinStats.inputSolutions.get(),// + (int) joinStats.outputSolutions.get(), // + result.toArray(new IBindingSet[result.size()])); - if (log.isInfoEnabled()) - log.info("edge=" + this + ", sample=" + edgeSample); + if (log.isTraceEnabled()) + log.trace("edge=" + this + ", sample=" + edgeSample); return edgeSample; @@ -892,12 +1054,14 @@ /** * The sample obtained by the step-wise cutoff evaluation of the ordered - * edges of the path. This sample is generated one edge at a time rather - * than by attempting the cutoff evaluation of the entire join path (the - * latter approach does allow us to limit the amount of work to be done - * to satisfy the cutoff). + * edges of the path. + * <p> + * Note: This sample is generated one edge at a time rather than by + * attempting the cutoff evaluation of the entire join path (the latter + * approach does allow us to limit the amount of work to be done to + * satisfy the cutoff). */ - final public EdgeSample sample; + public EdgeSample sample; /** * The cumulative estimated cardinality of the path. This is zero for an @@ -1012,23 +1176,84 @@ return false; } +// /** +// * Return <code>true</code> if this path is an unordered super set of +// * the given path. In the case where both paths have the same vertices +// * this will also return <code>true</code>. +// * +// * @param p +// * Another path. +// * +// * @return <code>true</code> if this path is an unordered super set of +// * the given path. +// */ +// public boolean isUnorderedSuperSet(final Path p) { +// +// if (p == null) +// throw new IllegalArgumentException(); +// +// if (edges.size() < p.edges.size()) { +// /* +// * Fast rejection. This assumes that each edge after the first +// * adds one distinct vertex to the path. That assumption is +// * enforced by #addEdge(). +// */ +// return false; +// } +// +// final Vertex[] v1 = getVertices(); +// final Vertex[] v2 = p.getVertices(); +// +// if (v1.length < v2.length) { +// // Proven false since the other set is larger. +// return false; +// } +// +// /* +// * Scan the vertices of the caller's path. If any of those vertices +// * are NOT found in this path then the caller's path can not be a +// * subset of this path. +// */ +// for (int i = 0; i < v2.length; i++) { +// +// final Vertex tmp = v2[i]; +// +// boolean found = false; +// for (int j = 0; j < v1.length; j++) { +// +// if (v1[j] == tmp) { +// found = true; +// break; +// } +// +// } +// +// if (!found) { +// return false; +// } +// +// } +// +// return true; +// +// } + /** - * Return <code>true</code> if this path is an unordered super set of - * the given path. In the case where both paths have the same vertices - * this will also return <code>true</code>. + * Return <code>true</code> if this path is an unordered variant of the + * given path (same vertices in any order). * * @param p * Another path. * - * @return <code>true</code> if this path is an unordered super set of - * the given path. + * @return <code>true</code> if this path is an unordered variant of the + * given path. */ - public boolean isUnorderedSuperSet(final Path p) { + public boolean isUnorderedVariant(final Path p) { if (p == null) throw new IllegalArgumentException(); - if (edges.size() < p.edges.size()) { + if (edges.size() != p.edges.size()) { /* * Fast rejection. This assumes that each edge after the first * adds one distinct vertex to the path. That assumption is @@ -1040,15 +1265,17 @@ final Vertex[] v1 = getVertices(); final Vertex[] v2 = p.getVertices(); - if (v1.length < v2.length) { - // Proven false since the other set is larger. + if (v1.length != v2.length) { + + // Reject (this case is also covered by the test above). return false; + } /* * Scan the vertices of the caller's path. If any of those vertices - * are NOT found in this path then the caller's path can not be a - * subset of this path. + * are NOT found in this path the paths are not unordered variations + * of one aother. */ for (int i = 0; i < v2.length; i++) { @@ -1170,24 +1397,24 @@ * the new join path we have to do a one step cutoff evaluation of * the new Edge, given the sample available on the current Path. * - * TODO It is possible for the path sample to be empty. Unless the + * FIXME It is possible for the path sample to be empty. Unless the * sample also happens to be exact, this is an indication that the - * estimated cardinality has underflowed. How are we going to deal - * with this situation?!? What would appear to matter is the amount - * of work being performed by the join in achieving that low - * cardinality. If we have to do a lot of work to get a small - * cardinality then we should prefer join paths which achieve the - * same reduction in cardinality with less 'intermediate - * cardinality' - that is, by examining fewer possible solutions. - * [In fact, the estimated (cumulative) cardinality might not be a - * good reflection of the IOs to be done -- this needs more - * thought.] + * estimated cardinality has underflowed. We track the estimated + * cumulative cardinality, so this does not make the join path an + * immediate winner, but it does mean that we can not probe further + * on that join path as we lack any intermediate solutions to feed + * into the downstream joins. [If we re-sampled the edges in the + * join path in each round then this would help to establish a + * better estimate in successive rounds.] */ final EdgeSample edgeSample = e.estimateCardinality(queryEngine, limit, sourceVertex, targetVertex, - this.sample.estimatedCardinality, this.sample.exact, - this.sample.sample); + this.sample.estimatedCardinality, + this.sample.estimateEnum == EstimateEnum.Exact, + this.sample.limit,// + this.sample.sample// + ); { @@ -1275,23 +1502,77 @@ * @return A table with that data. */ static public String showTable(final Path[] a) { + + return showTable(a, null/* pruned */); + + } + + /** + * Comma delimited table showing the estimated join hit ratio, the estimated + * cardinality, and the set of vertices for each of the specified join + * paths. + * + * @param a + * A set of paths (typically those before pruning). + * @param pruned + * The set of paths after pruning (those which were retained) + * (optional). When given, the paths which were pruned are marked + * in the table. + * + * @return A table with that data. + */ + static public String showTable(final Path[] a,final Path[] pruned) { final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); + f.format("%5s %10s%1s * %7s (%3s/%3s) = %10s%1s : %10s %10s", + "path",// + "rangeCount",// + "",// sourceSampleExact + "f",// + "out",// + "in",// + "estCard",// + "",// estimateIs(Exact|LowerBound|UpperBound) + "sumEstCard",// + "joinPath\n" + ); for (int i = 0; i < a.length; i++) { final Path x = a[i]; + // true iff the path survived pruning. + Boolean prune = null; + if (pruned != null) { + prune = Boolean.TRUE; + for (Path y : pruned) { + if (y == x) { + prune = Boolean.FALSE; + break; + } + } + } if (x.sample == null) { - f.format("p[%2d] %7s, %10s %10s", "N/A", "N/A", "N/A", i); + f.format("p[%2d] %10d%1s * %7s (%3s/%3s) = %10s%1s : %10s", i, "N/A", "", "N/A", "N/A", "N/A", "N/A", "", "N/A"); } else { - f.format("p[%2d] % 7.2f, % 10d % 10d", i, x.sample.f, - x.sample.estimatedCardinality, - x.cumulativeEstimatedCardinality); + f.format("p[%2d] %10d%1s * % 7.2f (%3d/%3d) = % 10d%1s : % 10d", i, + x.sample.rangeCount,// + x.sample.sourceSampleExact?"E":"",// + x.sample.f,// + x.sample.outputCount,// + x.sample.inputCount,// + x.sample.estimatedCardinality,// + x.sample.estimateEnum.getCode(),// + x.cumulativeEstimatedCardinality// + ); } - sb.append(", ["); + sb.append(" ["); final Vertex[] vertices = x.getVertices(); for (Vertex v : vertices) { f.format("%2d ", v.pred.getId()); } sb.append("]"); + if (pruned != null) { + if (prune) + sb.append(" pruned"); + } // for (Edge e : x.edges) // sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() // + ")"); @@ -1326,16 +1607,6 @@ * the timeout should be used to protect against join paths which take a * long time to materialize <i>cutoff</i> solutions rather than to fine tune * the running time of the query optimizer. - * - * TODO Runtime query optimization is probably useless (or else should rely - * on materialization of intermediate results) when the cardinality of the - * vertices and edges for the query is small. This would let us balance the - * design characteristics of MonetDB and bigdata. For this purpose, we need - * to flag when a {@link VertexSample} is complete (e.g., the cutoff is GTE - * the actual range count). This also needs to be done for each join path so - * we can decide when the sample for the path is in fact the exact solution - * rather than an estimate of the cardinality of the solution together with - * a sample of the solution. */ public static class JGraph { @@ -1432,8 +1703,7 @@ * @param queryEngine * @param limit * The limit for sampling a vertex and the initial limit for - * cutoff join evaluation. A reasonable value is - * <code>100</code>. + * cutoff join evaluation. * * @throws Exception */ @@ -1474,7 +1744,7 @@ * @todo When executing the query, it is actually being executed as * a subquery. Therefore we have to take appropriate care to ensure * that the results are copied out of the subquery and into the - * parent query. + * parent query. See SubqueryTask for how this is done. * * @todo When we execute the query, we should clear the references * to the sample (unless they are exact, in which case they can be @@ -1588,7 +1858,7 @@ * * @param queryEngine * The query engine. - * @param limit + * @param limitIn * The limit (this is automatically multiplied by the round * to increase the sample size in each round). * @param round @@ -1602,12 +1872,12 @@ * * @throws Exception */ - public Path[] expand(final QueryEngine queryEngine, int limit, + public Path[] expand(final QueryEngine queryEngine, int limitIn, final int round, final Path[] a) throws Exception { if (queryEngine == null) throw new IllegalArgumentException(); - if (limit <= 0) + if (limitIn <= 0) throw new IllegalArgumentException(); if (round <= 0) throw new IllegalArgumentException(); @@ -1617,7 +1887,7 @@ throw new IllegalArgumentException(); // increment the limit by itself in each round. - limit *= round; + final int limit = round * limitIn; final List<Path> tmp = new LinkedList<Path>(); @@ -1628,15 +1898,41 @@ // Vertices are inserted into this collection when they are resampled. final Set<Vertex> resampled = new LinkedHashSet<Vertex>(); - + // Then expand each path. for (Path x : a) { - if (x.edges.size() < round) { + final int nedges = x.edges.size(); + + if (nedges < round) { + // Path is from a previous round. continue; + } + /* + * The only way to increase the accuracy of our estimates for + * edges as we extend the join paths is to re-sample each edge + * in the join path in path order. + * + * Note: An edge must be sampled for each distinct join path + * prefix in which it appears within each round. However, it is + * common for surviving paths to share a join path prefix, so do + * not re-sample a given path prefix more than once per round. + * Also, do not re-sample paths which are from rounds before the + * immediately previous round as those paths will not be + * extended in this round. + * + * FIXME Find all vertices in use by all paths which survived + * into this round. Re-sample those vertices to the new limit + * (resampling a vertex is a NOP if it has been resampled to the + * desired limit so we can do this incrementally rather than up + * front). For each edge of each path in path order, re-sample + * the edge. Shared prefix samples should be reused, but samples + * of the same edge with a different prefix must not be shared. + */ + // The set of vertices used to expand this path in this round. final Set<Vertex> used = new LinkedHashSet<Vertex>(); @@ -1657,28 +1953,32 @@ continue; } - final Vertex newVertex = v1Found ? edgeInGraph.v2 + // the target vertex for the new edge. + final Vertex tVertex = v1Found ? edgeInGraph.v2 : edgeInGraph.v1; - if (used.contains(newVertex)) { +// // the source vertex for the new edge. +// final Vertex sVertex = v1Found ? edgeInGraph.v1 +// : edgeInGraph.v2; + + if (used.contains(tVertex)) { // Vertex already used to extend this path. continue; } // add the new vertex to the set of used vertices. - used.add(newVertex); + used.add(tVertex); - if (!resampled.add(newVertex)&&round>1) { + if (resampled.add(tVertex)) { /* - * Resample this vertex before we sample a new edge + * (Re-)sample this vertex before we sample a new edge * which targets this vertex. */ - newVertex.sample(queryEngine, limit); + tVertex.sample(queryEngine, limit); } // Extend the path to the new vertex. - final Path p = x.addEdge(queryEngine, limit, - edgeInGraph); + final Path p = x.addEdge(queryEngine, limit, edgeInGraph); // Add to the set of paths for this round. tmp.add(p); @@ -1689,17 +1989,18 @@ final Path[] paths_tp1 = tmp.toArray(new Path[tmp.size()]); + final Path[] paths_tp1_pruned = pruneJoinPaths(paths_tp1); + if (log.isDebugEnabled()) - log.debug("\n*** round=" + round + " : generated paths\n" - + JoinGraph.showTable(paths_tp1)); + log.debug("\n*** round=" + round + ", limit=" + limit + + " : generated paths\n" + + JoinGraph.showTable(paths_tp1, paths_tp1_pruned)); - final Path[] paths_tp1_pruned = pruneJoinPaths(paths_tp1); - if (log.isInfoEnabled()) - log.info("\n*** round=" + round + ": paths{in=" + a.length - + ",considered=" + paths_tp1.length + ",out=" - + paths_tp1_pruned.length + "}\n" - + JoinGraph.showTable(paths_tp1_pruned)); + log.info("\n*** round=" + round + ", limit=" + limit + + ": paths{in=" + a.length + ",considered=" + + paths_tp1.length + ",out=" + paths_tp1_pruned.length + + "}\n" + JoinGraph.showTable(paths_tp1_pruned)); return paths_tp1_pruned; @@ -1919,52 +2220,34 @@ } /** - * Prune paths which are dominated by other paths. Start the algorithm - * by passing in all edges which have the minimum cardinality (when - * comparing their expected cardinality after rounding to 2 significant - * digits). + * Prune paths which are dominated by other paths. Paths are extended in + * each round. Paths from previous rounds are always pruned. Of the new + * paths in each round, the following rule is applied to prune the + * search to just those paths which are known to dominate the other + * paths covering the same set of vertices: * <p> - * If there is a path [p] whose total cost is LTE the cost of executing - * just its last edge [e], then the path [p] dominates all paths - * beginning with edge [e]. The dominated paths should be pruned. [This - * is a degenerate case of the next rule.] - * <p> - * If there is a path, [p] != [p1], where [p] is an unordered superset - * of [p1] (that is the vertices of p are a superset of the vertices of - * p1, but allowing the special case where the set of vertices are the - * same), and the cumulative cost of [p] is LTE the cumulative cost of - * [p1], then [p] dominates (or is equivalent to) [p1] and p1 should be + * If there is a path, [p] != [p1], where [p] is an unordered variant of + * [p1] (that is the vertices of p are the same as the vertices of p1), + * and the cumulative cost of [p] is LTE the cumulative cost of [p1], + * then [p] dominates (or is equivalent to) [p1] and p1 should be * pruned. - * <p> - * If there is a path, [p], which has the same vertices as a path [p1] - * and the cumulative cost of [p] is LTE the cumulative cost of [p1], - * then [p] dominates (or is equivalent to) [p1]. The path [p1] should - * be pruned. [This is a degenerate case of the prior rule.] * * @param a * A set of paths. * * @return The set of paths with all dominated paths removed. - * - * FIXME This does not give us a stopping condition unless the - * set of paths becomes empty. I think it will tend to search - * too far for a best path, running the risk of increasing - * inaccuracy introduced by propagation of samples. Resampling - * the vertices and increasing the vertex and edge cutoff at - * each iteration of the search could compensate for that. - * - * TODO Cumulative estimated cardinality is an estimate of the - * work to be done. However, the actual cost of a join depends - * on whether we will use nested index subquery or a hash join - * and the cost of that operation on the database. There could - * be counter examples where the cost of the hash join with a - * range scan using the unbound variable is LT the nested index - * subquery. For those cases, we will do the same amount of IO - * on the hash join but there will still be a lower cardinality - * to the join path since we are feeding in fewer solutions to - * be joined. */ public Path[] pruneJoinPaths(final Path[] a) { + /* + * Find the length of the longest path(s). All shorter paths are + * dropped in each round. + */ + int maxPathLen = 0; + for(Path p : a) { + if(p.edges.size()>maxPathLen) { + maxPathLen = p.edges.size(); + } + } final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); final Set<Path> pruned = new LinkedHashSet<Path>(); @@ -1972,6 +2255,14 @@ final Path Pi = a[i]; if (Pi.sample == null) throw new RuntimeException("Not sampled: " + Pi); + if (Pi.edges.size() < maxPathLen) { + /* + * Only the most recently generated set of paths survive to + * the next round. + */ + pruned.add(Pi); + continue; + } if (pruned.contains(Pi)) continue; for (int j = 0; j < a.length; j++) { @@ -1982,7 +2273,7 @@ throw new RuntimeException("Not sampled: " + Pj); if (pruned.contains(Pj)) continue; - final boolean isPiSuperSet = Pi.isUnorderedSuperSet(Pj); + final boolean isPiSuperSet = Pi.isUnorderedVariant(Pj); if (!isPiSuperSet) { // Can not directly compare these join paths. continue; @@ -2071,63 +2362,23 @@ } - // /** - // * Return <code>true</code> iff there exists at least one {@link Edge} - // * branching from a vertex NOT found in the set of vertices which have - // * visited. - // * - // * @param visited - // * A set of vertices. - // * - // * @return <code>true</code> if there are more edges to explore. - // */ - // private boolean moreEdgesToVisit(final Set<Vertex> visited) { - // - // // Consider all edges. - // for(Edge e : E) { - // - // if (visited.contains(e.v1) && visited.contains(e.v2)) { - // /* - // * Since both vertices for this edge have been executed the - // * edge is now redundant. Either it was explicitly executed - // * or another join path was used which implies the edge by - // * transitivity in the join graph. - // */ - // continue; - // } - // - // /* - // * We found a counter example (an edge which has not been - // * explored). - // */ - // if (log.isTraceEnabled()) - // log.trace("Edge has not been explored: " + e); - // - // return true; - // - // } - // - // // No more edges to explore. - // return false; - // - // } - } - private static double roundToSignificantFigures(final double num, - final int n) { - if (num == 0) { - return 0; - } +// @todo Could be used to appropriately ignore false precision in cardinality estimates. +// private static double roundToSignificantFigures(final double num, +// final int n) { +// if (num == 0) { +// return 0; +// } +// +// final double d = Math.ceil(Math.log10(num < 0 ? -num : num)); +// final int power = n - (int) d; +// +// final double magnitude = Math.pow(10, power); +// final long shifted = Math.round(num * magnitude); +// return shifted / magnitude; +// } - final double d = Math.ceil(Math.log10(num < 0 ? -num : num)); - final int power = n - (int) d; - - final double magnitude = Math.pow(10, power); - final long shifted = Math.round(num * magnitude); - return shifted / magnitude; - } - /** * Places vertices into order by the {@link BOp#getId()} associated with * their {@link IPredicate}. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-11-14 16:45:55 UTC (rev 3948) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-11-15 18:13:06 UTC (rev 3949) @@ -29,9 +29,12 @@ import java.rmi.RemoteException; import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.Map; import java.util.UUID; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -39,6 +42,8 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.PriorityBlockingQueue; import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; import org.apache.log4j.Logger; @@ -298,22 +303,68 @@ } /** + * Lock used to guard register / halt of a query. + */ + private final ReentrantLock lock = new ReentrantLock(); + + /** + * Signaled when no queries are running. + */ + private final Condition nothingRunning = lock.newCondition(); + + /** * The currently executing queries. */ - final protected ConcurrentHashMap<UUID/* queryId */, RunningQuery> runningQueries = new ConcurrentHashMap<UUID, RunningQuery>(); + final private ConcurrentHashMap<UUID/* queryId */, RunningQuery> runningQueries = new ConcurrentHashMap<UUID, RunningQuery>(); - /** - * A queue of {@link RunningQuery}s having binding set chunks available for - * consumption. - * - * @todo Be careful when testing out a {@link PriorityBlockingQueue} here. - * First, that collection is intrinsically bounded (it is backed by an - * array) so it will BLOCK under heavy load and could be expected to - * have some resize costs if the queue size becomes too large. Second, - * either {@link RunningQuery} needs to implement an appropriate - * {@link Comparator} or we need to pass one into the constructor for - * the queue. - */ + /** + * LRU cache used to handle problems with asynchronous termination of + * running queries. + * <p> + * Note: Holding onto the query references here might pin memory retained by + * those queries. However, all we really need is the Haltable (Future) of + * that query in this map. + * + * @todo This should not be much of a hot spot even though it is not thread + * safe but the synchronized() call could force cache stalls anyway. A + * concurrent hash map with an approximate LRU access policy might be + * a better choice. + * + * @todo The maximum cache capacity here is a SWAG. It should be large + * enough that we can not have a false cache miss on a system which is + * heavily loaded by a bunch of light queries. + */ + private LinkedHashMap<UUID, Future<Void>> doneQueries = new LinkedHashMap<UUID,Future<Void>>( + 16/* initialCapacity */, .75f/* loadFactor */, true/* accessOrder */) { + + private static final long serialVersionUID = 1L; + + @Override + protected boolean removeEldestEntry(Map.Entry<UUID, Future<Void>> eldest) { + + return size() > 100/* maximumCacheCapacity */; + + } + }; + + /** + * A queue of {@link RunningQuery}s having binding set chunks available for + * consumption. + * + * @todo Handle priority for selective queries based on the time remaining + * until the timeout. + * <p> + * Handle priority for unselective queries based on the order in which + * they are submitted? + * <p> + * Be careful when testing out a {@link PriorityBlockingQueue} here. + * First, that collection is intrinsically bounded (it is backed by an + * array) so it will BLOCK under heavy load and could be expected to + * have some resize costs if the queue size becomes too large. Second, + * either {@link RunningQuery} needs to implement an appropriate + * {@link Comparator} or we need to pass one into the constructor for + * the queue. + */ final private BlockingQueue<RunningQuery> priorityQueue = new LinkedBlockingQueue<RunningQuery>(); // final private BlockingQueue<RunningQuery> priorityQueue = new PriorityBlockingQueue<RunningQuery>( // ); @@ -432,27 +483,6 @@ * for the JVM to finalize the {@link QueryEngine} if the application no * longer holds a hard reference to it. The {@link QueryEngine} is then * automatically closed from within its finalizer method. - * - * @todo Handle priority for selective queries based on the time remaining - * until the timeout. - * <p> - * Handle priority for unselective queries based on the order in which - * they are submitted? - * - * @todo The approach taken by the {@link QueryEngine} executes one task per - * pipeline bop per chunk. Outside of how the tasks are scheduled, - * this corresponds closely to the historical pipeline query - * evaluation. - * <p> - * Chunk concatenation could be performed here if we (a) mark the - * {@link LocalChunkMessage} with a flag to indicate when it has been - * accepted; and (b) rip through the incoming chunks for the query for - * the target bop and combine them to feed the task. Chunks which have - * already been assigned would be dropped when take() discovers them. - * [The chunk combination could also be done when we output the chunk - * if the sink has not been taken, e.g., by combining the chunk into - * the same target ByteBuffer, or when we add the chunk to the - * RunningQuery.] */ static private class QueryEngineTask implements Runnable { @@ -523,18 +553,12 @@ if (!msg.isMaterialized()) throw new IllegalStateException(); - final RunningQuery q = runningQueries.get(msg.getQueryId()); + final RunningQuery q = getRunningQuery(msg.getQueryId()); if(q == null) { /* * The query is not registered on this node. - * - * FIXME We should recognize the difference between a query which - * was never registered (and throw an error here) and a query which - * is done and has been removed from runningQueries. One way to do - * this is with an LRU of recently completed queries. */ -// return false; throw new IllegalStateException(); } @@ -561,27 +585,24 @@ /** * Shutdown the {@link QueryEngine} (blocking). The {@link QueryEngine} will * not accept new queries, but existing queries will run to completion. - * - * @todo This sleeps until {@link #runningQueries} is empty. It could be - * signaled when that collection becomes empty if we protected the - * collection with a lock for mutation (or if we just notice each time - * a query terminates). However, that would restrict the concurrency - * for query start/stop. */ public void shutdown() { // normal termination. shutdown = true; - while(!runningQueries.isEmpty()) { - - try { - Thread.sleep(100/*ms*/); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - - } + lock.lock(); + try { + while (!runningQueries.isEmpty()) { + try { + nothingRunning.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } finally { + lock.unlock(); + } // hook for subclasses. didShutdown(); @@ -643,37 +664,6 @@ } - /** - * The query is no longer running. Resources associated with the query - * should be released. - * - * @todo A race is possible where a query is cancelled on a node where the - * node receives notice to start the query after the cancelled message - * has arrived. To avoid having such queries linger, we should have a - * a concurrent hash set with an approximate LRU policy containing the - * identifiers for queries which have been cancelled, possibly paired - * with the cause (null if normal execution). That will let us handle - * any reasonable concurrent indeterminism between cancel and start - * notices for a query. - * <p> - * Another way in which this might be addressed is by involving the - * client each time a query start is propagated to a node. If we - * notify the client that the query will start on the node first, then - * the client can always issue the cancel notices [unless the client - * dies, in which case we still want to kill the query which could be - * done based on a service disappearing from a jini registry or - * zookeeper.] - */ - protected void halt(final RunningQuery q) { - - // remove from the set of running queries. - runningQueries.remove(q.getQueryId(), q); - - if (log.isInfoEnabled()) - log.info("Removed entry for query: " + q.getQueryId()); - - } - /* * IQueryPeer */ @@ -690,13 +680,33 @@ } + /** + * {@inheritDoc} + * <p> + * The default implementation is a NOP. + */ + public void cancelQuery(UUID queryId, Throwable cause) { + // NOP + } + /* * IQueryClient */ + public PipelineOp getQuery(final UUID queryId) { + + final RunningQuery q = getRunningQuery(queryId); + + if (q == null) + throw new IllegalArgumentException(); + + return q.getQuery(); + + } + public void startOp(final StartOpMessage msg) throws RemoteException { - final RunningQuery q = runningQueries.get(msg.queryId); + final RunningQuery q = getRunningQuery(msg.queryId); if (q != null) { @@ -708,7 +718,7 @@ public void haltOp(final HaltOpMessage msg) throws RemoteException { - final RunningQuery q = runningQueries.get(msg.queryId); + final RunningQuery q = getRunningQuery(msg.queryId); if (q != null) { @@ -829,7 +839,7 @@ assertRunning(); - putRunningQuery(queryId, runningQuery); + putIfAbsent(queryId, runningQuery); runningQuery.startQuery(msg); @@ -839,56 +849,224 @@ } - /** - * Return the {@link RunningQuery} associated with that query identifier. - * - * @param queryId - * The query identifier. - * - * @return The {@link RunningQuery} -or- <code>null</code> if there is no - * query associated with that query identifier. + /* + * Management of running queries. */ + + /** + * Places the {@link RunningQuery} object into the internal map. + * + * @param queryId + * The query identifier. + * @param runningQuery + * The {@link RunningQuery}. + * + * @return The {@link RunningQuery} -or- another {@link RunningQuery} iff + * one exists with the same {@link UUID}. + */ + protected RunningQuery putIfAbsent(final UUID queryId, + final RunningQuery runningQuery) { + + if (queryId == null) + throw new IllegalArgumentException(); + + if (runningQuery == null) + throw new IllegalArgumentException(); + + // First, check [runningQueries] w/o acquiring a lock. + { + final RunningQuery tmp = runningQueries.get(queryId); + + if (tmp != null) { + + // Found existing query. + return tmp; + + } + + } + + /* + * A lock is used to address a race condition here with the concurrent + * registration and halt of a query. + */ + + lock.lock(); + + try { + + // Test for a recently terminated query. + final Future<Void> doneQueryFuture = doneQueries.get(queryId); + + if (doneQueryFuture != null) { + + // Throw out an appropriate exception for a halted query. + handleDoneQuery(queryId, doneQueryFuture); + + // Should never get here. + throw n... [truncated message content] |
From: <tho...@us...> - 2010-11-17 02:14:18
|
Revision: 3950 http://bigdata.svn.sourceforge.net/bigdata/?rev=3950&view=rev Author: thompsonbry Date: 2010-11-17 02:14:08 +0000 (Wed, 17 Nov 2010) Log Message: ----------- More work on the runtime query optimizer. It is generating useful plans for LUBM Q2, Q8 and Q9. The runtime cost of the generated plans is close to the runtime cost of the plans produced by the static query optimizer. LUBM data are pretty regular so the runtime query optimizer is not able to exploit unexpected correlations in the joins. The runtime query optimizer tends to have cardinality estimation underflow for Q2 which suggests that we need to deepen the search on paths with low estimated cardinality. This bears further investigation. When we have estimation underflow in the runtime query optimizer that means that plans extending that point are picked at random. This is similar to, but not the same, as the problem encounterd by the static query optimizer, which is unable to estimate the "as bound" cardinality after making some initial decision about the join ordering. There may very well be a role for hybrid of both the static and runtime query optimizer which plays to their different strengths. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/Journal.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestPredicateAccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestRemoteAccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/eval/TestDefaultEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestContextAdvancer.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpIdFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IdFactory.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-11-15 18:13:06 UTC (rev 3949) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-11-17 02:14:08 UTC (rev 3950) @@ -31,6 +31,8 @@ import java.util.List; import java.util.Map; +import com.bigdata.bop.IPredicate.Annotations; + import cutthecrap.utils.striterators.IPropertySet; /** @@ -180,25 +182,29 @@ */ BOpEvaluationContext getEvaluationContext(); - /** - * Return <code>true</code> iff this operator is an access path which writes - * on the database. - * - * @see Annotations#MUTATION - */ - boolean isMutation(); +// /** +// * Return <code>true</code> iff this operator is an access path which writes +// * on the database. +// * +// * @see com.bigdata.bop.IPredicate.Annotations#MUTATION +// * +// * @todo Move to {@link IPredicate}? +// */ +// boolean isMutation(); +// +// /** +// * The timestamp or transaction identifier on which the operator will read +// * or write. +// * +// * @see Annotations#TIMESTAMP +// * +// * @throws IllegalStateException +// * if {@link Annotations#TIMESTAMP} was not specified. +// * +// * @todo move to {@link IPredicate}? +// */ +// long getTimestamp(); - /** - * The timestamp or transaction identifier on which the operator will read - * or write. - * - * @see Annotations#TIMESTAMP - * - * @throws IllegalStateException - * if {@link Annotations#TIMESTAMP} was not specified. - */ - long getTimestamp(); - // /** // * Compare this {@link BOp} with another {@link BOp}. // * @@ -240,37 +246,6 @@ long DEFAULT_TIMEOUT = Long.MAX_VALUE; /** - * Boolean property whose value is <code>true</code> iff this operator - * writes on a database. - * <p> - * Most operators operate solely on streams of elements or binding sets. - * Some operators read or write on the database using an access path, - * which is typically described by an {@link IPredicate}. This property - * MUST be <code>true</code> when access path is used to write on the - * database. - * <p> - * Operators which read or write on the database must declare the - * {@link Annotations#TIMESTAMP} associated with that operation. - * - * @see #TIMESTAMP - * - * @todo Move to {@link IPredicate}? - */ - String MUTATION = BOp.class.getName() + ".mutation"; - - boolean DEFAULT_MUTATION = false; - - /** - * The timestamp (or transaction identifier) used by this operator if it - * reads or writes on the database (no default). - * - * @see #MUTATION - * - * @todo Move to {@link IPredicate}? - */ - String TIMESTAMP = BOp.class.getName() + ".timestamp"; - - /** * This annotation determines where an operator will be evaluated * (default {@value #DEFAULT_EVALUATION_CONTEXT}). */ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2010-11-15 18:13:06 UTC (rev 3949) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2010-11-17 02:14:08 UTC (rev 3950) @@ -582,18 +582,6 @@ } - public final boolean isMutation() { - - return getProperty(Annotations.MUTATION, Annotations.DEFAULT_MUTATION); - - } - - public final long getTimestamp() { - - return (Long) getRequiredProperty(Annotations.TIMESTAMP); - - } - /* * Note: I've played around with a few hash functions and senses of * equality. Predicate (before the bops were introduced) used to have a Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java 2010-11-15 18:13:06 UTC (rev 3949) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java 2010-11-17 02:14:08 UTC (rev 3950) @@ -165,8 +165,7 @@ final IIndexManager tmp = getFederation() == null ? getIndexManager() : getFederation(); - final long timestamp = (Long) pred - .getRequiredProperty(BOp.Annotations.TIMESTAMP); + final long timestamp = pred.getTimestamp(); return (IRelation<E>) tmp.getResourceLocator().locate( pred.getOnlyRelationName(), timestamp); Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpIdFactory.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpIdFactory.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpIdFactory.java 2010-11-17 02:14:08 UTC (rev 3950) @@ -0,0 +1,29 @@ +package com.bigdata.bop; + +import java.util.LinkedHashSet; + +/** + * A factory which may be used when some identifiers need to be reserved. + */ +public class BOpIdFactory implements IdFactory { + + private final LinkedHashSet<Integer> ids = new LinkedHashSet<Integer>(); + + private int nextId = 0; + + public void reserve(int id) { + ids.add(id); + } + + public int nextId() { + + while (ids.contains(nextId)) { + + nextId++; + + } + + return nextId++; + } + +} \ No newline at end of file Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-11-15 18:13:06 UTC (rev 3949) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-11-17 02:14:08 UTC (rev 3950) @@ -27,7 +27,6 @@ package com.bigdata.bop; -import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; @@ -699,7 +698,7 @@ for (BOp arg : bop.args()) { - if (arg.arity() > 0) { + if (!(arg instanceof IVariableOrConstant<?>)) { toString(arg, sb, indent+1); @@ -798,29 +797,33 @@ return true; } - - /** - * Copy binding sets from the source to the sink(s). - * - * @param source - * The source. - * @param sink - * The sink (required). - * @param sink2 - * Another sink (optional). - * @param constraints - * Binding sets which fail these constraints will NOT be copied - * (optional). - * @param stats - * The {@link BOpStats#chunksIn} and {@link BOpStats#unitsIn} - * will be updated during the copy (optional). - */ - static public void copy( + + /** + * Copy binding sets from the source to the sink(s). + * + * @param source + * The source. + * @param sink + * The sink (required). + * @param sink2 + * Another sink (optional). + * @param constraints + * Binding sets which fail these constraints will NOT be copied + * (optional). + * @param stats + * The {@link BOpStats#chunksIn} and {@link BOpStats#unitsIn} + * will be updated during the copy (optional). + * + * @return The #of binding sets copied. + */ + static public long copy( final IAsynchronousIterator<IBindingSet[]> source, final IBlockingBuffer<IBindingSet[]> sink, final IBlockingBuffer<IBindingSet[]> sink2, final IConstraint[] constraints, final BOpStats stats) { + long nout = 0; + while (source.hasNext()) { final IBindingSet[] chunk = source.next(); @@ -841,13 +844,19 @@ // copy accepted binding sets to the default sink. sink.add(tmp); + nout += chunk.length; + if (sink2 != null) { - // copy accepted binding sets to the alt sink. + + // copy accepted binding sets to the alt sink. sink2.add(tmp); + } } + return nout; + } /** @@ -946,5 +955,5 @@ return out; } - + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-11-15 18:13:06 UTC (rev 3949) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-11-17 02:14:08 UTC (rev 3950) @@ -42,13 +42,12 @@ import com.bigdata.btree.filter.Advancer; import com.bigdata.btree.filter.TupleFilter; import com.bigdata.mdi.PartitionLocator; -import com.bigdata.rawstore.Bytes; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.ElementFilter; import com.bigdata.relation.accesspath.IAccessPath; +import com.bigdata.relation.rule.IAccessPathExpander; import com.bigdata.relation.rule.IRule; -import com.bigdata.relation.rule.IAccessPathExpander; import com.bigdata.relation.rule.eval.IEvaluationPlan; import com.bigdata.relation.rule.eval.pipeline.JoinMasterTask; import com.bigdata.service.ndx.IClientIndex; @@ -69,9 +68,12 @@ */ public interface IPredicate<E> extends BOp, Cloneable, Serializable { - /** - * Interface declaring well known annotations. - */ + /** + * Interface declaring well known annotations. + * + * FIXME All of these annotations should be in the {@link IPredicate} + * namespace. + */ public interface Annotations extends BOp.Annotations, BufferAnnotations { /** @@ -289,6 +291,35 @@ // | IRangeQuery.PARALLEL ; + /** + * Boolean property whose value is <code>true</code> iff this operator + * writes on a database. + * <p> + * Most operators operate solely on streams of elements or binding sets. + * Some operators read or write on the database using an access path, + * which is typically described by an {@link IPredicate}. This property + * MUST be <code>true</code> when access path is used to write on the + * database. + * <p> + * Operators which read or write on the database must declare the + * {@link Annotations#TIMESTAMP} associated with that operation. + * + * @see Annotations#TIMESTAMP + */ + String MUTATION = BOp.class.getName() + ".mutation"; + + boolean DEFAULT_MUTATION = false; + + /** + * The timestamp (or transaction identifier) used by this operator if it + * reads or writes on the database (no default). + * + * @see com.bigdata.bop.IPredicate.Annotations#MUTATION + * + * @todo Move to {@link IPredicate}? + */ + String TIMESTAMP = BOp.class.getName() + ".timestamp"; + } /** @@ -637,4 +668,23 @@ */ public IPredicate<E> setBOpId(int bopId); + /** + * Return <code>true</code> iff this operator is an access path which writes + * on the database. + * + * @see Annotations#MUTATION + */ + boolean isMutation(); + + /** + * The timestamp or transaction identifier on which the operator will read + * or write. + * + * @see Annotations#TIMESTAMP + * + * @throws IllegalStateException + * if {@link Annotations#TIMESTAMP} was not specified. + */ + long getTimestamp(); + } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IdFactory.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IdFactory.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IdFactory.java 2010-11-17 02:14:08 UTC (rev 3950) @@ -0,0 +1,10 @@ +package com.bigdata.bop; + +/** + * An interface for a bop identifier factory. + */ +public interface IdFactory { + + public int nextId(); + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-11-15 18:13:06 UTC (rev 3949) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-11-17 02:14:08 UTC (rev 3950) @@ -636,4 +636,16 @@ } + public final boolean isMutation() { + + return getProperty(IPredicate.Annotations.MUTATION, IPredicate.Annotations.DEFAULT_MUTATION); + + } + + public final long getTimestamp() { + + return (Long) getRequiredProperty(IPredicate.Annotations.TIMESTAMP); + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-15 18:13:06 UTC (rev 3949) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-17 02:14:08 UTC (rev 3950) @@ -34,6 +34,7 @@ import java.util.Comparator; import java.util.Formatter; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; @@ -50,6 +51,8 @@ import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpContextBase; import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.BOpIdFactory; +import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IElement; import com.bigdata.bop.IPredicate; @@ -64,8 +67,11 @@ import com.bigdata.bop.join.PipelineJoin; import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; import com.bigdata.bop.rdf.join.DataSetJoin; +import com.bigdata.bop.solutions.SliceOp; import com.bigdata.relation.IRelation; +import com.bigdata.relation.accesspath.BufferClosedException; import com.bigdata.relation.accesspath.IAccessPath; +import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.ThickAsynchronousIterator; import com.bigdata.relation.rule.Rule; import com.bigdata.striterator.Dechunkerator; @@ -176,13 +182,6 @@ } - /** - * - * TODO How can join constraints be moved around? Just attach them where - * ever a variable becomes bound? And when do we filter out variables which - * are not required downstream? Once we decide on a join path and execute it - * fully (rather than sampling that join path). - */ public JoinGraph(final BOp[] args, final Map<String, Object> anns) { super(args, anns); @@ -204,11 +203,6 @@ } -// /** -// * Used to assign row identifiers. -// */ -// static private final IVariable<Integer> ROWID = Var.var("__rowid"); - /** * A sample of a {@link Vertex} (an access path). */ @@ -334,9 +328,10 @@ } /** - * Take a sample of the vertex. If the sample is already exact, then - * this is a NOP. If the vertex was already sampled to that limit, then - * this is a NOP (you have to raise the limit to re-sample the vertex). + * Take a sample of the vertex, updating {@link #sample} as a + * side-effect. If the sample is already exact, then this is a NOP. If + * the vertex was already sampled to that limit, then this is a NOP (you + * have to raise the limit to re-sample the vertex). * * @param limit * The sample cutoff. @@ -677,6 +672,11 @@ /** * The last sample for this edge and <code>null</code> if the edge has * not been sampled. + * <p> + * Note: This sample is only the one-step cutoff evaluation of the edge + * given a sample of its vertex having the lesser cardinality. It is NOT + * the cutoff sample of a join path having this edge except for the + * degenerate case where the edge is the first edge in the join path. */ public EdgeSample sample = null; @@ -696,14 +696,23 @@ } /** + * The edge label is formed from the {@link BOp.Annotations#BOP_ID} of + * its ordered vertices (v1,v2). + */ + public String getLabel() { + + return "(" + v1.pred.getId() + "," + v2.pred.getId() + ")"; + + } + + /** * Note: The vertices of the edge are labeled using the * {@link BOp.Annotations#BOP_ID} associated with the {@link IPredicate} * for each vertex. */ public String toString() { - return "Edge{ (V" + v1.pred.getId() + ",V" + v2.pred.getId() - + "), estCard=" + return "Edge{ "+getLabel()+", estCard=" + (sample == null ? "N/A" : sample.estimatedCardinality) + ", shared=" + shared.toString() + ", sample=" + sample + "}"; @@ -790,27 +799,48 @@ } /** - * Estimate the cardinality of the edge. + * Estimate the cardinality of the edge, updating {@link #sample} as a + * side-effect. This is a NOP if the edge has already been sampled at + * that <i>limit</i>. This is a NOP if the edge sample is exact. * * @param context * - * @return The estimated cardinality of the edge. + * @return The new {@link EdgeSample} (this is also updated on + * {@link #sample} as a side-effect). * * @throws Exception */ - public long estimateCardinality(final QueryEngine queryEngine, + public EdgeSample estimateCardinality(final QueryEngine queryEngine, final int limit) throws Exception { if (limit <= 0) throw new IllegalArgumentException(); - /* - * Note: There is never a need to "re-sample" the edge. Unlike ROX, - * we always can sample a vertex. This means that we can sample the - * edges exactly once, during the initialization of the join graph. - */ - if (sample != null) - throw new RuntimeException(); +// /* +// * Note: There is never a need to "re-sample" the edge. Unlike ROX, +// * we always can sample a vertex. This means that we can sample the +// * edges exactly once, during the initialization of the join graph. +// */ +// if (sample != null) +// throw new RuntimeException(); + + if (sample != null) { + + if (sample.limit >= limit) { + + // Already sampled at that limit. + return sample; + + } + + if (sample.estimateEnum == EstimateEnum.Exact) { + + // Sample is exact (fully materialized result). + return sample; + + } + + } /* * Figure out which vertex has the smaller cardinality. The sample @@ -832,27 +862,6 @@ } /* - * TODO This is awkward to setup because we do not have a concept - * (or class) corresponding to a fly weight relation and we do not - * have a general purpose relation, just arrays or sequences of - * IBindingSets. Also, all relations are persistent. Temporary - * relations are on a temporary store and are locatable by their - * namespace rather than being Objects. - * - * The algorithm presupposes fly weight / temporary relations this - * both to wrap the sample and to store the computed intermediate - * results. - * - * Note: The PipelineJoin does not have a means to halt after a - * limit is satisfied. In order to achieve this, we have to wrap it - * with a SliceOp. - * - * Together, this means that we are dealing with IBindingSet[]s for - * both the input and the output of the cutoff evaluation of the - * edge rather than rows of the materialized relation. - */ - - /* * Convert the source sample into an IBindingSet[]. * * TODO We might as well do this when we sample the vertex. @@ -872,12 +881,16 @@ v.sample.rangeCount, v.sample.exact, v.sample.limit, sourceSample); - return sample.estimatedCardinality; + return sample; } /** - * Estimate the cardinality of the edge. + * Estimate the cardinality of the edge given a sample of either a + * vertex or a join path leading up to that edge. + * <p> + * Note: The caller is responsible for protecting against needless + * re-sampling. * * @param queryEngine * @param limit @@ -908,10 +921,6 @@ if (limit <= 0) throw new IllegalArgumentException(); -// // Inject a rowId column. -// sourceSample = BOpUtility.injectRowIdColumn(ROWID, 1/* start */, -// sourceSample); - /* * Note: This sets up a cutoff pipeline join operator which makes an * accurate estimate of the #of input solutions consumed and the #of @@ -928,6 +937,12 @@ * predicate) will not reduce the effort to compute the join, but * they can reduce the cardinality of the join and that is what we * are trying to estimate here. + * + * TODO How can join constraints be moved around? Just attach them + * where ever a variable becomes bound? And when do we filter out + * variables which are not required downstream? Once we decide on a + * join path and execute it fully (rather than sampling that join + * path). */ final int joinId = 1; final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, // @@ -953,22 +968,8 @@ */ new NV(PipelineJoin.Annotations.SHARED_STATE,true), new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT,BOpEvaluationContext.CONTROLLER) -// // make sure the chunks are large enough to hold the result. -// new NV(PipelineJoin.Annotations.CHUNK_CAPACITY,limit), -// // no chunk timeout -// new NV(PipelineJoin.Annotations.CHUNK_TIMEOUT,Long.MAX_VALUE) ); -// BOpContext context = new BOpContext(runningQuery, partitionId, stats, source, sink, sink2); -// joinOp.eval(context); - -// final SliceOp sliceOp = new SliceOp(new BOp[] { joinOp },// -// NV.asMap(// -// new NV(BOp.Annotations.BOP_ID, 2), // -// new NV(SliceOp.Annotations.LIMIT, (long) limit), // -// new NV(BOp.Annotations.EVALUATION_CONTEXT, -// BOpEvaluationContext.CONTROLLER))); - final PipelineOp queryOp = joinOp; // run the cutoff sampling of the edge. @@ -980,10 +981,6 @@ new ThickAsynchronousIterator<IBindingSet[]>( new IBindingSet[][] { sourceSample }))); -// // #of source samples consumed. -// int inputCount; -// // #of output samples generated. -// int outputCount = 0; final List<IBindingSet> result = new LinkedList<IBindingSet>(); try { try { @@ -993,15 +990,8 @@ runningQuery.iterator()); while (itr.hasNext()) { bset = itr.next(); -// final int rowid = (Integer) bset.get(ROWID).get(); -// if (rowid > inputCount) -// inputCount = rowid; result.add(bset); -// outputCount++; } -// // #of input rows consumed. -// inputCount = bset == null ? 0 : ((Integer) bset.get(ROWID) -// .get()); } finally { // verify no problems. runningQuery.get(); @@ -1014,8 +1004,8 @@ final PipelineJoinStats joinStats = (PipelineJoinStats) runningQuery .getStats().get(joinId); - if (log.isDebugEnabled()) - log.debug(joinStats.toString()); + if (log.isTraceEnabled()) + log.trace(joinStats.toString()); /* * TODO Improve comments here. See if it is possible to isolate a @@ -1032,8 +1022,8 @@ (int) joinStats.outputSolutions.get(), // result.toArray(new IBindingSet[result.size()])); - if (log.isTraceEnabled()) - log.trace("edge=" + this + ", sample=" + edgeSample); + if (log.isDebugEnabled()) + log.debug(getLabel() + " : newSample=" + edgeSample); return edgeSample; @@ -1081,8 +1071,7 @@ for (Edge e : edges) { if (!first) sb.append(","); - sb.append("(" + e.v1.pred.getId() + "," + e.v2.pred.getId() - + ")"); + sb.append(e.getLabel()); first = false; } sb.append(",cumEstCard=" + cumulativeEstimatedCardinality @@ -1176,68 +1165,6 @@ return false; } -// /** -// * Return <code>true</code> if this path is an unordered super set of -// * the given path. In the case where both paths have the same vertices -// * this will also return <code>true</code>. -// * -// * @param p -// * Another path. -// * -// * @return <code>true</code> if this path is an unordered super set of -// * the given path. -// */ -// public boolean isUnorderedSuperSet(final Path p) { -// -// if (p == null) -// throw new IllegalArgumentException(); -// -// if (edges.size() < p.edges.size()) { -// /* -// * Fast rejection. This assumes that each edge after the first -// * adds one distinct vertex to the path. That assumption is -// * enforced by #addEdge(). -// */ -// return false; -// } -// -// final Vertex[] v1 = getVertices(); -// final Vertex[] v2 = p.getVertices(); -// -// if (v1.length < v2.length) { -// // Proven false since the other set is larger. -// return false; -// } -// -// /* -// * Scan the vertices of the caller's path. If any of those vertices -// * are NOT found in this path then the caller's path can not be a -// * subset of this path. -// */ -// for (int i = 0; i < v2.length; i++) { -// -// final Vertex tmp = v2[i]; -// -// boolean found = false; -// for (int j = 0; j < v1.length; j++) { -// -// if (v1[j] == tmp) { -// found = true; -// break; -// } -// -// } -// -// if (!found) { -// return false; -// } -// -// } -// -// return true; -// -// } - /** * Return <code>true</code> if this path is an unordered variant of the * given path (same vertices in any order). @@ -1302,21 +1229,100 @@ } /** - * Return the vertices in this path (in path order). + * Return the vertices in this path (in path order). For the first edge, + * the minimum cardinality vertex is always reported first (this is + * critical for producing the correct join plan). For the remaining + * edges in the path, the unvisited is reported. * * @return The vertices (in path order). * * TODO This could be rewritten without the toArray() using a * method which visits the vertices of a path in any order. + * + * @todo unit test for the first vertex to be reported. */ public Vertex[] getVertices() { + final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); + for (Edge e : edges) { + + if (tmp.isEmpty()) { + /* + * The first edge is handled specially in order to report + * the minimum cardinality vertex first. + */ + tmp.add(e.getMinimumCardinalityVertex()); + tmp.add(e.getMaximumCardinalityVertex()); + + } else { + + tmp.add(e.v1); + + tmp.add(e.v2); + + } + + } + + final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); + + return a; + + } + + /** + * Return the {@link IPredicate}s associated with the vertices of the + * join path in path order. + * + * @see #getVertices() + */ + public IPredicate[] getPredicates() { + + // The vertices in the selected evaluation order. + final Vertex[] vertices = getVertices(); + + // The predicates in the same order as the vertices. + final IPredicate[] preds = new IPredicate[vertices.length]; + + for (int i = 0; i < vertices.length; i++) { + + preds[i] = vertices[i].pred; + + } + + return preds; + + } + + /** + * Return the {@link BOp} identifiers of the predicates associated with + * each vertex in path order. + */ + static public int[] getVertexIds(final List<Edge> edges) { + + final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); + + for (Edge e : edges) { + tmp.add(e.v1); + tmp.add(e.v2); + } + final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); - return a; + + final int[] b = new int[a.length]; + + for (int i = 0; i < a.length; i++) { + + b[i] = a[i].pred.getId(); + + } + + return b; + } /** @@ -1350,14 +1356,18 @@ /** * Add an edge to a path, computing the estimated cardinality of the new - * path, and returning the new path. + * path, and returning the new path. The cutoff join is performed using + * the {@link #sample} of <i>this</i> join path and the actual access + * path for the target vertex. * * @param queryEngine * @param limit * @param e * The edge. * - * @return The new path. + * @return The new path. The materialized sample for the new path is the + * sample obtained by the cutoff join for the edge added to the + * path. * * @throws Exception */ @@ -1432,63 +1442,12 @@ final Path tmp = new Path(edges, cumulativeEstimatedCardinality, edgeSample); - // tmp.stopVertex = e.getMaximumCardinalityVertex(); - return tmp; } } - // /** - // * Equality is defined by comparison of the unordered set of edges. - // */ - // public boolean equals(final Object o) { - // if (this == o) - // return true; - // if (!(o instanceof Path)) - // return false; - // final Path t = (Path) o; - // if (edges.length != t.edges.length) - // return false; - // for (Edge e : edges) { - // boolean found = false; - // for (Edge x : t.edges) { - // if (x.equals(e)) { - // found = true; - // break; - // } - // } - // if (!found) - // return false; - // } - // return true; - // } - // - // /** - // * The hash code of path is defined as the bit-wise XOR of the hash - // * codes of the edges in that path. - // */ - // public int hashCode() { - // - // if (hash == 0) { - // - // int result = 0; - // - // for(Edge e : edges) { - // - // result ^= e.hashCode(); - // - // } - // - // hash = result; - // - // } - // return hash; - // - // } - // private int hash; - } /** @@ -1582,31 +1541,60 @@ } /** - * A join graph (data structure and methods only). + * A runtime optimizer for a join graph. The {@link JoinGraph} bears some + * similarity to ROX (Runtime Optimizer for XQuery), but has several + * significant differences: + * <ol> + * <li> + * 1. ROX starts from the minimum cardinality edge of the minimum + * cardinality vertex. The {@link JoinGraph} starts with one or more low + * cardinality vertices.</li> + * <li> + * 2. ROX always extends the last vertex added to a given join path. The + * {@link JoinGraph} extends all vertices having unexplored edges in each + * breadth first expansion.</li> + * <li> + * 3. ROX is designed to interleave operator-at-once evaluation of join path + * segments which dominate other join path segments. The {@link JoinGraph} + * is designed to prune all join paths which are known to be dominated by + * other join paths for the same set of vertices in each round and iterates + * until a join path is identified which uses all vertices and has the + * minimum expected cumulative estimated cardinality. Join paths which + * survive pruning are re-sampled as necessary in order to obtain better + * information about edges in join paths which have a low estimated + * cardinality in order to address a problem with underflow of the + * cardinality estimates.</li> + * </ol> * - * Note: ROX was stated in terms of materialization of intermediate results. - * Bigdata was originally designed to support pipelined join evaluation in - * which the zero investment property is true (there exists an index for the - * join). While support is being developed for operator-at-once joins (e.g., - * hash joins), that support is aimed at more efficient evaluation of high - * cardinality joins using multi-block IO. Therefore, unlike ROX, the - * runtime query optimizer does not materialize the intermediate results - * when chain sampling. Instead, it feeds a sample into a cutoff pipeline - * evaluation for the join path. Since some join paths can eliminate a lot - * of intermediate solutions and hence take a long time to satisfy the - * cutoff, we also specify a timeout for the cutoff evaluation of a join - * path. Given the zero investment property (an index exists for the join), - * if the cutoff is not satisfied within the timeout, then the join has a - * low correlation. If no solutions are generated within the timeout, then - * the estimate of the correlation "underflows". + * TODO For join graphs with a large number of vertices we may need to + * constrain the #of vertices which are explored in parallel. This could be + * done by only branching the N lowest cardinality vertices from the already + * connected edges. Since fewer vertices are being explored in parallel, + * paths are more likely to converge onto the same set of vertices at which + * point we can prune the dominated paths. * - * Note: timeouts are a bit tricky when you are not running on a real-time - * platform. In particular, heavy swapping or heavy GC workloads could both - * cause a timeout to expire because no work was done on sampling the join - * path rather than because there was a lot of work to be done. Therefore, - * the timeout should be used to protect against join paths which take a - * long time to materialize <i>cutoff</i> solutions rather than to fine tune - * the running time of the query optimizer. + * TODO Compare the cumulative expected cardinality of a join path with the + * expected cost of a join path. The latter allows us to also explore + * alternative join strategies, such as the parallel subquery versus scan + * and filter decision for named graph and default graph SPARQL queries. + * + * TODO Coalescing duplicate access paths can dramatically reduce the work + * performed by a pipelined nested index subquery. (A hash join eliminates + * all duplicate access paths using a scan and filter approach.) If we will + * run a pipeline nested index subquery join, then should the runtime query + * optimizer prefer paths with duplicate access paths? + * + * TODO How can we handle things like lexicon joins. A lexicon join is is + * only evaluated when the dynamic type of a variable binding indicates that + * the RDF Value must be materialized by a join against the ID2T index. + * Binding sets having inlined values can simply be routed around the join + * against the ID2T index. Routing around saves network IO in scale-out + * where otherwise we would route binding sets having identifiers which do + * not need to be materialized to the ID2T shards. + * + * @see <a + * href="http://www-db.informatik.uni-tuebingen.de/files/research/pathfinder/publications/rox-demo.pdf"> + * ROX </a> */ public static class JGraph { @@ -1641,10 +1629,6 @@ } sb.append("\n]}"); return sb.toString(); - - // return super.toString() + "{V=" + Arrays.toString(V) + ",E=" - // + Arrays.toString(E) + - // ", executedVertices="+executedVertices+"}"; } public JGraph(final IPredicate[] v) { @@ -1707,7 +1691,7 @@ * * @throws Exception */ - public void runtimeOptimizer(final QueryEngine queryEngine, + public Path runtimeOptimizer(final QueryEngine queryEngine, final int limit) throws Exception { // Setup the join graph. @@ -1732,26 +1716,11 @@ } - /* - * FIXME Choose the best join path and execute it (or return the - * evaluation order to the caller). - * - * FIXME This must either recognize each time a join path is known - * to dominate all other join paths and then execute it or iterator - * until the total join path is decided and then execute the - * original query using that join path. - * - * @todo When executing the query, it is actually being executed as - * a subquery. Therefore we have to take appropriate care to ensure - * that the results are copied out of the subquery and into the - * parent query. See SubqueryTask for how this is done. - * - * @todo When we execute the query, we should clear the references - * to the sample (unless they are exact, in which case they can be - * used as is) in order to release memory associated with those - * samples if the query is long running. - */ - + // Should be one winner. + assert paths.length == 1; + + return paths[0]; + } /** @@ -1831,14 +1800,14 @@ */ estimateEdgeWeights(queryEngine, limit); - if (log.isInfoEnabled()) { + if (log.isDebugEnabled()) { final StringBuilder sb = new StringBuilder(); sb.append("Edges:\n"); for (Edge e : E) { sb.append(e.toString()); sb.append("\n"); } - log.info(sb.toString()); + log.debug(sb.toString()); } /* @@ -1887,52 +1856,215 @@ throw new IllegalArgumentException(); // increment the limit by itself in each round. - final int limit = round * limitIn; - - final List<Path> tmp = new LinkedList<Path>(); + final int limit = (round + 1) * limitIn; - // First, copy all existing paths. + if (log.isDebugEnabled()) + log.debug("round=" + round + ", limit=" + limit + + ", #paths(in)=" + a.length); + +// final List<Path> tmp = new LinkedList<Path>(); +// +// // First, copy all existing paths. +// for (Path x : a) { +// tmp.add(x); +// } + + /* + * Re-sample all vertices which are part of any of the existing + * paths. + * + * Note: A request to re-sample a vertex is a NOP unless the limit + * has been increased since the last time the vertex was sampled. It + * is also a NOP if the vertex has been fully materialized. + * + * TODO We only really need to deepen those paths where we have a + * low estimated join hit ratio. Paths with a higher join hit ratio + * already have a decent estimate of the cardinality and a decent + * sample size and can be explored without resampling. + */ + if (log.isDebugEnabled()) + log.debug("Re-sampling in-use vertices: limit=" + limit); + for (Path x : a) { - tmp.add(x); + + for(Edge e : x.edges) { + + e.v1.sample(queryEngine, limit); + e.v2.sample(queryEngine, limit); + + } + } - // Vertices are inserted into this collection when they are resampled. - final Set<Vertex> resampled = new LinkedHashSet<Vertex>(); + /* + * Re-sample the cutoff join for each edge in each of the existing + * paths using the newly re-sampled vertices. + * + * Note: The only way to increase the accuracy of our estimates for + * edges as we extend the join paths is to re-sample each edge in + * the join path in path order. + * + * Note: An edge must be sampled for each distinct join path prefix + * in which it appears within each round. However, it is common for + * surviving paths to share a join path prefix, so do not re-sample + * a given path prefix more than once per round. Also, do not + * re-sample paths which are from rounds before the immediately + * previous round as those paths will not be extended in this round. + */ + if (log.isDebugEnabled()) + log.debug("Re-sampling in-use path segments: limit=" + limit); - // Then expand each path. + final Map<int[], EdgeSample> edgePaths = new LinkedHashMap<int[], EdgeSample>(); + for (Path x : a) { - final int nedges = x.edges.size(); + // The edges which we have visited in this path. + final List<Edge> edges = new LinkedList<Edge>(); + + // The vertices which we have visited in this path. + final Set<Vertex> vertices = new LinkedHashSet<Vertex>(); + + EdgeSample priorEdgeSample = null; + + for(Edge e : x.edges) { + + // Add edge to the visited set for this join path. + edges.add(e); - if (nedges < round) { + // Generate unique key for this join path segment. + final int[] ids = Path.getVertexIds(edges); - // Path is from a previous round. - continue; - - } + if (priorEdgeSample == null) { - /* - * The only way to increase the accuracy of our estimates for - * edges as we extend the join paths is to re-sample each edge - * in the join path in path order. - * - * Note: An edge must be sampled for each distinct join path - * prefix in which it appears within each round. However, it is - * common for surviving paths to share a join path prefix, so do - * not re-sample a given path prefix more than once per round. - * Also, do not re-sample paths which are from rounds before the - * immediately previous round as those paths will not be - * extended in this round. - * - * FIXME Find all vertices in use by all paths which survived - * into this round. Re-sample those vertices to the new limit - * (resampling a vertex is a NOP if it has been resampled to the - * desired limit so we can do this incrementally rather than up - * front). For each edge of each path in path order, re-sample - * the edge. Shared prefix samples should be reused, but samples - * of the same edge with a different prefix must not be shared. - */ + /* + * This is the first edge in the path. + * + * Test our local table of join path segment estimates + * to see if we have already re-sampled that edge. If + * not, then re-sample it now. + */ + + // Test sample cache. + EdgeSample edgeSample = edgePaths.get(ids); + + if (edgeSample == null) { + if (e.sample != null && e.sample.limit >= limit) { + + // The existing sample for that edge is fine. + edgeSample = e.sample; + + } else { + + /* + * Re-sample the edge, updating the sample on + * the edge as a side-effect. The cutoff sample + * is based on the vertex sample for the minimum + * cardinality vertex. + */ + + edgeSample = e.estimateCardinality(queryEngine, + limit); + + } + + // Cache the sample. + if (edgePaths.put(ids, edgeSample) != null) + throw new AssertionError(); + + } + + // Add both vertices to the visited set. + vertices.add(e.v1); + vertices.add(e.v2); + + // Save sample. It will be used to re-sample the next edge. + priorEdgeSample = edgeSample; + + continue; + + } + + final boolean v1Found = vertices.contains(e.v1); + + // The source vertex for the new edge. + final Vertex sVertex = v1Found ? e.v1 : e.v2; + + // The target vertex for the new edge. + final Vertex tVertex = v1Found ? e.v2 : e.v1; + + // Look for sample for this path in our cache. + EdgeSample edgeSample = edgePaths.get(ids); + + if (edgeSample == null) { + + /* + * This is some N-step edge in the path, where N is + * greater than ONE (1). The source vertex is the vertex + * which already appears in the prior edges of this join + * path. The target vertex is the next vertex which is + * visited by the join path. The sample pass in is the + * prior edge sample - that is, the sample from the path + * segment less the target vertex. This is the sample + * that we just updated when we visited the prior edge + * of the path. + */ + + edgeSample = e + .estimateCardinality( + queryEngine, + limit, + sVertex, + tVertex,// + priorEdgeSample.estimatedCardinality,// + priorEdgeSample.estimateEnum == EstimateEnum.Exact, + priorEdgeSample.limit,// + priorEdgeSample.sample// + ); + + if (log.isDebugEnabled()) + log.debug("Resampled: " + Arrays.toString(ids) + + " : " + edgeSample); + + if (edgePaths.put(ids, edgeSample) != null) + throw new AssertionError(); + + } + + // Save sample. It will be used to re-sample the next edge. + priorEdgeSample = edgeSample; + + // Add target vertex to the visited set. + vertices.add(tVertex); + + } // next Edge [e] in Path [x] + + // Save the result on the path. + x.sample = priorEdgeSample; + } // next Path [x]. + + /* + * Expand each path one step from each vertex which branches to an + * unused vertex. + */ + + if (log.isDebugEnabled()) + log.debug("Expanding paths: limit=" + limit + ", #paths(in)=" + + a.length); + + final List<Path> tmp = new LinkedList<Path>(); + + for (Path x : a) { + +// final int nedges = x.edges.size(); +// +// if (nedges < round) { +// +// // Path is from a previous round. +// continue; +// +// } + // The set of vertices used to expand this path in this round. final Set<Vertex> used = new LinkedHashSet<Vertex>(); @@ -1969,13 +2101,8 @@ // add the new vertex to the set of used vertices. used.add(tVertex); - if (resampled.add(tVertex)) { - /* - * (Re-)sample this vertex before we sample a new edge - * which targets this vertex. - */ - tVertex.sample(queryEngine, limit); - } + // (Re-)sample vertex before we sample a new edge + tVertex.sample(queryEngine, limit); // Extend the path to the new vertex. final Path p = x.addEdge(queryEngine, limit, edgeInGraph); @@ -2355,30 +2482,26 @@ // Create the join graph. final JGraph g = new JGraph(getVertices()); - // Run it. - g.runtimeOptimizer(context.getRunningQuery().getQueryEngine(), limit); + // Find the best join path. + final Path p = g.runtimeOptimizer(context.getRunningQuery() + .getQueryEngine(), limit); + // Factory avoids reuse of bopIds assigned to the predicates. + final BOpIdFactory idFactory = new BOpIdFactory(); + + // Generate the query from the join path. + final PipelineOp queryOp = JoinGraph.getQuery(idFactory, p + .getPredicates()); + + // Run the query, blocking until it is done. + JoinGraph.runSubquery(context, queryOp); + return null; } - } + } // class JoinGraphTask -// @todo Could be used to appropriately ignore false precision in cardinality estimates. -// private static double roundToSignificantFigures(final double num, -// final int n) { -// if (num == 0) { -// return 0; -// } -// -// final double d = Math.ceil(Math.log10(num < 0 ? -num : num)); -// final int power = n - (int) d; -// -// final double magnitude = Math.pow(10, power); -// final long shifted = Math.round(num * magnitude); -// return shifted / magnitude; -// } - /** * Places vertices into order by the {@link BOp#getId()} associated with * their {@link IPredicate}. @@ -2436,4 +2559,191 @@ } + /* + * Static methods: + * + * @todo Keep with JGraph or move to utility class. However, the precise + * manner in which the query plan is generated is still up in the air since + * we are not yet handling anything except standard joins in the runtime + * optimizer. + */ + + /** + * Generate a query plan from an ordered collection of predicates. + * + * @param p + * The join path. + * + * @return The query plan. + */ + static public PipelineOp getQuery(final BOpIdFactory idFactory, + final IPredicate[] preds) { + + final PipelineJoin[] joins = new PipelineJoin[preds.length]; + +// final PipelineOp startOp = new StartOp(new BOp[] {}, +// NV.asMap(new NV[] {// +// new NV(Predicate.Annotations.BOP_ID, idFactory +// .nextId()),// +// new NV(SliceOp.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER),// +// })); +// +// PipelineOp lastOp = startOp; + PipelineOp lastOp = null; + +// final Set<IVariable> vars = new LinkedHashSet<IVariable>(); +// for(IPredicate p : preds) { +// for(BOp arg : p.args()) { +// if(arg instanceof IVariable) { +// vars.add((IVariable)arg); +// } +// } +// } + + for (int i = 0; i < preds.length; i++) { + + // The next vertex in the selected join order. + final IPredicate p = preds[i]; + + final List<NV> anns = new LinkedList<NV>(); + + anns.add(new NV(PipelineJoin.Annotations.PREDICATE, p)); + + anns.add(new NV(PipelineJoin.Annotations.BOP_ID, idFactory + .nextId())); + +// anns.add(new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.ANY)); +// +// anns.add(new NV(PipelineJoin.Annotations.SELECT, vars.toArray(new IVariable[vars.size()]))); + + final PipelineJoin joinOp = new PipelineJoin( + lastOp == null ? new BOp[0] : new BOp[] { lastOp }, + anns.toArray(new NV[anns.size()])); + + joins[i] = joinOp; + + lastOp = joinOp; + + } + +// final PipelineOp queryOp = lastOp; + + /* + * FIXME Why does wrapping with this slice appear to be + * necessary? (It is causing runtime errors when not wrapped). + * Is this a bopId collision which is not being detected? + */ + final PipelineOp queryOp = new SliceOp(new BOp[] { lastOp }, NV + .asMap(new NV[] { + new NV(JoinGraph.Annotations.BOP_ID, idFactory.nextId()), // + new NV(JoinGraph.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER) }) // + ); + + return queryOp; + + } + + /** + * Execute the selected join path. + * <p> + * Note: When executing the query, it is actually being executed as a + * subquery. Therefore we have to take appropriate care to ensure that the + * results are copied out of the subquery and into the parent query. See + * {@link AbstractSubqueryOp} for how this is done. + * + * @todo When we execute the query, we should clear the references to the + * samples (unless they are exact, in which case they can be used as + * is) in order to release memory associated with those samples if the + * query is long running. Samples must be held until we have + * identified the final join path since each vertex will be used by + * each maximum length join path and we use the samples from the + * vertices to re-sample the surviving join paths in each round. + * + * @todo If there is a slice on the outer query, then the query result may + * well be materialized by now. + * + * @todo If there are source binding sets then they need to be applied above + * (when we are sampling) and below (when we evaluate the selected + * join path). + * + * FIXME runQuery() is not working correctly. The query is being + * halted by a {@link BufferClosedException} which appears before it + * has materialized the necessary results. + */ + static public void runSubquery(final BOpContext<IBindingSet> parentContext, + final PipelineOp queryOp) { + + IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; + + try { + + if (log.isInfoEnabled()) + log.info("Running: " + BOpUtility.toString(queryOp)); + + final PipelineOp startOp = (PipelineOp) BOpUtility + .getPipelineStart(queryOp); + + if (log.isInfoEnabled()) + log.info("StartOp: " + BOpUtility.toString(startOp)); + + // Run the query. + final UUID queryId = UUID.randomUUID(); + + final QueryEngine queryEngine = parentContext.getRunningQuery() + .getQueryEngine(); + + final RunningQuery runningQuery = queryEngine + .eval( + queryId, + queryOp, + new LocalChunkMessage<IBindingSet>( + queryEngine, + queryId, + startOp.getId()/* startId */, + -1 /* partitionId */, + /* + * @todo pass in the source binding sets + * here and also when sampling the + * vertices. + */ + new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { new IBindingSet[] { new HashBindingSet() } }))); + + // Iterator visiting the subquery solutions. + subquerySolutionItr = runningQuery.iterator(); + + // Copy solutions from the subquery to the query. + final long nout = BOpUtility + .copy(subquerySolutionItr, parentContext.getSink(), + null/* sink2 */, null/* constraints */, null/* stats */); + + System.out.println("nout=" + nout); + + // verify no problems. + runningQuery.get(); + + System.out.println("Future Ok"); + + } catch (Throwable t) { + + log.error(t,t); + + /* + * If a subquery fails, then propagate the error to the parent + * and rethrow the first cause error out of the subquery. + */ + throw new RuntimeException(parentContext.getRunningQuery() + .halt(t)); + + } finally { + + if (subquerySolutionItr != null) + subquerySolutionItr.close(); + + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java 2010-11-15 18:13:06 UTC (rev 3949) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java 2010-11-17 02:14:08 UTC (rev 3950) @@ -72,15 +72,33 @@ if (log.isInfoEnabled()) { - final Integer[] order = BOpUtility.getEvaluationOrder(q.getQuery()); + try { - log.info(getTableRow(q, -1/* orderIndex */, q.getQuery().getId(), - true/* summary */)); +// if (log.isDebugEnabled()) { - int orderIndex = 0; - for (Integer bopId : order) { - log.info(getTableRow(q, orderIndex, bopId, false/* summary */)); - orderIndex++; + /* + * Detail row for each operator in the query. + */ + final Integer[] order = BOpUtility.getEvaluationOrder(q + .getQuery()); + + int orderIndex = 0; + for (Integer bopId : order) { + log + .info(getTableRow(q, orderIndex, bopId, false/* summary */)); + orderIndex++; + } + +// } + + // summary row. + log.info(getTableRow(q, -1/* orderIndex */, q.getQuery().getId(), + true/* summary */)); + + } catch (RuntimeException t) { + + log.error(t,t); + } } @@ -107,6 +125,7 @@ */ sb.append("\tevalOrder"); // [0..n-1] sb.append("\tbopId"); + sb.append("\tpredId"); sb.append("\tevalContext"); sb.append("\tcontroller"); // metadata considered by the static optimizer. @@ -120,7 +139,7 @@ sb.append("\tunitsIn"); sb.append("\tchunksOut"); sb.append("\tunitsOut"); - sb.append("\tmultipler"); // expansion rate multipler in the solution count. + sb.append("\tjoinRatio"); // expansion rate multipler in the solution count. sb.append("\taccessPathDups"); sb.append("\taccessPathCount"); sb.append("\taccessPathChunksIn"); @@ -146,7 +165,8 @@ * @param summary <code>true</code> iff the summary for the query should be written. * @return The row of the table. */ - static private String getTableRow(final IRunningQuery q, final int evalOrder, final Integer bopId, final boolean summary) { + static private String getTableRow(final IRunningQuery q, + final int evalOrder, final Integer bopId, final boolean summary) { final StringBuilder sb = new StringBuilder(); @@ -190,16 +210,32 @@ * keep this from breaking the table format. */ sb.append(BOpUtility.toString(q.getQuery()).replace('\n', ' ')); + sb.append('\t'); + sb.append("total"); // summary line. } else { - // Otherwise how just this bop. + // Otherwise show just this bop. sb.append(bopIndex.get(bopId).toString()); + sb.append('\t'); + sb.append(evalOrder); // eval order for this bop. } sb.append('\t'); - sb.append(evalOrder); - sb.append('\t'); sb.append(Integer.toString(bopId)); sb.append('\t'); + { + /* + * Show the predicate identifier if this is a Join operator. + * + * @todo handle other kinds of join operators when added using a + * shared interface. + */ + final IPredicate<?> pred = (IPredicate<?>) bop + .getProperty(PipelineJoin.Annotations.PREDICATE); + if (pred != null) { + sb.append(Integer.toString(pred.getId())); + } + } + sb.append('\t'); sb.append(bop.getEvaluationContext()); sb.append('\t'); sb.append(bop.getProperty(BOp.Annotations.CONTROLLER, Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-11-15 18:13:06 UTC (rev 3949) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-11-17 02:14:08 UTC (rev 3950) @@ -491,8 +491,16 @@ pop... [truncated message content] |
From: <tho...@us...> - 2010-11-22 22:05:18
|
Revision: 3974 http://bigdata.svn.sourceforge.net/bigdata/?rev=3974&view=rev Author: thompsonbry Date: 2010-11-22 21:08:56 +0000 (Mon, 22 Nov 2010) Log Message: ----------- Added a sumRangeCounts counter to the join stats. Modified the runtime optimizer to use the sum of the range counts considered by the cutoff join when the cardinality estimate is recognized as a lower bound. Added test case for "bar" data set for the runtime optimizer. This query and data set ran into the lower bound estimate problem. The change in this commit fixed the query. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-22 19:22:05 UTC (rev 3973) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -142,6 +142,15 @@ * within the round. This would imply that we keep per join path limits. * The vertex and edge samples are already aware of the limit at which * they were last sampled so this should not cause any problems there. + * <p> + * A related option would be to deepen the samples only when we are in + * danger of cardinality estimation underflow. E.g., a per-path limit. + * Resampling vertices may only make sense when we increase the limit + * since otherwise we may find a different correlation with the new sample + * but the comparison of paths using one sample base with paths using a + * different sample base in a different round does not carry forward the + * cardinality estimates from the prior round (unless we do something like + * a weighted moving average). * * @todo When comparing choices among join paths having fully bound tails where * the estimated cardinality has also gone to zero, we should prefer to @@ -152,7 +161,7 @@ * those which reach the 1-var vertex. [In order to support this, we would * need a means to indicate that a fully bound access path should use an * index specified by the query optimizer rather than the primary index - * for the relation. In addition, this suggests that we should keep bloom + * for the relation. In addition, this suggests that we should keep bloom * filters for more than just the SPO(C) index in scale-out.] * * @todo Examine behavior when we do not have perfect covering indices. This @@ -187,6 +196,15 @@ String LIMIT = JoinGraph.class.getName() + ".limit"; int DEFAULT_LIMIT = 100; + + /** + * The <i>nedges</i> edges of the join graph having the lowest + * cardinality will be used to generate the initial join paths (default + * {@value #DEFAULT_NEDGES}). This must be a positive integer. + */ + String NEDGES = JoinGraph.class.getName() + ".nedges"; + + int DEFAULT_NEDGES = 2; } /** @@ -207,6 +225,15 @@ } + /** + * @see Annotations#NEDGES + */ + public int getNEdges() { + + return getProperty(Annotations.NEDGES, Annotations.DEFAULT_NEDGES); + + } + public JoinGraph(final NV... anns) { this(BOpBase.NOARGS, NV.asMap(anns)); @@ -542,7 +569,7 @@ * there is an error in the query such that the join will not select * anything. This is not 100%, merely indicative. */ - public final int outputCount; + public final long outputCount; /** * The ratio of the #of input samples consumed to the #of output samples @@ -592,7 +619,9 @@ final int sourceSampleLimit,// final int limit,// final int inputCount, // - final int outputCount,// + final long outputCount,// + final double f, + final long estimatedCardinality, final IBindingSet[] sample) { if (sample == null) @@ -609,10 +638,10 @@ this.outputCount = outputCount; - f = outputCount == 0 ? 0 : (outputCount / (double) inputCount); - - estimatedCardinality = (long) (rangeCount * f); - + this.f = f; + + this.estimatedCardinality = estimatedCardinality; + if (sourceSampleExact && outputCount < limit) { /* * Note: If the entire source vertex is being fed into the @@ -1037,20 +1066,55 @@ if (log.isTraceEnabled()) log.trace(joinStats.toString()); - + /* * TODO Improve comments here. See if it is possible to isolate a * common base class which would simplify the setup of the cutoff * join and the computation of the sample stats. */ + // #of solutions in. + final int nin = (int) joinStats.inputSolutions.get(); + + // #of solutions out. + long nout = joinStats.outputSolutions.get(); + + // cumulative range count of the sampled access paths. + final long sumRangeCount = joinStats.accessPathRangeCount.get(); + + if (nin == 1 && nout == limit) { + /* + * We are getting [limit] solutions out for one solution in. In + * this case, (nout/nin) is a lower bound for the estimated + * cardinality of the edge. In fact, this condition suggests + * that the upper bound is a must better estimate of the + * cardinality of this join. Therefore, we replace [nout] with + * the sum of the range counts for the as-bound predicates + * considered by the cutoff join. + * + * For example, consider a join feeding a rangeCount of 16 into + * a rangeCount of 175000. With a limit of 100, we estimated the + * cardinality at 1600L (lower bound). In fact, the cardinality + * is 16*175000. This falsely low estimate can cause solutions + * which are really better to be dropped. + */ + nout = sumRangeCount; + + } + + final double f = nout == 0 ? 0 : (nout / (double) nin); + + final long estimatedCardinality = (long) (sourceSampleRangeCount * f); + final EdgeSample edgeSample = new EdgeSample( sourceSampleRangeCount, // sourceSampleExact, // @todo redundant with sourceSampleLimit sourceSampleLimit, // limit, // - (int) joinStats.inputSolutions.get(),// - (int) joinStats.outputSolutions.get(), // + nin,// + nout, // + f, // + estimatedCardinality, // result.toArray(new IBindingSet[result.size()])); if (log.isDebugEnabled()) @@ -1719,19 +1783,25 @@ * @param limit * The limit for sampling a vertex and the initial limit for * cutoff join evaluation. + * @param nedges + * The edges in the join graph are sorted in order of + * increasing cardinality and up to <i>nedges</i> of the + * edges having the lowest cardinality are used to form the + * initial set of join paths. For each edge selected to form + * a join path, the starting vertex will be the vertex of + * that edge having the lower cardinality. * * @throws Exception */ public Path runtimeOptimizer(final QueryEngine queryEngine, - final int limit) throws Exception { + final int limit, final int nedges) throws Exception { // Setup the join graph. - Path[] paths = round0(queryEngine, limit, 2/* nedges */); + Path[] paths = round0(queryEngine, limit, nedges); /* - * The input paths for the first round have two vertices (one edge - * is two vertices). Each round adds one more vertex, so we have - * three vertices by the end of round 1. We are done once we have + * The initial paths all have one edge, and hence two vertices. Each + * round adds one more vertex to each path. We are done once we have * generated paths which include all vertices. * * This occurs at round := nvertices - 1 @@ -1796,6 +1866,11 @@ * The maximum #of edges to choose. Those having the smallest * expected cardinality will be chosen. * + * @return An initial set of paths starting from any most <i>nedges</i>. + * For each of the <i>nedges</i> lowest cardinality edges, the + * starting vertex will be the vertex with the lowest + * cardinality for that edge. + * * @throws Exception */ public Path[] round0(final QueryEngine queryEngine, final int limit, @@ -2489,6 +2564,8 @@ private final JGraph g; private int limit; + + private int nedges; JoinGraphTask(final BOpContext<IBindingSet> context) { @@ -2499,9 +2576,14 @@ limit = getLimit(); + nedges = getNEdges(); + if (limit <= 0) throw new IllegalArgumentException(); + if (nedges <= 0) + throw new IllegalArgumentException(); + final IPredicate[] v = getVertices(); g = new JGraph(v); @@ -2515,7 +2597,7 @@ // Find the best join path. final Path p = g.runtimeOptimizer(context.getRunningQuery() - .getQueryEngine(), limit); + .getQueryEngine(), limit, nedges); // Factory avoids reuse of bopIds assigned to the predicates. final BOpIdFactory idFactory = new BOpIdFactory(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java 2010-11-22 19:22:05 UTC (rev 3973) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -158,6 +158,7 @@ sb.append("\tjoinRatio"); // expansion rate multipler in the solution count. sb.append("\taccessPathDups"); sb.append("\taccessPathCount"); + sb.append("\taccessPathRangeCount"); sb.append("\taccessPathChunksIn"); sb.append("\taccessPathUnitsIn"); // dynamics based on elapsed wall clock time. @@ -337,6 +338,8 @@ sb.append('\t'); sb.append(stats.accessPathCount.get()); sb.append('\t'); + sb.append(stats.accessPathRangeCount.get()); + sb.append('\t'); sb.append(stats.accessPathChunksIn.get()); sb.append('\t'); sb.append(stats.accessPathUnitsIn.get()); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-11-22 19:22:05 UTC (rev 3973) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -778,6 +778,7 @@ sb.append("\tunitsOut"); sb.append("\taccessPathDups"); sb.append("\taccessPathCount"); + sb.append("\taccessPathRangeCount"); sb.append("\taccessPathChunksIn"); sb.append("\taccessPathUnitsIn"); //{chunksIn=1,unitsIn=100,chunksOut=4,unitsOut=313,accessPathDups=0,accessPathCount=100,chunkCount=100,elementCount=313} @@ -929,6 +930,8 @@ sb.append('\t'); sb.append(t.accessPathCount.get()); sb.append('\t'); + sb.append(t.accessPathRangeCount.get()); + sb.append('\t'); sb.append(t.accessPathChunksIn.get()); sb.append('\t'); sb.append(t.accessPathUnitsIn.get()); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-11-22 19:22:05 UTC (rev 3973) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -41,6 +41,7 @@ import java.util.concurrent.FutureTask; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; @@ -114,7 +115,7 @@ final private AtomicLong deadline = new AtomicLong(Long.MAX_VALUE); /** - * The timestamp(ms) when the query begins to execute. + * The timestamp (ms) when the query begins to execute. */ final private AtomicLong startTime = new AtomicLong(System .currentTimeMillis()); @@ -171,10 +172,91 @@ } /** - * The maximum number of operator tasks which may be concurrently executor + * The maximum number of operator tasks which may be concurrently executed * for a given (bopId,shardId). + * + * @see QueryEngineTestAnnotations#MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD */ final private int maxConcurrentTasksPerOperatorAndShard; + +// /** +// * The maximum #of concurrent tasks for this query across all operators and +// * shards. +// * +// * Note: This is not a safe option and MUST be removed. It is possible for +// * N-1 tasks to backup with the Nth task not running due to concurrent +// * execution of some of the N-t tasks. +// */ +// final private int maxConcurrentTasks = 10; + + /* + * FIXME Explore the use of this semaphore to limit the maximum #of messages + * further. (Note that placing a limit on messages would allow us to buffer + * potentially many chunks. That could be solved by making LocalChunkMessage + * transparent in terms of the #of chunks or _binding_sets_ which it is + * carrying, but let's take this one step at a time). + * + * The first issue is ensuring that the query continue to make progress when + * a semaphore with a limited #of permits is introduced. This is because the + * ChunkFutureTask only attempts to schedule the next task for a given + * (bopId,shardId) but we could have failed to accept outstanding work for + * any of a number of operator/shard combinations. Likewise, the QueryEngine + * tells the RunningQuery to schedule work each time a message is dropped + * onto the QueryEngine, but the signal to execute more work is lost if the + * permits were not available immediately. + * + * One possibility would be to have a delayed retry. Another would be to + * have ChunkTaskFuture try to run *any* messages, not just messages for the + * same (bopId,shardId). + * + * Also, when scheduling work, there needs to be some bias towards the + * downstream operators in the query plan in order to ensure that they get a + * chance to clear work from upstream operators. This suggests that we might + * carry an order[] and use it to scan the work queue -- or make the work + * queue a priority heap using the order[] to place a primary sort over the + * bopIds in terms of the evaluation order and letting the shardIds fall in + * increasing shard order so we have a total order for the priority heap (a + * total order may also require a tie breaker, but I think that the priority + * heap allows ties). + * + * This concept of memory overhead and permits would be associated with the + * workload waiting on a given node for processing. (In scale-out, we do not + * care how much data is moving in the cluster, only how much data is + * challenging an individual machine). + * + * This emphasize again why we need to get the data off of the Java heap. + * + * The same concept should apply for chained buffers. Maybe one way to do + * this is to allocate a fixed budget to each query for the Java heap and + * the C heap and then the query blocks or goes to disk. + */ +// /** +// * The maximum number of binding sets which may be outstanding before a task +// * which is producing binding sets will block. This value may be used to +// * limit the memory demand of a query in which some operators produce +// * binding sets faster than other operators can consume them. +// * +// * @todo This could be generalized to consider the Java heap separately from +// * the native heap as we get into the use of native ByteBuffers to +// * buffer intermediate results. +// * +// * @todo This is expressed in terms of messages and not {@link IBindingSet}s +// * because the {@link LocalChunkMessage} does not self-report the #of +// * {@link IBindingSet}s (or chunks). +// */ +// final private int maxOutstandingMessageCount = 100; +// +// /** +// * A counting semaphore used to limit the #of outstanding binding set chunks +// * which may be buffered before a producer will block when trying to emit +// * another chunk. +// * +// * @see HandleChunkBuffer#outputChunk(IBindingSet[]) +// * @see #scheduleNext(BSBundle) +// * +// * @see #maxOutstandingMessageCount +// */ +// final private Semaphore outstandingMessageSemaphore = new Semaphore(maxOutstandingMessageCount); /** * A collection of (bopId,partitionId) keys mapped onto a collection of @@ -471,6 +553,8 @@ this.bopIndex = BOpUtility.getIndex(query); + +// this.maxConcurrentTasksPerOperatorAndShard = 300; this.maxConcurrentTasksPerOperatorAndShard = query .getProperty( QueryEngineTestAnnotations.MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD, @@ -1203,6 +1287,33 @@ return false; } } +// if (runState.getTotalRunningCount() > maxConcurrentTasks) { +// // Too many already running. +// return false; +// } +// { +// /* +// * Verify that we can acquire sufficient permits to do some +// * work. +// */ +// final BlockingQueue<IChunkMessage<IBindingSet>> queue = operatorQueues +// .get(bundle); +// if (queue == null || queue.isEmpty()) { +// // No work. +// return false; +// } +// // The queue could be increased, but this will be its minimum size. +// final int minQueueSize = queue.size(); +// if(!outstandingMessageSemaphore.tryAcquire(minQueueSize)) { +// // Not enough permits. +// System.err.println("Permits: required=" + minQueueSize +// + ", available=" +// + outstandingMessageSemaphore.availablePermits() +// + ", bundle=" + bundle); +// return false; +// } +// +// } // Remove the work queue for that (bopId,partitionId). final BlockingQueue<IChunkMessage<IBindingSet>> queue = operatorQueues .remove(bundle); @@ -1210,7 +1321,7 @@ // no work return false; } - // Drain the work queue. + // Drain the work queue for that (bopId,partitionId). final List<IChunkMessage<IBindingSet>> messages = new LinkedList<IChunkMessage<IBindingSet>>(); queue.drainTo(messages); final int nmessages = messages.size(); @@ -1218,9 +1329,11 @@ * Combine the messages into a single source to be consumed by a * task. */ + int nchunks = 1; final IMultiSourceAsynchronousIterator<IBindingSet[]> source = new MultiSourceSequentialAsynchronousIterator<IBindingSet[]>(messages.remove(0).getChunkAccessor().iterator()); for (IChunkMessage<IBindingSet> msg : messages) { source.add(msg.getChunkAccessor().iterator()); + nchunks++; } /* * Create task to consume that source. @@ -1852,13 +1965,23 @@ */ private void outputChunk(final IBindingSet[] e) { - stats.unitsOut.add(((Object[]) e).length); + final int chunkSize = e.length; + + stats.unitsOut.add(chunkSize); stats.chunksOut.increment(); - sinkMessagesOut.addAndGet(q.getChunkHandler().handleChunk(q, bopId, - sinkId, e)); + final int messagesOut = q.getChunkHandler().handleChunk(q, bopId, + sinkId, e); + sinkMessagesOut.addAndGet(messagesOut); + +// try { +// q.outstandingMessageSemaphore.acquire(); +// } catch (InterruptedException e1) { +// throw new RuntimeException(e1); +// } + } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-11-22 19:22:05 UTC (rev 3973) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -218,6 +218,12 @@ public final CAT accessPathCount = new CAT(); /** + * The running sum of the range counts of the accepted as-bound access + * paths. + */ + public final CAT accessPathRangeCount = new CAT(); + + /** * The #of input solutions consumed (not just accepted). * <p> * This counter is highly correlated with {@link BOpStats#unitsIn} but @@ -333,6 +339,8 @@ accessPathCount.add(t.accessPathCount.get()); + accessPathRangeCount.add(t.accessPathRangeCount.get()); + accessPathChunksIn.add(t.accessPathChunksIn.get()); accessPathUnitsIn.add(t.accessPathUnitsIn.get()); @@ -358,6 +366,7 @@ protected void toString(final StringBuilder sb) { sb.append(",accessPathDups=" + accessPathDups.get()); sb.append(",accessPathCount=" + accessPathCount.get()); + sb.append(",accessPathRangeCount=" + accessPathRangeCount.get()); sb.append(",accessPathChunksIn=" + accessPathChunksIn.get()); sb.append(",accessPathUnitsIn=" + accessPathUnitsIn.get()); sb.append(",inputSolutions=" + inputSolutions.get()); @@ -1562,6 +1571,10 @@ stats.accessPathCount.increment(); + // the range count of the as-bound access path (should be cached). + stats.accessPathRangeCount.add(accessPath + .rangeCount(false/* exact */)); + if (accessPath.getPredicate() instanceof IStarJoin<?>) { handleStarJoin(); Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -0,0 +1,608 @@ +package com.bigdata.bop.rdf.joinGraph; + +import java.io.File; +import java.util.Arrays; +import java.util.Properties; + +import junit.framework.TestCase2; + +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.openrdf.rio.RDFFormat; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContextBase; +import com.bigdata.bop.BOpIdFactory; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.Var; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.controller.JoinGraph; +import com.bigdata.bop.controller.JoinGraph.JGraph; +import com.bigdata.bop.controller.JoinGraph.Path; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.engine.QueryLog; +import com.bigdata.bop.engine.RunningQuery; +import com.bigdata.bop.fed.QueryEngineFactory; +import com.bigdata.journal.ITx; +import com.bigdata.journal.Journal; +import com.bigdata.rdf.model.BigdataLiteral; +import com.bigdata.rdf.model.BigdataURI; +import com.bigdata.rdf.model.BigdataValue; +import com.bigdata.rdf.model.BigdataValueFactory; +import com.bigdata.rdf.spo.SPOPredicate; +import com.bigdata.rdf.store.AbstractTripleStore; +import com.bigdata.rdf.store.DataLoader; +import com.bigdata.rdf.store.LocalTripleStore; +import com.bigdata.rdf.store.DataLoader.ClosureEnum; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.rule.IRule; +import com.bigdata.relation.rule.Rule; +import com.bigdata.relation.rule.eval.DefaultEvaluationPlan2; +import com.bigdata.relation.rule.eval.IRangeCountFactory; + +/** + * Unit tests for runtime query optimization using {@link JoinGraph} and the + * "bar data" test set. + * <p> + * Note: When running large queries, be sure to provide a sufficient heap, set + * the -server flag, etc. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestJoinGraph.java 3918 2010-11-08 21:31:17Z thompsonbry $ + */ +public class TestJoinGraphOnBarData extends TestCase2 { + + /** + * + */ + public TestJoinGraphOnBarData() { + } + + /** + * @param name + */ + public TestJoinGraphOnBarData(String name) { + super(name); + } + + @Override + public Properties getProperties() { + + final Properties p = new Properties(super.getProperties()); + +// p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient +// .toString()); + + p.setProperty(AbstractTripleStore.Options.QUADS_MODE, "true"); + + /* + * Don't compute closure in the data loader since it does TM, not + * database at once closure. + */ + p.setProperty(DataLoader.Options.CLOSURE, ClosureEnum.None.toString()); + + return p; + + } + + private Journal jnl; + + private AbstractTripleStore database; + + /** The initial sampling limit. */ + private final int limit = 100; + + /** The #of edges considered for the initial paths. */ + private final int nedges = 2; + + private QueryEngine queryEngine; + + private String namespace; + + /** + * When true, do a warm up run of the plan generated by the static query + * optimizer. + */ + private final boolean warmUp = false; + + /** + * The #of times to run each query. Use N GT ONE (1) if you want to converge + * onto the hot query performance. + */ + private final int ntrials = 1; + + /** + * When <code>true</code> runs the dynamic query optimizer and then evaluates + * the generated query plan. + */ + private final boolean runRuntimeQueryOptimizer = true; + + /** + * When <code>true</code> runs the static query optimizer and then evaluates + * the generated query plan. + */ + private final boolean runStaticQueryOptimizer = true; + + /** + * Loads LUBM U1 into a triple store. + */ + protected void setUp() throws Exception { + +// QueryLog.logTableHeader(); + + super.setUp(); + +// System.err.println(UUID.randomUUID().toString()); +// System.exit(0); + + final Properties properties = getProperties(); + + final File file; + { + /* + * Use a specific file generated by some external process. + */ + file = new File("/data/bardata/bigdata-bardata.WORM.jnl"); + namespace = "bardata"; + } + + properties.setProperty(Journal.Options.FILE, file.toString()); + +// properties.setProperty(Journal.Options.BUFFER_MODE,BufferMode.DiskRW.toString()); + +// file.delete(); + + if (!file.exists()) { + + jnl = new Journal(properties); + + final AbstractTripleStore tripleStore = new LocalTripleStore(jnl, + namespace, ITx.UNISOLATED, properties); + + // Create the KB instance. + tripleStore.create(); + + tripleStore.getDataLoader().loadFiles( + new File("/root/Desktop/Downloads/barData/barData.trig"), + null/* baseURI */, RDFFormat.TRIG, null/* defaultGraph */, + null/* filter */); + + // Truncate the journal (trim its size). + jnl.truncate(); + + // Commit the journal. + jnl.commit(); + + // Close the journal. + jnl.close(); + + } + + // Open the test resource. + jnl = new Journal(properties); + + queryEngine = QueryEngineFactory + .getQueryController(jnl/* indexManager */); + + database = (AbstractTripleStore) jnl.getResourceLocator().locate( + namespace, jnl.getLastCommitTime()); + + if (database == null) + throw new RuntimeException("Not found: " + namespace); + + } + + protected void tearDown() throws Exception { + + if (database != null) { + database = null; + } + + if (queryEngine != null) { + queryEngine.shutdownNow(); + queryEngine = null; + } + + if(jnl != null) { + jnl.close(); + jnl = null; + } + + super.tearDown(); + + } + + /** + * Sample query for the synthetic data set. The query is arranged in a known + * good order. + * <p> + * Note: The runtime optimizer estimate of the cardinality of the edge [5 4] + * in this query is a lower bound, which makes this an interesting test + * case. The runtime optimizer detects this lower bound and replaces [nout] + * with the sum of the range count of the as-bound predicates for the join, + * which leads to an efficient query plan. + * + * <pre> + * SELECT ?f (COUNT(?d) AS ?total) WHERE { + * ?a <http://test/bar#beverageType> "Beer" . + * ?value <http://test/bar#orderItems> ?a. + * ?value <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://test/bar#Order> . + * ?a <http://test/bar#beverageType> ?d. + * ?value <http://test/bar#employee> ?b. + * ?b <http://test/bar#employeeNum> ?f. + * } GROUP BY ?f + * </pre> + * + * Note: Mike suggests that it is easier to read the query like this: + * + * <pre> + * SELECT ?employeeNum (COUNT(?type) AS ?total) + * WHERE { + * ?order <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> + * <http://test/bar#Order> . + * ?order <http://test/bar#orderItems> ?item . + * ?item <http://test/bar#beverageType> "Beer" . + * ?item <http://test/bar#beverageType> ?type . + * + * ?order <http://test/bar#employee> ?employee . + * + * ?employee <http://test/bar#employeeNum> ?employeeNum . + * } GROUP BY ?employeeNum + * </pre> + * + * @throws Exception + */ + public void test_query() throws Exception { + + /* + * Resolve terms against the lexicon. + */ + final BigdataValueFactory valueFactory = database.getLexiconRelation() + .getValueFactory(); + + final BigdataURI rdfType = valueFactory + .createURI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); + + final BigdataLiteral beer = valueFactory.createLiteral("Beer"); + + final BigdataURI beverageType = valueFactory + .createURI("http://test/bar#beverageType"); + + final BigdataURI orderItems = valueFactory + .createURI("http://test/bar#orderItems"); + + final BigdataURI Order = valueFactory + .createURI("http://test/bar#Order"); + + final BigdataURI employee = valueFactory + .createURI("http://test/bar#employee"); + + final BigdataURI employeeNum = valueFactory + .createURI("http://test/bar#employeeNum"); + + final BigdataValue[] terms = new BigdataValue[] { rdfType, beer, + beverageType, orderItems, Order, employee, employeeNum }; + + // resolve terms. + database.getLexiconRelation() + .addTerms(terms, terms.length, true/* readOnly */); + + { + for (BigdataValue tmp : terms) { + System.out.println(tmp + " : " + tmp.getIV()); + if (tmp.getIV() == null) + throw new RuntimeException("Not defined: " + tmp); + } + } + + final IPredicate[] preds; + final IPredicate p0, p1, p2, p3, p4, p5; + { +// a, value, d, b, f + final IVariable<?> a = Var.var("a"); + final IVariable<?> value = Var.var("value"); + final IVariable<?> d = Var.var("d"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> f = Var.var("f"); + + final IVariable<?> g0 = Var.var("g0"); + final IVariable<?> g1 = Var.var("g1"); + final IVariable<?> g2 = Var.var("g2"); + final IVariable<?> g3 = Var.var("g3"); + final IVariable<?> g4 = Var.var("g4"); + final IVariable<?> g5 = Var.var("g5"); + + + // The name space for the SPO relation. + final String[] spoRelation = new String[] { namespace + ".spo" }; + + // The name space for the Lexicon relation. + final String[] lexRelation = new String[] { namespace + ".lex" }; + + final long timestamp = jnl.getLastCommitTime(); + + int nextId = 0; + +// ?a <http://test/bar#beverageType> "Beer" . + p0 = new SPOPredicate(new BOp[] { a, + new Constant(beverageType.getIV()), + new Constant(beer.getIV()), g0 },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + + // ?value <http://test/bar#orderItems> ?a. + p1 = new SPOPredicate(new BOp[] { value, + new Constant(orderItems.getIV()), a, g1 },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + +// ?value <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://test/bar#Order> . + p2 = new SPOPredicate(new BOp[] { value, + new Constant(rdfType.getIV()), + new Constant(Order.getIV()), g2 },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + +// ?a <http://test/bar#beverageType> ?d. + p3 = new SPOPredicate(new BOp[] { a, + new Constant(beverageType.getIV()), d, g3 },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + +// ?value <http://test/bar#employee> ?b. + p4 = new SPOPredicate(new BOp[] { value, + new Constant(employee.getIV()), b, g4 },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + +// ?b <http://test/bar#employeeNum> ?f. + p5 = new SPOPredicate(new BOp[] { b, + new Constant(employeeNum.getIV()), f, g5 },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + + // the vertices of the join graph (the predicates). + preds = new IPredicate[] { p0, p1, p2, p3, p4, p5 }; + + } + + doTest(preds); + + } // LUBM_Q9 + + /** + * + * @param preds + * @throws Exception + * + * @todo To actually test anything this needs to compare the results (or at + * least the #of result). We could also test for known good join + * orders as generated by the runtime optimizer, but that requires a + * known data set (e.g., U1 or U50) and non-random sampling. + * + * @todo This is currently providing a "hot run" comparison by a series of + * trials. This means that the IO costs are effectively being wiped + * away, assuming that the file system cache is larger than the data + * set. The other way to compare performance is a cold cache / cold + * JVM run using the known solutions produced by the runtime versus + * static query optimizers. + */ + private void doTest(final IPredicate[] preds) throws Exception { + + if (warmUp) + runQuery("Warmup", queryEngine, runStaticQueryOptimizer(preds)); + + /* + * Run the runtime query optimizer once (its cost is not counted + * thereafter). + */ + final IPredicate[] runtimePredOrder = runRuntimeQueryOptimizer(preds); + + long totalRuntimeTime = 0; + long totalStaticTime = 0; + + for (int i = 0; i < ntrials; i++) { + + final String RUNTIME = getName() + " : runtime["+i+"] :"; + + final String STATIC = getName() + " : static ["+i+"] :"; + + final String GIVEN = getName() + " : given ["+i+"] :"; + + if (true/* originalOrder */) { + + runQuery(GIVEN, queryEngine, preds); + + } + + if (runStaticQueryOptimizer) { + + totalStaticTime += runQuery(STATIC, queryEngine, + runStaticQueryOptimizer(preds)); + + } + + if (runRuntimeQueryOptimizer) { + + /* + * Run the runtime query optimizer each time (its overhead is + * factored into the running comparison of the two query + * optimizers). + */ +// final IPredicate[] runtimePredOrder = runRuntimeQueryOptimizer(new JGraph( +// preds)); + + // Evaluate the query using the selected join order. + totalRuntimeTime += runQuery(RUNTIME, queryEngine, + runtimePredOrder); + + } + + } + + if(runStaticQueryOptimizer&&runRuntimeQueryOptimizer) { + System.err.println(getName() + " : Total times" + // + ": static=" + totalStaticTime + // + ", runtime=" + totalRuntimeTime + // + ", delta(static-runtime)=" + (totalStaticTime - totalRuntimeTime)); + } + + } + + /** + * Apply the runtime query optimizer. + * <p> + * Note: This temporarily raises the {@link QueryLog} log level during + * sampling to make the log files cleaner (this can not be done for a + * deployed system since the logger level is global and there are concurrent + * query mixes). + * + * @return The predicates in order as recommended by the runtime query + * optimizer. + * + * @throws Exception + */ + private IPredicate[] runRuntimeQueryOptimizer(final IPredicate[] preds) throws Exception { + + final Logger tmp = Logger.getLogger(QueryLog.class); + final Level oldLevel = tmp.getEffectiveLevel(); + tmp.setLevel(Level.WARN); + + try { + + final JGraph g = new JGraph(preds); + + final Path p = g.runtimeOptimizer(queryEngine, limit, nedges); + +// System.err.println(getName() + " : runtime optimizer join order " +// + Arrays.toString(Path.getVertexIds(p.edges))); + + return p.getPredicates(); + + } finally { + + tmp.setLevel(oldLevel); + + } + + } + + /** + * Apply the static query optimizer. + * + * @return The predicates in order as recommended by the static query + * optimizer. + */ + private IPredicate[] runStaticQueryOptimizer(final IPredicate[] preds) { + + final BOpContextBase context = new BOpContextBase(queryEngine); + + final IRule rule = new Rule("tmp", null/* head */, preds, null/* constraints */); + + final DefaultEvaluationPlan2 plan = new DefaultEvaluationPlan2( + new IRangeCountFactory() { + + public long rangeCount(final IPredicate pred) { + return context.getRelation(pred).getAccessPath(pred) + .rangeCount(false); + } + + }, rule); + + // evaluation plan order. + final int[] order = plan.getOrder(); + + final int[] ids = new int[order.length]; + + final IPredicate[] out = new IPredicate[order.length]; + + for (int i = 0; i < order.length; i++) { + + out[i] = preds[order[i]]; + + ids[i] = out[i].getId(); + + } + +// System.err.println(getName() + " : static optimizer join order " +// + Arrays.toString(ids)); + + return out; + + } + + /** + * Run a query joining a set of {@link IPredicate}s in the given join order. + * + * @return The elapsed query time (ms). + */ + private static long runQuery(final String msg, + final QueryEngine queryEngine, final IPredicate[] predOrder) + throws Exception { + + final BOpIdFactory idFactory = new BOpIdFactory(); + + final int[] ids = new int[predOrder.length]; + + for(int i=0; i<ids.length; i++) { + + final IPredicate<?> p = predOrder[i]; + + idFactory.reserve(p.getId()); + + ids[i] = p.getId(); + + } + + final PipelineOp queryOp = JoinGraph.getQuery(idFactory, predOrder); + + // submit query to runtime optimizer. + final RunningQuery q = queryEngine.eval(queryOp); + + // drain the query results. + long nout = 0; + long nchunks = 0; + final IAsynchronousIterator<IBindingSet[]> itr = q.iterator(); + try { + while (itr.hasNext()) { + final IBindingSet[] chunk = itr.next(); + nout += chunk.length; + nchunks++; + } + } finally { + itr.close(); + } + + // check the Future for the query. + q.get(); + + // show the results. + final BOpStats stats = q.getStats().get(queryOp.getId()); + + System.err.println(msg + " : ids=" + Arrays.toString(ids) + + ", elapsed=" + q.getElapsed() + ", nout=" + nout + + ", nchunks=" + nchunks + ", stats=" + stats); + + return q.getElapsed(); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-22 19:22:05 UTC (rev 3973) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-22 21:08:56 UTC (rev 3974) @@ -75,7 +75,12 @@ * FIXME There is now an option to converge onto the hot query * performance. Add an option to drop the file system cache and to * reopen the journal in order to converge on the cold query - * performance for the selected join orderings. + * performance for the selected join orderings. (Or, either devise a + * benchmark which can be used assess the relative performance with + * disk IO or use the LUBM benchmark at a data scale which would force + * queries to touch the disk (this actually requires a very high data + * scale for LUBM since the complex queries are not parameterized and + * tend to fully cache the relevant data on their first presentation.) * * FIXME Looks like U1000 Q2 runs into GC OH problems with both the * static and runtime query optimizers. Track down why. Note that Q2 @@ -167,6 +172,9 @@ /** The initial sampling limit. */ private final int limit = 100; + /** The #of edges considered for the initial paths. */ + private final int nedges = 2; + private QueryEngine queryEngine; private String namespace; @@ -184,10 +192,16 @@ private static final UUID resourceId = UUID.fromString("bb93d970-0cc4-48ca-ba9b-123412683b3d"); /** + * When true, do a warm up run of the plan generated by the static query + * optimizer. + */ + private final boolean warmUp = false; + + /** * The #of times to run each query. Use N GT ONE (1) if you want to converge * onto the hot query performance. */ - private final int ntrials = 5; + private final int ntrials = 1; /** * When <code>true</code> runs the dynamic query optimizer and then evaluates @@ -206,6 +220,8 @@ */ protected void setUp() throws Exception { +// QueryLog.logTableHeader(); + super.setUp(); // System.err.println(UUID.randomUUID().toString()); @@ -228,7 +244,7 @@ /* * Use a specific file generated by some external process. */ - final int nuniv = 50; + final int nuniv = 1000; file = new File("/data/lubm/U" + nuniv + "/bigdata-lubm.WORM.jnl"); namespace = "LUBM_U" + nuniv; } @@ -803,14 +819,14 @@ */ private void doTest(final IPredicate[] preds) throws Exception { - runQuery("Warmup", queryEngine, runStaticQueryOptimizer(preds)); + if (warmUp) + runQuery("Warmup", queryEngine, runStaticQueryOptimizer(preds)); /* * Run the runtime query optimizer once (its cost is not counted * thereafter). */ - final IPredicate[] runtimePredOrder = runRuntimeQueryOptimizer(new JGraph( - preds)); + final IPredicate[] runtimePredOrder = runRuntimeQueryOptimizer(preds); long totalRuntimeTime = 0; long totalStaticTime = 0; @@ -868,7 +884,7 @@ * * @throws Exception */ - private IPredicate[] runRuntimeQueryOptimizer(final JGraph g) throws Exception { + private IPredicate[] runRuntimeQueryOptimizer(final IPredicate[] preds) throws Exception { final Logger tmp = Logger.getLogger(QueryLog.class); final Level oldLevel = tmp.getEffectiveLevel(); @@ -876,7 +892,9 @@ try { - final Path p = g.runtimeOptimizer(queryEngine, limit); + final JGraph g = new JGraph(preds); + + final Path p = g.runtimeOptimizer(queryEngine, limit, nedges); // System.err.println(getName() + " : runtime optimizer join order " // + Arrays.toString(Path.getVertexIds(p.edges))); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-12-07 22:32:03
|
Revision: 3998 http://bigdata.svn.sourceforge.net/bigdata/?rev=3998&view=rev Author: mrpersonick Date: 2010-12-07 22:31:55 +0000 (Tue, 07 Dec 2010) Log Message: ----------- adding test for optional join groups Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2010-12-07 22:31:55 UTC (rev 3998) @@ -0,0 +1,980 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 23, 2010 + */ + +package com.bigdata.bop.engine; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.UUID; +import java.util.concurrent.CancellationException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executor; +import java.util.concurrent.FutureTask; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; + +import junit.framework.TestCase2; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.Var; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.ap.E; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.ap.R; +import com.bigdata.bop.bindingSet.ArrayBindingSet; +import com.bigdata.bop.bindingSet.HashBindingSet; +import com.bigdata.bop.bset.ConditionalRoutingOp; +import com.bigdata.bop.bset.StartOp; +import com.bigdata.bop.constraint.EQ; +import com.bigdata.bop.constraint.EQConstant; +import com.bigdata.bop.constraint.NEConstant; +import com.bigdata.bop.fed.TestFederatedQueryEngine; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.bop.solutions.SliceOp.SliceStats; +import com.bigdata.io.DirectBufferPoolAllocator.IAllocationContext; +import com.bigdata.journal.BufferMode; +import com.bigdata.journal.ITx; +import com.bigdata.journal.Journal; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; +import com.bigdata.striterator.ChunkedArrayIterator; +import com.bigdata.striterator.Dechunkerator; +import com.bigdata.striterator.ICloseableIterator; +import com.bigdata.util.InnerCause; +import com.bigdata.util.concurrent.LatchedExecutor; +import com.ibm.icu.impl.ByteBuffer; + +/** + * Test suite for the {@link QueryEngine} against a local database instance. + * <p> + * Note: The {@link BOp}s are unit tested separately. This test suite is focused + * on interactions when {@link BOp}s are chained together in a query, such as a + * sequence of pipeline joins, a slice applied to a query, etc. + * + * <pre> + * -Dlog4j.configuration=bigdata/src/resources/logging/log4j.properties + * </pre> + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestQueryEngine.java 3950 2010-11-17 02:14:08Z thompsonbry $ + * + * @see TestFederatedQueryEngine + * + * @todo write a unit and stress tests for deadlines. + */ +public class TestQueryEngineOptionalJoins extends TestCase2 { + + /** + * + */ + public TestQueryEngineOptionalJoins() { + } + + /** + * @param name + */ + public TestQueryEngineOptionalJoins(String name) { + super(name); + } + + @Override + public Properties getProperties() { + + final Properties p = new Properties(super.getProperties()); + + p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient + .toString()); + + return p; + + } + + static private final String namespace = "ns"; + Journal jnl; + QueryEngine queryEngine; + + public void setUp() throws Exception { + + jnl = new Journal(getProperties()); + + loadData(jnl); + + queryEngine = new QueryEngine(jnl); + + queryEngine.init(); + + } + + /** + * Create and populate relation in the {@link #namespace}. + */ + private void loadData(final Journal store) { + + // create the relation. + final R rel = new R(store, namespace, ITx.UNISOLATED, new Properties()); + rel.create(); + + // data to insert (in key order for convenience). + final E[] a = {// + new E("Paul", "Mary"),// [0] + new E("Paul", "Brad"),// [1] + + new E("John", "Mary"),// [0] + new E("John", "Brad"),// [1] + + new E("Mary", "Brad"),// [1] + + new E("Brad", "Fred"),// [1] + new E("Brad", "Leon"),// [1] + }; + + // insert data (the records are not pre-sorted). + rel.insert(new ChunkedArrayIterator<E>(a.length, a, null/* keyOrder */)); + + // Do commit since not scale-out. + store.commit(); + + } + + public void tearDown() throws Exception { + + if (queryEngine != null) { + queryEngine.shutdownNow(); + queryEngine = null; + } + + if (jnl != null) { + jnl.destroy(); + jnl = null; + } + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, + * empty {@link IBindingSet}. + * + * @param bindingSet + * the binding set. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet bindingSet) { + + return new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { new IBindingSet[] { bindingSet } }); + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, chunk + * containing all of the specified {@link IBindingSet}s. + * + * @param bindingSets + * the binding sets. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet[] bindingSets) { + + return new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { bindingSets }); + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, chunk + * containing all of the specified {@link IBindingSet}s. + * + * @param bindingSetChunks + * the chunks of binding sets. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet[][] bindingSetChunks) { + + return new ThickAsynchronousIterator<IBindingSet[]>(bindingSetChunks); + + } + + /** + * Unit test for optional join group. Three joins are used and target a + * {@link SliceOp}. The 2nd and 3rd joins are an optional join group. + * Intermediate results which do not succeed on the optional join are + * forwarded to the {@link SliceOp} which is the target specified by the + * {@link PipelineOp.Annotations#ALT_SINK_REF}. + * + * The optional join group takes the form: + * (a b) + * optional { + * (b c) + * (c d) + * } + * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be four solutions + * that succeed the optional join group: + * + * (paul mary brad fred) + * (paul mary brad leon) + * (john mary brad fred) + * (john mary brad leon) + * + * and five more that don't succeed the optional join group: + * + * (paul brad) * + * (john brad) * + * (mary brad) * + * (brad fred) + * (brad leon) + * + * In this cases marked with a *, ?c will become temporarily bound to fred + * and leon (since brad knows fred and leon), but the (c d) tail will fail + * since fred and leon don't know anyone else. At this point, the ?c binding + * must be removed from the solution. + */ + public void test_query_join2_optionals() throws Exception { + + final int startId = 1; + final int joinId1 = 2; + final int predId1 = 3; + final int joinId2 = 4; + final int predId2 = 5; + final int joinId3 = 6; + final int predId3 = 7; + final int sliceId = 8; + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { a, b }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { b, c }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred3Op = new Predicate<E>( + new IVariableOrConstant[] { c, d }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId3),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); + + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { join1Op },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); + + final PipelineOp join3Op = new PipelineJoin<E>(// + new BOp[] { join2Op },// + new NV(Predicate.Annotations.BOP_ID, joinId3),// + new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{join3Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final RunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // four solutions where the optional join succeeds. + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ), + // plus anything we read from the first access path which did not + // pass the optional join + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ) + }; + + assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(5, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + } + + /** + * Unit test for optional join group with a filter. Three joins are used + * and target a {@link SliceOp}. The 2nd and 3rd joins are an optional join + * group. Intermediate results which do not succeed on the optional join are + * forwarded to the {@link SliceOp} which is the target specified by the + * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group + * contains a filter. + * + * The optional join group takes the form: + * (a b) + * optional { + * (b c) + * (c d) + * filter(d != Leon) + * } + * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be two solutions + * that succeed the optional join group: + * + * (paul mary brad fred) + * (john mary brad fred) + * + * and five more that don't succeed the optional join group: + * + * (paul brad) * + * (john brad) * + * (mary brad) * + * (brad fred) + * (brad leon) + * + * In this cases marked with a *, ?c will become temporarily bound to fred + * and leon (since brad knows fred and leon), but the (c d) tail will fail + * since fred and leon don't know anyone else. At this point, the ?c binding + * must be removed from the solution. + * + * The filter (d != Leon) will prune the two solutions: + * + * (paul mary brad leon) + * (john mary brad leon) + * + * since ?d is bound to Leon in those cases. + */ + public void test_query_optionals_filter() throws Exception { + + final int startId = 1; + final int joinId1 = 2; + final int predId1 = 3; + final int joinId2 = 4; + final int predId2 = 5; + final int joinId3 = 6; + final int predId3 = 7; + final int sliceId = 8; + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { a, b }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { b, c }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred3Op = new Predicate<E>( + new IVariableOrConstant[] { c, d }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId3),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); + + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { join1Op },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); + + final PipelineOp join3Op = new PipelineJoin<E>(// + new BOp[] { join2Op },// + new NV(Predicate.Annotations.BOP_ID, joinId3),// + new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// + // constraint d != Leon + new NV(PipelineJoin.Annotations.CONSTRAINTS, + new IConstraint[] { new NEConstant(d, new Constant<String>("Leon")) }), + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{join3Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final RunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // two solutions where the optional join succeeds. + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + // plus anything we read from the first access path which did not + // pass the optional join + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ) + }; + + assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(5, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + } + + /** + * Unit test for optional join group with a filter on a variable outside + * the optional join group. Three joins are used and target a + * {@link SliceOp}. The 2nd and 3rd joins are an optional join + * group. Intermediate results which do not succeed on the optional join are + * forwarded to the {@link SliceOp} which is the target specified by the + * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group + * contains a filter that uses a variable outside the optional join group. + * + * The query takes the form: + * (a b) + * optional { + * (b c) + * (c d) + * filter(a != Paul) + * } + * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be two solutions + * that succeed the optional join group: + * + * (john mary brad fred) + * (john mary brad leon) + * + * and six more that don't succeed the optional join group: + * + * (paul mary) * + * (paul brad) * + * (john brad) + * (mary brad) + * (brad fred) + * (brad leon) + * + * In this cases marked with a *, ?a is bound to Paul even though there is + * a filter that specifically prohibits a = Paul. This is because the filter + * is inside the optional join group, which means that solutions can still + * include a = Paul, but the optional join group should not run in that + * case. + */ + public void test_query_optionals_filter2() throws Exception { + + final int startId = 1; + final int joinId1 = 2; + final int predId1 = 3; + final int joinId2 = 4; + final int predId2 = 5; + final int joinId3 = 6; + final int predId3 = 7; + final int sliceId = 8; + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + + /* + * Not quite sure how to write this one. I think it probably goes + * something like this: + * + * 1. startOp + * 2. join1Op(a b) + * 3. conditionalRoutingOp( if a = Paul then goto sliceOp ) + * 4. join2Op(b c) + * 5. join3Op(c d) + * 6. sliceOp + */ + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{startOp}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final RunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // two solutions where the optional join succeeds. + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("John") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + // plus anything we read from the first access path which did not + // pass the optional join + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ) + }; + + assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(6, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + } + + /** + * Verify the expected solutions. + * + * @param expected + * @param itr + */ + static public void assertSameSolutions(final IBindingSet[] expected, + final IAsynchronousIterator<IBindingSet[]> itr) { + try { + int n = 0; + while (itr.hasNext()) { + final IBindingSet[] e = itr.next(); + if (log.isInfoEnabled()) + log.info(n + " : chunkSize=" + e.length); + for (int i = 0; i < e.length; i++) { + if (log.isInfoEnabled()) + log.info(n + " : " + e[i]); + if (n >= expected.length) { + fail("Willing to deliver too many solutions: n=" + n + + " : " + e[i]); + } + if (!expected[n].equals(e[i])) { + fail("n=" + n + ", expected=" + expected[n] + + ", actual=" + e[i]); + } + n++; + } + } + assertEquals("Wrong number of solutions", expected.length, n); + } finally { + itr.close(); + } + } + + /** + * Verifies that the iterator visits the specified objects in some arbitrary + * ordering and that the iterator is exhausted once all expected objects + * have been visited. The implementation uses a selection without + * replacement "pattern". + * <p> + * Note: If the objects being visited do not correctly implement hashCode() + * and equals() then this can fail even if the desired objects would be + * visited. When this happens, fix the implementation classes. + */ + static public <T> void assertSameSolutionsAnyOrder(final T[] expected, + final Iterator<T> actual) { + + assertSameSolutionsAnyOrder("", expected, actual); + + } + + /** + * Verifies that the iterator visits the specified objects in some arbitrary + * ordering and that the iterator is exhausted once all expected objects + * have been visited. The implementation uses a selection without + * replacement "pattern". + * <p> + * Note: If the objects being visited do not correctly implement hashCode() + * and equals() then this can fail even if the desired objects would be + * visited. When this happens, fix the implementation classes. + */ + static public <T> void assertSameSolutionsAnyOrder(final String msg, + final T[] expected, final Iterator<T> actual) { + + try { + + /* + * Populate a map that we will use to realize the match and + * selection without replacement logic. The map uses counters to + * handle duplicate keys. This makes it possible to write tests in + * which two or more binding sets which are "equal" appear. + */ + + final int nrange = expected.length; + + final java.util.Map<T, AtomicInteger> range = new java.util.LinkedHashMap<T, AtomicInteger>(); + + for (int j = 0; j < nrange; j++) { + + AtomicInteger count = range.get(expected[j]); + + if (count == null) { + + count = new AtomicInteger(); + + } + + range.put(expected[j], count); + + count.incrementAndGet(); + + } + + // Do selection without replacement for the objects visited by + // iterator. + + for (int j = 0; j < nrange; j++) { + + if (!actual.hasNext()) { + + fail(msg + + ": Iterator exhausted while expecting more object(s)" + + ": index=" + j); + + } + + final T actualObject = actual.next(); + + if (log.isInfoEnabled()) + log.info("visting: " + actualObject); + + AtomicInteger counter = range.get(actualObject); + + if (counter == null || counter.get() == 0) { + + fail("Object not expected" + ": index=" + j + ", object=" + + actualObject); + + } + + counter.decrementAndGet(); + + } + + if (actual.hasNext()) { + + fail("Iterator will deliver too many objects."); + + } + + } finally { + + if (actual instanceof ICloseableIterator<?>) { + + ((ICloseableIterator<T>) actual).close(); + + } + + } + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java 2010-12-06 22:09:42 UTC (rev 3997) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java 2010-12-07 22:31:55 UTC (rev 3998) @@ -26,11 +26,11 @@ package com.bigdata.rdf.sail; -import java.util.Arrays; import java.util.Collection; import java.util.LinkedList; import java.util.List; import java.util.Properties; + import org.apache.log4j.Logger; import org.openrdf.model.Literal; import org.openrdf.model.URI; @@ -49,6 +49,13 @@ import org.openrdf.query.algebra.TupleExpr; import org.openrdf.query.algebra.ValueExpr; import org.openrdf.query.algebra.Var; +import org.openrdf.repository.Repository; +import org.openrdf.repository.RepositoryConnection; +import org.openrdf.repository.sail.SailRepository; +import org.openrdf.repository.sail.SailTupleQuery; +import org.openrdf.sail.Sail; +import org.openrdf.sail.memory.MemoryStore; + import com.bigdata.rdf.axioms.NoAxioms; import com.bigdata.rdf.store.BD; import com.bigdata.rdf.vocab.NoVocabulary; @@ -92,9 +99,160 @@ public TestNestedOptionals(String arg0) { super(arg0); } + + public void testNestedOptionals() throws Exception { + + final Sail sail = new MemoryStore(); + sail.initialize(); + final Repository repo = new SailRepository(sail); + final RepositoryConnection cxn = repo.getConnection(); + cxn.setAutoCommit(false); + + try { + + final ValueFactory vf = sail.getValueFactory(); - public void testNestedOptionals1() throws Exception { + /* + * Create some terms. + */ + final URI john = vf.createURI(BD.NAMESPACE + "john"); + final URI mary = vf.createURI(BD.NAMESPACE + "mary"); + final URI leon = vf.createURI(BD.NAMESPACE + "leon"); + final URI paul = vf.createURI(BD.NAMESPACE + "paul"); + final URI brad = vf.createURI(BD.NAMESPACE + "brad"); + final URI fred = vf.createURI(BD.NAMESPACE + "fred"); + final URI knows = vf.createURI(BD.NAMESPACE + "knows"); + /* + * Create some statements. + */ + cxn.add(paul, knows, mary); + cxn.add(paul, knows, brad); + + cxn.add(john, knows, mary); + cxn.add(john, knows, brad); + + cxn.add(mary, knows, brad); + cxn.add(brad, knows, fred); + cxn.add(brad, knows, leon); + + /* + * Note: The either flush() or commit() is required to flush the + * statement buffers to the database before executing any operations + * that go around the sail. + */ + cxn.commit(); + + { + + String query = + "prefix bd: <"+BD.NAMESPACE+"> " + + "select * " + + "where { " + + " ?a bd:knows ?b . " + + " OPTIONAL { " + + " ?b bd:knows ?c . " + + " ?c bd:knows ?d . " + + " } " + + "}"; + + final SailTupleQuery tupleQuery = (SailTupleQuery) + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(false /* includeInferred */); + + if (INFO) { + log.info(query); + final TupleQueryResult result = tupleQuery.evaluate(); + while (result.hasNext()) { + log.info(result.next()); + } + } + + final Collection<BindingSet> answer = new LinkedList<BindingSet>(); + answer.add(createBindingSet()); + + // result = tupleQuery.evaluate(); + // compare(result, answer); + + } + + { + + String query = + "prefix bd: <"+BD.NAMESPACE+"> " + + "select * " + + "where { " + + " ?a bd:knows ?b . " + + " OPTIONAL { " + + " ?b bd:knows ?c . " + + " ?c bd:knows ?d . " + + " filter(?a != bd:paul) " + + " } " + + "}"; + + final SailTupleQuery tupleQuery = (SailTupleQuery) + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(false /* includeInferred */); + + if (INFO) { + log.info(query); + final TupleQueryResult result = tupleQuery.evaluate(); + while (result.hasNext()) { + log.info(result.next()); + } + } + + final Collection<BindingSet> answer = new LinkedList<BindingSet>(); + answer.add(createBindingSet()); + + // result = tupleQuery.evaluate(); + // compare(result, answer); + + } + + { + + String query = + "prefix bd: <"+BD.NAMESPACE+"> " + + "select * " + + "where { " + + " ?a bd:knows ?b . " + + " OPTIONAL { " + + " ?b bd:knows ?c . " + + " ?c bd:knows ?d . " + + " filter(?d != bd:leon) " + + " } " + + "}"; + + final SailTupleQuery tupleQuery = (SailTupleQuery) + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(false /* includeInferred */); + + if (INFO) { + log.info(query); + final TupleQueryResult result = tupleQuery.evaluate(); + while (result.hasNext()) { + log.info(result.next()); + } + } + + final Collection<BindingSet> answer = new LinkedList<BindingSet>(); + answer.add(createBindingSet()); + + // result = tupleQuery.evaluate(); + // compare(result, answer); + + } + + } finally { + cxn.close(); + sail.shutDown(); + } + + } + + private void __testNestedOptionals1() throws Exception { + final BigdataSail sail = getSail(); sail.initialize(); final BigdataSailRepository repo = new BigdataSailRepository(sail); @@ -197,7 +355,7 @@ } - public void testNestedOptionals2() throws Exception { + private void __testNestedOptionals2() throws Exception { final BigdataSail sail = getSail(); sail.initialize(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-02 00:41:41
|
Revision: 4044 http://bigdata.svn.sourceforge.net/bigdata/?rev=4044&view=rev Author: thompsonbry Date: 2011-01-02 00:41:31 +0000 (Sun, 02 Jan 2011) Log Message: ----------- Added a new IRunningQuery implementation based on chaining together operators using a blocking queue in front of each operator. The new implementation is conditionally enabled by an annotation. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkHandler.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryResultIterator.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChunkHandler.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/CancelQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederationChunkHandler.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiplexBlockingBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/config/LogUtil.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestUnion.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/OutputStatsBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionMetadata.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-12-22 17:32:36 UTC (rev 4043) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -413,40 +413,4 @@ // // } -// /** -// * Copy data from the source to the sink. The sink will be flushed and -// * closed. The source will be closed. -// */ -// public void copySourceToSink() { -// -// // source. -// final IAsynchronousIterator<IBindingSet[]> source = (IAsynchronousIterator) getSource(); -// -// // default sink -// final IBlockingBuffer<IBindingSet[]> sink = (IBlockingBuffer) getSink(); -// -// final BOpStats stats = getStats(); -// -// try { -// -// // copy binding sets from the source. -// BOpUtility.copy(source, sink, null/* sink2 */, -// null/* constraints */, stats); -// -// // flush the sink. -// sink.flush(); -// -// } finally { -// -// sink.close(); -// -// if (sink2 != null) -// sink2.close(); -// -// source.close(); -// -// } -// -// } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java 2010-12-22 17:32:36 UTC (rev 4043) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -233,13 +233,8 @@ * @todo replaces * {@link IJoinNexus#getTailAccessPath(IRelation, IPredicate)}. * - * @todo Reconcile with IRelation#getAccessPath(IPredicate) once the bop - * conversion is done. It has much of the same logic (this also - * handles remote access paths now). - * * @todo Support mutable relation views (no - just fix truth maintenance). */ -// @SuppressWarnings("unchecked") public <E> IAccessPath<E> getAccessPath(final IRelation<E> relation, final IPredicate<E> predicate) { @@ -252,146 +247,6 @@ return relation.getAccessPath(indexManager/* localIndexManager */, relation.getKeyOrder(predicate), predicate); -// /* -// * Note: ALWAYS use the "perfect" index. -// */ -// final IKeyOrder<E> keyOrder = relation.getKeyOrder(predicate); -//// { -//// final IKeyOrder<E> tmp = predicate.getKeyOrder(); -//// if (tmp != null) { -//// // use the specified index. -//// keyOrder = tmp; -//// } else { -//// // ask the relation for the best index. -//// keyOrder = relation.getKeyOrder(predicate); -//// } -//// } -//// -//// if (keyOrder == null) -//// throw new RuntimeException("No access path: " + predicate); -// -// final int partitionId = predicate.getPartitionId(); -// -// final long timestamp = (Long) predicate -// .getRequiredProperty(BOp.Annotations.TIMESTAMP); -// -// final int flags = predicate.getProperty( -// IPredicate.Annotations.FLAGS, -// IPredicate.Annotations.DEFAULT_FLAGS) -// | (TimestampUtility.isReadOnly(timestamp) ? IRangeQuery.READONLY -// : 0); -// -// final int chunkOfChunksCapacity = predicate.getProperty( -// BufferAnnotations.CHUNK_OF_CHUNKS_CAPACITY, -// BufferAnnotations.DEFAULT_CHUNK_OF_CHUNKS_CAPACITY); -// -// final int chunkCapacity = predicate.getProperty( -// BufferAnnotations.CHUNK_CAPACITY, -// BufferAnnotations.DEFAULT_CHUNK_CAPACITY); -// -// final int fullyBufferedReadThreshold = predicate.getProperty( -// IPredicate.Annotations.FULLY_BUFFERED_READ_THRESHOLD, -// IPredicate.Annotations.DEFAULT_FULLY_BUFFERED_READ_THRESHOLD); -// -// if (partitionId != -1) { -// -// /* -// * Note: This handles a read against a local index partition. For -// * scale-out, the [indexManager] will be the data service's local -// * index manager. -// * -// * Note: Expanders ARE NOT applied in this code path. Expanders -// * require a total view of the relation, which is not available -// * during scale-out pipeline joins. Likewise, the [backchain] -// * property will be ignored since it is handled by an expander. -// * -// * @todo Replace this with IRelation#getAccessPathForIndexPartition() -// */ -//// return ((AbstractRelation<?>) relation) -//// .getAccessPathForIndexPartition(indexManager, -//// (IPredicate) predicate); -// -// /* -// * @todo This is an error since expanders are currently ignored on -// * shard-wise access paths. While it is possible to enable expanders -// * for shard-wise access paths. -// */ -// if (predicate.getSolutionExpander() != null) -// throw new IllegalArgumentException(); -// -// final String namespace = relation.getNamespace();//predicate.getOnlyRelationName(); -// -// // The name of the desired index partition. -// final String name = DataService.getIndexPartitionName(namespace -// + "." + keyOrder.getIndexName(), partitionId); -// -// // MUST be a local index view. -// final ILocalBTreeView ndx = (ILocalBTreeView) indexManager -// .getIndex(name, timestamp); -// -// return new AccessPath<E>(relation, indexManager, timestamp, -// predicate, keyOrder, ndx, flags, chunkOfChunksCapacity, -// chunkCapacity, fullyBufferedReadThreshold).init(); -// -// } -// -//// accessPath = relation.getAccessPath((IPredicate) predicate); -// -// // Decide on a local or remote view of the index. -// final IIndexManager indexManager; -// if (predicate.isRemoteAccessPath()) { -// // use federation in scale-out for a remote access path. -// indexManager = fed != null ? fed : this.indexManager; -// } else { -// indexManager = this.indexManager; -// } -// -// // Obtain the index. -// final String fqn = AbstractRelation.getFQN(relation, keyOrder); -// final IIndex ndx = AbstractRelation.getIndex(indexManager, fqn, timestamp); -// -// if (ndx == null) { -// -// throw new IllegalArgumentException("no index? relation=" -// + relation.getNamespace() + ", timestamp=" + timestamp -// + ", keyOrder=" + keyOrder + ", pred=" + predicate -// + ", indexManager=" + getIndexManager()); -// -// } -// -// // Obtain the access path for that relation and index. -// final IAccessPath<E> accessPath = ((AbstractRelation<E>) relation) -// .newAccessPath(relation, indexManager, timestamp, predicate, -// keyOrder, ndx, flags, chunkOfChunksCapacity, -// chunkCapacity, fullyBufferedReadThreshold); -// -// // optionally wrap with an expander pattern. -// return expander(predicate, accessPath); - } -// /** -// * Optionally wrap with an expander pattern. -// * -// * @param predicate -// * @param accessPath -// * @return -// * @param <E> -// */ -// private <E> IAccessPath<E> expander(final IPredicate<E> predicate, -// final IAccessPath<E> accessPath) { -// -// final ISolutionExpander<E> expander = predicate.getSolutionExpander(); -// -// if (expander != null) { -// -// // allow the predicate to wrap the access path -// return expander.getAccessPath(accessPath); -// -// } -// -// return accessPath; -// -// } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2010-12-22 17:32:36 UTC (rev 4043) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -42,8 +42,8 @@ import com.bigdata.bop.IBindingSet; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.bset.Tee; +import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.bop.engine.RunningQuery; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.util.concurrent.LatchedExecutor; @@ -167,7 +167,7 @@ private final AbstractSubqueryOp controllerOp; private final BOpContext<IBindingSet> context; - private final List<FutureTask<RunningQuery>> tasks = new LinkedList<FutureTask<RunningQuery>>(); + private final List<FutureTask<IRunningQuery>> tasks = new LinkedList<FutureTask<IRunningQuery>>(); private final CountDownLatch latch; private final int nparallel; private final Executor executor; @@ -204,7 +204,7 @@ * Task runs subquery and cancels all subqueries in [tasks] if * it fails. */ - tasks.add(new FutureTask<RunningQuery>(new SubqueryTask(op, + tasks.add(new FutureTask<IRunningQuery>(new SubqueryTask(op, context)) { /* * Hook future to count down the latch when the task is @@ -233,7 +233,7 @@ /* * Run subqueries with limited parallelism. */ - for (FutureTask<RunningQuery> ft : tasks) { + for (FutureTask<IRunningQuery> ft : tasks) { executor.execute(ft); } @@ -251,7 +251,7 @@ /* * Get the futures, throwing out any errors. */ - for (FutureTask<RunningQuery> ft : tasks) + for (FutureTask<IRunningQuery> ft : tasks) ft.get(); // Now that we know the subqueries ran Ok, flush the sink. @@ -263,7 +263,7 @@ } finally { // Cancel any tasks which are still running. - for (FutureTask<RunningQuery> ft : tasks) + for (FutureTask<IRunningQuery> ft : tasks) ft.cancel(true/* mayInterruptIfRunning */); context.getSink().close(); @@ -281,7 +281,7 @@ * @author <a href="mailto:tho...@us...">Bryan * Thompson</a> */ - private class SubqueryTask implements Callable<RunningQuery> { + private class SubqueryTask implements Callable<IRunningQuery> { /** * The evaluation context for the parent query. @@ -302,7 +302,7 @@ } - public RunningQuery call() throws Exception { + public IRunningQuery call() throws Exception { IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; try { @@ -310,7 +310,7 @@ final QueryEngine queryEngine = parentContext.getRunningQuery() .getQueryEngine(); - final RunningQuery runningQuery = queryEngine + final IRunningQuery runningQuery = queryEngine .eval(subQueryOp); // Iterator visiting the subquery solutions. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-12-22 17:32:36 UTC (rev 4043) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -61,9 +61,9 @@ import com.bigdata.bop.PipelineOp; import com.bigdata.bop.ap.SampleIndex; import com.bigdata.bop.bindingSet.HashBindingSet; +import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.LocalChunkMessage; import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.bop.engine.RunningQuery; import com.bigdata.bop.join.PipelineJoin; import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; import com.bigdata.bop.rdf.join.DataSetJoin; @@ -1061,7 +1061,7 @@ // run the cutoff sampling of the edge. final UUID queryId = UUID.randomUUID(); - final RunningQuery runningQuery = queryEngine.eval(queryId, + final IRunningQuery runningQuery = queryEngine.eval(queryId, queryOp, new LocalChunkMessage<IBindingSet>(queryEngine, queryId, joinOp.getId()/* startId */, -1 /* partitionId */, @@ -2834,7 +2834,7 @@ final QueryEngine queryEngine = parentContext.getRunningQuery() .getQueryEngine(); - final RunningQuery runningQuery = queryEngine + final IRunningQuery runningQuery = queryEngine .eval( queryId, queryOp, Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -0,0 +1,1000 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * Created on Dec 30, 2010 + */ + +package com.bigdata.bop.engine; + +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.journal.IIndexManager; +import com.bigdata.journal.ITx; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBlockingBuffer; +import com.bigdata.service.IBigdataFederation; +import com.bigdata.util.InnerCause; +import com.bigdata.util.concurrent.Haltable; + +/** + * Abstract base class for various {@link IRunningQuery} implementations. The + * purpose of this class is to isolate aspects common to different designs for + * managing resources for a running query and make it easier to realize + * different strategies for managing the resources allocated to a running query. + * <p> + * There are common requirements for the {@link IRunningQuery}, but a variety of + * ways in which those requirements can be met. Among the common requirements + * are a means to manage tradeoffs in the allocation of various resources to the + * operators in each query. Some of the more important tradeoffs are the #of + * threads to allocate to each operator (threads bounds IO for Java 6 since we + * are using a synchronous IO model) and the amount of RAM allocated to each + * operator (including RAM on the JVM heap and RAM on the native Java process + * heap). If the #of threads is too restrictive, then queries will progress + * slowly due to insufficient IO level parallelism. If the query buffers too + * much data on the JVM heap, then it can cause GC overhead problems that can + * drastically reduce the responsiveness and throughput of the JVM. Data can be + * moved off of the JVM heap onto the Java process heap by serializing it into + * <em>direct</em> {@link ByteBuffer}s. This can be very efficient in + * combination with hash joins at the expense of increasing the latency to the + * first result when compared with pipelined evaluation. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +abstract public class AbstractRunningQuery implements IRunningQuery { + + /** + * Error message used when an operation which must be performed on the query + * controller is attempted on some other {@link IQueryPeer}. + */ + protected static final String ERR_NOT_CONTROLLER = "Operator only permitted on the query controller"; + + /** + * Error message used when a request is made after the query has stopped + * executing. + */ + protected static final String ERR_QUERY_DONE = "Query is no longer running"; + + /** + * Error message used when a request is addressed to an operator other than + * the head of the pipeline in a context where the request must be addressed + * to the operator at the head of the pipeline (e.g., when presenting the + * initial binding sets to get the query moving.) + */ + protected static final String ERR_NOT_PIPELINE_START = "Not pipeline start"; + + /** + * Error message used when no operator can be found for a given + * {@link BOp.Annotations#BOP_ID}. + */ + protected static final String ERR_NO_SUCH_BOP = "No such bop: id="; + + /** + * Error message used when two operators have the same + * {@link BOp.Annotations#BOP_ID}. + */ + protected static final String ERR_DUPLICATE_IDENTIFIER = "Duplicate identifier: id="; + + private final static transient Logger log = Logger + .getLogger(AbstractRunningQuery.class); + + /** + * The class executing the query on this node. + */ + final private QueryEngine queryEngine; + + /** The unique identifier for this query. */ + final private UUID queryId; + + /** + * The query deadline. The value is the system clock time in milliseconds + * when the query is due and {@link Long#MAX_VALUE} if there is no deadline. + * In order to have a guarantee of a consistent clock, the deadline is + * interpreted by the query controller. + */ + final private AtomicLong deadline = new AtomicLong(Long.MAX_VALUE); + + /** + * The timestamp (ms) when the query begins to execute. + */ + final private AtomicLong startTime = new AtomicLong(System + .currentTimeMillis()); + + /** + * The timestamp (ms) when the query is done executing and ZERO (0L) if the + * query is not done. + */ + final private AtomicLong doneTime = new AtomicLong(0L); + + /** + * <code>true</code> iff the outer {@link QueryEngine} is the controller for + * this query. + */ + final private boolean controller; + + /** + * The client executing this query (aka the query controller). + * <p> + * Note: The proxy is primarily for light weight RMI messages used to + * coordinate the distributed query evaluation. Ideally, all large objects + * will be transfered among the nodes of the cluster using NIO buffers. + */ + final private IQueryClient clientProxy; + + /** The query. */ + final private PipelineOp query; + + /** + * An index from the {@link BOp.Annotations#BOP_ID} to the {@link BOp}. This + * index is generated by the constructor. It is immutable and thread-safe. + */ + private final Map<Integer, BOp> bopIndex; + + /** + * The run state of the query and the result of the computation iff it + * completes execution normally (without being interrupted, cancelled, etc). + */ + final private Haltable<Void> future = new Haltable<Void>(); + + /** + * The {@link Future} of this query. + * <p> + * Note: This is exposed to the {@link QueryEngine} to let it cache the + * {@link Future} for recently finished queries. + */ + final protected Future<Void> getFuture() { + + return future; + + } + + /** + * The runtime statistics for each {@link BOp} in the query and + * <code>null</code> unless this is the query controller. + */ + final private ConcurrentHashMap<Integer/* bopId */, BOpStats> statsMap; + + /** + * The buffer used for the overall output of the query pipeline. + * <p> + * Note: This only exists on the query controller, and then only when the + * top-level operator is not a mutation. In order to ensure that the results + * are transferred to the query controller in scale-out, the top-level + * operator in the query plan must specify + * {@link BOpEvaluationContext#CONTROLLER}. For example, {@link SliceOp} + * uses this {@link BOpEvaluationContext}. + */ + final private IBlockingBuffer<IBindingSet[]> queryBuffer; + + /** + * The iterator draining the {@link #queryBuffer} and <code>null</code> iff + * the {@link #queryBuffer} is <code>null</code>. + */ + final private IAsynchronousIterator<IBindingSet[]> queryIterator; + + /** + * A lock guarding various state changes. This guards changes to the + * internal state of the {@link #runState} object. It is also used to + * serialize requests to {@link #acceptChunk(IChunkMessage)} and + * {@link #cancel(boolean)} and make atomic decision concerning whether to + * attach a new {@link IChunkMessage} to an operator task which is already + * running or to start a new task for that message. + * + * @see RunState + */ + protected final ReentrantLock lock = new ReentrantLock(); + + /** + * The run state of this query and <code>null</code> unless this is the + * query controller. + */ + final private RunState runState; + + /** + * Flag used to prevent retriggering of {@link #lifeCycleTearDownQuery()}. + */ + private final AtomicBoolean didQueryTearDown = new AtomicBoolean(false); + + /** + * Set the query deadline. The query will be cancelled when the deadline is + * passed. If the deadline is passed, the query is immediately cancelled. + * + * @param deadline + * The deadline. + * @throws IllegalArgumentException + * if the deadline is non-positive. + * @throws IllegalStateException + * if the deadline was already set. + * @throws UnsupportedOperationException + * unless node is the query controller. + */ + final public void setDeadline(final long deadline) { + + if (!controller) + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); + + if (deadline <= 0) + throw new IllegalArgumentException(); + + // set the deadline. + if (!this.deadline + .compareAndSet(Long.MAX_VALUE/* expect */, deadline/* update */)) { + + // the deadline is already set. + throw new IllegalStateException(); + + } + + if (deadline < System.currentTimeMillis()) { + + // deadline has already expired. + halt(new TimeoutException()); + + } + + } + + final public long getDeadline() { + + return deadline.get(); + + } + + final public long getStartTime() { + + return startTime.get(); + + } + + final public long getDoneTime() { + + return doneTime.get(); + + } + + final public long getElapsed() { + + long mark = doneTime.get(); + + if (mark == 0L) + mark = System.currentTimeMillis(); + + return mark - startTime.get(); + + } + + /** + * Return the buffer used for the overall output of the query pipeline and + * <code>null</code> if this is not the query controller. + */ + final protected IBlockingBuffer<IBindingSet[]> getQueryBuffer() { + + return queryBuffer; + + } + + public QueryEngine getQueryEngine() { + + return queryEngine; + + } + + /** + * The client executing this query (aka the query controller). + * <p> + * Note: The proxy is primarily for light weight RMI messages used to + * coordinate the distributed query evaluation. Ideally, all large objects + * will be transfered among the nodes of the cluster using NIO buffers. + */ + final public IQueryClient getQueryController() { + + return clientProxy; + + } + + /** + * The unique identifier for this query. + */ + final public UUID getQueryId() { + + return queryId; + + } + + /** + * Return the operator tree for this query. + */ + final public PipelineOp getQuery() { + + return query; + + } + + /** + * Return <code>true</code> iff this is the query controller. + */ + final public boolean isController() { + + return controller; + + } + + final public Map<Integer/* bopId */, BOpStats> getStats() { + + return Collections.unmodifiableMap(statsMap); + + } + + /** + * Return the {@link BOpStats} instance associated with the given + * {@link BOp} identifier. + * + * @param bopId + * The {@link BOp} identifier. + * + * @return The associated {@link BOpStats} object -or- <code>null</code> if + * there is no entry for that {@link BOp} identifier. + * + * @throws IllegalArgumentException + * if the argument is <code>null</code>. + */ + final public BOpStats getStats(final Integer bopId) { + + if (bopId == null) + throw new IllegalArgumentException(); + + return statsMap.get(bopId); + + } + + final public Map<Integer, BOp> getBOpIndex() { + + return bopIndex; + + } + + /** + * @param queryEngine + * The {@link QueryEngine} on which the query is running. In + * scale-out, a query is typically instantiated on many + * {@link QueryEngine}s. + * @param queryId + * The identifier for that query. + * @param controller + * <code>true</code> iff the {@link QueryEngine} is the query + * controller for this query (the {@link QueryEngine} which will + * coordinate the query evaluation). + * @param clientProxy + * The query controller. In standalone, this is the same as the + * <i>queryEngine</i>. In scale-out, this is an RMI proxy for the + * query controller whenever the query is instantiated on a node + * other than the query controller itself. + * @param query + * The query. + * + * @throws IllegalArgumentException + * if any argument is <code>null</code>. + * @throws IllegalArgumentException + * if the <i>readTimestamp</i> is {@link ITx#UNISOLATED} + * (queries may not read on the unisolated indices). + * @throws IllegalArgumentException + * if the <i>writeTimestamp</i> is neither + * {@link ITx#UNISOLATED} nor a read-write transaction + * identifier. + */ + public AbstractRunningQuery(final QueryEngine queryEngine, + final UUID queryId, final boolean controller, + final IQueryClient clientProxy, final PipelineOp query) { + + if (queryEngine == null) + throw new IllegalArgumentException(); + + if (queryId == null) + throw new IllegalArgumentException(); + + if (clientProxy == null) + throw new IllegalArgumentException(); + + if (query == null) + throw new IllegalArgumentException(); + + this.queryEngine = queryEngine; + + this.queryId = queryId; + + this.controller = controller; + + this.clientProxy = clientProxy; + + this.query = query; + + this.bopIndex = BOpUtility.getIndex(query); + + /* + * Setup the BOpStats object for each pipeline operator in the query. + */ + if (controller) { + + runState = new RunState(this); + + statsMap = new ConcurrentHashMap<Integer, BOpStats>(); + + populateStatsMap(query); + + /* + * FIXME Review the concept of mutation queries. It used to be that + * queries could only either read or write. Now we have access paths + * which either read or write and each query could use zero or more + * such access paths. + */ + if (true/* !query.isMutation() */) { + + // read-only query. + + final BOpStats queryStats = statsMap.get(query.getId()); + + queryBuffer = new BlockingBufferWithStats<IBindingSet[]>(query, + queryStats); + + queryIterator = new QueryResultIterator<IBindingSet[]>(this, + queryBuffer.iterator()); + + // } else { + // + // // Note: Not used for mutation queries. + // queryBuffer = null; + // queryIterator = null; + + } + + } else { + + runState = null; // Note: only on the query controller. + statsMap = null; // Note: only on the query controller. + queryBuffer = null; // Note: only on the query controller. + queryIterator = null; // Note: only when queryBuffer is defined. + + } + + } + + /** + * Pre-populate a map with {@link BOpStats} objects for the query. Only the + * child operands are visited. Operators in subqueries are not visited since + * they will be assigned {@link BOpStats} objects when they are run as a + * subquery. + * + * @see BOp.Annotations#CONTROLLER + */ + private void populateStatsMap(final BOp op) { + + if (!(op instanceof PipelineOp)) + return; + + final PipelineOp bop = (PipelineOp) op; + + final int bopId = bop.getId(); + + statsMap.put(bopId, bop.newStats()); + + if (!op.getProperty(BOp.Annotations.CONTROLLER, + BOp.Annotations.DEFAULT_CONTROLLER)) { + /* + * Visit children, but not if this is a CONTROLLER operator since + * its children belong to a subquery. + */ + for (BOp t : op.args()) { + // visit children (recursion) + populateStatsMap(t); + } + } + + } + + /** + * Message provides notice that the query has started execution and will + * consume some specific number of binding set chunks. + * + * @param msg + * The initial message presented to the query. The message is + * used to update the query {@link RunState}. However, the + * message will not be consumed until it is presented to + * {@link #acceptChunk(IChunkMessage)} by the {@link QueryEngine} + * . + * + * @throws UnsupportedOperationException + * If this node is not the query coordinator. + */ + final protected void startQuery(final IChunkMessage<IBindingSet> msg) { + + if (!controller) + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); + + if (msg == null) + throw new IllegalArgumentException(); + + if (!queryId.equals(msg.getQueryId())) + throw new IllegalArgumentException(); + + lock.lock(); + + try { + + runState.startQuery(msg); + + lifeCycleSetUpQuery(); + + } catch (TimeoutException ex) { + + halt(ex); + + } finally { + + lock.unlock(); + + } + + } + + /** + * Message provides notice that the operator has started execution and will + * consume some specific number of binding set chunks. + * + * @param msg + * The {@link StartOpMessage}. + * + * @throws UnsupportedOperationException + * If this node is not the query coordinator. + */ + final protected void startOp(final StartOpMessage msg) { + + if (!controller) + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); + + if (msg == null) + throw new IllegalArgumentException(); + + if (!queryId.equals(msg.queryId)) + throw new IllegalArgumentException(); + + lock.lock(); + + try { + + if (runState.startOp(msg)) + lifeCycleSetUpOperator(msg.bopId); + + } catch (TimeoutException ex) { + + halt(ex); + + } finally { + + lock.unlock(); + + } + + } + + /** + * Message provides notice that the operator has ended execution. The + * termination conditions for the query are checked. (For scale-out, the + * node node controlling the query needs to be involved for each operator + * start/stop in order to make the termination decision atomic). + * + * @param msg + * The {@link HaltOpMessage} + * + * @throws UnsupportedOperationException + * If this node is not the query coordinator. + */ + final protected void haltOp(final HaltOpMessage msg) { + + if (!controller) + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); + + if (msg == null) + throw new IllegalArgumentException(); + + if (!queryId.equals(msg.queryId)) + throw new IllegalArgumentException(); + + lock.lock(); + + try { + + // update per-operator statistics. + final BOpStats tmp = statsMap.putIfAbsent(msg.bopId, msg.taskStats); + + // combine stats, but do not combine a stats object with itself. + if (tmp != null && tmp != msg.taskStats) { + tmp.add(msg.taskStats); + } + + if (runState.haltOp(msg)) { + + /* + * No more chunks can appear for this operator so invoke its end + * of life cycle hook. + */ + + lifeCycleTearDownOperator(msg.bopId); + + if (runState.isAllDone()) { + + // Normal termination. + halt(); + + } + + } + + } catch (Throwable t) { + + halt(t); + + } finally { + + lock.unlock(); + + } + + } + + /** + * Hook invoked the first time the given operator is evaluated for the + * query. This may be used to set up life cycle resources for the operator, + * such as a distributed hash table on a set of nodes identified by + * annotations of the operator. + * + * @param bopId + * The operator identifier. + */ + protected void lifeCycleSetUpOperator(final int bopId) { + + if (log.isTraceEnabled()) + log.trace("queryId=" + queryId + ", bopId=" + bopId); + + } + + /** + * Hook invoked the after the given operator has been evaluated for the + * query for what is known to be the last time. This may be used to tear + * down life cycle resources for the operator, such as a distributed hash + * table on a set of nodes identified by annotations of the operator. + * + * @param bopId + * The operator identifier. + */ + protected void lifeCycleTearDownOperator(final int bopId) { + + if (log.isTraceEnabled()) + log.trace("queryId=" + queryId + ", bopId=" + bopId); + + } + + /** + * Hook invoked the before any operator is evaluated for the query. This may + * be used to set up life cycle resources for the query. + */ + protected void lifeCycleSetUpQuery() { + + if (log.isTraceEnabled()) + log.trace("queryId=" + queryId); + + } + + /** + * Hook invoked when the query terminates. This may be used to tear down + * life cycle resources for the query. + */ + protected void lifeCycleTearDownQuery() { + + if (log.isTraceEnabled()) + log.trace("queryId=" + queryId); + + } + + /** + * Make a chunk of binding sets available for consumption by the query. + * <p> + * Note: this is invoked by {@link QueryEngine#acceptChunk(IChunkMessage)} + * + * @param msg + * The chunk. + * + * @return <code>true</code> if the message was accepted. + * + * @todo Reconcile {@link #acceptChunk(IChunkMessage)} and + * {@link #consumeChunk()}. Why {@link #consumeChunk()} is also used + * by the {@link QueryEngine}. + */ + abstract protected boolean acceptChunk(final IChunkMessage<IBindingSet> msg); + + /** + * Instruct the {@link IRunningQuery} to consume an {@link IChunkMessage} + * already on its input queue. + */ + abstract protected void consumeChunk(); + + final public IAsynchronousIterator<IBindingSet[]> iterator() { + + if (!controller) + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); + + if (queryIterator == null) + throw new UnsupportedOperationException(); + + return queryIterator; + + } + + public void halt() { + + lock.lock(); + + try { + + // signal normal completion. + future.halt((Void) null); + + // interrupt anything which is running. + cancel(true/* mayInterruptIfRunning */); + + } finally { + + lock.unlock(); + + } + + } + + public Throwable halt(final Throwable t) { + + if (t == null) + throw new IllegalArgumentException(); + + lock.lock(); + + try { + + if (!InnerCause.isInnerCause(t, InterruptedException.class)) + log.error(toString(), t); + + try { + + // signal error condition. + return future.halt(t); + + } finally { + + // interrupt anything which is running. + cancel(true/* mayInterruptIfRunning */); + + } + + } finally { + + lock.unlock(); + + } + + } + + /** + * {@inheritDoc} + * <p> + * Cancelled queries : + * <ul> + * <li>must reject new chunks</li> + * <li>must cancel any running operators</li> + * <li>must not begin to evaluate operators</li> + * <li>must release all of their resources</li> + * <li>must not cause the solutions to be discarded before the client can + * consume them.</li> + * </ul> + */ + final public boolean cancel(final boolean mayInterruptIfRunning) { + lock.lock(); + try { + // halt the query. + boolean cancelled = future.cancel(mayInterruptIfRunning); + if (didQueryTearDown + .compareAndSet(false/* expect */, true/* update */)) { + /* + * Do additional cleanup exactly once. + */ + // cancel any running operators for this query on this node. + cancelled |= cancelRunningOperators(mayInterruptIfRunning); + if (controller) { + // cancel query on other peers. + cancelled |= cancelQueryOnPeers(future.getCause()); + } + if (queryBuffer != null) { + /* + * Close the query buffer so the iterator draining the query + * results will recognize that no new results will become + * available. + */ + queryBuffer.close(); + } + // life cycle hook for the end of the query. + lifeCycleTearDownQuery(); + // mark done time. + doneTime.set(System.currentTimeMillis()); + // log summary statistics for the query. + if (isController()) + QueryLog.log(this); + } + // remove from the collection of running queries. + queryEngine.halt(this); + // true iff we cancelled something. + return cancelled; + } finally { + lock.unlock(); + } + } + + /** + * Cancel any running operators for this query on this node (internal API). + * <p> + * Note: This will wind up invoking the tear down methods for each operator + * which was running or which could have been re-triggered. + * + * @return <code>true</code> if any operators were cancelled. + */ + abstract protected boolean cancelRunningOperators( + final boolean mayInterruptIfRunning); + + // { + // boolean cancelled = false; + // + // final Iterator<ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask>> fitr = + // operatorFutures.values().iterator(); + // + // while (fitr.hasNext()) { + // + // final ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask> set = + // fitr.next(); + // + // for(ChunkFutureTask f : set.keySet()) { + // + // if (f.cancel(mayInterruptIfRunning)) + // cancelled = true; + // + // } + // + // } + // + // return cancelled; + // + // } + + /** + * Cancel the query on each node where it is known to be running. + * <p> + * Note: The default implementation verifies that the caller is holding the + * {@link #lock} but is otherwise a NOP. This is overridden for scale-out. + * + * @param cause + * When non-<code>null</code>, the cause. + * + * @return <code>true</code> iff something was cancelled. + * + * @throws IllegalMonitorStateException + * unless the {@link #lock} is held by the current thread. + * @throws UnsupportedOperationException + * unless this is the query controller. + */ + protected boolean cancelQueryOnPeers(final Throwable cause) { + + if (!controller) + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); + + if (!lock.isHeldByCurrentThread()) + throw new IllegalMonitorStateException(); + + return false; + + } + + final public Void get() throws InterruptedException, ExecutionException { + + return future.get(); + + } + + final public Void get(long arg0, TimeUnit arg1) + throws InterruptedException, ExecutionException, TimeoutException { + + return future.get(arg0, arg1); + + } + + final public boolean isCancelled() { + + return future.isCancelled(); + + } + + final public boolean isDone() { + + return future.isDone(); + + } + + final public Throwable getCause() { + + return future.getCause(); + + } + + public IBigdataFederation<?> getFederation() { + + return queryEngine.getFederation(); + + } + + public IIndexManager getIndexManager() { + + return queryEngine.getIndexManager(); + + } + + public String toString() { + final StringBuilder sb = new StringBuilder(getClass().getName()); + sb.append("{queryId=" + queryId); + sb.append(",deadline=" + deadline.get()); + sb.append(",isDone=" + isDone()); + sb.append(",isCancelled=" + isCancelled()); + sb.append(",runState=" + runState); + sb.append(",controller=" + controller); + sb.append(",clientProxy=" + clientProxy); + sb.append(",query=" + query); + sb.append("}"); + return sb.toString(); + } + + // abstract protected IChunkHandler getChunkHandler(); + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java 2010-12-22 17:32:36 UTC (rev 4043) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -39,7 +39,11 @@ * to the buffer. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ + * @version $Id: BlockingBufferWithStats.java 3838 2010-10-22 19:45:33Z + * thompsonbry $ + * + * @todo replace with {@link OutputStatsBuffer}? (It is still used by the + * {@link ChunkedRunningQuery} and by the query output buffer.) */ public class BlockingBufferWithStats<E> extends BlockingBuffer<E> { Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java (from rev 4039, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -0,0 +1,1592 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + */ +/* + * Created on Aug 31, 2010 + */ +package com.bigdata.bop.engine; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Future; +import java.util.concurrent.FutureTask; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.NoSuchBOpException; +import com.bigdata.bop.PipelineOp; +import com.bigdata.journal.ITx; +import com.bigdata.journal.Journal; +import com.bigdata.relation.accesspath.BufferClosedException; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBlockingBuffer; +import com.bigdata.relation.accesspath.IMultiSourceAsynchronousIterator; +import com.bigdata.relation.accesspath.MultiSourceSequentialAsynchronousIterator; +import com.bigdata.service.IBigdataFederation; +import com.bigdata.util.InnerCause; +import com.bigdata.util.concurrent.Memoizer; + +/** + * {@link IRunningQuery} implementation based on the assignment of + * {@link IChunkMessage}(s) to an operator task. Operators (other than those + * with "operator-at-once" evaluation semantics) will typically executed + * multiple times, consuming at least one {@link IChunkMessage} each time they + * are evaluated. {@link IChunkMessage}s target a specific operator (bopId) and + * shard (shardId). In scale-out, binding sets will be mapped across the target + * access path and may be replicated to one or more nodes depending on the + * distribution of the shards. This evaluation strategy is compatible with both + * the {@link Journal} (aka standalone) and the {@link IBigdataFederation} (aka + * clustered or scale-out). + * + * @todo The challenge with this implementation is managing the amount of data + * buffered on the JVM heap without introducing control structures which + * can result in deadlock or starvation. One way to manage this is to move + * the data off of the JVM heap onto direct ByteBuffers and then + * potentially spilling blocks to disk, e.g., using an RWStore based cache + * pattern. + */ +public class ChunkedRunningQuery extends AbstractRunningQuery { + + private final static transient Logger log = Logger + .getLogger(ChunkedRunningQuery.class); + + /** + * Logger for the {@link ChunkTask}. + */ + private final static Logger chunkTaskLog = Logger + .getLogger(ChunkTask.class); + +// /** +// * The maximum number of operator tasks which may be concurrently executed +// * for a given (bopId,shardId). +// * +// * @see QueryEngineTestAnnotations#MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD +// */ +// final private int maxConcurrentTasksPerOperatorAndShard; + +// /** +// * The maximum #of concurrent tasks for this query across all operators and +// * shards. +// * +// * Note: This is not a safe option and MUST be removed. It is possible for +// * N-1 tasks to backup with the Nth task not running due to concurrent +// * execution of some of the N-t tasks. +// */ +// final private int maxConcurrentTasks = 10; + + /* + * FIXME Explore the use of this semaphore to limit the maximum #of messages + * further. (Note that placing a limit on messages would allow us to buffer + * potentially many chunks. That could be solved by making LocalChunkMessage + * transparent in terms of the #of chunks or _binding_sets_ which it is + * carrying, but let's take this one step at a time). + * + * The first issue is ensuring that the query continue to make progress when + * a semaphore with a limited #of permits is introduced. This is because the + * ChunkFutureTask only attempts to schedule the next task for a given + * (bopId,shardId) but we could have failed to accept outstanding work for + * any of a number of operator/shard combinations. Likewise, the QueryEngine + * tells the RunningQuery to schedule work each time a message is dropped + * onto the QueryEngine, but the signal to execute more work is lost if the + * permits were not available immediately. + * + * One possibility would be to have a delayed retry. Another would be to + * have ChunkTaskFuture try to run *any* messages, not just messages for the + * same (bopId,shardId). + * + * Also, when scheduling work, there needs to be some bias towards the + * downstream operators in the query plan in order to ensure that they get a + * chance to clear work from upstream operators. This suggests that we might + * carry an order[] and use it to scan the work queue -- or make the work + * queue a priority heap using the order[] to place a primary sort over the + * bopIds in terms of the evaluation order and letting the shardIds fall in + * increasing shard order so we have a total order for the priority heap (a + * total order may also require a tie breaker, but I think that the priority + * heap allows ties). + * + * This concept of memory overhead and permits would be associated with the + * workload waiting on a given node for processing. (In scale-out, we do not + * care how much data is moving in the cluster, only how much data is + * challenging an individual machine). + * + * This emphasize again why we need to get the data off of the Java heap. + * + * The same concept should apply for chained buffers. Maybe one way to do + * this is to allocate a fixed budget to each query for the Java heap and + * the C heap and then the query blocks or goes to disk. + */ +// /** +// * The maximum number of binding sets which may be outstanding before a task +// * which is producing binding sets will block. This value may be used to +// * limit the memory demand of a query in which some operators produce +// * binding sets faster than other operators can consume them. +// * +// * @todo This could be generalized to consider the Java heap separately from +// * the native heap as we get into the use of native ByteBuffers to +// * buffer intermediate results. +// * +// * @todo This is expressed in terms of messages and not {@link IBindingSet}s +// * because the {@link LocalChunkMessage} does not self-report the #of +// * {@link IBindingSet}s (or chunks). [It should really be bytes on the +// * heap even if we can count binding sets and #s of bindings, but we +// * do not serialize all binding sets so we have to have one measure +// * for serialized and one measure for live objects.] +// */ +// final private int maxOutstandingMessageCount = 100; +// +// /** +// * A counting semaphore used to limit the #of outstanding binding set chunks +// * which may be buffered before a producer will block when trying to emit +// * another chunk. +// * +// * @see HandleChunkBuffer#outputChunk(IBindingSet[]) +// * @see #scheduleNext(BSBundle) +// * +// * @see #maxOutstandingMessageCount +// */ +// final private Semaphore outstandingMessageSemaphore = new Semaphore(maxOutstandingMessageCount); + + /** + * A collection of (bopId,partitionId) keys mapped onto a collection of + * operator task evaluation contexts for currently executing operators for + * this query. + */ + private final ConcurrentHashMap<BSBundle, ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask>> operatorFutures; + + /** + * A map of unbounded work queues for each (bopId,partitionId). Empty queues + * are removed from the map. + * <p> + * The map is guarded by the {@link #lock}. + */ + private final Map<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>> operatorQueues; + +// /** +// * When running in stand alone, we can chain together the operators and have +// * much higher throughput. Each operator has an {@link BlockingBuffer} which +// * is essentially its input queue. The operator will drain its input queue +// * using {@link BlockingBuffer#iterator()}. +// * <p> +// * Each operator closes its {@link IBlockingBuffer} sink(s) once its own +// * source has been closed and it has finished processing that source. Since +// * multiple producers can target the same operator, we need a means to +// * ensure that the source for the target operator is not closed until each +// * producer which targets that operator has closed its corresponding sink. +// * <p> +// * In order to support this many-to-one producer/consumer pattern, we wrap +// * the input queue (a {@link BlockingBuffer}) for each operator having +// * multiple sources wi... [truncated message content] |
From: <tho...@us...> - 2011-01-02 21:40:08
|
Revision: 4045 http://bigdata.svn.sourceforge.net/bigdata/?rev=4045&view=rev Author: thompsonbry Date: 2011-01-02 21:40:00 +0000 (Sun, 02 Jan 2011) Log Message: ----------- Hooked up the query hints into the sail. Moved the declaration of the query hints namespace into a QueryHints interface in the sail package. Added a query hint to select the join optimizer (Static, Runtime, None). The Runtime query optimizer can only be used for plains triples without optionals right now. Adding support for optionals is easy enough (MikeP is signed up for this). Added support for quads and scale-out is more tricky since we place various annotations on the joins (not just the predicates) when configuring for named or default graph queries (e.g., when to use a REMOTE access path). Since the runtime query optimizer works directly with the IPredicates, the annotations for the joins probably need to be inferred directly from the annotations for the predicates in order for this to be compatible with the JoinGraph optimizer. Also, the DataSetJoin needs to be replaced by a standard join against an inline access path comprising the default or named graph data set. This requires some changes to the AccessPath class. PipelineJoin lacked a deep copy constructor. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailBooleanQuery.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailTupleQuery.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -360,8 +360,31 @@ if (!annotations.containsKey(name)) return defaultValue; - return (T) annotations.get(name); + final Object val = annotations.get(name); + if (defaultValue != null && val.getClass() != defaultValue.getClass()) { + + /* + * Attempt to convert to the correct target type. + */ + + if (defaultValue.getClass() == Integer.class) { + return (T) Integer.valueOf("" + val); + } + if (defaultValue.getClass() == Long.class) { + return (T) Long.valueOf("" + val); + } + if (defaultValue.getClass() == Float.class) { + return (T) Float.valueOf("" + val); + } + if (defaultValue.getClass() == Double.class) { + return (T) Double.valueOf("" + val); + } + + } + + return (T) val; + } // @SuppressWarnings("unchecked") Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -1416,7 +1416,7 @@ return preds; } - + /** * Return the {@link BOp} identifiers of the predicates associated with * each vertex in path order. @@ -1851,6 +1851,55 @@ } + /** + * Return a permutation vector which may be used to reorder the given + * {@link IPredicate}[] into the evaluation order selected by the + * runtime query optimizer. + * + * @throws IllegalArgumentException + * if the argument is <code>null</code>. + * @throws IllegalArgumentException + * if the given {@link Path} does not cover all vertices in + * the join graph. + */ + public int[] getOrder(final Path p) { + + if(p == null) + throw new IllegalArgumentException(); + + final IPredicate[] path = p.getPredicates(); + + if (path.length != V.length) { + throw new IllegalArgumentException( + "Wrong path length: #vertices=" + V.length + + ", but path.length=" + path.length); + } + + final int[] order = new int[V.length]; + + for (int i = 0; i < order.length; i++) { + + boolean found = false; + for (int j = 0; j < order.length; j++) { + + if (path[i].getId() == V[j].pred.getId()) { + order[i] = j; + found = true; + break; + } + + } + + if (!found) + throw new RuntimeException("No such vertex: id=" + + path[i].getId()); + + } + + return order; + + } + /** * Choose the starting vertices. * Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -378,6 +378,15 @@ } /** + * Deep copy constructor. + * + * @param op + */ + public PipelineJoin(final PipelineJoin<E> op) { + super(op); + } + + /** * Shallow copy vararg constructor. * * @param args @@ -637,6 +646,9 @@ this.predicate = joinOp.getPredicate(); this.constraints = joinOp.constraints(); this.maxParallel = joinOp.getMaxParallel(); + if (maxParallel < 0) + throw new IllegalArgumentException(Annotations.MAX_PARALLEL + + "=" + maxParallel); if (maxParallel > 0) { // shared service. service = new LatchedExecutor(context.getIndexManager() Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -110,7 +110,7 @@ * Deep Copy constructor. * @param op */ - public SliceOp(SliceOp op) { + public SliceOp(final SliceOp op) { super(op); @@ -122,7 +122,7 @@ * @param args * @param annotations */ - public SliceOp(BOp[] args, Map<String, Object> annotations) { + public SliceOp(final BOp[] args, final Map<String, Object> annotations) { super(args, annotations); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -52,6 +52,7 @@ import org.openrdf.model.Value; import org.openrdf.model.impl.URIImpl; + /** * A vocabulary for bigdata specific extensions. * @@ -64,15 +65,6 @@ * The namespace used for bigdata specific extensions. */ String NAMESPACE = "http://www.bigdata.com/rdf#"; - - /** - * The namespace prefix used in SPARQL queries to signify query hints. You - * can embed query hints into a SPARQL query as follows: - * <code> - * PREFIX BIGDATA_QUERY_HINTS: <http://www.bigdata.com/queryHints#com.bigdata.relation.rule.eval.DefaultRuleTaskFactory.nestedSubquery=true&com.bigdata.fullScanTreshold=1000> - * </code> - */ - String QUERY_HINTS_NAMESPACE = "BIGDATA_QUERY_HINTS"; /** * The name of a per-statement attribute whose value is recognized in Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -230,7 +230,7 @@ final Properties properties = getProperties(); final File file; - if (false) { + if (true) { /* * Use a persistent file that is generated once and then reused by * each test run. @@ -899,6 +899,8 @@ // System.err.println(getName() + " : runtime optimizer join order " // + Arrays.toString(Path.getVertexIds(p.edges))); + System.err.println(getName() + " : order[]=" + Arrays.toString(g.getOrder(p))); + return p.getPredicates(); } finally { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -366,7 +366,7 @@ * This is the top-level method called by the SAIL to evaluate a query. * The TupleExpr parameter here is guaranteed to be the root of the operator * tree for the query. Query hints are parsed by the SAIL from the - * namespaces in the original query. See {@link BD#QUERY_HINTS_NAMESPACE}. + * namespaces in the original query. See {@link QueryHints#NAMESPACE}. */ public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( TupleExpr expr, BindingSet bindings, Properties queryHints) @@ -1673,7 +1673,7 @@ final QueryEngine queryEngine = tripleSource.getSail().getQueryEngine(); - final int startId = 1; +// final int startId = 1; final PipelineOp query; { @@ -1686,7 +1686,7 @@ // Convert the step to a bigdata operator tree. query = Rule2BOpUtility.convert(step, idFactory, database, - queryEngine); + queryEngine, queryHints); if (log.isInfoEnabled()) log.info(query); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -3117,7 +3117,7 @@ * {@link Options#QUERY_TIME_EXPANDER}, but not on a per-query basis. * <p> * QueryHints are a set of properties that are parsed from a SPARQL - * query. See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * query. See {@link QueryHints#NAMESPACE} for more information. * * @todo The [bindings] are supposed to be inputs to the query * evaluation, but I am still not quite clear what the role of the Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailBooleanQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailBooleanQuery.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailBooleanQuery.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -12,14 +12,13 @@ import org.openrdf.sail.SailConnection; import org.openrdf.sail.SailException; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; -import com.bigdata.rdf.store.BD; public class BigdataSailBooleanQuery extends SailBooleanQuery implements BigdataSailQuery { /** * Query hints are embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ private final Properties queryHints; @@ -32,7 +31,7 @@ /** * Overriden to use query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public boolean evaluate() throws QueryEvaluationException { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -42,14 +42,13 @@ import org.openrdf.repository.sail.SailRepositoryConnection; import org.openrdf.sail.SailException; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; -import com.bigdata.rdf.store.BD; public class BigdataSailGraphQuery extends SailGraphQuery implements BigdataSailQuery { /** * Query hints are embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ private final Properties queryHints; @@ -222,7 +221,7 @@ /** * Overriden to use query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public GraphQueryResult evaluate() throws QueryEvaluationException { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -29,7 +29,6 @@ import com.bigdata.rdf.sail.sparql.PrefixDeclProcessor; import com.bigdata.rdf.sail.sparql.StringEscapesProcessor; import com.bigdata.rdf.store.AbstractTripleStore; -import com.bigdata.rdf.store.BD; public class BigdataSailRepositoryConnection extends SailRepositoryConnection { @@ -52,7 +51,7 @@ * <p> * Overridden to capture query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public SailGraphQuery prepareGraphQuery(final QueryLanguage ql, @@ -72,7 +71,7 @@ * <p> * Overridden to capture query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public SailTupleQuery prepareTupleQuery(final QueryLanguage ql, @@ -89,7 +88,7 @@ * <p> * Overridden to capture query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. See - * {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * {@link QueryHints#NAMESPACE} for more information. */ @Override public SailBooleanQuery prepareBooleanQuery(final QueryLanguage ql, @@ -106,7 +105,7 @@ * <p> * Overridden to capture query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public SailQuery prepareQuery(final QueryLanguage ql, final String qs, @@ -251,7 +250,7 @@ /** * Parse query hints from a query string. Query hints are embedded in the * query string via special namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ private Properties parseQueryHints(QueryLanguage ql, String queryString, String baseURI) @@ -270,7 +269,7 @@ for (Map.Entry<String, String> prefix : prefixes.entrySet()) { // if we see one that matches the magic namespace, try // to parse it - if (prefix.getKey().equalsIgnoreCase(BD.QUERY_HINTS_NAMESPACE)) { + if (prefix.getKey().equalsIgnoreCase(QueryHints.NAMESPACE)) { String hints = prefix.getValue(); // has to have a # and it can't be at the end int i = hints.indexOf('#'); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailTupleQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailTupleQuery.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailTupleQuery.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -14,14 +14,13 @@ import org.openrdf.sail.SailConnection; import org.openrdf.sail.SailException; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; -import com.bigdata.rdf.store.BD; public class BigdataSailTupleQuery extends SailTupleQuery implements BigdataSailQuery { /** * Query hints are embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ private final Properties queryHints; @@ -34,7 +33,7 @@ /** * Overriden to use query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public TupleQueryResult evaluate() throws QueryEvaluationException { Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -0,0 +1,66 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Jan 2, 2011 + */ + +package com.bigdata.rdf.sail; + +import com.bigdata.bop.BOp; + +/** + * Query hint directives understood by a bigdata SPARQL end point. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface QueryHints { + + /** + * The namespace prefix used in SPARQL queries to signify query hints. You + * can embed query hints into a SPARQL query as follows: + * + * <pre> + * PREFIX BIGDATA_QUERY_HINTS: <http://www.bigdata.com/queryHints#name1=value1&name2=value2> + * </pre> + * + * where <i>name</i> is the name of a query hint and <i>value</i> is the + * value associated with that query hint. Multiple query hints can be + * specified (as shown in this example) using a <code>&</code> character + * to separate each name=value pair. + * <p> + * Query hints are either directives understood by the SPARQL end point or + * {@link BOp.Annotations}. A list of the known directives is declared by + * this interface. + */ + String NAMESPACE = "BIGDATA_QUERY_HINTS"; + + /** + * Specify the query optimizer. + * + * @see QueryOptimizerEnum + */ + String OPTIMIZER = QueryHints.class.getName() + ".optimizer"; + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -0,0 +1,69 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Jan 2, 2011 + */ +package com.bigdata.rdf.sail; + +/** + * The known query optimizers. + * + * @see QueryHints#OPTIMIZER + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public enum QueryOptimizerEnum { + /** + * The query optimizer is disabled. The joins in the query will be evaluated + * in the order in which they are given. This may be used to compensate when + * the static query optimizer produces an inefficient join ordering. + */ + None, + /** + * A query optimizer based on a static analysis of the query which relies on + * fast range counts for the basic graph patterns to estimate the + * cardinality of the different access paths. This optimizer is fast but it + * can fail to order joins correctly as the error in the estimated + * cardinality of joins can grow exponentially in the number of joins in the + * query. + */ + Static, + /** + * A runtime query optimizer based on sampling. The runtime query optimizer + * samples each of the access paths and each of the joins and builds out + * join paths in a breadth first manner until it finds a join ordering which + * is known to dominate the other possible join orderings. The runtime query + * optimizer takes into account the actual cardinality and correlation in + * the query and the data selected by that query. The runtime query + * optimizer can have slightly more overhead than the static query + * optimizer, but it never produces a bad join ordering and often identifies + * the <em>best</em> join ordering. For cases where the <code>static</code> + * query optimizer produces a bad join ordering, the runtime query optimizer + * can find join orderings which are orders of magnitude more efficient (10x + * or 100x). For long running joins, this can translates into a savings of + * minutes or hours. + */ + Runtime; +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -30,12 +30,14 @@ import java.io.Serializable; import java.util.Arrays; import java.util.Collection; +import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; @@ -64,6 +66,8 @@ import com.bigdata.bop.bset.StartOp; import com.bigdata.bop.controller.Steps; import com.bigdata.bop.controller.Union; +import com.bigdata.bop.controller.JoinGraph.JGraph; +import com.bigdata.bop.controller.JoinGraph.Path; import com.bigdata.bop.cost.ScanCostReport; import com.bigdata.bop.cost.SubqueryCostReport; import com.bigdata.bop.engine.QueryEngine; @@ -243,13 +247,13 @@ */ public static PipelineOp convert(final IStep step, final AtomicInteger idFactory, final AbstractTripleStore db, - final QueryEngine queryEngine) { + final QueryEngine queryEngine, final Properties queryHints) { if (step instanceof IRule<?>) { // Convert the step to a bigdata operator tree. PipelineOp tmp = convert((IRule<?>) step, idFactory, db, - queryEngine); + queryEngine, queryHints); if (!tmp.getEvaluationContext().equals( BOpEvaluationContext.CONTROLLER)) { @@ -265,15 +269,55 @@ } - return tmp; + return applyQueryHints(tmp, queryHints); } - return convert((IProgram) step, idFactory, db, queryEngine); + return convert((IProgram) step, idFactory, db, queryEngine, queryHints); } /** + * Apply any query hints to the operator as annotations of that operator. + * + * @param op + * The operator. + * @param queryHints + * The query hints. + * + * @return A copy of that operator to which the query hints (if any) have + * been applied. If there are no query hints then the original + * operator is returned. + * + * @todo It would be nice if this would only apply those query hints to an + * operator which are known to be annotations understood by that + * operator. This information is basically available from the inner + * Annotation interface for a given operator class, but that is not + * really all that accessible. + */ + private static PipelineOp applyQueryHints(PipelineOp op, + Properties queryHints) { + + final Enumeration<?> pnames = queryHints.propertyNames(); + + while (pnames.hasMoreElements()) { + + final String name = (String) pnames.nextElement(); + + final String value = queryHints.getProperty(name); + + if (log.isInfoEnabled()) + log.info("Query hint: [" + name + "=" + value + "]"); + + op = (PipelineOp) op.setProperty(name, value); + + } + + return op; + + } + + /** * Convert a rule into an operator tree. * * @param rule @@ -282,52 +326,164 @@ */ public static PipelineOp convert(final IRule<?> rule, final AtomicInteger idFactory, final AbstractTripleStore db, - final QueryEngine queryEngine) { + final QueryEngine queryEngine, final Properties queryHints) { // // true iff the database is in quads mode. // final boolean isQuadsQuery = db.isQuads(); - final PipelineOp startOp = new StartOp(new BOp[] {}, + final PipelineOp startOp = applyQueryHints(new StartOp(new BOp[] {}, NV.asMap(new NV[] {// new NV(Predicate.Annotations.BOP_ID, idFactory .incrementAndGet()),// new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// - })); - + })),queryHints); + /* * First put the tails in the correct order based on the logic in * DefaultEvaluationPlan2. + * + * @todo Consider making order[] disappear such that all of the arrays + * (preds[], cardinality[], keyOrder[]) are indexed directly by the + * array index rather than by order[i]. Alternatively, make sure that + * the runtime query optimizer reports the permutation array (order[]) + * so we can maintain information about the relationship between the + * given joins and the evaluation order. */ final BOpContextBase context = new BOpContextBase(queryEngine); - final DefaultEvaluationPlan2 plan = new DefaultEvaluationPlan2( - new IRangeCountFactory() { + + final QueryOptimizerEnum optimizer = QueryOptimizerEnum + .valueOf(queryHints.getProperty(QueryHints.OPTIMIZER, + QueryOptimizerEnum.Static.toString())); - public long rangeCount(final IPredicate pred) { - return context.getRelation(pred).getAccessPath(pred) - .rangeCount(false); + // The evaluation plan order. + final int[] order; + // The estimated cardinality of each tail (if the optimizer provides it) + final long[] cardinality; + // The index assigned to each tail of the rule by static analysis. + final IKeyOrder[] keyOrder; + + switch(optimizer) { + case None: { + /* + * Do not run the join optimizer. + * + * @todo Do we need to move any of the joins to the front, e.g., + * magic search, or should everything just be left the way it is? + */ + order = new int[rule.getTailCount()]; + for (int i = 0; i < order.length; i++) { + order[i] = i; } + cardinality = null; + keyOrder = null; + break; + } + case Static: { + /* + * Static query optimizer. + */ + final DefaultEvaluationPlan2 plan = new DefaultEvaluationPlan2( + new IRangeCountFactory() { + + public long rangeCount(final IPredicate pred) { + return context.getRelation(pred) + .getAccessPath(pred).rangeCount(false); + } + + }, rule); + + order = plan.getOrder(); + + /* + * The index assigned to each tail of the rule by static analysis + * (this is often not the index which is actually used when we + * evaluate a given predicate since we always choose the best index + * and that can depend on whether or not we are binding the context + * position for a default or named graph query. When optional joins + * are involved, some variables may not become bound for some + * solutions. A different index will often be chosen for access + * paths using the unbound variable. + */ + + // the #of variables in each tail of the rule (set by side-effect). + final int[] nvars = new int[rule.getTailCount()]; + + cardinality = new long[rule.getTailCount()]; + for (int i = 0; i < cardinality.length; i++) { + cardinality[i] = plan.cardinality(i); + } + + keyOrder = computeKeyOrderForEachTail(rule, context, order, nvars); + + break; + + } + case Runtime: { + /* + * The runtime query optimizer. + * + * FIXME MikeP: I have modified the JoinGraph so that it can report + * the permutation order. However, the code here needs to isolate + * the join graph rather than running against all predicates in the + * tail. As it is, it will reorder optionals. + * + * FIXME We can not optimize quads here using the runtime query + * optimizer since we have not yet generated the full query plan. In + * order to get the runtime query optimizer working for quads we + * need to replace the DataSetJoin with a PipelineJoin against an + * inline "relation" containing the named or default graphs IVs. The + * runtime query optimizer does not accept the JOIN operators so the + * annotations which are being applied there will be lost which is + * another problem, especially in scale-out. Both of these issues + * need to be resolved before quads can be used with the runtime + * query optimizer. + * + * @todo In fact, we should be able to write in a JoinGraph operator + * which optimizes the join graph and then evaluates it rather than + * explicitly doing the optimization and evaluation steps here. + * + * @todo Make sure that a summary of the information collected by + * the runtime query optimizer is attached as an annotation to the + * query. + * + * @todo query hints for [limit] and [nedges]. + */ - }, rule); - - // evaluation plan order. - final int[] order = plan.getOrder(); - - // the #of variables in each tail of the rule. - final int[] nvars = new int[rule.getTailCount()]; + // The initial sampling limit. + final int limit = 100; - /* - * The index assigned to each tail of the rule by static analysis (this - * is often not the index which is actually used when we evaluate a - * given predicate since we always choose the best index and that can - * depend on whether or not we are binding the context position for a - * default or named graph query. When optional joins are involved, some - * variables may not become bound for some solutions. A different index - * will often be chosen for access paths using the unbound variable. - */ - final IKeyOrder[] keyOrder = computeKeyOrderForEachTail(rule, context, - order, nvars); + // The #of edges considered for the initial paths. + final int nedges = 2; + // isolate/extract the join graph. + final IPredicate[] preds = new IPredicate[rule.getTailCount()]; + for (int i = 0; i < preds.length; i++) { + preds[i] = rule.getTail(i); + } + + final JGraph g = new JGraph(preds); + + final Path p; + try { + p = g.runtimeOptimizer(queryEngine, limit, nedges); + } catch (Exception e) { + throw new RuntimeException(e); + } + + // the permutation order. + order = g.getOrder(p); + + keyOrder = null; + + cardinality = null; + + break; + } + default: + throw new AssertionError("Unknown option: " + optimizer); + } + // the variables to be retained for each join. final IVariable<?>[][] selectVars = RuleState .computeRequiredVarsForEachTail(rule, order); @@ -379,15 +535,22 @@ Predicate<?> pred = (Predicate<?>) rule.getTail(order[i]).setBOpId( idFactory.incrementAndGet()); - // decorate the predicate with the assigned index. -// pred = pred.setKeyOrder(keyOrder[order[i]]); - pred = (Predicate<?>) pred.setProperty(Annotations.ORIGINAL_INDEX, - keyOrder[order[i]]); + /* + * Decorate the predicate with the assigned index (this is purely + * informative). + */ + if (keyOrder != null && keyOrder[order[i]] != null) { + // pred = pred.setKeyOrder(keyOrder[order[i]]); + pred = (Predicate<?>) pred.setProperty( + Annotations.ORIGINAL_INDEX, keyOrder[order[i]]); + } // decorate the predicate with the cardinality estimate. - pred = (Predicate<?>) pred.setProperty( - Annotations.ESTIMATED_CARDINALITY, plan - .cardinality(order[i])); + if (cardinality != null) { + pred = (Predicate<?>) pred.setProperty( + Annotations.ESTIMATED_CARDINALITY, + cardinality[order[i]]); + } /* * Collect all the constraints for this predicate based on which @@ -468,11 +631,11 @@ switch (scope) { case NAMED_CONTEXTS: left = namedGraphJoin(queryEngine, context, idFactory, - left, anns, pred, dataset); + left, anns, pred, dataset, queryHints); break; case DEFAULT_CONTEXTS: left = defaultGraphJoin(queryEngine, context, idFactory, - left, anns, pred, dataset); + left, anns, pred, dataset, queryHints); break; default: throw new AssertionError(); @@ -494,10 +657,10 @@ BOpEvaluationContext.ANY)); anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - - left = new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + left = applyQueryHints(new PipelineJoin(new BOp[] { left }, + anns.toArray(new NV[anns.size()])), queryHints); + } } else { @@ -506,7 +669,7 @@ * Triples or provenance mode. */ - left = triplesModeJoin(queryEngine, left, anns, pred); + left = triplesModeJoin(queryEngine, left, anns, pred, queryHints); } @@ -533,7 +696,8 @@ * @return The join operator. */ private static PipelineOp triplesModeJoin(final QueryEngine queryEngine, - final PipelineOp left, final List<NV> anns, Predicate<?> pred) { + final PipelineOp left, final List<NV> anns, Predicate<?> pred, + final Properties queryHints) { final boolean scaleOut = queryEngine.isScaleOut(); if (scaleOut) { @@ -551,8 +715,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -578,7 +742,7 @@ private static PipelineOp namedGraphJoin(final QueryEngine queryEngine, final BOpContextBase context, final AtomicInteger idFactory, final PipelineOp left, final List<NV> anns, Predicate<?> pred, - final Dataset dataset) { + final Dataset dataset, final Properties queryHints) { final boolean scaleOut = queryEngine.isScaleOut(); if (scaleOut) { @@ -603,8 +767,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -616,8 +780,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -646,8 +810,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -662,8 +826,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -714,8 +878,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } else { @@ -762,8 +926,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { dataSetJoin }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { dataSetJoin }, + anns.toArray(new NV[anns.size()])), queryHints); } @@ -786,7 +950,7 @@ private static PipelineOp defaultGraphJoin(final QueryEngine queryEngine, final BOpContextBase context, final AtomicInteger idFactory, final PipelineOp left, final List<NV> anns, Predicate<?> pred, - final Dataset dataset) { + final Dataset dataset, final Properties queryHints) { /* * @todo raise this into the caller and do one per rule rather than once @@ -813,8 +977,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -842,8 +1006,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE, pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -911,8 +1075,8 @@ // // } // -// return new PipelineJoin(new BOp[] { left, pred }, anns -// .toArray(new NV[anns.size()])); +// return applyQueryHints(new PipelineJoin(new BOp[] { left, pred }, anns +// .toArray(new NV[anns.size()])),queryHints); // // } @@ -987,8 +1151,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE, pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])),queryHints); } else { @@ -1037,8 +1201,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])),queryHints); } @@ -1059,7 +1223,7 @@ */ public static PipelineOp convert(final IProgram program, final AtomicInteger idFactory, final AbstractTripleStore db, - final QueryEngine queryEngine) { + final QueryEngine queryEngine, final Properties queryHints) { // When parallel, the program is translated to a UNION. Else STEPS. final boolean isParallel = program.isParallel(); @@ -1076,7 +1240,8 @@ for (int i = 0; i < arity; i++) { // convert the child IStep - BOpBase tmp = convert(steps[i], idFactory, db, queryEngine); + final BOpBase tmp = convert(steps[i], idFactory, db, queryEngine, + queryHints); /* * @todo Route binding sets around the UNION/STEPS operator. We need Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -28,22 +28,17 @@ import java.util.Collection; import java.util.LinkedList; -import org.openrdf.model.BNode; -import org.openrdf.model.Literal; -import org.openrdf.model.Resource; + import org.openrdf.model.URI; -import org.openrdf.model.impl.BNodeImpl; -import org.openrdf.model.impl.LiteralImpl; import org.openrdf.model.impl.URIImpl; -import org.openrdf.model.vocabulary.RDF; -import org.openrdf.model.vocabulary.RDFS; import org.openrdf.query.BindingSet; import org.openrdf.query.QueryLanguage; import org.openrdf.query.TupleQuery; import org.openrdf.query.TupleQueryResult; import org.openrdf.query.impl.BindingImpl; -import com.bigdata.rdf.store.BD; +import com.bigdata.bop.join.PipelineJoin; + /** * Unit tests the query hints aspect of the {@link BigdataSail} implementation. * @@ -68,7 +63,11 @@ /** * Tests adding query hints in SPARQL. * - * @throws Exception + * @throws Exception + * + * @todo Unfortunately, this does not really _test_ anything since the query + * should be answered correctly regardless of the query hint(s) + * specified. */ public void testQueryHints() throws Exception { @@ -102,20 +101,22 @@ { - String query = - "PREFIX "+BD.QUERY_HINTS_NAMESPACE+": " + - " <http://www.bigdata.com/queryOption#com.bigdata.relation.rule.eval.DefaultRuleTaskFactory.nestedSubquery=true&com.bigdata.fullScanTreshold=1000> " + - "SELECT * " + - "WHERE { " + - " <"+a+"> ?p ?o " + - "}"; - + final String query = "PREFIX " + QueryHints.NAMESPACE + + ": " + "<http://www.bigdata.com/queryOption#" + // + PipelineJoin.Annotations.MAX_PARALLEL + "=-5" // + + "&" + "com.bigdata.fullScanTreshold=1000" // + + ">\n"// + + "SELECT * " + // + "WHERE { " + // + " <" + a + "> ?p ?o " + // + "}"; + final TupleQuery tupleQuery = cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); tupleQuery.setIncludeInferred(true /* includeInferred */); - TupleQueryResult result = tupleQuery.evaluate(); + final TupleQueryResult result = tupleQuery.evaluate(); - Collection<BindingSet> answer = new LinkedList<BindingSet>(); + final Collection<BindingSet> answer = new LinkedList<BindingSet>(); answer.add(createBindingSet( new BindingImpl("p", b), new BindingImpl("o", c) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-05 13:51:33
|
Revision: 4054 http://bigdata.svn.sourceforge.net/bigdata/?rev=4054&view=rev Author: thompsonbry Date: 2011-01-05 13:51:25 +0000 (Wed, 05 Jan 2011) Log Message: ----------- Reduced several array capacity constants of 10000 or more to 100 in order to reduce the heap churn during query. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BufferAnnotations.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/AbstractChunkedTupleIterator.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/UnisolatedReadWriteIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/IChunkedIterator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsIterator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesPIterator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesPOIterator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesSPIterator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesSPOIterator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainTypeResourceIterator.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BufferAnnotations.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BufferAnnotations.java 2011-01-05 13:49:15 UTC (rev 4053) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BufferAnnotations.java 2011-01-05 13:51:25 UTC (rev 4054) @@ -51,8 +51,10 @@ /** * Default for {@link #CHUNK_OF_CHUNKS_CAPACITY} + * + * @todo Try smaller capacities in benchmarks */ - int DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = 5;//trunk=1000 + int DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = 5;//5;//trunk=1000 /** * Sets the capacity of the {@link IBuffer}[]s used to accumulate a chunk of Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-01-05 13:49:15 UTC (rev 4053) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-01-05 13:51:25 UTC (rev 4054) @@ -42,6 +42,7 @@ import com.bigdata.btree.filter.Advancer; import com.bigdata.btree.filter.TupleFilter; import com.bigdata.mdi.PartitionLocator; +import com.bigdata.rawstore.Bytes; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.ElementFilter; @@ -258,7 +259,7 @@ * @todo Experiment with this. It should probably be something close to * the branching factor, e.g., 100. */ - int DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = 100;//trunk=20*Bytes.kilobyte32 + int DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = 100;//trunk=20*Bytes.kilobyte32; /** * Specify the {@link IRangeQuery} flags for the {@link IAccessPath} ( Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/AbstractChunkedTupleIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/AbstractChunkedTupleIterator.java 2011-01-05 13:49:15 UTC (rev 4053) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/AbstractChunkedTupleIterator.java 2011-01-05 13:51:25 UTC (rev 4054) @@ -248,7 +248,7 @@ */ protected int getDefaultCapacity() { - return 100000; + return 100;//1000;//100000; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/UnisolatedReadWriteIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/UnisolatedReadWriteIndex.java 2011-01-05 13:49:15 UTC (rev 4053) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/UnisolatedReadWriteIndex.java 2011-01-05 13:51:25 UTC (rev 4054) @@ -284,7 +284,7 @@ * main purpose of the capacity is to reduce the contention for the * {@link ReadWriteLock}. */ - final static protected int DEFAULT_CAPACITY = 10000; + final static protected int DEFAULT_CAPACITY = 100;//10000; /** * Creates a view of an unisolated index that will enforce the concurrency Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java 2011-01-05 13:49:15 UTC (rev 4053) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java 2011-01-05 13:51:25 UTC (rev 4054) @@ -166,12 +166,12 @@ * The default capacity for the internal {@link Queue} on which elements (or * chunks of elements) are buffered. */ - public static transient final int DEFAULT_PRODUCER_QUEUE_CAPACITY = 5000; + public static transient final int DEFAULT_PRODUCER_QUEUE_CAPACITY = 100;//5000; /** * The default minimum chunk size for the chunk combiner. */ - public static transient final int DEFAULT_MINIMUM_CHUNK_SIZE = 10000; + public static transient final int DEFAULT_MINIMUM_CHUNK_SIZE = 100;//10000; /** * The default timeout in milliseconds during which chunks of elements may Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/IChunkedIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/IChunkedIterator.java 2011-01-05 13:49:15 UTC (rev 4053) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/IChunkedIterator.java 2011-01-05 13:51:25 UTC (rev 4054) @@ -61,7 +61,7 @@ * * FIXME This is way too large. */ - int DEFAULT_CHUNK_SIZE = 10000; + int DEFAULT_CHUNK_SIZE = 100;//00; /** * The next element available from the iterator. Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsIterator.java 2011-01-05 13:49:15 UTC (rev 4053) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsIterator.java 2011-01-05 13:51:25 UTC (rev 4054) @@ -27,6 +27,8 @@ protected IV sameAs; + final int chunkSize = 100;//10000; + protected IChunkedOrderedIterator<ISPO> src; public BackchainOwlSameAsIterator(IChunkedOrderedIterator<ISPO> src, Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesPIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesPIterator.java 2011-01-05 13:49:15 UTC (rev 4053) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesPIterator.java 2011-01-05 13:51:25 UTC (rev 4054) @@ -148,7 +148,7 @@ // use a buffer so that we can do a more efficient batch contains // to filter out existing statements - int chunkSize = 10000; +// int chunkSize = 10000; SPO[] spos = new SPO[chunkSize]; int numSPOs = 0; // create a new link between {s,? sameAs s} X {o,? sameAs o} tuples @@ -199,7 +199,6 @@ } public ISPO[] nextChunk() { - final int chunkSize = 10000; ISPO[] s = new ISPO[chunkSize]; int n = 0; while (hasNext() && n < chunkSize) { Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesPOIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesPOIterator.java 2011-01-05 13:49:15 UTC (rev 4053) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesPOIterator.java 2011-01-05 13:51:25 UTC (rev 4054) @@ -100,7 +100,7 @@ // which might be present in the source iterator already // use a buffer so that we can do a more efficient batch contains // to filter out existing statements - int chunkSize = 10000; +// int chunkSize = 10000; SPO[] spos = new SPO[chunkSize]; int numSPOs = 0; // get all of o's sames @@ -112,7 +112,7 @@ db.getAccessPath(null, p, same).iterator(); while (reversePropsIt.hasNext()) { final ISPO reverseProp = reversePropsIt.next(); - // do not add ( s sameAs s ) inferences + // do not add ( s sameAs s ) inferences if (IVUtility.equals(reverseProp.p(), sameAs) && IVUtility.equals(reverseProp.s(), o)) { continue; @@ -229,7 +229,7 @@ // ignore sameAs properties // use a buffer so that we can do a more efficient batch contains // to filter out existing statements - int chunkSize = 10000; +// int chunkSize = 10000; final ISPO[] spos = new ISPO[chunkSize]; int numSPOs = 0; // get all of s's sames @@ -273,7 +273,6 @@ } public ISPO[] nextChunk() { - final int chunkSize = 10000; ISPO[] s = new ISPO[chunkSize]; int n = 0; while (hasNext() && n < chunkSize) { Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesSPIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesSPIterator.java 2011-01-05 13:49:15 UTC (rev 4053) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesSPIterator.java 2011-01-05 13:51:25 UTC (rev 4054) @@ -107,7 +107,7 @@ // which might be present in the source iterator already // use a buffer so that we can do a more efficient batch contains // to filter out existing statements - int chunkSize = 10000; +// int chunkSize = 10000; SPO[] spos = new SPO[chunkSize]; int numSPOs = 0; // get all of s's sames @@ -238,7 +238,7 @@ // ignore sameAs properties // use a buffer so that we can do a more efficient batch contains // to filter out existing statements - int chunkSize = 10000; +// int chunkSize = 10000; ISPO[] spos = new ISPO[chunkSize]; int numSPOs = 0; // get all of o's sames @@ -282,7 +282,7 @@ } public ISPO[] nextChunk() { - final int chunkSize = 10000; +// final int chunkSize = 10000; ISPO[] s = new ISPO[chunkSize]; int n = 0; while (hasNext() && n < chunkSize) { Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesSPOIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesSPOIterator.java 2011-01-05 13:49:15 UTC (rev 4053) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainOwlSameAsPropertiesSPOIterator.java 2011-01-05 13:51:25 UTC (rev 4054) @@ -108,7 +108,7 @@ // all of which might be present in the source iterator already // use a buffer so that we can do a more efficient batch contains // to filter out existing statements - int chunkSize = 10000; +// int chunkSize = 10000; SPO[] spos = new SPO[chunkSize]; int numSPOs = 0; // collect up the links between {s,? sameAs s} X {o,? sameAs o} @@ -196,7 +196,7 @@ } public ISPO[] nextChunk() { - final int chunkSize = 10000; +// final int chunkSize = 10000; ISPO[] s = new ISPO[chunkSize]; int n = 0; while (hasNext() && n < chunkSize) { Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainTypeResourceIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainTypeResourceIterator.java 2011-01-05 13:49:15 UTC (rev 4053) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/BackchainTypeResourceIterator.java 2011-01-05 13:51:25 UTC (rev 4054) @@ -20,7 +20,7 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ + */ /* * Created on Oct 30, 2007 */ @@ -69,973 +69,984 @@ * * @see InferenceEngine * @see InferenceEngine.Options - * + * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class BackchainTypeResourceIterator implements IChunkedOrderedIterator<ISPO> { + * @version $Id: BackchainTypeResourceIterator.java 3687 2010-09-29 22:50:32Z + * mrpersonick $ + */ +public class BackchainTypeResourceIterator implements + IChunkedOrderedIterator<ISPO> { - protected static final Logger log = Logger.getLogger(BackchainTypeResourceIterator.class); - - private final IChunkedOrderedIterator<ISPO> _src; - private final Iterator<ISPO> src; -// private final long s; -// private final AbstractTripleStore db; - private final IV rdfType, rdfsResource; - private final IKeyOrder<ISPO> keyOrder; + protected static final Logger log = Logger + .getLogger(BackchainTypeResourceIterator.class); - /** - * The subject(s) whose (s rdf:type rdfs:Resource) entailments will be - * visited. - */ - private PushbackIterator<IV> resourceIds; - - /** - * An iterator reading on the {@link SPOKeyOrder#POS} index. The predicate - * is bound to <code>rdf:type</code> and the object is bound to - * <code>rdfs:Resource</code>. If the subject was given to the ctor, then - * it will also be bound. The iterator visits the term identifier for the - * <em>subject</em> position. - */ - private PushbackIterator<IV> posItr; - - private boolean sourceExhausted = false; - - private boolean open = true; + private final IChunkedOrderedIterator<ISPO> _src; + private final Iterator<ISPO> src; + // private final long s; + // private final AbstractTripleStore db; + private final IV rdfType, rdfsResource; + private final IKeyOrder<ISPO> keyOrder; - /** - * This is set each time by {@link #nextChunk()} and inspected by - * {@link #nextChunk(IKeyOrder)} in order to decide whether the chunk needs - * to be sorted. - */ - private IKeyOrder<ISPO> chunkKeyOrder = null; + private final int chunkSize = 100;//10000; - /** - * The last {@link ISPO} visited by {@link #next()}. - */ - private ISPO current = null; + /** + * The subject(s) whose (s rdf:type rdfs:Resource) entailments will be + * visited. + */ + private PushbackIterator<IV> resourceIds; - /** - * Returns a suitably configured {@link BackchainTypeResourceIterator} -or- - * <i>src</i> iff the <i>accessPath</i> does not require the - * materialization of <code>(x rdf:type rdfs:Resource)</code> entailments. - * - * @param _src - * The source iterator. {@link #nextChunk()} will sort statements - * into the {@link IKeyOrder} reported by this iterator (as long - * as the {@link IKeyOrder} is non-<code>null</code>). - * @param accessPath - * The {@link IAccessPath} from which the <i>src</i> iterator - * was derived. Note that <i>src</i> is NOT necessarily - * equivalent to {@link IAccessPath#iterator()} since it MAY have - * been layered already to backchain other entailments, e.g., - * <code>owl:sameAs</code>. - * @param db - * The database from which we will read the distinct subject - * identifiers from its {@link SPORelation}. This parameter is - * used iff this is an all unbound triple pattern. - * @param rdfType - * The term identifier that corresponds to rdf:Type for the - * database. - * @param rdfsResource - * The term identifier that corresponds to rdf:Resource for the - * database. - * - * @return The backchain iterator -or- the <i>src</i> iterator iff the - * <i>accessPath</i> does not require the materialization of - * <code>(x rdf:type rdfs:Resource)</code> entailments. - */ - @SuppressWarnings("unchecked") - static public IChunkedOrderedIterator<ISPO> newInstance( - final IChunkedOrderedIterator<ISPO> _src, - final IAccessPath<ISPO> accessPath, final AbstractTripleStore db, - final IV rdfType, final IV rdfsResource) { - - if (accessPath == null) - throw new IllegalArgumentException(); - -// final SPO spo = new SPO(accessPath.getPredicate()); - final IPredicate<ISPO> pred = accessPath.getPredicate(); - final IV s = getTerm(pred, 0); - final IV p = getTerm(pred, 1); - final IV o = getTerm(pred, 2); + /** + * An iterator reading on the {@link SPOKeyOrder#POS} index. The predicate + * is bound to <code>rdf:type</code> and the object is bound to + * <code>rdfs:Resource</code>. If the subject was given to the ctor, then it + * will also be bound. The iterator visits the term identifier for the + * <em>subject</em> position. + */ + private PushbackIterator<IV> posItr; - if (((o == null || o.equals(rdfsResource)) && - (p == null || p.equals(rdfType))) == false) { - - /* - * Backchain will not generate any statements. - */ + private boolean sourceExhausted = false; - return _src; - - } - - if (_src == null) - throw new IllegalArgumentException(); - - if (db == null) - throw new IllegalArgumentException(); - - /* - * The subject(s) whose (s rdf:type rdfs:Resource) entailments will be - * visited. - */ - final PushbackIterator<IV> resourceIds; - - /* - * An iterator reading on the {@link SPOKeyOrder#POS} index. The - * predicate is bound to <code>rdf:type</code> and the object is bound - * to <code>rdfs:Resource</code>. If the subject was given to the - * ctor, then it will also be bound. The iterator visits the term - * identifier for the <em>subject</em> position. - */ - final PushbackIterator<IV> posItr; + private boolean open = true; - if (s == null) { + /** + * This is set each time by {@link #nextChunk()} and inspected by + * {@link #nextChunk(IKeyOrder)} in order to decide whether the chunk needs + * to be sorted. + */ + private IKeyOrder<ISPO> chunkKeyOrder = null; - /* - * Backchain will generate one statement for each distinct subject - * or object in the store. - * - * @todo This is Ok as long as you are forward chaining all of the - * rules that put a predicate or an object into the subject position - * since it will then have all resources. If you backward chain some - * of those rules, e.g., rdf1, then you MUST change this to read on - * the ids index and skip anything that is marked as a literal using - * the low bit of the term identifier but you will overgenerate for - * resources that are no longer in use by the KB (you could filter - * for that). - */ + /** + * The last {@link ISPO} visited by {@link #next()}. + */ + private ISPO current = null; -// resourceIds = db.getSPORelation().distinctTermScan(SPOKeyOrder.SPO); - - resourceIds = new PushbackIterator<IV>(new MergedOrderedIterator(// - db.getSPORelation().distinctTermScan(SPOKeyOrder.SPO), // - db.getSPORelation().distinctTermScan(SPOKeyOrder.OSP, - new ITermIVFilter() { - private static final long serialVersionUID = 1L; - public boolean isValid(IV iv) { - // filter out literals from the OSP scan. - return !iv.isLiteral(); - } - }))); + /** + * Returns a suitably configured {@link BackchainTypeResourceIterator} -or- + * <i>src</i> iff the <i>accessPath</i> does not require the materialization + * of <code>(x rdf:type rdfs:Resource)</code> entailments. + * + * @param _src + * The source iterator. {@link #nextChunk()} will sort statements + * into the {@link IKeyOrder} reported by this iterator (as long + * as the {@link IKeyOrder} is non-<code>null</code>). + * @param accessPath + * The {@link IAccessPath} from which the <i>src</i> iterator was + * derived. Note that <i>src</i> is NOT necessarily equivalent to + * {@link IAccessPath#iterator()} since it MAY have been layered + * already to backchain other entailments, e.g., + * <code>owl:sameAs</code>. + * @param db + * The database from which we will read the distinct subject + * identifiers from its {@link SPORelation}. This parameter is + * used iff this is an all unbound triple pattern. + * @param rdfType + * The term identifier that corresponds to rdf:Type for the + * database. + * @param rdfsResource + * The term identifier that corresponds to rdf:Resource for the + * database. + * + * @return The backchain iterator -or- the <i>src</i> iterator iff the + * <i>accessPath</i> does not require the materialization of + * <code>(x rdf:type rdfs:Resource)</code> entailments. + */ + @SuppressWarnings("unchecked") + static public IChunkedOrderedIterator<ISPO> newInstance( + final IChunkedOrderedIterator<ISPO> _src, + final IAccessPath<ISPO> accessPath, final AbstractTripleStore db, + final IV rdfType, final IV rdfsResource) { - /* - * Reading (? rdf:Type rdfs:Resource) using the POS index. - */ + if (accessPath == null) + throw new IllegalArgumentException(); - posItr = new PushbackIterator<IV>(new Striterator(db.getAccessPath( - null, rdfType, rdfsResource, - ExplicitSPOFilter.INSTANCE).iterator()) - .addFilter(new Resolver() { - private static final long serialVersionUID = 1L; - @Override - protected Object resolve(Object obj) { - return ((SPO) obj).s; - } - })); + // final SPO spo = new SPO(accessPath.getPredicate()); + final IPredicate<ISPO> pred = accessPath.getPredicate(); + final IV s = getTerm(pred, 0); + final IV p = getTerm(pred, 1); + final IV o = getTerm(pred, 2); - } else { + if (((o == null || o.equals(rdfsResource)) && (p == null || p + .equals(rdfType))) == false) { - /* - * Backchain will generate exactly one statement: (s rdf:type - * rdfs:Resource). - */ -/* - resourceIds = new PushbackIterator<Long>( - new ClosableSingleItemIterator<Long>(spo.s)); -*/ - /* - * Reading a single point (s type resource), so this will actually - * use the SPO index. - */ -/* - posItr = new PushbackIterator<Long>(new Striterator(db - .getAccessPath(spo.s, rdfType, rdfsResource, - ExplicitSPOFilter.INSTANCE).iterator()) - .addFilter(new Resolver() { - private static final long serialVersionUID = 1L; - @Override - protected Object resolve(Object obj) { - return Long.valueOf(((SPO) obj).s); - } - })); -*/ - return new BackchainSTypeResourceIterator - ( _src, accessPath, db, rdfType, rdfsResource - ); + /* + * Backchain will not generate any statements. + */ - } - - /* - * filters out (x rdf:Type rdfs:Resource) in case it is explicit in the - * db so that we do not generate duplicates for explicit type resource - * statement. - */ - final Iterator<ISPO> src = new Striterator(_src).addFilter(new Filter(){ + return _src; - private static final long serialVersionUID = 1L; + } - public boolean isValid(Object arg0) { + if (_src == null) + throw new IllegalArgumentException(); - final SPO o = (SPO) arg0; + if (db == null) + throw new IllegalArgumentException(); - if (o.p.equals(rdfType) && o.o.equals(rdfsResource)) { - - return false; - - } - - return true; - - }}); - - return new BackchainTypeResourceIterator(_src, src, resourceIds, - posItr, rdfType, rdfsResource); - - } - - private static IV getTerm(final IPredicate<ISPO> pred, final int pos) { - - final IVariableOrConstant<IV> term = pred.get(pos); - - return term == null || term.isVar() ? null : term.get(); - - } - - /** - * Create an iterator that will visit all statements in the source iterator - * and also backchain any entailments of the form (x rdf:type rdfs:Resource) - * which are valid for the given triple pattern. - * - * @param src - * The source iterator. {@link #nextChunk()} will sort statements - * into the {@link IKeyOrder} reported by this iterator (as long - * as the {@link IKeyOrder} is non-<code>null</code>). - * @param db - * The database from which we will read the distinct subject - * identifiers (iff this is an all unbound triple pattern). - * @param rdfType - * The term identifier that corresponds to rdf:Type for the - * database. - * @param rdfsResource - * The term identifier that corresponds to rdf:Resource for the - * database. - * - * @see #newInstance(IChunkedOrderedIterator, IAccessPath, - * AbstractTripleStore, long, long) - */ - @SuppressWarnings({ "unchecked", "serial" }) - private BackchainTypeResourceIterator(IChunkedOrderedIterator<ISPO> _src,// - Iterator<ISPO> src,// - PushbackIterator<IV> resourceIds,// - PushbackIterator<IV> posItr,// - final IV rdfType,// - final IV rdfsResource// - ) { - - // the raw source - we pass close() through to this. - this._src = _src; - - this.keyOrder = _src.getKeyOrder(); // MAY be null. - - // the source with (x type resource) filtered out. - this.src = src; - - // - this.resourceIds = resourceIds; - - this.posItr = posItr; - - this.rdfType = rdfType; - - this.rdfsResource = rdfsResource; - - } + /* + * The subject(s) whose (s rdf:type rdfs:Resource) entailments will be + * visited. + */ + final PushbackIterator<IV> resourceIds; - public IKeyOrder<ISPO> getKeyOrder() { + /* + * An iterator reading on the {@link SPOKeyOrder#POS} index. The + * predicate is bound to <code>rdf:type</code> and the object is bound + * to <code>rdfs:Resource</code>. If the subject was given to the ctor, + * then it will also be bound. The iterator visits the term identifier + * for the <em>subject</em> position. + */ + final PushbackIterator<IV> posItr; - return keyOrder; - - } + if (s == null) { - public void close() { + /* + * Backchain will generate one statement for each distinct subject + * or object in the store. + * + * @todo This is Ok as long as you are forward chaining all of the + * rules that put a predicate or an object into the subject position + * since it will then have all resources. If you backward chain some + * of those rules, e.g., rdf1, then you MUST change this to read on + * the ids index and skip anything that is marked as a literal using + * the low bit of the term identifier but you will overgenerate for + * resources that are no longer in use by the KB (you could filter + * for that). + */ - if(!open) return; - - // release any resources here. - - open = false; + // resourceIds = + // db.getSPORelation().distinctTermScan(SPOKeyOrder.SPO); - _src.close(); + resourceIds = new PushbackIterator<IV>(new MergedOrderedIterator(// + db.getSPORelation().distinctTermScan(SPOKeyOrder.SPO), // + db.getSPORelation().distinctTermScan(SPOKeyOrder.OSP, + new ITermIVFilter() { + private static final long serialVersionUID = 1L; - resourceIds.close(); - - resourceIds = null; - - if (posItr != null) { + public boolean isValid(IV iv) { + // filter out literals from the OSP scan. + return !iv.isLiteral(); + } + }))); - posItr.close(); - - } - - } + /* + * Reading (? rdf:Type rdfs:Resource) using the POS index. + */ - public boolean hasNext() { - - if (!open) { + posItr = new PushbackIterator<IV>(new Striterator(db.getAccessPath( + null, rdfType, rdfsResource, ExplicitSPOFilter.INSTANCE) + .iterator()).addFilter(new Resolver() { + private static final long serialVersionUID = 1L; - // the iterator has been closed. - - return false; - - } + @Override + protected Object resolve(Object obj) { + return ((SPO) obj).s; + } + })); - if (!sourceExhausted) { + } else { - if (src.hasNext()) { + /* + * Backchain will generate exactly one statement: (s rdf:type + * rdfs:Resource). + */ + /* + * resourceIds = new PushbackIterator<Long>( new + * ClosableSingleItemIterator<Long>(spo.s)); + */ + /* + * Reading a single point (s type resource), so this will actually + * use the SPO index. + */ + /* + * posItr = new PushbackIterator<Long>(new Striterator(db + * .getAccessPath(spo.s, rdfType, rdfsResource, + * ExplicitSPOFilter.INSTANCE).iterator()) .addFilter(new Resolver() + * { private static final long serialVersionUID = 1L; + * + * @Override protected Object resolve(Object obj) { return + * Long.valueOf(((SPO) obj).s); } })); + */ + return new BackchainSTypeResourceIterator(_src, accessPath, db, + rdfType, rdfsResource); - // still consuming the source iterator. + } - return true; + /* + * filters out (x rdf:Type rdfs:Resource) in case it is explicit in the + * db so that we do not generate duplicates for explicit type resource + * statement. + */ + final Iterator<ISPO> src = new Striterator(_src) + .addFilter(new Filter() { - } + private static final long serialVersionUID = 1L; - // the source iterator is now exhausted. + public boolean isValid(Object arg0) { - sourceExhausted = true; + final SPO o = (SPO) arg0; - _src.close(); + if (o.p.equals(rdfType) && o.o.equals(rdfsResource)) { - } + return false; - if (resourceIds.hasNext()) { + } - // still consuming the subjects iterator. - - return true; - - } - - // the subjects iterator is also exhausted so we are done. - - return false; - - } + return true; - /** - * Visits all {@link SPO}s visited by the source iterator and then begins - * to backchain ( x rdf:type: rdfs:Resource ) statements. - * <p> - * The "backchain" scans two iterators: an {@link IChunkedOrderedIterator} - * on <code>( ? rdf:type - * rdfs:Resource )</code> that reads on the database - * (this tells us whether we have an explicit - * <code>(x rdf:type rdfs:Resource)</code> in the database for a given - * subject) and iterator that reads on the term identifiers for the distinct - * resources in the database (this bounds the #of backchained statements - * that we will emit). - * <p> - * For each value visited by the {@link #resourceIds} iterator we examine - * the statement iterator. If the next value that would be visited by the - * statement iterator is an explicit statement for the current subject, then - * we emit the explicit statement. Otherwise we emit an inferred statement. - */ - public ISPO next() { + } + }); - if (!hasNext()) { + return new BackchainTypeResourceIterator(_src, src, resourceIds, + posItr, rdfType, rdfsResource); - throw new NoSuchElementException(); - - } + } - if (src.hasNext()) { + private static IV getTerm(final IPredicate<ISPO> pred, final int pos) { - return current = src.next(); - - } else if(resourceIds.hasNext()) { + final IVariableOrConstant<IV> term = pred.get(pos); - /* - * Examine resourceIds and posItr. - */ - - // resourceIds is the source for _inferences_ - final IV s1 = resourceIds.next(); - - if(posItr.hasNext()) { - - // posItr is the source for _explicit_ statements. - final IV s2 = posItr.next(); - - final int cmp = s1.compareTo(s2); - - if (cmp < 0) { + return term == null || term.isVar() ? null : term.get(); - /* - * Consuming from [resourceIds] (the term identifier ordered - * LT the next term identifier from [posItr]). - * - * There is NOT an explicit statement from [posItr], so emit - * as an inference and pushback on [posItr]. - */ - - current = new SPO(s1, rdfType, rdfsResource, - StatementEnum.Inferred); + } - posItr.pushback(); - - } else { - - /* - * Consuming from [posItr]. - * - * There is an explicit statement for the current term - * identifer from [resourceIds]. - */ - - if (cmp != 0) { - - /* - * Since [resourceIds] and [posItr] are NOT visiting the - * same term identifier, we pushback on [resourceIds]. - * - * Note: When they DO visit the same term identifier - * then we only emit the explicit statement and we - * consume (rather than pushback) from [resourceIds]. - */ - - resourceIds.pushback(); - - } - - current = new SPO(s2, rdfType, rdfsResource, - StatementEnum.Explicit); + /** + * Create an iterator that will visit all statements in the source iterator + * and also backchain any entailments of the form (x rdf:type rdfs:Resource) + * which are valid for the given triple pattern. + * + * @param src + * The source iterator. {@link #nextChunk()} will sort statements + * into the {@link IKeyOrder} reported by this iterator (as long + * as the {@link IKeyOrder} is non-<code>null</code>). + * @param db + * The database from which we will read the distinct subject + * identifiers (iff this is an all unbound triple pattern). + * @param rdfType + * The term identifier that corresponds to rdf:Type for the + * database. + * @param rdfsResource + * The term identifier that corresponds to rdf:Resource for the + * database. + * + * @see #newInstance(IChunkedOrderedIterator, IAccessPath, + * AbstractTripleStore, long, long) + */ + @SuppressWarnings( { "unchecked", "serial" }) + private BackchainTypeResourceIterator(IChunkedOrderedIterator<ISPO> _src,// + Iterator<ISPO> src,// + PushbackIterator<IV> resourceIds,// + PushbackIterator<IV> posItr,// + final IV rdfType,// + final IV rdfsResource// + ) { - } - - } else { - - /* - * [posItr] is exhausted so just emit inferences based on - * [resourceIds]. - */ - - current = new SPO(s1, rdfType, rdfsResource, - StatementEnum.Inferred); - - } + // the raw source - we pass close() through to this. + this._src = _src; - return current; + this.keyOrder = _src.getKeyOrder(); // MAY be null. - } else { - - /* - * Finish off the [posItr]. Anything from this source is an explicit (? - * type resource) statement. - */ - - assert posItr.hasNext(); - - return new SPO(posItr.next(), rdfType, rdfsResource, - StatementEnum.Explicit); - - } - - } + // the source with (x type resource) filtered out. + this.src = src; - /** - * Note: This method preserves the {@link IKeyOrder} of the source iterator - * iff it is reported by {@link #getKeyOrder()}. Otherwise chunks read from - * the source iterator will be in whatever order that iterator is using - * while chunks containing backchained entailments will be in - * {@link SPOKeyOrder#POS} order. - * <p> - * Note: In order to ensure that a consistent ordering is always used within - * a chunk the backchained entailments will always begin on a chunk - * boundary. - */ - public ISPO[] nextChunk() { + // + this.resourceIds = resourceIds; - final int chunkSize = 10000; - - if (!hasNext()) - throw new NoSuchElementException(); - - if(!sourceExhausted) { - - /* - * Return a chunk from the source iterator. - * - * Note: The chunk will be in the order used by the source iterator. - * If the source iterator does not report that order then - * [chunkKeyOrder] will be null. - */ - - chunkKeyOrder = keyOrder; + this.posItr = posItr; - ISPO[] s = new ISPO[chunkSize]; + this.rdfType = rdfType; - int n = 0; - - while(src.hasNext() && n < chunkSize ) { - - s[n++] = src.next(); - - } - - ISPO[] stmts = new ISPO[n]; - - // copy so that stmts[] is dense. - System.arraycopy(s, 0, stmts, 0, n); - - return stmts; - - } + this.rdfsResource = rdfsResource; - /* - * Create a "chunk" of entailments. - * - * Note: This chunk will be in natural POS order since that is the index - * that we scan to decide whether or not there was an explicit ( x - * rdf:type rdfs:Resource ) while we consume the [subjects] in termId - * order. - */ - - IV[] s = new IV[chunkSize]; - - int n = 0; - - while(resourceIds.hasNext() && n < chunkSize ) { - - s[n++] = resourceIds.next(); - - } - - SPO[] stmts = new SPO[n]; - - for(int i=0; i<n; i++) { - - stmts[i] = new SPO(s[i], rdfType, rdfsResource, - StatementEnum.Inferred); - - } - - if (keyOrder != null && keyOrder != SPOKeyOrder.POS) { + } - /* - * Sort into the same order as the source iterator. - * - * Note: We have to sort explicitly since we are scanning the POS - * index - */ + public IKeyOrder<ISPO> getKeyOrder() { - Arrays.sort(stmts, 0, stmts.length, keyOrder.getComparator()); + return keyOrder; - } + } - /* - * The chunk will be in POS order since that is how we are scanning the - * indices. - */ - - chunkKeyOrder = SPOKeyOrder.POS; - - return stmts; - - } + public void close() { - public ISPO[] nextChunk(IKeyOrder<ISPO> keyOrder) { - - if (keyOrder == null) - throw new IllegalArgumentException(); + if (!open) + return; - final ISPO[] stmts = nextChunk(); - - if (chunkKeyOrder != keyOrder) { + // release any resources here. - // sort into the required order. + open = false; - Arrays.sort(stmts, 0, stmts.length, keyOrder.getComparator()); + _src.close(); - } + resourceIds.close(); - return stmts; - - } + resourceIds = null; - /** - * Note: You can not "remove" the backchained entailments. If the last - * statement visited by {@link #next()} is "explicit" then the request is - * delegated to the source iterator. - */ - public void remove() { + if (posItr != null) { - if (!open) - throw new IllegalStateException(); - - if (current == null) - throw new IllegalStateException(); - - if(current.isExplicit()) { - - /* - * Delegate the request to the source iterator. - */ - - src.remove(); - - } - - current = null; - - } - - /** - * Reads on two iterators visiting elements in some natural order and visits - * their order preserving merge (no duplicates). - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * @param <T> - */ - private static class MergedOrderedIterator<T extends Comparable<T>> - implements IChunkedIterator<T> { - - private final IChunkedIterator<T> src1; - private final IChunkedIterator<T> src2; - - public MergedOrderedIterator(IChunkedIterator<T> src1, - IChunkedIterator<T> src2) { + posItr.close(); - this.src1 = src1; - - this.src2 = src2; - - } - - public void close() { - - src1.close(); - - src2.close(); - - } + } - /** - * Note: Not implemented since not used above and this class is private. - */ - public T[] nextChunk() { - throw new UnsupportedOperationException(); - } + } - public boolean hasNext() { + public boolean hasNext() { - return tmp1 != null || tmp2 != null || src1.hasNext() - || src2.hasNext(); - - } - - private T tmp1; - private T tmp2; - - public T next() { + if (!open) { - if(!hasNext()) throw new NoSuchElementException(); - - if (tmp1 == null && src1.hasNext()) { + // the iterator has been closed. - tmp1 = src1.next(); + return false; - } - - if (tmp2 == null && src2.hasNext()) { + } - tmp2 = src2.next(); + if (!sourceExhausted) { - } - - if (tmp1 == null) { + if (src.hasNext()) { - // src1 is exhausted so deliver from src2. - final T tmp = tmp2; + // still consuming the source iterator. - tmp2 = null; + return true; - return tmp; + } - } - - if (tmp2 == null) { + // the source iterator is now exhausted. - // src2 is exhausted so deliver from src1. - final T tmp = tmp1; + sourceExhausted = true; - tmp1 = null; + _src.close(); - return tmp; + } - } + if (resourceIds.hasNext()) { - final int cmp = tmp1.compareTo(tmp2); + // still consuming the subjects iterator. - if (cmp == 0) { + return true; - final T tmp = tmp1; + } - tmp1 = tmp2 = null; + // the subjects iterator is also exhausted so we are done. - return tmp; + return false; - } else if (cmp < 0) { + } - final T tmp = tmp1; + /** + * Visits all {@link SPO}s visited by the source iterator and then begins to + * backchain ( x rdf:type: rdfs:Resource ) statements. + * <p> + * The "backchain" scans two iterators: an {@link IChunkedOrderedIterator} + * on <code>( ? rdf:type + * rdfs:Resource )</code> that reads on the database (this tells us whether + * we have an explicit <code>(x rdf:type rdfs:Resource)</code> in the + * database for a given subject) and iterator that reads on the term + * identifiers for the distinct resources in the database (this bounds the + * #of backchained statements that we will emit). + * <p> + * For each value visited by the {@link #resourceIds} iterator we examine + * the statement iterator. If the next value that would be visited by the + * statement iterator is an explicit statement for the current subject, then + * we emit the explicit statement. Otherwise we emit an inferred statement. + */ + public ISPO next() { - tmp1 = null; + if (!hasNext()) { - return tmp; + throw new NoSuchElementException(); - } else { + } - final T tmp = tmp2; + if (src.hasNext()) { - tmp2 = null; + return current = src.next(); - return tmp; + } else if (resourceIds.hasNext()) { - } - - } + /* + * Examine resourceIds and posItr. + */ - public void remove() { + // resourceIds is the source for _inferences_ + final IV s1 = resourceIds.next(); - throw new UnsupportedOperationException(); - - } + if (posItr.hasNext()) { - } + // posItr is the source for _explicit_ statements. + final IV s2 = posItr.next(); - /** - * Filterator style construct that allows push back of a single visited - * element. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * @param <E> - */ - public static class PushbackFilter<E> extends FilterBase { + final int cmp = s1.compareTo(s2); - /** - * - */ - private static final long serialVersionUID = -8010263934867149205L; + if (cmp < 0) { - @SuppressWarnings("unchecked") - public PushbackIterator<E> filterOnce(Iterator src, Object context) { + /* + * Consuming from [resourceIds] (the term identifier ordered + * LT the next term identifier from [posItr]). + * + * There is NOT an explicit statement from [posItr], so emit + * as an inference and pushback on [posItr]. + */ - return new PushbackIterator<E>((Iterator<E>) src); + current = new SPO(s1, rdfType, rdfsResource, + StatementEnum.Inferred); - } + posItr.pushback(); - } + } else { - /** - * Implementation class for {@link PushbackFilter}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * @param <E> - */ - public static class PushbackIterator<E> implements Iterator<E>, - ICloseableIterator<E> { + /* + * Consuming from [posItr]. + * + * There is an explicit statement for the current term + * identifer from [resourceIds]. + */ - private final Iterator<E> src; + if (cmp != 0) { - /** - * The most recent element visited by the iterator. - */ - private E current; - - /** - * When non-<code>null</code>, this element was pushed back and - * is the next element to be visited. - */ - private E buffer; + /* + * Since [resourceIds] and [posItr] are NOT visiting the + * same term identifier, we pushback on [resourceIds]. + * + * Note: When they DO visit the same term identifier + * then we only emit the explicit statement and we + * consume (rather than pushback) from [resourceIds]. + */ - public PushbackIterator(final Iterator<E> src) { + resourceIds.pushback(); - if (src == null) - throw new IllegalArgumentException(); + } - this.src = src; + current = new SPO(s2, rdfType, rdfsResource, + StatementEnum.Explicit); - } + } - public boolean hasNext() { + } else { - return buffer != null || src.hasNext(); + /* + * [posItr] is exhausted so just emit inferences based on + * [resourceIds]. + */ - } + current = new SPO(s1, rdfType, rdfsResource, + StatementEnum.Inferred); - public E next() { + } - if (!hasNext()) - throw new NoSuchElementException(); + return current; - final E tmp; + } else { - if (buffer != null) { + /* + * Finish off the [posItr]. Anything from this source is an explicit + * (? type resource) statement. + */ - tmp = buffer; + assert posItr.hasNext(); - buffer = null; + return new SPO(posItr.next(), rdfType, rdfsResource, + StatementEnum.Explicit); - } else { + } - tmp = src.next(); + } - } + /** + * Note: This method preserves the {@link IKeyOrder} of the source iterator + * iff it is reported by {@link #getKeyOrder()}. Otherwise chunks read from + * the source iterator will be in whatever order that iterator is using + * while chunks containing backchained entailments will be in + * {@link SPOKeyOrder#POS} order. + * <p> + * Note: In order to ensure that a consistent ordering is always used within + * a chunk the backchained entailments will always begin on a chunk + * boundary. + */ + public ISPO[] nextChunk() { - current = tmp; - - return tmp; + if (!hasNext()) + throw new NoSuchElementException(); - } + if (!sourceExhausted) { - /** - * Push the value onto the internal buffer. It will be returned by the - * next call to {@link #next()}. + /* + * Return a chunk from the source iterator. + * + * Note: The chunk will be in the order used by the source iterator. + * If the source iterator does not report that order then + * [chunkKeyOrder] will be null. + */ + + chunkKeyOrder = keyOrder; + + ISPO[] s = new ISPO[chunkSize]; + + int n = 0; + + while (src.hasNext() && n < chunkSize) { + + s[n++] = src.next(); + + } + + ISPO[] stmts = new ISPO[n]; + + // copy so that stmts[] is dense. + System.arraycopy(s, 0, stmts, 0, n); + + return stmts; + + } + + /* + * Create a "chunk" of entailments. + * + * Note: This chunk will be in natural POS order since that is the index + * that we scan to decide whether or not there was an explicit ( x + * rdf:type rdfs:Resource ) while we consume the [subjects] in termId + * order. + */ + + IV[] s = new IV[chunkSize]; + + int n = 0; + + while (resourceIds.hasNext() && n < chunkSize) { + + s[n++] = resourceIds.next(); + + } + + SPO[] stmts = new SPO[n]; + + for (int i = 0; i < n; i++) { + + stmts[i] = new SPO(s[i], rdfType, rdfsResource, + StatementEnum.Inferred); + + } + + if (keyOrder != null && keyOrder != SPOKeyOrder.POS) { + + /* + * Sort into the same order as the source iterator. + * + * Note: We have to sort explicitly since we are scanning the POS + * index + */ + + Arrays.sort(stmts, 0, stmts.length, keyOrder.getComparator()); + + } + + /* + * The chunk will be in POS order since that is how we are scanning the + * indices. + */ + + chunkKeyOrder = SPOKeyOrder.POS; + + return stmts; + + } + + public ISPO[] nextChunk(IKeyOrder<ISPO> keyOrder) { + + if (keyOrder == null) + throw new IllegalArgumentException(); + + final ISPO[] stmts = nextChunk(); + + if (chunkKeyOrder != keyOrder) { + + // sort into the required order. + + Arrays.sort(stmts, 0, stmts.length, keyOrder.getComparator()); + + } + + return stmts; + + } + + /** + * Note: You can not "remove" the backchained entailments. If the last + * statement visited by {@link #next()} is "explicit" then the request is + * delegated to the source iterator. + */ + public void remove() { + + if (!open) + throw new IllegalStateException(); + + if (current == null) + throw new IllegalStateException(); + + if (current.isExplicit()) { + + /* + * Delegate the request to the source iterator. + */ + + src.remove(); + + } + + current = null; + + } + + /** + * Reads on two iterators visiting elements in some natural order and visits + * their order preserving merge (no duplicates). + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + * @version $Id: BackchainTypeResourceIterator.java 3687 2010-09-29 + * 22:50:32Z mrpersonick $ + * @param <T> + */ + private static class MergedOrderedIterator<T extends Comparable<T>> + implements IChunkedIterator<T> { + + private final IChunkedIterator<T> src1; + private final IChunkedIterator<T> src2; + + public MergedOrderedIterator(IChunkedIterator<T> src1, + IChunkedIterator<T> src2) { + + this.src1 = src1; + + this.src2 = src2; + + } + + public void close() { + + src1.close(); + + src2.close(); + + } + + /** + * Note: Not implemented since not used above and this class is private. + */ + public T[] nextChunk() { + throw new UnsupportedOperationException(); + } + + public boolean hasNext() { + + return tmp1 != null || tmp2 != null || src1.hasNext() + || src2.hasNext(); + + } + + private T tmp1; + private T tmp2; + + public T next() { + + if (!hasNext()) + throw new NoSuchElementException(); + + if (tmp1 == null && src1.hasNext()) { + + tmp1 = src1.next(); + + } + + if (tmp2 == null && src2.hasNext()) { + + tmp2 = src2.next(); + + } + + if (tmp1 == null) { + + // src1 is exhausted so deliver from src2. + final T tmp = tmp2; + + tmp2 = null; + + return tmp; + + } + + if (tmp2 == null) { + + // src2 is exhausted so deliver from src1. + final T tmp = tmp1; + + tmp1 = null; + + return tmp; + + } + + final int cmp = tmp1.compareTo(tmp2); + + if (cmp == 0) { + + final T tmp = tmp1; + + tmp1 = tmp2 = null; + + return tmp; + + } else if (cmp < 0) { + + final T tmp = tmp1; + + tmp1 = null; + + return tmp; + + } else { + + final T tmp = tmp2; + + tmp2 = null; + + return tmp; + + } + + } + + public void remove() { + + throw new UnsupportedOperationException(); + + } + + } + + /** + * Filterator style construct that allows push back of a single visited + * element. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + * @version $Id: BackchainTypeResourceIterator.java 3687 2010-09-29 + * 22:50:32Z mrpersonick $ + * @param <E> + */ + public static class PushbackFilter<E> extends FilterBase { + + /** * - * @param value - * The value. - * - * @throws IllegalStateException - * if there is already a value pushed back. */ - public void pushback() { + private static final long serialVersionUID = -8010263934867149205L; - if (buffer != null) - throw new IllegalStateException(); - - // pushback the last visited element. - buffer = current; - - } - - public void remove() { + @SuppressWarnings("unchecked") + public PushbackIterator<E> filterOnce(Iterator src, Object context) { - throw new UnsupportedOperationException(); + return new PushbackIterator<E>((Iterator<E>) src); - } + } - public void close() { + } - if(src instanceof ICloseableIterator) { + /** + * Implementation class for {@link PushbackFilter}. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + * @version $Id: BackchainTypeResourceIterator.java 3687 2010-09-29 + * 22:50:32Z mrpersonick $ + * @param <E> + */ + public static class PushbackIterator<E> implements Iterator<E>, + ICloseableIterator<E> { - ((ICloseableIterator<E>)src).close(); - - } - - } + private final Iterator<E> src; - } - - private static class BackchainSTypeResourceIterator - implements IChunkedOrderedIterator<ISPO> { + /** + * The most recent element visited by the iterator. + */ + private E current; - private final IChunkedOrderedIterator<ISPO> _src; - private final IAccessPath<ISPO> accessPath; - private final AbstractTripleStore db; - private final IV rdfType; - private final IV rdfsResource; - private final IV s; - private IChunkedOrderedIterator<ISPO> appender; - private boolean canRemove; - - public BackchainSTypeResourceIterator( - final IChunkedOrderedIterator<ISPO> _src, - final IAccessPath<ISPO> accessPath, final AbstractTripleStore db, - final IV rdfType, final IV rdfsResource) { - this._src = _src; - this.accessPath = accessPath; - this.db = db; - this.rdfType = rdfType; - this.rdfsResource = rdfsResource; - this.s = (IV) accessPath... [truncated message content] |
From: <mrp...@us...> - 2011-01-11 04:44:31
|
Revision: 4073 http://bigdata.svn.sourceforge.net/bigdata/?rev=4073&view=rev Author: mrpersonick Date: 2011-01-11 04:44:22 +0000 (Tue, 11 Jan 2011) Log Message: ----------- working on nested optional groups Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariableOrConstant.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IValueExpression.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/AND.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/BOUND.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/NOT.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTree.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/UnsupportedOperatorException.java Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IValueExpression.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IValueExpression.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IValueExpression.java 2011-01-11 04:44:22 UTC (rev 4073) @@ -0,0 +1,25 @@ +package com.bigdata.bop; + +import java.io.Serializable; + +public interface IValueExpression<E> extends BOp, Serializable { + + /** + * Return the <i>as bound</i> value of the variable or constant. The <i>as + * bound</i> value of an {@link IConstant} is the contant's value. The <i>as + * bound</i> value of an {@link IVariable} is the bound value in the given + * {@link IBindingSet} -or- <code>null</code> if the variable is not bound + * in the {@link IBindingSet}. + * + * @param bindingSet + * The binding set. + * + * @return The as bound value of the constant or variable. + * + * @throws IllegalArgumentException + * if this is an {@link IVariable} and the <i>bindingSet</i> is + * <code>null</code>. + */ + E get(IBindingSet bindingSet); + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariableOrConstant.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariableOrConstant.java 2011-01-11 01:37:17 UTC (rev 4072) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariableOrConstant.java 2011-01-11 04:44:22 UTC (rev 4073) @@ -37,7 +37,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ -public interface IVariableOrConstant<E> extends BOp, Serializable { +public interface IVariableOrConstant<E> extends IValueExpression<E> { /** * Return <code>true</code> iff this is a variable. @@ -64,24 +64,6 @@ E get(); /** - * Return the <i>as bound</i> value of the variable or constant. The <i>as - * bound</i> value of an {@link IConstant} is the contant's value. The <i>as - * bound</i> value of an {@link IVariable} is the bound value in the given - * {@link IBindingSet} -or- <code>null</code> if the variable is not bound - * in the {@link IBindingSet}. - * - * @param bindingSet - * The binding set. - * - * @return The as bound value of the constant or variable. - * - * @throws IllegalArgumentException - * if this is an {@link IVariable} and the <i>bindingSet</i> is - * <code>null</code>. - */ - E get(IBindingSet bindingSet); - - /** * Return the name of a variable. * * @throws UnsupportedOperationException Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/AND.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/AND.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/AND.java 2011-01-11 04:44:22 UTC (rev 4073) @@ -0,0 +1,74 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.bop.constraint; + +import java.util.Map; + +import com.bigdata.bop.BOpBase; +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstraint; + +/** + * Imposes the constraint <code>x AND y</code>. + */ +public class AND extends BOpConstraint { + + /** + * + */ + private static final long serialVersionUID = -8146965892831895463L; + + /** + * Required deep copy constructor. + */ + public AND(final BOp[] args, final Map<String, Object> annotations) { + super(args, annotations); + } + + /** + * Required deep copy constructor. + */ + public AND(final AND op) { + super(op); + } + + public AND(final IConstraint x, final IConstraint y) { + + super(new BOp[] { x, y }, null/*annocations*/); + + if (x == null || y == null) + throw new IllegalArgumentException(); + + } + + public boolean accept(final IBindingSet s) { + + return ((IConstraint) get(0)).accept(s) + && ((IConstraint) get(1)).accept(s); + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/BOUND.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/BOUND.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/BOUND.java 2011-01-11 04:44:22 UTC (rev 4073) @@ -0,0 +1,73 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.bop.constraint; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IVariable; + +/** + * Imposes the constraint <code>bound(x)</code> for the variable x. + */ +public class BOUND extends BOpConstraint { + + /** + * + */ + private static final long serialVersionUID = -7408654639183330874L; + + /** + * Required deep copy constructor. + */ + public BOUND(final BOp[] args, final Map<String, Object> annotations) { + super(args, annotations); + } + + /** + * Required deep copy constructor. + */ + public BOUND(final BOUND op) { + super(op); + } + + public BOUND(final IVariable x) { + + super(new BOp[] { x }, null/*annocations*/); + + if (x == null) + throw new IllegalArgumentException(); + + } + + public boolean accept(final IBindingSet s) { + + return ((IVariable) get(0)).get(s) != null; + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/NOT.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/NOT.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/NOT.java 2011-01-11 04:44:22 UTC (rev 4073) @@ -0,0 +1,73 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.bop.constraint; + +import java.util.Map; + +import com.bigdata.bop.BOpBase; +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstraint; + +/** + * Imposes the constraint <code>!x</code>. + */ +public class NOT extends BOpConstraint { + + /** + * + */ + private static final long serialVersionUID = -5701967329003122236L; + + /** + * Required deep copy constructor. + */ + public NOT(final BOp[] args, final Map<String, Object> annotations) { + super(args, annotations); + } + + /** + * Required deep copy constructor. + */ + public NOT(final NOT op) { + super(op); + } + + public NOT(final IConstraint x) { + + super(new BOp[] { x }, null/*annocations*/); + + if (x == null) + throw new IllegalArgumentException(); + + } + + public boolean accept(final IBindingSet s) { + + return !((IConstraint) get(0)).accept(s); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-01-11 01:37:17 UTC (rev 4072) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-01-11 04:44:22 UTC (rev 4073) @@ -30,6 +30,7 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; @@ -73,8 +74,8 @@ super(op); } - public CompareBOp(final IVariableOrConstant<IV> left, - final IVariableOrConstant<IV> right, final CompareOp op) { + public CompareBOp(final IValueExpression<IV> left, + final IValueExpression<IV> right, final CompareOp op) { super(new BOp[] { left, right }, NV.asMap(new NV(Annotations.OP, op))); @@ -85,8 +86,8 @@ public boolean accept(final IBindingSet s) { - final IV left = ((IVariableOrConstant<IV>) get(0)).get(s); - final IV right = ((IVariableOrConstant<IV>) get(1)).get(s); + final IV left = ((IValueExpression<IV>) get(0)).get(s); + final IV right = ((IValueExpression<IV>) get(1)).get(s); if (left == null || right == null) return true; // not yet bound. Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java 2011-01-11 01:37:17 UTC (rev 4072) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java 2011-01-11 04:44:22 UTC (rev 4073) @@ -27,18 +27,18 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; -import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.ImmutableBOp; import com.bigdata.bop.NV; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; /** - * A math expression involving a left and right IVariableOrConstant operand. + * A math expression involving a left and right IValueExpression operand. */ final public class MathBOp extends ImmutableBOp - implements IVariableOrConstant<IV> { + implements IValueExpression<IV> { /** * @@ -55,19 +55,6 @@ } - final public boolean isVar() { - - return ((IVariableOrConstant) get(0)).isVar() || - ((IVariableOrConstant) get(1)).isVar(); - - } - - final public boolean isConstant() { - - return !isVar(); - - } - /** * Required deep copy constructor. * @@ -79,8 +66,8 @@ } - public MathBOp(final IVariableOrConstant<IV> left, - final IVariableOrConstant<IV> right, final MathOp op) { + public MathBOp(final IValueExpression<IV> left, + final IValueExpression<IV> right, final MathOp op) { super(new BOp[] { left, right }, NV.asMap(new NV(Annotations.OP, op))); @@ -98,12 +85,12 @@ // // } - public IVariableOrConstant<IV> left() { - return (IVariableOrConstant<IV>) get(0); + public IValueExpression<IV> left() { + return (IValueExpression<IV>) get(0); } - public IVariableOrConstant<IV> right() { - return (IVariableOrConstant<IV>) get(1); + public IValueExpression<IV> right() { + return (IValueExpression<IV>) get(1); } public MathOp op() { @@ -133,13 +120,11 @@ } - final public boolean equals(final IVariableOrConstant<IV> o) { + final public boolean equals(final IValueExpression<IV> o) { if(!(o instanceof MathBOp)) { - // incomparable types. return false; - } return equals((MathBOp) o); @@ -176,18 +161,6 @@ } - final public IV get() { - - final IV left = left().get(); - final IV right = right().get(); - - if (left == null || right == null) - return null; - - return IVUtility.numericalMath(left, right, op()); - - } - final public IV get(final IBindingSet bindingSet) { final IV left = left().get(bindingSet); @@ -200,10 +173,4 @@ } - final public String getName() { - - throw new UnsupportedOperationException(); - - } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2011-01-11 01:37:17 UTC (rev 4072) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2011-01-11 04:44:22 UTC (rev 4073) @@ -1607,10 +1607,10 @@ if (iv == null) return null; iv1 = new Constant<IV>(iv); - } else if (left instanceof MathExpr) { - iv1 = generateMath((MathExpr) left); - if (iv1 == null) - return null; +// } else if (left instanceof MathExpr) { +// iv1 = generateMath((MathExpr) left); +// if (iv1 == null) +// return null; } else { return null; } @@ -1622,10 +1622,10 @@ if (iv == null) return null; iv2 = new Constant<IV>(iv); - } else if (right instanceof MathExpr) { - iv2 = generateMath((MathExpr) right); - if (iv2 == null) - return null; +// } else if (right instanceof MathExpr) { +// iv2 = generateMath((MathExpr) right); +// if (iv2 == null) +// return null; } else { return null; } @@ -1648,10 +1648,10 @@ if (iv == null) return null; iv1 = new Constant<IV>(iv); - } else if (left instanceof MathExpr) { - iv1 = generateMath((MathExpr) left); - if (iv1 == null) - return null; +// } else if (left instanceof MathExpr) { +// iv1 = generateMath((MathExpr) left); +// if (iv1 == null) +// return null; } else { return null; } @@ -1663,10 +1663,10 @@ if (iv == null) return null; iv2 = new Constant<IV>(iv); - } else if (right instanceof MathExpr) { - iv2 = generateMath((MathExpr) right); - if (iv2 == null) - return null; +// } else if (right instanceof MathExpr) { +// iv2 = generateMath((MathExpr) right); +// if (iv2 == null) +// return null; } else { return null; } Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-01-11 04:44:22 UTC (rev 4073) @@ -0,0 +1,2176 @@ +package com.bigdata.rdf.sail; + +import info.aduna.iteration.CloseableIteration; +import info.aduna.iteration.EmptyIteration; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.log4j.Logger; +import org.openrdf.model.Literal; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.query.BindingSet; +import org.openrdf.query.Dataset; +import org.openrdf.query.QueryEvaluationException; +import org.openrdf.query.algebra.And; +import org.openrdf.query.algebra.Bound; +import org.openrdf.query.algebra.Compare; +import org.openrdf.query.algebra.Compare.CompareOp; +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.Group; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.LeftJoin; +import org.openrdf.query.algebra.MathExpr; +import org.openrdf.query.algebra.MathExpr.MathOp; +import org.openrdf.query.algebra.MultiProjection; +import org.openrdf.query.algebra.Not; +import org.openrdf.query.algebra.Or; +import org.openrdf.query.algebra.Order; +import org.openrdf.query.algebra.Projection; +import org.openrdf.query.algebra.ProjectionElem; +import org.openrdf.query.algebra.ProjectionElemList; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.QueryRoot; +import org.openrdf.query.algebra.SameTerm; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.StatementPattern.Scope; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.UnaryTupleOperator; +import org.openrdf.query.algebra.Union; +import org.openrdf.query.algebra.ValueConstant; +import org.openrdf.query.algebra.ValueExpr; +import org.openrdf.query.algebra.Var; +import org.openrdf.query.algebra.evaluation.impl.EvaluationStrategyImpl; +import org.openrdf.query.algebra.evaluation.iterator.FilterIterator; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.constraint.AND; +import com.bigdata.bop.constraint.BOUND; +import com.bigdata.bop.constraint.EQ; +import com.bigdata.bop.constraint.INBinarySearch; +import com.bigdata.bop.constraint.NE; +import com.bigdata.bop.constraint.NOT; +import com.bigdata.bop.constraint.OR; +import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.solutions.ISortOrder; +import com.bigdata.btree.IRangeQuery; +import com.bigdata.btree.keys.IKeyBuilderFactory; +import com.bigdata.rdf.internal.DummyIV; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.constraints.CompareBOp; +import com.bigdata.rdf.internal.constraints.MathBOp; +import com.bigdata.rdf.lexicon.LexiconRelation; +import com.bigdata.rdf.model.BigdataValue; +import com.bigdata.rdf.sail.BigdataSail.Options; +import com.bigdata.rdf.sail.sop.SOp; +import com.bigdata.rdf.sail.sop.SOp2BOpUtility; +import com.bigdata.rdf.sail.sop.SOpTree; +import com.bigdata.rdf.sail.sop.SOpTree.SOpGroup; +import com.bigdata.rdf.sail.sop.SOpTreeBuilder; +import com.bigdata.rdf.sail.sop.UnsupportedOperatorException; +import com.bigdata.rdf.spo.DefaultGraphSolutionExpander; +import com.bigdata.rdf.spo.ExplicitSPOFilter; +import com.bigdata.rdf.spo.ISPO; +import com.bigdata.rdf.spo.NamedGraphSolutionExpander; +import com.bigdata.rdf.spo.SPOPredicate; +import com.bigdata.rdf.store.AbstractTripleStore; +import com.bigdata.rdf.store.BD; +import com.bigdata.rdf.store.BigdataBindingSetResolverator; +import com.bigdata.relation.accesspath.ElementFilter; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBuffer; +import com.bigdata.relation.accesspath.IElementFilter; +import com.bigdata.relation.rule.IAccessPathExpander; +import com.bigdata.relation.rule.IProgram; +import com.bigdata.relation.rule.IQueryOptions; +import com.bigdata.relation.rule.IRule; +import com.bigdata.relation.rule.Rule; +import com.bigdata.relation.rule.eval.ISolution; +import com.bigdata.relation.rule.eval.RuleStats; +import com.bigdata.search.FullTextIndex; +import com.bigdata.search.IHit; +import com.bigdata.striterator.ChunkedWrappedIterator; +import com.bigdata.striterator.Dechunkerator; +import com.bigdata.striterator.DistinctFilter; +import com.bigdata.striterator.IChunkedOrderedIterator; + +/** + * Extended to rewrite Sesame {@link TupleExpr}s onto native {@link Rule}s and + * to evaluate magic predicates for full text search, etc. Query evaluation can + * proceed either by Sesame 2 evaluation or, if {@link Options#NATIVE_JOINS} is + * enabled, then by translation of Sesame 2 query expressions into native + * {@link IRule}s and native evaluation of those {@link IRule}s. + * + * <h2>Query options</h2> + * The following summarizes how various high-level query language feature are + * mapped onto native {@link IRule}s. + * <dl> + * <dt>DISTINCT</dt> + * <dd>{@link IQueryOptions#isDistinct()}, which is realized using + * {@link DistinctFilter}.</dd> + * <dt>ORDER BY</dt> + * <dd>{@link IQueryOptions#getOrderBy()} is effected by a custom + * {@link IKeyBuilderFactory} which generates sort keys that capture the desired + * sort order from the bindings in an {@link ISolution}. Unless DISTINCT is + * also specified, the generated sort keys are made unique by appending a one up + * long integer to the key - this prevents sort keys that otherwise compare as + * equals from dropping solutions. Note that the SORT is actually imposed by the + * {@link DistinctFilter} using an {@link IKeyBuilderFactory} assembled from the + * ORDER BY constraints. + * + * FIXME BryanT - implement the {@link IKeyBuilderFactory}. + * + * FIXME MikeP - assemble the {@link ISortOrder}[] from the query and set on + * the {@link IQueryOptions}.</dd> + * <dt>OFFSET and LIMIT</dt> + * <dd> + * <p> + * {@link IQueryOptions#getSlice()}, which was effected as a conditional in + * the old "Nested Subquery With Join Threads Task" based on the + * {@link RuleStats#solutionCount}. Query {@link ISolution}s are counted as + * they are generated, but they are only entered into the {@link ISolution} + * {@link IBuffer} when the solutionCount is GE the OFFSET and LT the LIMIT. + * Query evaluation halts once the LIMIT is reached. + * </p> + * <p> + * Note that when DISTINCT and either LIMIT and/or OFFSET are specified + * together, then the LIMIT and OFFSET <strong>MUST</strong> be applied after + * the solutions have been generated since we may have to generate more than + * LIMIT solutions in order to have LIMIT <em>DISTINCT</em> solutions. We + * handle this for now by NOT translating the LIMIT and OFFSET onto the + * {@link IRule} and instead let Sesame close the iterator once it has enough + * solutions. + * </p> + * <p> + * Note that LIMIT and SLICE requires an evaluation plan that provides stable + * results. For a simple query this is achieved by setting + * {@link IQueryOptions#isStable()} to <code>true</code>. + * <p> + * For a UNION query, you must also set {@link IProgram#isParallel()} to + * <code>false</code> to prevent parallelized execution of the {@link IRule}s + * in the {@link IProgram}. + * </p> + * </dd> + * <dt>UNION</dt> + * <dd>A UNION is translated into an {@link IProgram} consisting of one + * {@link IRule} for each clause in the UNION. + * + * FIXME MikeP - implement.</dd> + * </dl> + * <h2>Filters</h2> + * The following provides a summary of how various kinds of FILTER are handled. + * A filter that is not explicitly handled is left untranslated and will be + * applied by Sesame against the generated {@link ISolution}s. + * <p> + * Whenever possible, a FILTER is translated into an {@link IConstraint} on an + * {@link IPredicate} in the generated native {@link IRule}. Some filters are + * essentially JOINs against the {@link LexiconRelation}. Those can be handled + * either as JOINs (generating an additional {@link IPredicate} in the + * {@link IRule}) or as an {@link INBinarySearch} constraint, where the inclusion set is + * pre-populated by some operation on the {@link LexiconRelation}. + * <dl> + * <dt>EQ</dt> + * <dd>Translated into an {@link EQ} constraint on an {@link IPredicate}.</dd> + * <dt>NE</dt> + * <dd>Translated into an {@link NE} constraint on an {@link IPredicate}.</dd> + * <dt>IN</dt> + * <dd>Translated into an {@link INBinarySearch} constraint on an {@link IPredicate}.</dd> + * <dt>OR</dt> + * <dd>Translated into an {@link OR} constraint on an {@link IPredicate}.</dd> + * <dt></dt> + * <dd></dd> + * </dl> + * <h2>Magic predicates</h2> + * <p> + * {@link BD#SEARCH} is the only magic predicate at this time. When the object + * position is bound to a constant, the magic predicate is evaluated once and + * the result is used to generate a set of term identifiers that are matches for + * the token(s) extracted from the {@link Literal} in the object position. Those + * term identifiers are then used to populate an {@link INBinarySearch} constraint. The + * object position in the {@link BD#SEARCH} MUST be bound to a constant. + * </p> + * + * FIXME We are not in fact rewriting the query operation at all, simply + * choosing a different evaluation path as we go. The rewrite should really be + * isolated from the execution, e.g., in its own class. That more correct + * approach is more than I want to get into right now as we will have to define + * variants on the various operators that let us model the native rule system + * directly, e.g., an n-ary IProgram, n-ary IRule operator, an IPredicate + * operator, etc. Then we can handle evaluation using their model with anything + * re-written to our custom operators being caught by our custom evaluate() + * methods and everything else running their default methods. Definitely the + * right approach, and much easier to write unit tests. + * + * @todo REGEX : if there is a "ˆ" literal followed by a wildcard + * AND there are no flags which would cause problems (case-folding, etc) + * then the REGEX can be rewritten as a prefix scan on the lexicon, which + * is very efficient, and converted to an IN filter. When the set size is + * huge we should rewrite it as another tail in the query instead. + * <p> + * Otherwise, regex filters are left outside of the rule. We can't + * optimize that until we generate rules that perform JOINs across the + * lexicon and the spo relations (which we could do, in which case it + * becomes a constraint on that join). + * <p> + * We don't have any indices that are designed to optimize regex scans, + * but we could process a regex scan as a parallel iterator scan against + * the lexicon. + * + * @todo Roll more kinds of filters into the native {@link IRule}s as + * {@link IConstraint}s on {@link IPredicate}s. + * <p> + * isURI(), etc. can be evaluated by testing a bit flag on the term + * identifier, which is very efficient. + * <p> + * + * @todo Verify handling of datatype operations. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: BigdataEvaluationStrategyImpl.java 2272 2009-11-04 02:10:19Z + * mrpersonick $ + */ +public class BigdataEvaluationStrategyImpl3 extends EvaluationStrategyImpl { + + /** + * Logger. + */ + protected static final Logger log = + Logger.getLogger(BigdataEvaluationStrategyImpl3.class); + + protected final BigdataTripleSource tripleSource; + + protected final Dataset dataset; + + private final AbstractTripleStore database; + + /** + */ + public BigdataEvaluationStrategyImpl3( + final BigdataTripleSource tripleSource, final Dataset dataset, + final boolean nativeJoins) { + + super(tripleSource, dataset); + + this.tripleSource = tripleSource; + this.dataset = dataset; + this.database = tripleSource.getDatabase(); + this.nativeJoins = nativeJoins; + + } + + /** + * If true, use native evaluation on the sesame operator tree if possible. + */ + private boolean nativeJoins; + + /** + * A set of properties that act as query hints during evaluation. + */ + private Properties queryHints; + + /** + * This is the top-level method called by the SAIL to evaluate a query. + * The TupleExpr parameter here is guaranteed to be the root of the operator + * tree for the query. Query hints are parsed by the SAIL from the + * namespaces in the original query. See {@link QueryHints#NAMESPACE}. + * <p> + * The query root will be handled by the native Sesame evaluation until we + * reach one of three possible top-level operators (union, join, or left + * join) at which point we will take over and translate the sesame operator + * tree into a native bigdata query. If in the process of this translation + * we encounter an operator that we cannot handle natively, we will log + * a warning message and punt to Sesame to let it handle the entire + * query evaluation process (much slower than native evaluation). + */ + public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( + final TupleExpr expr, final BindingSet bindings, + final Properties queryHints) + throws QueryEvaluationException { + + // spit out the whole operator tree + if (log.isInfoEnabled()) { + log.info("operator tree:\n" + expr); + } + + this.queryHints = queryHints; + + if (log.isInfoEnabled()) { + log.info("queryHints:\n" + queryHints); + } + + return super.evaluate(expr, bindings); + + } + + /** + * Translate top-level UNIONs into native bigdata programs for execution. + * This will attempt to look down the operator tree from this point and turn + * the Sesame operators into a set of native rules within a single program. + * <p> + * FIXME A Union is a BinaryTupleOperator composed of two expressions. This + * native evaluation only handles the special case where the left and right + * args are one of: {Join, LeftJoin, StatementPattern, Union}. It's + * possible that the left or right arg is something other than one of those + * operators, in which case we punt to the Sesame evaluation, which + * degrades performance. + * <p> + * FIXME Also, even if the left or right arg is one of the cases we handle, + * it's possible that the translation of that arg into a native rule will + * fail because of an unsupported SPARQL language feature, such as an + * embedded UNION or an unsupported filter type. + */ + @Override + public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( + final Union union, final BindingSet bs) + throws QueryEvaluationException { + + if (!nativeJoins) { + // Use Sesame 2 evaluation + return super.evaluate(union, bs); + } + + if (log.isInfoEnabled()) { + log.info("evaluating top-level Union operator"); + } + + try { + + return evaluateNatively(union, bs); + + } catch (UnsupportedOperatorException ex) { + + // Use Sesame 2 evaluation + + log.warn("could not evaluate natively, using Sesame evaluation"); + + if (log.isInfoEnabled()) { + log.info(ex.getOperator()); + } + + nativeJoins = false; + + return super.evaluate(union, bs); + + } + + } + + /** + * Translate top-level JOINs into native bigdata programs for execution. + * This will attempt to look down the operator tree from this point and turn + * the Sesame operators into a native rule. + * <p> + * FIXME It's possible that the translation of the left or right arg into a + * native rule will fail because of an unsupported SPARQL language feature, + * such as an embedded UNION or an unsupported filter type. + */ + @Override + public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( + final Join join, final BindingSet bs) + throws QueryEvaluationException { + + if (!nativeJoins) { + // Use Sesame 2 evaluation + return super.evaluate(join, bs); + } + + if (log.isInfoEnabled()) { + log.info("evaluating top-level Join operator"); + } + + try { + + return evaluateNatively(join, bs); + + } catch (UnsupportedOperatorException ex) { + + // Use Sesame 2 evaluation + + log.warn("could not evaluate natively, using Sesame evaluation"); + + if (log.isInfoEnabled()) { + log.info(ex.getOperator()); + } + + nativeJoins = false; + + return super.evaluate(join, bs); + + } + + } + + /** + * Translate top-level LEFTJOINs into native bigdata programs for execution. + * This will attempt to look down the operator tree from this point and turn + * the Sesame operators into a native rule. + * <p> + * FIXME It's possible that the translation of the left or right arg into a + * native rule will fail because of an unsupported SPARQL language feature, + * such as an embedded UNION or an unsupported filter type. + */ + @Override + public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( + final LeftJoin leftJoin, final BindingSet bs) + throws QueryEvaluationException { + + if (!nativeJoins) { + // Use Sesame 2 evaluation + return super.evaluate(leftJoin, bs); + } + + if (log.isInfoEnabled()) { + log.info("evaluating top-level LeftJoin operator"); + } + + try { + + return evaluateNatively(leftJoin, bs); + + } catch (UnsupportedOperatorException ex) { + + // Use Sesame 2 evaluation + + log.warn("could not evaluate natively, using Sesame evaluation"); + + if (log.isInfoEnabled()) { + log.info(ex.getOperator()); + } + + nativeJoins = false; + + return super.evaluate(leftJoin, bs); + + } + + } + + private CloseableIteration<BindingSet, QueryEvaluationException> + evaluateNatively(final TupleExpr tupleExpr, final BindingSet bs) + throws QueryEvaluationException, UnsupportedOperatorException { + try { + return _evaluateNatively(tupleExpr, bs); + } catch (UnrecognizedValueException ex) { + return new EmptyIteration<BindingSet, QueryEvaluationException>(); + } catch (QueryEvaluationException ex) { + throw ex; + } catch (Exception ex) { + throw new QueryEvaluationException(ex); + } + } + + private CloseableIteration<BindingSet, QueryEvaluationException> + _evaluateNatively(final TupleExpr root, final BindingSet bs) + throws UnsupportedOperatorException, UnrecognizedValueException, + QueryEvaluationException { + + final SOpTreeBuilder stb = new SOpTreeBuilder(); + + /* + * The sesame operator tree + */ + SOpTree sopTree; + + /* + * Turn the Sesame operator tree into something a little easier + * to work with. + */ + sopTree = stb.collectSOps(root); + + /* + * We need to prune groups that contain terms that do not appear in + * our lexicon. + */ + final Collection<SOpGroup> groupsToPrune = new LinkedList<SOpGroup>(); + + /* + * Iterate through the sop tree and translate statement patterns into + * predicates. + */ + for (SOp sop : sopTree) { + final QueryModelNode op = sop.getOperator(); + if (op instanceof StatementPattern) { + final StatementPattern sp = (StatementPattern) op; + try { + final IPredicate bop = toPredicate((StatementPattern) op); + sop.setBOp(bop); + } catch (UnrecognizedValueException ex) { + /* + * If we encounter a value not in the lexicon, we can + * still continue with the query if the value is in + * either an optional tail or an optional join group (i.e. + * if it appears on the right side of a LeftJoin). + * Otherwise we can stop evaluating right now. + */ + if (sop.isRightSideLeftJoin()) { + groupsToPrune.add(sopTree.getGroup(sop.getGroup())); + } else { + throw ex; + } + } + } + } + + /* + * Prunes the sop tree of optional join groups containing values + * not in the lexicon. + */ + sopTree = stb.pruneGroups(sopTree, groupsToPrune); + + /* + * If we have a filter in the root group (one that can be safely applied + * across the entire query) that we cannot translate into a native + * bigdata constraint, we can run it as a FilterIterator after the + * query has run natively. + */ + final Collection<ValueExpr> sesameFilters = new LinkedList<ValueExpr>(); + + /* + * We need to prune Sesame filters that we cannot translate into native + * constraints (ones that require lexicon joins). + */ + final Collection<SOp> sopsToPrune = new LinkedList<SOp>(); + + /* + * Iterate through the sop tree and translate Sesame ValueExpr operators + * into bigdata IConstraint boperators. + */ + for (SOp sop : sopTree) { + final QueryModelNode op = sop.getOperator(); + if (op instanceof ValueExpr) { + final ValueExpr ve = (ValueExpr) op; + try { + final IConstraint bop = toConstraint(ve); + sop.setBOp(bop); + } catch (UnsupportedOperatorException ex) { + /* + * If we encounter a sesame filter (ValueExpr) that we + * cannot translate, we can safely wrap the entire query + * with a Sesame filter iterator to capture that + * untranslatable value expression. If we are not in the + * root group however, we risk applying the filter to the + * wrong context (for example a filter inside an optional + * join group cannot be applied universally to the entire + * solution). In this case we must punt. + */ + if (sop.getGroup() == SOpTreeBuilder.ROOT_GROUP_ID) { + sopsToPrune.add(sop); + sesameFilters.add(ve); + } else { + throw ex; + } + } + } + } + + /* + * Prunes the sop tree of untranslatable filters. + */ + sopTree = stb.pruneSOps(sopTree, sopsToPrune); + + /* + * Make sure we don't have free text searches searching outside + * their named graph scope. + */ + attachNamedGraphsFilterToSearches(sopTree); + + /* + * Gather variables required by Sesame outside of the query + * evaluation (projection and global sesame filters). + */ + final IVariable[] required = + gatherRequiredVariables(root, sesameFilters); + + final QueryEngine queryEngine = tripleSource.getSail().getQueryEngine(); + + final PipelineOp query; + { + /* + * Note: The ids are assigned using incrementAndGet() so ONE (1) is + * the first id that will be assigned when we pass in ZERO (0) as + * the initial state of the AtomicInteger. + */ + final AtomicInteger idFactory = new AtomicInteger(0); + + // Convert the step to a bigdata operator tree. + query = SOp2BOpUtility.convert(sopTree, idFactory, database, + queryEngine, queryHints); + + if (log.isInfoEnabled()) + log.info(query); + + } + + return _evaluateNatively(query, bs, queryEngine, sesameFilters); + + } + + protected CloseableIteration<BindingSet, QueryEvaluationException> + _evaluateNatively(final PipelineOp query, final BindingSet bs, + final QueryEngine queryEngine, + final Collection<ValueExpr> sesameConstraints) + throws QueryEvaluationException { + + try { + + final IRunningQuery runningQuery = queryEngine.eval(query); + + final IAsynchronousIterator<IBindingSet[]> it1 = + runningQuery.iterator(); + + final IChunkedOrderedIterator<IBindingSet> it2 = + new ChunkedWrappedIterator<IBindingSet>( + new Dechunkerator<IBindingSet>(it1)); + + CloseableIteration<BindingSet, QueryEvaluationException> result = + new Bigdata2Sesame2BindingSetIterator<QueryEvaluationException>( + new BigdataBindingSetResolverator(database, it2).start( + database.getExecutorService())); + + // Wait for the Future (checks for errors). + runningQuery.get(); + + // use the basic filter iterator for remaining filters + if (sesameConstraints != null) { + for (ValueExpr ve : sesameConstraints) { + final Filter filter = new Filter(null, ve); + result = new FilterIterator(filter, result, this); + } + } + + return result; + + } catch (QueryEvaluationException ex) { + throw ex; + } catch (Exception ex) { + throw new QueryEvaluationException(ex); + } + + } + +// /** +// * This is the method that will attempt to take a top-level join or left +// * join and turn it into a native bigdata rule. The Sesame operators Join +// * and LeftJoin share only the common base class BinaryTupleOperator, but +// * other BinaryTupleOperators are not supported by this method. Other +// * specific types of BinaryTupleOperators will cause this method to throw +// * an exception. +// * <p> +// * This method will also turn a single top-level StatementPattern into a +// * rule with one predicate in it. +// * <p> +// * Note: As a pre-condition, the {@link Value}s in the query expression +// * MUST have been rewritten as {@link BigdataValue}s and their term +// * identifiers MUST have been resolved. Any term identifier that remains +// * {@link IRawTripleStore#NULL} is an indication that there is no entry for +// * that {@link Value} in the database. Since the JOINs are required (vs +// * OPTIONALs), that means that there is no solution for the JOINs and an +// * {@link EmptyIteration} is returned rather than evaluating the query. +// * +// * @param join +// * @return native bigdata rule +// * @throws UnsupportedOperatorException +// * this exception will be thrown if the Sesame join contains any +// * SPARQL language constructs that cannot be converted into +// * the bigdata native rule model +// * @throws QueryEvaluationException +// */ +// private IRule createNativeQueryOld(final TupleExpr join) +// throws UnsupportedOperatorException, +// QueryEvaluationException { +// +// if (!(join instanceof StatementPattern || +// join instanceof Join || join instanceof LeftJoin || +// join instanceof Filter)) { +// throw new AssertionError( +// "only StatementPattern, Join, and LeftJoin supported"); +// } +// +// // flattened collection of statement patterns nested within this join, +// // along with whether or not each one is optional +// final Map<StatementPattern, Boolean> stmtPatterns = +// new LinkedHashMap<StatementPattern, Boolean>(); +// // flattened collection of filters nested within this join +// final Collection<Filter> filters = new LinkedList<Filter>(); +// +// // will throw EncounteredUnknownTupleExprException if the join +// // contains something we don't handle yet +//// collectStatementPatterns(join, stmtPatterns, filters); +// +// if (false) { +// for (Map.Entry<StatementPattern, Boolean> entry : +// stmtPatterns.entrySet()) { +// log.debug(entry.getKey() + ", optional=" + entry.getValue()); +// } +// for (Filter filter : filters) { +// log.debug(filter.getCondition()); +// } +// } +// +// // generate tails +// Collection<IPredicate> tails = new LinkedList<IPredicate>(); +// // keep a list of free text searches for later to solve a named graphs +// // problem +// final Map<IPredicate, StatementPattern> searches = +// new HashMap<IPredicate, StatementPattern>(); +// for (Map.Entry<StatementPattern, Boolean> entry : stmtPatterns +// .entrySet()) { +// StatementPattern sp = entry.getKey(); +// boolean optional = entry.getValue(); +// IPredicate tail = toPredicate(sp, optional); +// // encountered a value not in the database lexicon +// if (tail == null) { +// if (log.isDebugEnabled()) { +// log.debug("could not generate tail for: " + sp); +// } +// if (optional) { +// // for optionals, just skip the tail +// continue; +// } else { +// // for non-optionals, skip the entire rule +// return null; +// } +// } +// if (tail.getAccessPathExpander() instanceof FreeTextSearchExpander) { +// searches.put(tail, sp); +// } +// tails.add(tail); +// } +// +// /* +// * When in quads mode, we need to go through the free text searches and +// * make sure that they are properly filtered for the dataset where +// * needed. Joins will take care of this, so we only need to add a filter +// * when a search variable does not appear in any other tails that are +// * non-optional. +// * +// * @todo Bryan seems to think this can be fixed with a DISTINCT JOIN +// * mechanism in the rule evaluation. +// */ +// if (database.isQuads() && dataset != null) { +// for (IPredicate search : searches.keySet()) { +// final Set<URI> graphs; +// StatementPattern sp = searches.get(search); +// switch (sp.getScope()) { +// case DEFAULT_CONTEXTS: { +// /* +// * Query against the RDF merge of zero or more source +// * graphs. +// */ +// graphs = dataset.getDefaultGraphs(); +// break; +// } +// case NAMED_CONTEXTS: { +// /* +// * Query against zero or more named graphs. +// */ +// graphs = dataset.getNamedGraphs(); +// break; +// } +// default: +// throw new AssertionError(); +// } +// if (graphs == null) { +// continue; +// } +// // why would we use a constant with a free text search??? +// if (search.get(0).isConstant()) { +// throw new AssertionError(); +// } +// // get ahold of the search variable +// com.bigdata.bop.Var searchVar = +// (com.bigdata.bop.Var) search.get(0); +// if (log.isDebugEnabled()) { +// log.debug(searchVar); +// } +// // start by assuming it needs filtering, guilty until proven +// // innocent +// boolean needsFilter = true; +// // check the other tails one by one +// for (IPredicate<ISPO> tail : tails) { +// IAccessPathExpander<ISPO> expander = +// tail.getAccessPathExpander(); +// // only concerned with non-optional tails that are not +// // themselves magic searches +// if (expander instanceof FreeTextSearchExpander +// || tail.isOptional()) { +// continue; +// } +// // see if the search variable appears in this tail +// boolean appears = false; +// for (int i = 0; i < tail.arity(); i++) { +// IVariableOrConstant term = tail.get(i); +// if (log.isDebugEnabled()) { +// log.debug(term); +// } +// if (term.equals(searchVar)) { +// appears = true; +// break; +// } +// } +// // if it appears, we don't need a filter +// if (appears) { +// needsFilter = false; +// break; +// } +// } +// // if it needs a filter, add it to the expander +// if (needsFilter) { +// if (log.isDebugEnabled()) { +// log.debug("needs filter: " + searchVar); +// } +// FreeTextSearchExpander expander = (FreeTextSearchExpander) +// search.getAccessPathExpander(); +// expander.addNamedGraphsFilter(graphs); +// } +// } +// } +// +// // generate constraints +// final Collection<IConstraint> constraints = +// new LinkedList<IConstraint>(); +// final Iterator<Filter> filterIt = filters.iterator(); +// while (filterIt.hasNext()) { +// final Filter filter = filterIt.next(); +// final IConstraint constraint = toConstraint(filter.getCondition()); +// if (constraint != null) { +// // remove if we are able to generate a native constraint for it +// if (log.isDebugEnabled()) { +// log.debug("able to generate a constraint: " + constraint); +// } +// filterIt.remove(); +// constraints.add(constraint); +// } +// } +// +// /* +// * FIXME Native slice, DISTINCT, etc. are all commented out for now. +// * Except for ORDER_BY, support exists for all of these features in the +// * native rules, but we need to separate the rewrite of the tupleExpr +// * and its evaluation in order to properly handle this stuff. +// */ +// IQueryOptions queryOptions = QueryOptions.NONE; +// // if (slice) { +// // if (!distinct && !union) { +// // final ISlice slice = new Slice(offset, limit); +// // queryOptions = new QueryOptions(false/* distinct */, +// // true/* stable */, null/* orderBy */, slice); +// // } +// // } else { +// // if (distinct && !union) { +// // queryOptions = QueryOptions.DISTINCT; +// // } +// // } +// +//// if (log.isDebugEnabled()) { +//// for (IPredicate<ISPO> tail : tails) { +//// IAccessPathExpander<ISPO> expander = tail.getAccessPathExpander(); +//// if (expander != null) { +//// IAccessPath<ISPO> accessPath = database.getSPORelation() +//// .getAccessPath(tail); +//// accessPath = expander.getAccessPath(accessPath); +//// IChunkedOrderedIterator<ISPO> it = accessPath.iterator(); +//// while (it.hasNext()) { +//// log.debug(it.next().toString(database)); +//// } +//// } +//// } +//// } +// +// /* +// * Collect a set of variables required beyond just the join (i.e. +// * aggregation, projection, filters, etc.) +// */ +// Set<String> required = new HashSet<String>(); +// +// try { +// +// QueryModelNode p = join; +// while (true) { +// p = p.getParentNode(); +// if (log.isDebugEnabled()) { +// log.debug(p.getClass()); +// } +// if (p instanceof UnaryTupleOperator) { +// required.addAll(collectVariables((UnaryTupleOperator) p)); +// } +// if (p instanceof QueryRoot) { +// break; +// } +// } +// +// if (filters.size() > 0) { +// for (Filter filter : filters) { +// required.addAll(collectVariables((UnaryTupleOperator) filter)); +// } +// } +// +// } catch (Exception ex) { +// throw new QueryEvaluationException(ex); +// } +// +// IVariable[] requiredVars = new IVariable[required.size()]; +// int i = 0; +// for (String v : required) { +// requiredVars[i++] = com.bigdata.bop.Var.var(v); +// } +// +// if (log.isDebugEnabled()) { +// log.debug("required binding names: " + Arrays.toString(requiredVars)); +// } +// +//// if (starJoins) { // database.isQuads() == false) { +//// if (log.isDebugEnabled()) { +//// log.debug("generating star joins"); +//// } +//// tails = generateStarJoins(tails); +//// } +// +// // generate native rule +// IRule rule = new Rule("nativeJoin", +// // @todo should serialize the query string here for the logs. +// null, // head +// tails.toArray(new IPredicate[tails.size()]), queryOptions, +// // constraints on the rule. +// constraints.size() > 0 ? constraints +// .toArray(new IConstraint[constraints.size()]) : null, +// null/* constants */, null/* taskFactory */, requiredVars); +// +// if (BigdataStatics.debug) { +// System.err.println(join.toString()); +// System.err.println(rule.toString()); +// } +// +// // we have filters that we could not translate natively +// if (filters.size() > 0) { +// if (log.isDebugEnabled()) { +// log.debug("could not translate " + filters.size() +// + " filters into native constraints:"); +// for (Filter filter : filters) { +// log.debug("\n" + filter.getCondition()); +// } +// } +// // use the basic filter iterator for remaining filters +//// rule = new ProxyRuleWithSesameFilters(rule, filters); +// } +// +// return rule; +// +// } + + private void attachNamedGraphsFilterToSearches(final SOpTree sopTree) { + + /* + * When in quads mode, we need to go through the free text searches and + * make sure that they are properly filtered for the dataset where + * needed. Joins will take care of this, so we only need to add a filter + * when a search variable does not appear in any other tails that are + * non-optional. + * + * @todo Bryan seems to think this can be fixed with a DISTINCT JOIN + * mechanism in the rule evaluation. + */ + if (database.isQuads() && dataset != null) { +// for (IPredicate search : searches.keySet()) { + for (SOp sop : sopTree) { + final QueryModelNode op = sop.getOperator(); + if (!(op instanceof StatementPattern)) { + continue; + } + final StatementPattern sp = (StatementPattern) op; + final IPredicate pred = (IPredicate) sop.getBOp(); + if (!(pred.getAccessPat... [truncated message content] |
From: <mrp...@us...> - 2011-01-12 18:18:00
|
Revision: 4075 http://bigdata.svn.sourceforge.net/bigdata/?rev=4075&view=rev Author: mrpersonick Date: 2011-01-12 18:17:51 +0000 (Wed, 12 Jan 2011) Log Message: ----------- working on nested optional groups Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInline.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTree.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategy.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AbstractInlineConstraint.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineEQ.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineGE.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineGT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineLE.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineLT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineNE.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java 2011-01-12 04:44:24 UTC (rev 4074) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -139,8 +139,8 @@ this.sink2 = context.getSink2(); - if (sink2 == null) - throw new IllegalArgumentException(); +// if (sink2 == null) +// throw new IllegalArgumentException(); if (sink == sink2) throw new IllegalArgumentException(); @@ -186,7 +186,7 @@ // stats.unitsOut.add(ndef); } - if (nalt > 0) { + if (nalt > 0 && sink2 != null) { if (nalt == alt.length) sink2.add(alt); else @@ -198,14 +198,16 @@ } sink.flush(); - sink2.flush(); + if (sink2 != null) + sink2.flush(); return null; } finally { sink.close(); - sink2.close(); + if (sink2 != null) + sink2.close(); } Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java 2011-01-12 04:44:24 UTC (rev 4074) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -1,437 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 18, 2010 - */ - -package com.bigdata.bop.controller; - -import java.util.Map; -import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.concurrent.Executor; -import java.util.concurrent.FutureTask; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpContext; -import com.bigdata.bop.BOpUtility; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.NV; -import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.engine.IRunningQuery; -import com.bigdata.bop.engine.LocalChunkMessage; -import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.relation.accesspath.IAsynchronousIterator; -import com.bigdata.relation.accesspath.ThickAsynchronousIterator; -import com.bigdata.util.concurrent.LatchedExecutor; - -/** - * For each binding set presented, this operator executes a subquery. Any - * solutions produced by the subquery are copied to the default sink. If no - * solutions are produced, then the original binding set is copied to the - * default sink (optional join semantics). Each subquery is run as a separate - * query but will be cancelled if the parent query is cancelled. - * - * FIXME Parallel evaluation of subqueries is not implemented. What is the - * appropriate parallelism for this operator? More parallelism should reduce - * latency but could increase the memory burden. Review this decision once we - * have the RWStore operating as a binding set buffer on the Java process heap. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class OptionalJoinGroup extends PipelineOp { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public interface Annotations extends PipelineOp.Annotations { - - /** - * The subquery to be evaluated for each binding sets presented to the - * {@link OptionalJoinGroup} (required). This should be a - * {@link PipelineOp}. - */ - String SUBQUERY = OptionalJoinGroup.class.getName() + ".subquery"; - - /** - * When <code>true</code> the subquery has optional semantics (if the - * subquery fails, the original binding set will be passed along to the - * downstream sink anyway). - */ - String OPTIONAL = OptionalJoinGroup.class.getName() + ".optional"; - - boolean DEFAULT_OPTIONAL = true; - - /** - * The maximum parallelism with which the subqueries will be evaluated - * (default {@value #DEFAULT_MAX_PARALLEL}). - */ - String MAX_PARALLEL = OptionalJoinGroup.class.getName() - + ".maxParallel"; - - int DEFAULT_MAX_PARALLEL = 1; - - } - - /** - * @see Annotations#MAX_PARALLEL - */ - public int getMaxParallel() { - return getProperty(Annotations.MAX_PARALLEL, - Annotations.DEFAULT_MAX_PARALLEL); - } - - /** - * Deep copy constructor. - */ - public OptionalJoinGroup(final OptionalJoinGroup op) { - super(op); - } - - /** - * Shallow copy constructor. - * - * @param args - * @param annotations - */ - public OptionalJoinGroup(final BOp[] args, - final Map<String, Object> annotations) { - - super(args, annotations); - -// if (!getEvaluationContext().equals(BOpEvaluationContext.CONTROLLER)) -// throw new IllegalArgumentException(Annotations.EVALUATION_CONTEXT -// + "=" + getEvaluationContext()); - - getRequiredProperty(Annotations.SUBQUERY); - -// if (!getProperty(Annotations.CONTROLLER, Annotations.DEFAULT_CONTROLLER)) -// throw new IllegalArgumentException(Annotations.CONTROLLER); - -// // The id of this operator (if any). -// final Integer thisId = (Integer)getProperty(Annotations.BOP_ID); -// -// for(BOp op : args) { -// -// final Integer sinkId = (Integer) op -// .getRequiredProperty(Annotations.SINK_REF); -// -// if(sinkId.equals(thisId)) -// throw new RuntimeException("Operand may not target ") -// -// } - - } - - public OptionalJoinGroup(final BOp[] args, NV... annotations) { - - this(args, NV.asMap(annotations)); - - } - - public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { - - return new FutureTask<Void>(new ControllerTask(this, context)); - - } - - /** - * Evaluates the arguments of the operator as subqueries. The arguments are - * evaluated in order. An {@link Executor} with limited parallelism to - * evaluate the arguments. If the controller operator is interrupted, then - * the subqueries are cancelled. If a subquery fails, then all subqueries - * are cancelled. - */ - private static class ControllerTask implements Callable<Void> { - - private final OptionalJoinGroup controllerOp; - private final BOpContext<IBindingSet> context; -// private final List<FutureTask<IRunningQuery>> tasks = new LinkedList<FutureTask<IRunningQuery>>(); -// private final CountDownLatch latch; - private final boolean optional; - private final int nparallel; - private final PipelineOp subquery; - private final Executor executor; - - public ControllerTask(final OptionalJoinGroup controllerOp, final BOpContext<IBindingSet> context) { - - if (controllerOp == null) - throw new IllegalArgumentException(); - - if (context == null) - throw new IllegalArgumentException(); - - this.controllerOp = controllerOp; - - this.context = context; - - this.optional = controllerOp.getProperty(Annotations.OPTIONAL, - Annotations.DEFAULT_OPTIONAL); - - this.nparallel = controllerOp.getProperty(Annotations.MAX_PARALLEL, - Annotations.DEFAULT_MAX_PARALLEL); - - this.subquery = (PipelineOp) controllerOp - .getRequiredProperty(Annotations.SUBQUERY); - - this.executor = new LatchedExecutor(context.getIndexManager() - .getExecutorService(), nparallel); - -// this.latch = new CountDownLatch(controllerOp.arity()); - -// /* -// * Create FutureTasks for each subquery. The futures are submitted -// * to the Executor yet. That happens in call(). By deferring the -// * evaluation until call() we gain the ability to cancel all -// * subqueries if any subquery fails. -// */ -// for (BOp op : controllerOp.args()) { -// -// /* -// * Task runs subquery and cancels all subqueries in [tasks] if -// * it fails. -// */ -// tasks.add(new FutureTask<IRunningQuery>(new SubqueryTask(op, -// context)) { -// /* -// * Hook future to count down the latch when the task is -// * done. -// */ -// public void run() { -// try { -// super.run(); -// } finally { -// latch.countDown(); -// } -// } -// }); -// -// } - - } - - /** - * Evaluate the subquery. - * - * @todo Support limited parallelism for each binding set read from the - * source. We will need to keep track of the running subqueries in - * order to wait on them before returning from this method and in - * order to cancel them if something goes wrong. - */ - public Void call() throws Exception { - - try { - - final IAsynchronousIterator<IBindingSet[]> sitr = context - .getSource(); - - while(sitr.hasNext()) { - - final IBindingSet[] chunk = sitr.next(); - - for(IBindingSet bset : chunk) { - - FutureTask<IRunningQuery> ft = new FutureTask<IRunningQuery>( - new SubqueryTask(bset, subquery, context)); - - // run the subquery. - executor.execute(ft); - - try { - - // wait for the outcome. - ft.get(); - - } finally { - - /* - * Ensure that the inner task is cancelled if the - * outer task is interrupted. - */ - ft.cancel(true/* mayInterruptIfRunning */); - - } - - } - - } - -// /* -// * Run subqueries with limited parallelism. -// */ -// for (FutureTask<IRunningQuery> ft : tasks) { -// executor.execute(ft); -// } -// -// /* -// * Wait for all subqueries to complete. -// */ -// latch.await(); -// -// /* -// * Get the futures, throwing out any errors. -// */ -// for (FutureTask<IRunningQuery> ft : tasks) -// ft.get(); - - // Now that we know the subqueries ran Ok, flush the sink. - context.getSink().flush(); - - // Done. - return null; - - } finally { - -// // Cancel any tasks which are still running. -// for (FutureTask<IRunningQuery> ft : tasks) -// ft.cancel(true/* mayInterruptIfRunning */); - - context.getSource().close(); - - context.getSink().close(); - - if (context.getSink2() != null) - context.getSink2().close(); - - } - - } - - /** - * Run a subquery. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ - private class SubqueryTask implements Callable<IRunningQuery> { - - /** - * The evaluation context for the parent query. - */ - private final BOpContext<IBindingSet> parentContext; - - /** - * The source binding set. This will be copied to the output if - * there are no solutions for the subquery (optional join - * semantics). - */ - private final IBindingSet bset; - - /** - * The root operator for the subquery. - */ - private final BOp subQueryOp; - - public SubqueryTask(final IBindingSet bset, final BOp subQuery, - final BOpContext<IBindingSet> parentContext) { - - this.bset = bset; - - this.subQueryOp = subQuery; - - this.parentContext = parentContext; - - } - - public IRunningQuery call() throws Exception { - - IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; - try { - - final QueryEngine queryEngine = parentContext.getRunningQuery() - .getQueryEngine(); - -// final IRunningQuery runningQuery = queryEngine -// .eval(subQueryOp); - - final BOp startOp = BOpUtility.getPipelineStart(subQueryOp); - - final int startId = startOp.getId(); - - final UUID queryId = UUID.randomUUID(); - - // execute the subquery, passing in the source binding set. - final IRunningQuery runningQuery = queryEngine - .eval( - queryId, - (PipelineOp) subQueryOp, - new LocalChunkMessage<IBindingSet>( - queryEngine, - queryId, - startId, - -1 /* partitionId */, - new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { new IBindingSet[] { bset } }))); - - // Iterator visiting the subquery solutions. - subquerySolutionItr = runningQuery.iterator(); - - // Copy solutions from the subquery to the query. - final long ncopied = BOpUtility.copy(subquerySolutionItr, - parentContext.getSink(), null/* sink2 */, - null/* constraints */, null/* stats */); - - // wait for the subquery. - runningQuery.get(); - - if (ncopied == 0L && optional) { - - /* - * Since there were no solutions for the subquery, copy - * the original binding set to the default sink. - */ - parentContext.getSink().add(new IBindingSet[]{bset}); - - } - - // done. - return runningQuery; - - } catch (Throwable t) { - - /* - * If a subquery fails, then propagate the error to the - * parent and rethrow the first cause error out of the - * subquery. - */ - throw new RuntimeException(ControllerTask.this.context - .getRunningQuery().halt(t)); - - } finally { - - if (subquerySolutionItr != null) - subquerySolutionItr.close(); - - } - - } - - } // SubqueryTask - - } // ControllerTask - -} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java (from rev 4072, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -0,0 +1,437 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 18, 2010 + */ + +package com.bigdata.bop.controller; + +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.Executor; +import java.util.concurrent.FutureTask; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.LocalChunkMessage; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; +import com.bigdata.util.concurrent.LatchedExecutor; + +/** + * For each binding set presented, this operator executes a subquery. Any + * solutions produced by the subquery are copied to the default sink. If no + * solutions are produced, then the original binding set is copied to the + * default sink (optional join semantics). Each subquery is run as a separate + * query but will be cancelled if the parent query is cancelled. + * + * FIXME Parallel evaluation of subqueries is not implemented. What is the + * appropriate parallelism for this operator? More parallelism should reduce + * latency but could increase the memory burden. Review this decision once we + * have the RWStore operating as a binding set buffer on the Java process heap. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class SubqueryOp extends PipelineOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public interface Annotations extends PipelineOp.Annotations { + + /** + * The subquery to be evaluated for each binding sets presented to the + * {@link SubqueryOp} (required). This should be a + * {@link PipelineOp}. + */ + String SUBQUERY = SubqueryOp.class.getName() + ".subquery"; + + /** + * When <code>true</code> the subquery has optional semantics (if the + * subquery fails, the original binding set will be passed along to the + * downstream sink anyway). + */ + String OPTIONAL = SubqueryOp.class.getName() + ".optional"; + + boolean DEFAULT_OPTIONAL = false; + + /** + * The maximum parallelism with which the subqueries will be evaluated + * (default {@value #DEFAULT_MAX_PARALLEL}). + */ + String MAX_PARALLEL = SubqueryOp.class.getName() + + ".maxParallel"; + + int DEFAULT_MAX_PARALLEL = 1; + + } + + /** + * @see Annotations#MAX_PARALLEL + */ + public int getMaxParallel() { + return getProperty(Annotations.MAX_PARALLEL, + Annotations.DEFAULT_MAX_PARALLEL); + } + + /** + * Deep copy constructor. + */ + public SubqueryOp(final SubqueryOp op) { + super(op); + } + + /** + * Shallow copy constructor. + * + * @param args + * @param annotations + */ + public SubqueryOp(final BOp[] args, + final Map<String, Object> annotations) { + + super(args, annotations); + +// if (!getEvaluationContext().equals(BOpEvaluationContext.CONTROLLER)) +// throw new IllegalArgumentException(Annotations.EVALUATION_CONTEXT +// + "=" + getEvaluationContext()); + + getRequiredProperty(Annotations.SUBQUERY); + +// if (!getProperty(Annotations.CONTROLLER, Annotations.DEFAULT_CONTROLLER)) +// throw new IllegalArgumentException(Annotations.CONTROLLER); + +// // The id of this operator (if any). +// final Integer thisId = (Integer)getProperty(Annotations.BOP_ID); +// +// for(BOp op : args) { +// +// final Integer sinkId = (Integer) op +// .getRequiredProperty(Annotations.SINK_REF); +// +// if(sinkId.equals(thisId)) +// throw new RuntimeException("Operand may not target ") +// +// } + + } + + public SubqueryOp(final BOp[] args, NV... annotations) { + + this(args, NV.asMap(annotations)); + + } + + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { + + return new FutureTask<Void>(new ControllerTask(this, context)); + + } + + /** + * Evaluates the arguments of the operator as subqueries. The arguments are + * evaluated in order. An {@link Executor} with limited parallelism to + * evaluate the arguments. If the controller operator is interrupted, then + * the subqueries are cancelled. If a subquery fails, then all subqueries + * are cancelled. + */ + private static class ControllerTask implements Callable<Void> { + + private final SubqueryOp controllerOp; + private final BOpContext<IBindingSet> context; +// private final List<FutureTask<IRunningQuery>> tasks = new LinkedList<FutureTask<IRunningQuery>>(); +// private final CountDownLatch latch; + private final boolean optional; + private final int nparallel; + private final PipelineOp subquery; + private final Executor executor; + + public ControllerTask(final SubqueryOp controllerOp, final BOpContext<IBindingSet> context) { + + if (controllerOp == null) + throw new IllegalArgumentException(); + + if (context == null) + throw new IllegalArgumentException(); + + this.controllerOp = controllerOp; + + this.context = context; + + this.optional = controllerOp.getProperty(Annotations.OPTIONAL, + Annotations.DEFAULT_OPTIONAL); + + this.nparallel = controllerOp.getProperty(Annotations.MAX_PARALLEL, + Annotations.DEFAULT_MAX_PARALLEL); + + this.subquery = (PipelineOp) controllerOp + .getRequiredProperty(Annotations.SUBQUERY); + + this.executor = new LatchedExecutor(context.getIndexManager() + .getExecutorService(), nparallel); + +// this.latch = new CountDownLatch(controllerOp.arity()); + +// /* +// * Create FutureTasks for each subquery. The futures are submitted +// * to the Executor yet. That happens in call(). By deferring the +// * evaluation until call() we gain the ability to cancel all +// * subqueries if any subquery fails. +// */ +// for (BOp op : controllerOp.args()) { +// +// /* +// * Task runs subquery and cancels all subqueries in [tasks] if +// * it fails. +// */ +// tasks.add(new FutureTask<IRunningQuery>(new SubqueryTask(op, +// context)) { +// /* +// * Hook future to count down the latch when the task is +// * done. +// */ +// public void run() { +// try { +// super.run(); +// } finally { +// latch.countDown(); +// } +// } +// }); +// +// } + + } + + /** + * Evaluate the subquery. + * + * @todo Support limited parallelism for each binding set read from the + * source. We will need to keep track of the running subqueries in + * order to wait on them before returning from this method and in + * order to cancel them if something goes wrong. + */ + public Void call() throws Exception { + + try { + + final IAsynchronousIterator<IBindingSet[]> sitr = context + .getSource(); + + while(sitr.hasNext()) { + + final IBindingSet[] chunk = sitr.next(); + + for(IBindingSet bset : chunk) { + + FutureTask<IRunningQuery> ft = new FutureTask<IRunningQuery>( + new SubqueryTask(bset, subquery, context)); + + // run the subquery. + executor.execute(ft); + + try { + + // wait for the outcome. + ft.get(); + + } finally { + + /* + * Ensure that the inner task is cancelled if the + * outer task is interrupted. + */ + ft.cancel(true/* mayInterruptIfRunning */); + + } + + } + + } + +// /* +// * Run subqueries with limited parallelism. +// */ +// for (FutureTask<IRunningQuery> ft : tasks) { +// executor.execute(ft); +// } +// +// /* +// * Wait for all subqueries to complete. +// */ +// latch.await(); +// +// /* +// * Get the futures, throwing out any errors. +// */ +// for (FutureTask<IRunningQuery> ft : tasks) +// ft.get(); + + // Now that we know the subqueries ran Ok, flush the sink. + context.getSink().flush(); + + // Done. + return null; + + } finally { + +// // Cancel any tasks which are still running. +// for (FutureTask<IRunningQuery> ft : tasks) +// ft.cancel(true/* mayInterruptIfRunning */); + + context.getSource().close(); + + context.getSink().close(); + + if (context.getSink2() != null) + context.getSink2().close(); + + } + + } + + /** + * Run a subquery. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + private class SubqueryTask implements Callable<IRunningQuery> { + + /** + * The evaluation context for the parent query. + */ + private final BOpContext<IBindingSet> parentContext; + + /** + * The source binding set. This will be copied to the output if + * there are no solutions for the subquery (optional join + * semantics). + */ + private final IBindingSet bset; + + /** + * The root operator for the subquery. + */ + private final BOp subQueryOp; + + public SubqueryTask(final IBindingSet bset, final BOp subQuery, + final BOpContext<IBindingSet> parentContext) { + + this.bset = bset; + + this.subQueryOp = subQuery; + + this.parentContext = parentContext; + + } + + public IRunningQuery call() throws Exception { + + IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; + try { + + final QueryEngine queryEngine = parentContext.getRunningQuery() + .getQueryEngine(); + +// final IRunningQuery runningQuery = queryEngine +// .eval(subQueryOp); + + final BOp startOp = BOpUtility.getPipelineStart(subQueryOp); + + final int startId = startOp.getId(); + + final UUID queryId = UUID.randomUUID(); + + // execute the subquery, passing in the source binding set. + final IRunningQuery runningQuery = queryEngine + .eval( + queryId, + (PipelineOp) subQueryOp, + new LocalChunkMessage<IBindingSet>( + queryEngine, + queryId, + startId, + -1 /* partitionId */, + new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { new IBindingSet[] { bset } }))); + + // Iterator visiting the subquery solutions. + subquerySolutionItr = runningQuery.iterator(); + + // Copy solutions from the subquery to the query. + final long ncopied = BOpUtility.copy(subquerySolutionItr, + parentContext.getSink(), null/* sink2 */, + null/* constraints */, null/* stats */); + + // wait for the subquery. + runningQuery.get(); + + if (ncopied == 0L && optional) { + + /* + * Since there were no solutions for the subquery, copy + * the original binding set to the default sink. + */ + parentContext.getSink().add(new IBindingSet[]{bset}); + + } + + // done. + return runningQuery; + + } catch (Throwable t) { + + /* + * If a subquery fails, then propagate the error to the + * parent and rethrow the first cause error out of the + * subquery. + */ + throw new RuntimeException(ControllerTask.this.context + .getRunningQuery().halt(t)); + + } finally { + + if (subquerySolutionItr != null) + subquerySolutionItr.close(); + + } + + } + + } // SubqueryTask + + } // ControllerTask + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java 2011-01-12 04:44:24 UTC (rev 4074) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -45,12 +45,10 @@ import com.bigdata.bop.constraint.NE; import com.bigdata.bop.constraint.NEConstant; import com.bigdata.bop.constraint.OR; -import com.bigdata.rdf.internal.constraints.InlineEQ; -import com.bigdata.rdf.internal.constraints.InlineGE; -import com.bigdata.rdf.internal.constraints.InlineGT; -import com.bigdata.rdf.internal.constraints.InlineLE; -import com.bigdata.rdf.internal.constraints.InlineLT; -import com.bigdata.rdf.internal.constraints.InlineNE; +import com.bigdata.rdf.internal.constraints.CompareBOp; +import com.bigdata.rdf.internal.constraints.IsInline; +import com.bigdata.rdf.internal.constraints.IsLiteral; +import com.bigdata.rdf.internal.constraints.MathBOp; import com.bigdata.rdf.rules.RejectAnythingSameAsItself; import com.bigdata.rdf.spo.SPOPredicate; import com.bigdata.rdf.spo.SPOStarJoin; @@ -99,12 +97,10 @@ SPOStarJoin.class,// com.bigdata.rdf.magic.MagicPredicate.class,// // com.bigdata.rdf.internal.constraint - InlineEQ.class,// - InlineGE.class,// - InlineLT.class,// - InlineLE.class,// - InlineGT.class,// - InlineNE.class,// + CompareBOp.class,// + IsInline.class,// + IsLiteral.class,// + MathBOp.class,// // com.bigdata.rdf.inf RejectAnythingSameAsItself.class, Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java 2011-01-12 04:44:24 UTC (rev 4074) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -219,7 +219,7 @@ /** * Unit test for optional join group. Three joins are used and target a * {@link SliceOp}. The 2nd and 3rd joins are embedded in an - * {@link OptionalJoinGroup}. + * {@link SubqueryOp}. * <P> * The optional join group takes the form: * @@ -342,10 +342,10 @@ subQuery = join3Op; } - final PipelineOp joinGroup1Op = new OptionalJoinGroup(new BOp[]{join1Op}, + final PipelineOp joinGroup1Op = new SubqueryOp(new BOp[]{join1Op}, new NV(Predicate.Annotations.BOP_ID, joinGroup1),// // new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery)// + new NV(SubqueryOp.Annotations.SUBQUERY, subQuery)// // , new NV(BOp.Annotations.CONTROLLER,true)// // new NV(BOp.Annotations.EVALUATION_CONTEXT, // BOpEvaluationContext.CONTROLLER)// @@ -607,10 +607,10 @@ subQuery = join3Op; } - final PipelineOp joinGroup1Op = new OptionalJoinGroup(new BOp[]{join1Op}, + final PipelineOp joinGroup1Op = new SubqueryOp(new BOp[]{join1Op}, new NV(Predicate.Annotations.BOP_ID, joinGroup1),// // new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery)// + new NV(SubqueryOp.Annotations.SUBQUERY, subQuery)// // new NV(BOp.Annotations.CONTROLLER,true)// // new NV(BOp.Annotations.EVALUATION_CONTEXT, // BOpEvaluationContext.CONTROLLER)// @@ -717,7 +717,7 @@ /** * Unit test for optional join group with a filter on a variable outside the * optional join group. Three joins are used and target a {@link SliceOp}. - * The 2nd and 3rd joins are in embedded an {@link OptionalJoinGroup}. The + * The 2nd and 3rd joins are in embedded an {@link SubqueryOp}. The * optional join group contains a filter that uses a variable outside the * optional join group. * <P> @@ -868,10 +868,10 @@ subQuery = join3Op; } - final PipelineOp joinGroup1Op = new OptionalJoinGroup(new BOp[]{condOp}, + final PipelineOp joinGroup1Op = new SubqueryOp(new BOp[]{condOp}, new NV(Predicate.Annotations.BOP_ID, joinGroup1),// // new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery)// + new NV(SubqueryOp.Annotations.SUBQUERY, subQuery)// // new NV(BOp.Annotations.CONTROLLER,true)// // new NV(BOp.Annotations.EVALUATION_CONTEXT, // BOpEvaluationContext.CONTROLLER)// Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java 2011-01-12 04:44:24 UTC (rev 4074) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -37,8 +37,6 @@ import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.KeyBuilder; import com.bigdata.rawstore.Bytes; -import com.bigdata.rdf.internal.constraints.AbstractInlineConstraint; -import com.bigdata.rdf.internal.constraints.InlineGT; import com.bigdata.rdf.model.BigdataBNode; import com.bigdata.rdf.model.BigdataLiteral; Deleted: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AbstractInlineConstraint.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AbstractInlineConstraint.java 2011-01-12 04:44:24 UTC (rev 4074) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AbstractInlineConstraint.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -1,97 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -package com.bigdata.rdf.internal.constraints; - -import java.util.Map; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.Constant; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstant; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.constraint.BOpConstraint; -import com.bigdata.rdf.internal.IV; -import com.bigdata.rdf.internal.IVUtility; - -/** - * Use inline terms to perform numerical comparison operations. - * - * @see IVUtility#numericalCompare(IV, IV) - */ -public abstract class AbstractInlineConstraint extends BOpConstraint { - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * Required shallow copy constructor. - */ - public AbstractInlineConstraint(final BOp[] values, - final Map<String, Object> annotations) { - super(values, annotations); - } - - /** - * Required deep copy constructor. - */ - public AbstractInlineConstraint(final AbstractInlineConstraint op) { - super(op); - } - - protected AbstractInlineConstraint(final IVariable<IV> v, final IV iv) { - - super(new BOp[] { v, new Constant<IV>(iv) }, null/*annotations*/); - - if (v == null) - throw new IllegalArgumentException(); - - if (!IVUtility.canNumericalCompare(iv)) - throw new IllegalArgumentException(); - - } - - public boolean accept(final IBindingSet s) { - - // get binding for "x". - final IConstant<IV> c = s.get((IVariable<IV>) get(0)/* v */); - - if (c == null) - return true; // not yet bound. - - final IV term = c.get(); - - final IV iv = ((IConstant<IV>) get(1)/* iv */).get(); - - final int compare = IVUtility.numericalCompare(term, iv); - - return _accept(compare); - - } - - protected abstract boolean _accept(final int compare); - -} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-01-12 04:44:24 UTC (rev 4074) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -92,17 +92,14 @@ if (left == null || right == null) return true; // not yet bound. - if (IVUtility.canNumericalCompare(left) && - IVUtility.canNumericalCompare(right)) { - - return _accept(IVUtility.numericalCompare(left, right)); - - } else { - - return _accept(left.compareTo(right)); - - } + if (!IVUtility.canNumericalCompare(left)) + throw new NotNumericalException("cannot numerical compare: " + left); + if (!IVUtility.canNumericalCompare(right)) + throw new NotNumericalException("cannot numerical compare: " + right); + + return _accept(IVUtility.numericalCompare(left, right)); + } protected boolean _accept(final int compare) { @@ -128,4 +125,29 @@ } + public static class NotNumericalException extends RuntimeException { + + /** + * + */ + private static final long serialVersionUID = -8853739187628588335L; + + public NotNumericalException() { + super(); + } + + public NotNumericalException(String s, Throwable t) { + super(s, t); + } + + public NotNumericalException(String s) { + super(s); + } + + public NotNumericalException(Throwable t) { + super(t); + } + + } + } Deleted: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineEQ.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineEQ.java 2011-01-12 04:44:24 UTC (rev 4074) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineEQ.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -1,66 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -package com.bigdata.rdf.internal.constraints; - -import java.util.Map; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.IVariable; -import com.bigdata.rdf.internal.IV; - -/** - * Use inline numerical comparison techniques to implement the == operator. - */ -public class InlineEQ extends AbstractInlineConstraint { - - private static final long serialVersionUID = -859713006378534024L; - - /** - * Required shallow copy constructor. - */ - public InlineEQ(final BOp[] values, final Map<String, Object> annotations) { - super(values, annotations); - } - - /** - * Required deep copy constructor. - */ - public InlineEQ(final InlineEQ op) { - super(op); - } - - public InlineEQ(final IVariable<IV> v, final IV iv) { - - super(v, iv); - - } - - protected boolean _accept(final int compare) { - - return compare == 0; - - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineGE.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineGE.java 2011-01-12 04:44:24 UTC (rev 4074) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineGE.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -1,66 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -package com.bigdata.rdf.internal.constraints; - -import java.util.Map; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.IVariable; -import com.bigdata.rdf.internal.IV; - -/** - * Use inline numerical comparison techniques to implement the >= operator. - */ -public class InlineGE extends AbstractInlineConstraint { - - private static final long serialVersionUID = 5796593193255235408L; - - /** - * Required shallow copy constructor. - */ - public InlineGE(final BOp[] values, final Map<String, Object> annotations) { - super(values, annotations); - } - - /** - * Required deep copy constructor. - */ - public InlineGE(final InlineGE op) { - super(op); - } - - public InlineGE(final IVariable<IV> v, final IV iv) { - - super(v, iv); - - } - - protected boolean _accept(final int compare) { - - return compare >= 0; - - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineGT.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineGT.java 2011-01-12 04:44:24 UTC (rev 4074) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineGT.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -1,66 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -package com.bigdata.rdf.internal.constraints; - -import java.util.Map; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.IVariable; -import com.bigdata.rdf.internal.IV; - -/** - * Use inline numerical comparison techniques to implement the > operator. - */ -public class InlineGT extends AbstractInlineConstraint { - - private static final long serialVersionUID = 8104692462788944394L; - - /** - * Required shallow copy constructor. - */ - public InlineGT(final BOp[] values, final Map<String, Object> annotations) { - super(values, annotations); - } - - /** - * Required deep copy constructor. - */ - public InlineGT(final InlineGT op) { - super(op); - } - - public InlineGT(final IVariable<IV> v, final IV iv) { - - super(v, iv); - - } - - protected boolean _accept(final int compare) { - - return compare > 0; - - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineLE.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineLE.java 2011-01-12 04:44:24 UTC (rev 4074) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineLE.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -1,66 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -package com.bigdata.rdf.internal.constraints; - -import java.util.Map; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.IVariable; -import com.bigdata.rdf.internal.IV; - -/** - * Use inline numerical comparison techniques to implement the <= operator. - */ -public class InlineLE extends AbstractInlineConstraint { - - private static final long serialVersionUID = 7632756199316546837L; - - /** - * Required shallow copy constructor. - */ - public InlineLE(final BOp[] values, final Map<String, Object> annotations) { - super(values, annotations); - } - - /** - * Required deep copy constructor. - */ - public InlineLE(final InlineLE op) { - super(op); - } - - public InlineLE(final IVariable<IV> v, final IV iv) { - - super(v, iv); - - } - - protected boolean _accept(final int compare) { - - return compare <= 0; - - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineLT.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineLT.java 2011-01-12 04:44:24 UTC (rev 4074) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineLT.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -1,66 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -package com.bigdata.rdf.internal.constraints; - -import java.util.Map; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.IVariable; -import com.bigdata.rdf.internal.IV; - -/** - * Use inline numerical comparison techniques to implement the < operator. - */ -public class InlineLT extends AbstractInlineConstraint { - - private static final long serialVersionUID = 1012994769934551872L; - - /** - * Required shallow copy constructor. - */ - public InlineLT(final BOp[] values, final Map<String, Object> annotations) { - super(values, annotations); - } - - /** - * Required deep copy constructor. - */ - public InlineLT(final InlineLT op) { - super(op); - } - - public InlineLT(final IVariable<IV> v, final IV iv) { - - super(v, iv); - - } - - protected boolean _accept(final int compare) { - - return compare < 0; - - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineNE.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineNE.java 2011-01-12 04:44:24 UTC (rev 4074) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineNE.java 2011-01-12 18:17:51 UTC (rev 4075) @@ -1,66 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -package com.bigdata.rdf.internal.constraints; - -import java.util.Map; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.IVariable; -import com.bigdata.rdf.internal.IV; - -/** - * Use inline numerical comparison techniques to implement the != operator. - */ -public class InlineNE extends AbstractInlineConstraint { - - pr... [truncated message content] |
From: <tho...@us...> - 2011-01-15 14:53:34
|
Revision: 4101 http://bigdata.svn.sourceforge.net/bigdata/?rev=4101&view=rev Author: thompsonbry Date: 2011-01-15 14:53:26 +0000 (Sat, 15 Jan 2011) Log Message: ----------- Working through a problem with SubqueryOp leading to non-termination of some queries in BSBM. This appears to be fixed at this checkpoint. I am not sure yet if the fix was the -XX:+UseMembar JVM argument (I was running with JDK 1.6.0_17 which can loose monitor wake up signals) or the modifications to SubqueryOp. I have made several changes to improve the trapping of "normal" query termination exceptions (InterruptedException, BufferClosedException, and ClosedByInterruptException). All of these can arise in response to an interrupt triggered when a LIMIT is satisfied on the query. The RWStore and WORMStore properties files were modified to turn on the new query evaluation strategy impl which handles optional join groups. The data need to be loaded with that option enabled and also with dataTime inlining enabled. Added more reporting capabilities to the NanoSparqlServer, including reporting of the BOPs for the currently executing queries. This required adding a public method to IQueryClient to report on the UUIDs of the active IRunningQuery instances and making getRunningQuery(UUID) public so we can inspect the query by its UUID. BlockingBuffer was generating a log message even when it would not log the message. This is fixed. RWStore was logging an error in getData() when there are several non-error reasons why that method could throw an exception (an interrupt leading to a ClosedByInterrupt exception is the main one). Modified SOp2BOpUtility to layer the slice over the subquery op and not the other way around. The current BSBM performance status is excellent for the reduced query mix (without Q5 or Q6). Q5 appears to have a bad join plan which causes high CPU utilization. Q6 is visiting much more data than is otherwise required in order to satisfy a regex without a prefix anchor. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryClient.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractChunkedResolverator.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestRunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestNIOChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestThickChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/RWStore.properties branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/WORMStore.properties branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/build.xml branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/RunningQueryCloseableIteration.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -27,6 +27,7 @@ package com.bigdata.bop.controller; +import java.nio.channels.ClosedByInterruptException; import java.util.Map; import java.util.UUID; import java.util.concurrent.Callable; @@ -46,7 +47,6 @@ import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.ThickAsynchronousIterator; import com.bigdata.util.InnerCause; -import com.bigdata.util.concurrent.LatchedExecutor; /** * For each binding set presented, this operator executes a subquery. Any @@ -56,7 +56,7 @@ * semantics). Each subquery is run as a separate query but will be cancelled if * the parent query is cancelled. * - * FIXME Parallel evaluation of subqueries is not implemented. What is the + * @todo Parallel evaluation of subqueries is not implemented. What is the * appropriate parallelism for this operator? More parallelism should reduce * latency but could increase the memory burden. Review this decision once we * have the RWStore operating as a binding set buffer on the Java process heap. @@ -83,30 +83,30 @@ /** * When <code>true</code> the subquery has optional semantics (if the * subquery fails, the original binding set will be passed along to the - * downstream sink anyway). + * downstream sink anyway) (default {@value #DEFAULT_OPTIONAL}). */ String OPTIONAL = SubqueryOp.class.getName() + ".optional"; boolean DEFAULT_OPTIONAL = false; - /** - * The maximum parallelism with which the subqueries will be evaluated - * (default {@value #DEFAULT_MAX_PARALLEL}). - */ - String MAX_PARALLEL = SubqueryOp.class.getName() - + ".maxParallel"; +// /** +// * The maximum parallelism with which the subqueries will be evaluated +// * (default {@value #DEFAULT_MAX_PARALLEL}). +// */ +// String MAX_PARALLEL = SubqueryOp.class.getName() +// + ".maxParallel"; +// +// int DEFAULT_MAX_PARALLEL = 1; - int DEFAULT_MAX_PARALLEL = 1; - } - /** - * @see Annotations#MAX_PARALLEL - */ - public int getMaxParallel() { - return getProperty(Annotations.MAX_PARALLEL, - Annotations.DEFAULT_MAX_PARALLEL); - } +// /** +// * @see Annotations#MAX_PARALLEL +// */ +// public int getMaxParallel() { +// return getProperty(Annotations.MAX_PARALLEL, +// Annotations.DEFAULT_MAX_PARALLEL); +// } /** * Deep copy constructor. @@ -171,14 +171,14 @@ */ private static class ControllerTask implements Callable<Void> { - private final SubqueryOp controllerOp; +// private final SubqueryOp controllerOp; private final BOpContext<IBindingSet> context; // private final List<FutureTask<IRunningQuery>> tasks = new LinkedList<FutureTask<IRunningQuery>>(); // private final CountDownLatch latch; private final boolean optional; - private final int nparallel; +// private final int nparallel; private final PipelineOp subquery; - private final Executor executor; +// private final Executor executor; public ControllerTask(final SubqueryOp controllerOp, final BOpContext<IBindingSet> context) { @@ -188,21 +188,21 @@ if (context == null) throw new IllegalArgumentException(); - this.controllerOp = controllerOp; +// this.controllerOp = controllerOp; this.context = context; this.optional = controllerOp.getProperty(Annotations.OPTIONAL, Annotations.DEFAULT_OPTIONAL); - this.nparallel = controllerOp.getProperty(Annotations.MAX_PARALLEL, - Annotations.DEFAULT_MAX_PARALLEL); +// this.nparallel = controllerOp.getProperty(Annotations.MAX_PARALLEL, +// Annotations.DEFAULT_MAX_PARALLEL); this.subquery = (PipelineOp) controllerOp .getRequiredProperty(Annotations.SUBQUERY); - this.executor = new LatchedExecutor(context.getIndexManager() - .getExecutorService(), nparallel); +// this.executor = new LatchedExecutor(context.getIndexManager() +// .getExecutorService(), nparallel); // this.latch = new CountDownLatch(controllerOp.arity()); @@ -258,26 +258,37 @@ for(IBindingSet bset : chunk) { - FutureTask<IRunningQuery> ft = new FutureTask<IRunningQuery>( - new SubqueryTask(bset, subquery, context)); + final IRunningQuery runningSubquery = new SubqueryTask( + bset, subquery, context).call(); - // run the subquery. - executor.execute(ft); + if (!runningSubquery.isDone()) { - try { + throw new AssertionError("Future not done: " + + runningSubquery.toString()); + + } - // wait for the outcome. - ft.get(); - - } finally { - - /* - * Ensure that the inner task is cancelled if the - * outer task is interrupted. - */ - ft.cancel(true/* mayInterruptIfRunning */); - - } +// Note: Variant using executor, but still does not support parallel evaluation of subqueries. +// final FutureTask<IRunningQuery> ft = new FutureTask<IRunningQuery>( +// new SubqueryTask(bset, subquery, context)); +// +// try { +// +// // run the subquery. +// executor.execute(ft); +// +// // wait for the outcome. +// ft.get(); +// +// } finally { +// +// /* +// * Ensure that the inner task is cancelled if the +// * outer task is interrupted. +// */ +// ft.cancel(true/* mayInterruptIfRunning */); +// +// } } @@ -362,16 +373,15 @@ public IRunningQuery call() throws Exception { + // The subquery + IRunningQuery runningSubquery = null; + // The iterator draining the subquery IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; - IRunningQuery runningQuery = null; try { final QueryEngine queryEngine = parentContext.getRunningQuery() .getQueryEngine(); -// final IRunningQuery runningQuery = queryEngine -// .eval(subQueryOp); - final BOp startOp = BOpUtility.getPipelineStart(subQueryOp); final int startId = startOp.getId(); @@ -379,7 +389,7 @@ final UUID queryId = UUID.randomUUID(); // execute the subquery, passing in the source binding set. - runningQuery = queryEngine + runningSubquery = queryEngine .eval( queryId, (PipelineOp) subQueryOp, @@ -391,40 +401,58 @@ new ThickAsynchronousIterator<IBindingSet[]>( new IBindingSet[][] { new IBindingSet[] { bset } }))); - // Iterator visiting the subquery solutions. - subquerySolutionItr = runningQuery.iterator(); + long ncopied = 0L; + try { + + // Iterator visiting the subquery solutions. + subquerySolutionItr = runningSubquery.iterator(); - // Copy solutions from the subquery to the query. - final long ncopied = BOpUtility.copy(subquerySolutionItr, - parentContext.getSink(), null/* sink2 */, - null/* constraints */, null/* stats */); - - // wait for the subquery. - runningQuery.get(); + // Copy solutions from the subquery to the query. + ncopied = BOpUtility.copy(subquerySolutionItr, + parentContext.getSink(), null/* sink2 */, + null/* constraints */, null/* stats */); + // wait for the subquery to halt / test for errors. + runningSubquery.get(); + + } catch (InterruptedException ex) { + + // this thread was interrupted, so cancel the subquery. + runningSubquery + .cancel(true/* mayInterruptIfRunning */); + + // rethrow the exception. + throw ex; + + } + if (ncopied == 0L && optional) { /* * Since there were no solutions for the subquery, copy * the original binding set to the default sink. */ - parentContext.getSink().add(new IBindingSet[]{bset}); + + parentContext.getSink().add(new IBindingSet[]{bset}); } // done. - return runningQuery; + return runningSubquery; } catch (Throwable t) { - /* - * Note: SliceOp will cause other operators to be - * interrupted during normal evaluation but we do not want - * to terminate the parent query when this occurs. - */ - if (!InnerCause.isInnerCause(t, InterruptedException.class) - && !InnerCause.isInnerCause(t, BufferClosedException.class)) { - + /* + * Note: SliceOp will cause other operators to be + * interrupted during normal evaluation. Therefore, while + * these exceptions should cause the subquery to terminate, + * they should not be reported as errors to the parent + * query. + */ + if (!InnerCause.isInnerCause(t, InterruptedException.class) + && !InnerCause.isInnerCause(t, BufferClosedException.class) + && !InnerCause.isInnerCause(t, ClosedByInterruptException.class)) { + /* * If a subquery fails, then propagate the error to the * parent and rethrow the first cause error out of the @@ -435,13 +463,25 @@ } - return runningQuery; + return runningSubquery; } finally { - if (subquerySolutionItr != null) - subquerySolutionItr.close(); + try { + // ensure subquery is halted. + if (runningSubquery != null) + runningSubquery + .cancel(true/* mayInterruptIfRunning */); + + } finally { + + // ensure the subquery solution iterator is closed. + if (subquerySolutionItr != null) + subquerySolutionItr.close(); + + } + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -28,6 +28,7 @@ package com.bigdata.bop.engine; import java.nio.ByteBuffer; +import java.nio.channels.ClosedByInterruptException; import java.util.Collections; import java.util.Map; import java.util.UUID; @@ -799,19 +800,25 @@ try { - /* - * Note: SliceOp will cause other operators to be interrupted - * during normal evaluation so it is not useful to log an - * InterruptedException @ ERROR. - */ - if (!InnerCause.isInnerCause(t, InterruptedException.class) - && !InnerCause.isInnerCause(t, BufferClosedException.class)) - log.error(toString(), t); - try { - // signal error condition. - return future.halt(t); + /* + * Note: SliceOp will cause other operators to be interrupted + * during normal evaluation so it is not useful to log an + * InterruptedException @ ERROR. + */ + if (!InnerCause.isInnerCause(t, InterruptedException.class) + && !InnerCause.isInnerCause(t, BufferClosedException.class) + && !InnerCause.isInnerCause(t, ClosedByInterruptException.class)) { + log.error(toString(), t); + // signal error condition. + return future.halt(t); + } else { + // normal termination. + future.halt((Void)null/* result */); + // the caller's cause. + return t; + } } finally { @@ -990,16 +997,28 @@ public String toString() { final StringBuilder sb = new StringBuilder(getClass().getName()); - sb.append("{queryId=" + queryId); - sb.append(",deadline=" + deadline.get()); - sb.append(",isDone=" + isDone()); - sb.append(",isCancelled=" + isCancelled()); - sb.append(",runState=" + runState); - sb.append(",controller=" + controller); - sb.append(",clientProxy=" + clientProxy); - sb.append(",query=" + query); - sb.append("}"); - return sb.toString(); + sb.append("{queryId=" + queryId); + /* + * Note: Obtaining the lock here is required to avoid concurrent + * modification exception in RunState's toString() when there is a + * concurrent change in the RunState. It also makes the isDone() and + * isCancelled() reporting atomic. + */ + lock.lock(); + try { + sb.append(",elapsed=" + getElapsed()); + sb.append(",deadline=" + deadline.get()); + sb.append(",isDone=" + isDone()); + sb.append(",isCancelled=" + isCancelled()); + sb.append(",runState=" + runState); + } finally { + lock.unlock(); + } + sb.append(",controller=" + controller); + sb.append(",clientProxy=" + clientProxy); + sb.append(",query=" + query); + sb.append("}"); + return sb.toString(); } // abstract protected IChunkHandler getChunkHandler(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -27,6 +27,7 @@ */ package com.bigdata.bop.engine; +import java.nio.channels.ClosedByInterruptException; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -783,26 +784,33 @@ public void run() { - super.run(); + try { - /* - * This task is done executing so remove its Future before we - * attempt to schedule another task for the same - * (bopId,partitionId). - */ - final ConcurrentHashMap<ChunkFutureTask, ChunkFutureTask> map = operatorFutures - .get(new BSBundle(t.bopId, t.partitionId)); + super.run(); + + } finally { - if (map != null) { + /* + * This task is done executing so remove its Future before we + * attempt to schedule another task for the same + * (bopId,partitionId). + */ - map.remove(this, this); + final ConcurrentHashMap<ChunkFutureTask, ChunkFutureTask> map = operatorFutures + .get(new BSBundle(t.bopId, t.partitionId)); - } + if (map != null) { - // Schedule another task if any messages are waiting. - ChunkedRunningQuery.this.scheduleNext(new BSBundle( - t.bopId, t.partitionId)); - + map.remove(this, this); + + } + + } + + // Schedule another task if any messages are waiting. + ChunkedRunningQuery.this.scheduleNext(new BSBundle(t.bopId, + t.partitionId)); + } } @@ -852,6 +860,21 @@ final long begin = System.currentTimeMillis(); try { t.call(); + } catch(Throwable t) { + /* + * Note: SliceOp will cause other operators to be + * interrupted during normal evaluation. Therefore, while + * these exceptions should cause the query to terminate, + * they should not be reported as errors to the query + * controller. + */ + if (!InnerCause.isInnerCause(t, InterruptedException.class) + && !InnerCause.isInnerCause(t, BufferClosedException.class) + && !InnerCause.isInnerCause(t, ClosedByInterruptException.class) + ) { + // Not an error that we should ignore. + throw t; + } } finally { t.context.getStats().elapsed.add(System.currentTimeMillis() - begin); @@ -876,19 +899,10 @@ } catch (Throwable ex1) { - /* - * Note: SliceOp will cause other operators to be interrupted - * during normal evaluation so it is not useful to log an - * InterruptedException @ ERROR. - */ - if (!InnerCause.isInnerCause(ex1, InterruptedException.class) - && !InnerCause.isInnerCause(ex1, BufferClosedException.class) - ) { - // Log an error. - log.error("queryId=" + getQueryId() + ", bopId=" + t.bopId - + ", bop=" + t.bop, ex1); - } - + // Log an error. + log.error("queryId=" + getQueryId() + ", bopId=" + t.bopId + + ", bop=" + t.bop, ex1); + /* * Mark the query as halted on this node regardless of whether * we are able to communicate with the query controller. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryClient.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryClient.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryClient.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -10,6 +10,13 @@ */ public interface IQueryClient extends IQueryPeer { + /** + * Return the set of queries which are running as of the moment when the + * request was processed. Queries reported in the returned array may + * terminate at any time. + */ + UUID[] getRunningQueries(); + /** * Return the query. * Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -58,6 +58,7 @@ import com.bigdata.btree.IndexSegment; import com.bigdata.btree.view.FusedView; import com.bigdata.journal.IIndexManager; +import com.bigdata.rdf.sail.bench.NanoSparqlClient; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.ThickAsynchronousIterator; import com.bigdata.resources.IndexManager; @@ -967,17 +968,20 @@ } } - + /** - * Return the {@link AbstractRunningQuery} associated with that query identifier. + * Return the {@link AbstractRunningQuery} associated with that query + * identifier. * * @param queryId * The query identifier. * - * @return The {@link AbstractRunningQuery} -or- <code>null</code> if there is no - * query associated with that query identifier. + * @return The {@link AbstractRunningQuery} -or- <code>null</code> if there + * is no query associated with that query identifier. + * + * @todo Exposed to {@link NanoSparqlServer} */ - protected AbstractRunningQuery getRunningQuery(final UUID queryId) { + public /*protected*/ AbstractRunningQuery getRunningQuery(final UUID queryId) { if(queryId == null) throw new IllegalArgumentException(); @@ -1164,4 +1168,10 @@ } + public UUID[] getRunningQueries() { + + return runningQueries.keySet().toArray(new UUID[0]); + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -27,6 +27,7 @@ package com.bigdata.bop.engine; +import java.nio.channels.ClosedByInterruptException; import java.util.UUID; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; @@ -440,6 +441,21 @@ final long begin = System.currentTimeMillis(); try { t.call(); + } catch(Throwable t) { + /* + * Note: SliceOp will cause other operators to be + * interrupted during normal evaluation. Therefore, while + * these exceptions should cause the query to terminate, + * they should not be reported as errors to the query + * controller. + */ + if (!InnerCause.isInnerCause(t, InterruptedException.class) + && !InnerCause.isInnerCause(t, BufferClosedException.class) + && !InnerCause.isInnerCause(t, ClosedByInterruptException.class) + ) { + // Not an error that we should ignore. + throw t; + } } finally { t.context.getStats().elapsed.add(System.currentTimeMillis() - begin); @@ -457,20 +473,11 @@ StandaloneChainedRunningQuery.this.haltOp(msg); } catch (Throwable ex1) { -log.fatal(ex1,ex1); // FIXME remove log stmt. - /* - * Note: SliceOp will cause other operators to be interrupted - * during normal evaluation so it is not useful to log an - * InterruptedException @ ERROR. - */ - if (!InnerCause.isInnerCause(ex1, InterruptedException.class) - && !InnerCause.isInnerCause(ex1, BufferClosedException.class) - ) { - // Log an error. - log.error("queryId=" + getQueryId() + ", bopId=" + t.bopId - + ", bop=" + t.bop, ex1); - } + // Log an error. + log.error("queryId=" + getQueryId() + ", bopId=" + t.bopId + + ", bop=" + t.bop, ex1); + /* * Mark the query as halted on this node regardless of whether * we are able to communicate with the query controller. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -163,7 +163,7 @@ * {@inheritDoc} */ @Override - protected FederatedRunningQuery getRunningQuery(final UUID queryId) { + public /*protected*/ FederatedRunningQuery getRunningQuery(final UUID queryId) { return (FederatedRunningQuery) super.getRunningQuery(queryId); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -1074,6 +1074,8 @@ logTimeout += Math.min(maxLogTimeout, logTimeout); + if(log.isInfoEnabled()) { + final String msg = "blocked: ntries=" + ntries + ", elapsed=" @@ -1096,6 +1098,8 @@ // issue warning. log.warn(msg); } + + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -1432,7 +1432,13 @@ } } } catch (Throwable e) { - log.error(e,e); + /* + * Note: ClosedByInterruptException can be thrown out of + * FileChannelUtility.readAll(), typically because the LIMIT on + * a query was satisified, but we do not want to log that as an + * error. + */ +// log.error(e,e); throw new IllegalArgumentException("Unable to read data", e); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractChunkedResolverator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractChunkedResolverator.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractChunkedResolverator.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -239,9 +239,11 @@ } finally { - src.close(); - - buffer.close(); + try { + src.close(); + } finally { + buffer.close(); + } } @@ -336,7 +338,11 @@ log.info("lastIndex=" + lastIndex + ", chunkSize=" + (chunk != null ? "" + chunk.length : "N/A")); - // asynchronous close by the consumer of the producer's buffer. + /* + * Asynchronous close by the consumer of the producer's buffer. This + * will cause the ChunkConsumerTask to abort if it is still running and + * that will cause the [src] to be closed. + */ buffer.close(); chunk = null; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestRunState.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestRunState.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestRunState.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -952,6 +952,10 @@ throws RemoteException { } + public UUID[] getRunningQueries() { + return null; + } + } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestNIOChunkMessage.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestNIOChunkMessage.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestNIOChunkMessage.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -259,6 +259,10 @@ throws RemoteException { } + public UUID[] getRunningQueries() { + return null; + } + } private static class MyNIOChunkMessage<E> extends NIOChunkMessage<E> { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestThickChunkMessage.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestThickChunkMessage.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestThickChunkMessage.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -232,6 +232,10 @@ throws RemoteException { } + public UUID[] getRunningQueries() { + return null; + } + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/RWStore.properties =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/RWStore.properties 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/RWStore.properties 2011-01-15 14:53:26 UTC (rev 4101) @@ -24,6 +24,17 @@ com.bigdata.namespace.BSBM_284826.spo.SPO.com.bigdata.btree.BTree.branchingFactor=512 com.bigdata.namespace.BSBM_284826.spo.OSP.com.bigdata.btree.BTree.branchingFactor=470 +# Reduce the branching factor for the lexicon since BSBM uses a lot of long +# literals. Note that you have to edit this override to specify the namespace +# into which the BSBM data will be loaded. +com.bigdata.namespace.BSBM_566496.lex.TERM2ID.com.bigdata.btree.BTree.branchingFactor=32 +com.bigdata.namespace.BSBM_566496.lex.ID2TERM.com.bigdata.btree.BTree.branchingFactor=32 + +# 4k pages. +com.bigdata.namespace.BSBM_566496.spo.POS.com.bigdata.btree.BTree.branchingFactor=970 +com.bigdata.namespace.BSBM_566496.spo.SPO.com.bigdata.btree.BTree.branchingFactor=512 +com.bigdata.namespace.BSBM_566496.spo.OSP.com.bigdata.btree.BTree.branchingFactor=470 + # Override the #of write cache buffers. com.bigdata.journal.AbstractJournal.writeCacheBufferCount=12 @@ -59,3 +70,6 @@ # 10000 is default. com.bigdata.rdf.sail.bufferCapacity=100000 + +# direct sesame to bop translation. +com.bigdata.rdf.sail.newEvalStrategy=true Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/WORMStore.properties =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/WORMStore.properties 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/WORMStore.properties 2011-01-15 14:53:26 UTC (rev 4101) @@ -38,3 +38,6 @@ # 10000 is default. com.bigdata.rdf.sail.bufferCapacity=100000 + +# direct sesame to bop translation. +com.bigdata.rdf.sail.newEvalStrategy=true Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/build.xml =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/build.xml 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/build.xml 2011-01-15 14:53:26 UTC (rev 4101) @@ -51,14 +51,14 @@ <exclude name="**/*.java" /> <exclude name="**/package.html" /> </fileset> - <!-- copy log4j configuration file. --> - <fileset dir="${bsbm.dir}/src/resources/logging" /> </copy> <copy toDir="${build.dir}/bin"> <!-- copy benchmark data and queries. --> <fileset dir="${bsbm.dir}/src/resources/bsbm-data" /> <!-- copy the journal configuration file. --> <fileset file="${bsbm.dir}/*.properties" /> + <!-- copy log4j configuration file. --> + <fileset dir="${bsbm.dir}/src/resources/logging" /> </copy> </target> @@ -143,7 +143,7 @@ <!-- delete file if it exists so we load into a new journal. --> <delete file="${bsbm.journalFile}" /> <java classname="com.bigdata.rdf.store.DataLoader" fork="true" failonerror="true" dir="${build.dir}/bin"> - <arg line="-namespace ${bsbm.namespace} ${bsbm.journalPropertyFile} ${bsbm.outputFile}.${bsbm.outputType}${bsbm.compressType}" /> + <arg line="-verbose -namespace ${bsbm.namespace} ${bsbm.journalPropertyFile} ${bsbm.outputFile}.${bsbm.outputType}${bsbm.compressType}" /> <!-- specify/override the journal file name. --> <jvmarg line="${queryJvmArgs} -Dcom.bigdata.journal.AbstractJournal.file=${bsbm.journalFile} -Dcom.bigdata.rdf.store.DataLoader.bufferCapacity=1000000 Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/logging/log4j.properties =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/logging/log4j.properties 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/logging/log4j.properties 2011-01-15 14:53:26 UTC (rev 4101) @@ -244,7 +244,7 @@ log4j.additivity.com.bigdata.bop.engine.RunState$TableLog=false log4j.appender.queryRunStateLog=org.apache.log4j.FileAppender log4j.appender.queryRunStateLog.Threshold=ALL -log4j.appender.queryRunStateLog.File=queryRunState.log +log4j.appender.queryRunStateLog.File=queryRunState.csv log4j.appender.queryRunStateLog.Append=true # I find that it is nicer to have this unbuffered since you can see what # is going on and to make sure that I have complete rule evaluation logs Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/src/resources/logging/log4j.properties =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/src/resources/logging/log4j.properties 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/src/resources/logging/log4j.properties 2011-01-15 14:53:26 UTC (rev 4101) @@ -156,6 +156,8 @@ #log4j.logger.com.bigdata.relation.accesspath.IAccessPath=DEBUG #log4j.logger.com.bigdata.rdf.sail.BigdataSail=DEBUG +#log4j.logger.com.bigdata.rdf.sail.Rule2BOpUtility=INFO +log4j.logger.com.bigdata.bop.controller.JoinGraph=INFO #log4j.logger.com.bigdata.rdf.sail.TestNamedGraphs=DEBUG log4j.logger.com.bigdata.rdf.sail.QuadsTestCase=DEBUG #log4j.logger.com.bigdata.relation.rule.eval.NestedSubqueryWithJoinThreadsTask=DEBUG @@ -178,7 +180,7 @@ log4j.logger.com.bigdata.rdf.store.DataLoader=INFO # Test suite logger. -log4j.logger.junit=INFO +#log4j.logger.junit=INFO #log4j.logger.junit=DEBUG log4j.logger.com.bigdata.btree.AbstractBTreeTestCase=INFO @@ -202,6 +204,7 @@ ## # Summary query evaluation log (tab delimited file). +# Uncomment the next line to enable. #log4j.logger.com.bigdata.bop.engine.QueryLog=INFO,queryLog log4j.additivity.com.bigdata.bop.engine.QueryLog=false log4j.appender.queryLog=org.apache.log4j.FileAppender Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -1725,6 +1725,11 @@ new BigdataBindingSetResolverator(database, it2).start(database .getExecutorService())); + /* + * FIXME This will deadlock in the buffer fills - see + * BigdataEvaluationStrategyImpl3 which contains a new code pattern for + * this. + */ try { // Wait for the Future (checks for errors). runningQuery.get(); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -711,24 +711,34 @@ final Collection<Filter> sesameFilters) throws QueryEvaluationException { + IRunningQuery runningQuery = null; try { - final IRunningQuery runningQuery = queryEngine.eval(query); - + // Submit query for evaluation. + runningQuery = queryEngine.eval(query); + + // Iterator draining the query results. final IAsynchronousIterator<IBindingSet[]> it1 = runningQuery.iterator(); + // De-chunk the IBindingSet[] visited by that iterator. final IChunkedOrderedIterator<IBindingSet> it2 = new ChunkedWrappedIterator<IBindingSet>( new Dechunkerator<IBindingSet>(it1)); - - CloseableIteration<BindingSet, QueryEvaluationException> result = + + // Materialize IVs as RDF Values. + CloseableIteration<BindingSet, QueryEvaluationException> result = + // Monitor IRunningQuery and cancel if Sesame iterator is closed. + new RunningQueryCloseableIteration<BindingSet, QueryEvaluationException>(runningQuery, + // Convert bigdata binding sets to Sesame binding sets. new Bigdata2Sesame2BindingSetIterator<QueryEvaluationException>( + // Materialize IVs as RDF Values. new BigdataBindingSetResolverator(database, it2).start( - database.getExecutorService())); + database.getExecutorService()))); - // Wait for the Future (checks for errors). - runningQuery.get(); +// No - will deadlock if buffer fills up +// // Wait for the Future (checks for errors). +// runningQuery.get(); // use the basic filter iterator for remaining filters if (sesameFilters != null) { @@ -740,13 +750,13 @@ } } - return result; - - } catch (QueryEvaluationException ex) { - throw ex; - } catch (Exception ex) { - throw new QueryEvaluationException(ex); - } + return result; + + } catch (Throwable t) { + if (runningQuery != null) + runningQuery.cancel(true/* mayInterruptIfRunning */); + throw new QueryEvaluationException(t); + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -228,8 +228,8 @@ try { - TupleExpr tupleExpr = getParsedQuery().getTupleExpr(); - BigdataSailConnection sailCon = + final TupleExpr tupleExpr = getParsedQuery().getTupleExpr(); + final BigdataSailConnection sailCon = (BigdataSailConnection) getConnection().getSailConnection(); CloseableIteration<? extends BindingSet, QueryEvaluationException> bindingsIter = sailCon.evaluate( @@ -242,7 +242,7 @@ bindingsIter) { @Override protected boolean accept(BindingSet bindingSet) { - Value context = bindingSet.getValue("context"); + final Value context = bindingSet.getValue("context"); return bindingSet.getValue("subject") instanceof Resource && bindingSet.getValue("predicate") instanceof URI && bindingSet.getValue("object") instanceof Value @@ -254,15 +254,15 @@ // Convert the BindingSet objects to actual RDF statements final ValueFactory vf = getConnection().getRepository().getValueFactory(); - CloseableIteration<Statement, QueryEvaluationException> stIter; + final CloseableIteration<Statement, QueryEvaluationException> stIter; stIter = new ConvertingIteration<BindingSet, Statement, QueryEvaluationException>(bindingsIter) { @Override protected Statement convert(BindingSet bindingSet) { - Resource subject = (Resource)bindingSet.getValue("subject"); - URI predicate = (URI)bindingSet.getValue("predicate"); - Value object = bindingSet.getValue("object"); - Resource context = (Resource)bindingSet.getValue("context"); + final Resource subject = (Resource)bindingSet.getValue("subject"); + final URI predicate = (URI)bindingSet.getValue("predicate"); + final Value object = bindingSet.getValue("object"); + final Resource context = (Resource)bindingSet.getValue("context"); if (context == null) { return vf.createStatement(subject, predicate, object); @@ -277,10 +277,11 @@ return new GraphQueryResultImpl(getParsedQuery().getQueryNamespaces(), stIter); } else { + // native construct. // Convert the BindingSet objects to actual RDF statements final ValueFactory vf = getConnection().getRepository().getValueFactory(); - CloseableIteration<? extends Statement, QueryEvaluationException> stIter; + final CloseableIteration<? extends Statement, QueryEvaluationException> stIter; stIter = new BigdataConstructIterator(sailCon.getTripleStore(), bindingsIter, vf); return new GraphQueryResultImpl(getParsedQuery() .getQueryNamespaces(), stIter); Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/RunningQueryCloseableIteration.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/RunningQueryCloseableIteration.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/RunningQueryCloseableIteration.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -0,0 +1,63 @@ +package com.bigdata.rdf.sail; + +import info.aduna.iteration.CloseableIteration; + +import java.util.concurrent.ExecutionException; + +import org.openrdf.query.BindingSet; +import org.openrdf.query.QueryEvaluationException; + +import com.bigdata.bop.engine.IRunningQuery; + +/** + * Iteration construct wraps an {@link IRunningQuery} with logic to (a) verify + * that the {@link IRunningQuery} has not encountered an error; and (b) to cancel + * the {@link IRunningQuery} when the iteration is {@link #close() closed}. + * @author thompsonbry + * + * @param <E> + * @param <X> + */ +public class RunningQueryCloseableIteration<E extends BindingSet, X extends QueryEvaluationException> + implements CloseableIteration<E, X> { + + private final IRunningQuery runningQuery; + private final CloseableIteration<E, X> src; + private boolean checkedFuture = false; + + public RunningQueryCloseableIteration(final IRunningQuery runningQuery, + final CloseableIteration<E, X> src) { + + this.runningQuery = runningQuery; + this.src = src; + + } + + public void close() throws X { + runningQuery.cancel(true/* mayInterruptIfRunning */); + src.close(); + } + + public boolean hasNext() throws X { + return src.hasNext(); + } + + public E next() throws X { + if (!checkedFuture && runningQuery.isDone()) { + try { + runningQuery.get(); + } catch (InterruptedException e) { + throw (X) new QueryEvaluationException(e); + } catch (ExecutionException e) { + throw (X) new QueryEvaluationException(e); + } + checkedFuture = true; + } + return src.next(); + } + + public void remove() throws X { + src.remove(); + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -49,6 +49,7 @@ import java.util.Map; import java.util.Properties; import java.util.TreeMap; +import java.util.UUID; import java.util.Vector; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; @@ -74,8 +75,12 @@ import org.openrdf.sail.SailException; import com.bigdata.LRUNexus; +import com.bigdata.bop.BOpUtility; import com.bigdata.bop.BufferAnnotations; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.fed.QueryEngineFactory; import com.bigdata.bop.join.PipelineJoin; import com.bigdata.btree.IndexMetadata; import com.bigdata.journal.AbstractJournal; @@ -709,7 +714,7 @@ return config.timestamp; } - + /** * Respond to a status request. * @@ -719,15 +724,27 @@ * @param params * @return * @throws Exception + * + * @todo add statistics for top-N queries based on query template + * identifiers, which can be communicated using query hints. See // + * wait for the subquery. + * @todo Report on the average query latency, average concurrency of query + * evaluation, etc. */ public Response doStatus(final String uri, final String method, final Properties header, final LinkedHashMap<String, Vector<String>> params) throws Exception { + // SPARQL queries accepted by the SPARQL end point. final boolean showQueries = params.get("showQueries") != null; + // IRunningQuery objects currently running on the query controller. + final boolean showRunningQueries = params.get("showRunningQueries") != null; + + // Information about the KB (stats, properties). final boolean showKBInfo = params.get("showKBInfo") != null; + // bigdata namespaces known to the index manager. final boolean showNamespaces = params.get("showNamespaces") != null; final StringBuilder sb = new StringBuilder(); @@ -789,15 +806,17 @@ } // show the disk access details. - sb.append(jnl.getBufferStrategy().getCounters().toString()+"\n\n"); + sb.append(jnl.getBufferStrategy().getCounters().toString()+"\n"); } if(showQueries) { /* - * Show the queries which are currently executing. + * Show the queries which are currently executing (accepted by the NanoSparqlServer). */ + + sb.append("\n"); final long now = System.nanoTime(); @@ -850,6 +869,86 @@ } + if(showRunningQueries) { + + /* + * Show the queries which are currently executing (actually running + * on the QueryEngine). + */ + + sb.append("\n"); + + final QueryEngine queryEngine = (QueryEngine) QueryEngineFactory + .getQueryController(indexManager); + + final UUID[] queryIds = queryEngine.getRunningQueries(); + +// final long now = System.nanoTime(); + + final TreeMap<Long, IRunningQuery> ages = new TreeMap<Long, IRunningQuery>(new Comparator<Long>() { + /** + * Comparator puts the entries into descending order by the query + * execution time (longest running queries are first). + */ + public int compare(final Long o1, final Long o2) { + if(o1.longValue()<o2.longValue()) return 1; + if(o1.longValue()>o2.longValue()) return -1; + return 0; + } + }); + + for(UUID queryId : queryIds) { + + final IRunningQuery query = queryEngine + .getRunningQuery(queryId); + + if (query == null) { + // Already terminated. + continue; + } + + ages.put(query.getElapsed(), query); + + } + + { + + final Iterator<IRunningQuery> itr = ages.values().iterator(); + + while (itr.hasNext()) { + + final IRunningQuery query = itr.next(); + + if (query.isDone() && query.getCause() != null) { + // Already terminated (normal completion). + continue; + } + + /* + * @todo The runstate and stats could be formatted into an + * HTML table ala QueryLog or RunState. + */ + sb.append("age=" + query.getElapsed() + "ms\n"); + sb.append("queryId=" + query.getQueryId() + "\n"); + sb.append(query.toString()); + sb.append("\n"); + sb.append(BOpUtility.toString(query.getQuery())); + sb.append("\n"); + sb.append("\n"); + +// final long age = query.getElapsed(); +// sb.append("age=" +// + java.util.concurrent.TimeUnit.NANOSECONDS +// .toMillis(age) + "ms, queryId=" +// + query.getQueryId() + "\nquery=" +// + BOpUtility.toString(query.getQuery()) + "\n"); + + } + + } + + } + return new Response(HTTP_OK, MIME_TEXT_PLAIN, sb.toString()); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java 2011-01-14 18:39:44 UTC (rev 4100) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java 2011-01-15 14:53:26 UTC (rev 4101) @@ -169,20 +169,6 @@ PipelineOp left = Rule2BOpUtility.convert( rule, conditionals, idFactory, db, queryEngine, queryHints); - if (!left.getEvaluationContext().equals( - BOpEvaluationContext.CONTROLLER)) { - /* - * Wrap with an operator which will be evaluated on the query - * controller so the results will be streamed back to the query - * controller in scale-out. - */ - left = new SliceOp(new BOp[] { left }, NV.asMap(// - new NV(BOp.Annotations.BOP_ID, idFactory - .incrementAndGet()), // - new NV(BOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER))); - } - /* * Start with left=<this join group> and add a SubqueryOp for each * sub group. @@ -207,6 +193,33 @@ } } + if (!left.getEvaluationContext() + .equals(BOpEvaluationContext.CONTROLLER) + && !(left instanceof SubqueryOp)) { + /* + * Wrap with an operator which will be evaluated on the query + * controller so the results will be streamed back to the query + * controller in scale-out. + * + * @todo For scale-out, we probably need to stream the results back + * to the node from which the subquery was issued. If the subquery + * is issued against the local query engine where the IBindingSet + * was produced, then the that query engine is the query controller + * for the subquery and a SliceOp on the subquery would bring the + * results for the subquery back to that query controller. There is + * no requirement that the query controller for the subquery and the + * query controller for the parent query be the same node. [I am not + * doing this currently in order to test whether there is a problem + * with SliceOp which interactions with SubqueryOp to allow + * incorrect termination under some circumstances. + */ + left = new SliceOp(new BOp[] { left }, NV.asMap(// + new NV(BOp.Annotations.BOP_ID, idFactory + .incrementAndGet()), // + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER))); + } + return left; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-15 22:21:10
|
Revision: 4104 http://bigdata.svn.sourceforge.net/bigdata/?rev=4104&view=rev Author: thompsonbry Date: 2011-01-15 22:21:02 +0000 (Sat, 15 Jan 2011) Log Message: ----------- javadoc on IValueExpression, MathBOp, and CompareBOp. formatting on /status requests for NanoSparqlServer. added a test to run BSBM Q5 using the runtime query optimizer (I still have to modify JoinGraph to interpret the PipelineJoin.Annotation.CONSTRAINTS from the IPredicate before this can be run). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IValueExpression.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IValueExpression.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IValueExpression.java 2011-01-15 21:23:23 UTC (rev 4103) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IValueExpression.java 2011-01-15 22:21:02 UTC (rev 4104) @@ -5,16 +5,16 @@ public interface IValueExpression<E> extends BOp, Serializable { /** - * Return the <i>as bound</i> value of the variable or constant. The <i>as - * bound</i> value of an {@link IConstant} is the contant's value. The <i>as - * bound</i> value of an {@link IVariable} is the bound value in the given - * {@link IBindingSet} -or- <code>null</code> if the variable is not bound - * in the {@link IBindingSet}. + * Return the <i>as bound</i> value of the variable, constant, or + * expression. The <i>as bound</i> value of an {@link IConstant} is the + * contant's value. The <i>as bound</i> value of an {@link IVariable} is the + * bound value in the given {@link IBindingSet} -or- <code>null</code> if + * the variable is not bound in the {@link IBindingSet}. * * @param bindingSet * The binding set. * - * @return The as bound value of the constant or variable. + * @return The as bound value of the constant, variable, or expression. * * @throws IllegalArgumentException * if this is an {@link IVariable} and the <i>bindingSet</i> is Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-01-15 21:23:23 UTC (rev 4103) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-01-15 22:21:02 UTC (rev 4104) @@ -31,7 +31,6 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IValueExpression; -import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.constraint.BOpConstraint; @@ -53,7 +52,7 @@ public interface Annotations extends PipelineOp.Annotations { /** - * The compare operator + * The compare operator, which is a {@link CompareOp} enum value. */ String OP = CompareBOp.class.getName() + ".op"; Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java 2011-01-15 21:23:23 UTC (rev 4103) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java 2011-01-15 22:21:02 UTC (rev 4104) @@ -35,7 +35,9 @@ import com.bigdata.rdf.internal.IVUtility; /** - * A math expression involving a left and right IValueExpression operand. + * A math expression involving a left and right IValueExpression operand. The + * operation to be applied to the operands is specified by the + * {@link Annotations#OP} annotation. */ final public class MathBOp extends ImmutableBOp implements IValueExpression<IV> { @@ -48,8 +50,11 @@ public interface Annotations extends ImmutableBOp.Annotations { /** - * The {@link IVariable} which is bound to that constant value - * (optional). + * The operation to be applied to the left and right operands + * (required). The value of this annotation is a {@link MathOp}, such as + * {@link MathOp#PLUS}. + * + * @see MathOp */ String OP = MathBOp.class.getName() + ".op"; @@ -66,6 +71,16 @@ } + /** + * + * @param left + * The left operand. + * @param right + * The right operand. + * @param op + * The annotation specifying the operation to be performed on + * those operands. + */ public MathBOp(final IValueExpression<IV> left, final IValueExpression<IV> right, final MathOp op) { Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-15 22:21:02 UTC (rev 4104) @@ -0,0 +1,687 @@ +package com.bigdata.bop.rdf.joinGraph; + +import java.io.File; +import java.util.Arrays; +import java.util.Properties; + +import junit.framework.TestCase2; + +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.openrdf.query.algebra.Compare.CompareOp; +import org.openrdf.query.algebra.MathExpr.MathOp; +import org.semanticweb.yars.nx.dt.numeric.XSDInt; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContextBase; +import com.bigdata.bop.BOpIdFactory; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.Var; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.constraint.NEConstant; +import com.bigdata.bop.controller.JoinGraph; +import com.bigdata.bop.controller.JoinGraph.JGraph; +import com.bigdata.bop.controller.JoinGraph.Path; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.engine.QueryLog; +import com.bigdata.bop.fed.QueryEngineFactory; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.journal.Journal; +import com.bigdata.rdf.internal.IVUtility; +import com.bigdata.rdf.internal.constraints.CompareBOp; +import com.bigdata.rdf.internal.constraints.MathBOp; +import com.bigdata.rdf.model.BigdataURI; +import com.bigdata.rdf.model.BigdataValue; +import com.bigdata.rdf.model.BigdataValueFactory; +import com.bigdata.rdf.spo.SPOPredicate; +import com.bigdata.rdf.store.AbstractTripleStore; +import com.bigdata.rdf.store.DataLoader; +import com.bigdata.rdf.store.DataLoader.ClosureEnum; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.rule.IRule; +import com.bigdata.relation.rule.Rule; +import com.bigdata.relation.rule.eval.DefaultEvaluationPlan2; +import com.bigdata.relation.rule.eval.IRangeCountFactory; + +/** + * Unit tests for runtime query optimization using {@link JoinGraph} and the + * "BSBM" test set. + * <p> + * Note: When running large queries, be sure to provide a sufficient heap, set + * the -server flag, etc. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestJoinGraphOnBSBMData extends TestCase2 { + + /** + * + */ + public TestJoinGraphOnBSBMData() { + } + + /** + * @param name + */ + public TestJoinGraphOnBSBMData(String name) { + super(name); + } + + @Override + public Properties getProperties() { + + final Properties p = new Properties(super.getProperties()); + +// p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient +// .toString()); + +// p.setProperty(AbstractTripleStore.Options.QUADS_MODE, "true"); + + /* + * Don't compute closure in the data loader since it does TM, not + * database at once closure. + */ + p.setProperty(DataLoader.Options.CLOSURE, ClosureEnum.None.toString()); + + return p; + + } + + private Journal jnl; + + private AbstractTripleStore database; + + /** The initial sampling limit. */ + private final int limit = 100; + + /** The #of edges considered for the initial paths. */ + private final int nedges = 2; + + private QueryEngine queryEngine; + + private String namespace; + + /** + * When true, do a warm up run of the plan generated by the static query + * optimizer. + */ + private final boolean warmUp = false; + + /** + * The #of times to run each query. Use N GT ONE (1) if you want to converge + * onto the hot query performance. + */ + private final int ntrials = 1; + + /** + * When <code>true</code> runs the dynamic query optimizer and then evaluates + * the generated query plan. + */ + private final boolean runRuntimeQueryOptimizer = true; + + /** + * When <code>true</code> runs the static query optimizer and then evaluates + * the generated query plan. + */ + private final boolean runStaticQueryOptimizer = true; + + /** + * Loads LUBM U1 into a triple store. + */ + protected void setUp() throws Exception { + +// QueryLog.logTableHeader(); + + super.setUp(); + +// System.err.println(UUID.randomUUID().toString()); +// System.exit(0); + + final Properties properties = getProperties(); + + final File file; + { + /* + * Use a specific file generated by some external process. + */ + file = new File("/data/bsbm/bsbm_284826/bigdata-bsbm.RW.jnl"); + namespace = "BSBM_284826"; + } + + properties.setProperty(Journal.Options.FILE, file.toString()); + +// properties.setProperty(Journal.Options.BUFFER_MODE,BufferMode.DiskRW.toString()); + +// file.delete(); + + if (!file.exists()) { + + fail("File not found: " + file); + +// jnl = new Journal(properties); +// +// final AbstractTripleStore tripleStore = new LocalTripleStore(jnl, +// namespace, ITx.UNISOLATED, properties); +// +// // Create the KB instance. +// tripleStore.create(); +// +// tripleStore.getDataLoader().loadFiles( +// new File("/root/Desktop/Downloads/barData/barData.trig"), +// null/* baseURI */, RDFFormat.TRIG, null/* defaultGraph */, +// null/* filter */); +// +// // Truncate the journal (trim its size). +// jnl.truncate(); +// +// // Commit the journal. +// jnl.commit(); +// +// // Close the journal. +// jnl.close(); + + } + + // Open the test resource. + jnl = new Journal(properties); + + queryEngine = QueryEngineFactory + .getQueryController(jnl/* indexManager */); + + database = (AbstractTripleStore) jnl.getResourceLocator().locate( + namespace, jnl.getLastCommitTime()); + + if (database == null) + throw new RuntimeException("Not found: " + namespace); + + } + + protected void tearDown() throws Exception { + + if (database != null) { + database = null; + } + + if (queryEngine != null) { + queryEngine.shutdownNow(); + queryEngine = null; + } + + if(jnl != null) { + jnl.close(); + jnl = null; + } + + super.tearDown(); + + } + + /** + * BSBM Q5 + * + * <pre> + * PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> + * PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + * PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/> + * + * SELECT DISTINCT ?product ?productLabel + * WHERE { + * ?product rdfs:label ?productLabel . + * FILTER (<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1092/Product53999> != ?product) + * <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1092/Product53999> bsbm:productFeature ?prodFeature . + * ?product bsbm:productFeature ?prodFeature . + * <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1092/Product53999> bsbm:productPropertyNumeric1 ?origProperty1 . + * ?product bsbm:productPropertyNumeric1 ?simProperty1 . + * FILTER (?simProperty1 < (?origProperty1 + 120) && ?simProperty1 > (?origProperty1 - 120)) + * <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1092/Product53999> bsbm:productPropertyNumeric2 ?origProperty2 . + * ?product bsbm:productPropertyNumeric2 ?simProperty2 . + * FILTER (?simProperty2 < (?origProperty2 + 170) && ?simProperty2 > (?origProperty2 - 170)) + * } + * ORDER BY ?productLabel + * LIMIT 5 + * </pre> + * @throws Exception + */ + public void test_bsbm_q5() throws Exception { + + /* + * Resolve terms against the lexicon. + */ + final BigdataValueFactory valueFactory = database.getLexiconRelation() + .getValueFactory(); + + final String rdfs = "http://www.w3.org/2000/01/rdf-schema#"; + final String rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + final String bsbm = "http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/"; + +// final BigdataURI rdfType = valueFactory.createURI(rdf + "type"); + + final BigdataURI rdfsLabel = valueFactory.createURI(rdfs + "label"); + + final BigdataURI productFeature = valueFactory.createURI(bsbm + + "productFeature"); + + final BigdataURI productPropertyNumeric1 = valueFactory.createURI(bsbm + + "productPropertyNumeric1"); + + final BigdataURI productPropertyNumeric2 = valueFactory.createURI(bsbm + + "productPropertyNumeric2"); + + final BigdataURI product53999 = valueFactory + .createURI("http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1092/Product53999"); + + final BigdataValue[] terms = new BigdataValue[] { rdfsLabel, + productFeature, productPropertyNumeric1, + productPropertyNumeric2, product53999 }; + + // resolve terms. + database.getLexiconRelation() + .addTerms(terms, terms.length, true/* readOnly */); + + { + for (BigdataValue tmp : terms) { + System.out.println(tmp + " : " + tmp.getIV()); + if (tmp.getIV() == null) + throw new RuntimeException("Not defined: " + tmp); + } + } + + final IPredicate[] preds; + final IPredicate p0, p1, p2, p3, p4, p5, p6; + { + final IVariable product = Var.var("product"); + final IVariable productLabel = Var.var("productLabel"); + final IVariable prodFeature= Var.var("prodFeature"); + final IVariable simProperty1 = Var.var("simProperty1"); + final IVariable simProperty2 = Var.var("simProperty2"); + final IVariable origProperty1 = Var.var("origProperty1"); + final IVariable origProperty2 = Var.var("origProperty2"); + + // The name space for the SPO relation. + final String[] spoRelation = new String[] { namespace + ".spo" }; + + // The name space for the Lexicon relation. + final String[] lexRelation = new String[] { namespace + ".lex" }; + + final long timestamp = jnl.getLastCommitTime(); + + int nextId = 0; + +// ?product rdfs:label ?productLabel . +// FILTER (<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1092/Product53999> != ?product) + p0 = new SPOPredicate(new BOp[] {// + product, + new Constant(rdfsLabel.getIV()), + productLabel// + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation),// + /* + * Note: In order to code up this query for the runtime + * query optimizer we need to attach the constraint + * (product53999 != ?product) to the access path rather than + * the join (the RTO does not accept join operators, just + * predicates). The RTO knows to look for the CONSTRAINTS on + * the IPredicate and apply them to the constructed join + * operator. + * + * FIXME JOinGraph needs to do this ^^^^^^^ + */ + new NV(PipelineJoin.Annotations.CONSTRAINTS, + new IConstraint[] {// + new NEConstant(product, new Constant(product53999 + .getIV())) // + })// + ); + +// <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1092/Product53999> bsbm:productFeature ?prodFeature . + p1 = new SPOPredicate(new BOp[] { // + new Constant(product53999.getIV()),// + new Constant(productFeature.getIV()),// + prodFeature// + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation) + ); +// ?product bsbm:productFeature ?prodFeature . + p2 = new SPOPredicate(new BOp[] { // + product,// + new Constant(productFeature.getIV()),// + prodFeature// + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation) + ); +// <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1092/Product53999> bsbm:productPropertyNumeric1 ?origProperty1 . + p3 = new SPOPredicate(new BOp[] { // + new Constant(product53999.getIV()),// + new Constant(productPropertyNumeric1.getIV()),// + origProperty1// + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation) + ); +// ?product bsbm:productPropertyNumeric1 ?simProperty1 . +// FILTER (?simProperty1 < (?origProperty1 + 120) && ?simProperty1 > (?origProperty1 - 120)) + p4 = new SPOPredicate(new BOp[] { // + product,// + new Constant(productPropertyNumeric1.getIV()),// + simProperty1// + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation), + new NV(PipelineJoin.Annotations.CONSTRAINTS, + new IConstraint[] {// + new CompareBOp(new BOp[] { + simProperty1, + new MathBOp(origProperty1, + new Constant(new XSDInt( + "120")), + MathOp.PLUS) }, NV + .asMap(new NV[] { new NV( + CompareBOp.Annotations.OP, + CompareOp.LT) })),// + new CompareBOp(new BOp[] { + simProperty1, + new MathBOp(origProperty1, + new Constant(new XSDInt( + "120")), + MathOp.MINUS) }, NV + .asMap(new NV[] { new NV( + CompareBOp.Annotations.OP, + CompareOp.GT) })),// + })// + ); + + /* + * com.bigdata.rdf.internal.constraints.CompareBOp(Var,MathBOp)[ + * com.bigdata.rdf.internal.constraints.CompareBOp.op=GT], + * com.bigdata.rdf.internal.constraints.CompareBOp(Var,MathBOp)[ + * com.bigdata.rdf.internal.constraints.CompareBOp.op=LT]], + */ + +// <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1092/Product53999> bsbm:productPropertyNumeric2 ?origProperty2 . + p5 = new SPOPredicate(new BOp[] { // + new Constant(product53999.getIV()),// + new Constant(productPropertyNumeric2.getIV()),// + origProperty2// + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation) + ); + +// ?product bsbm:productPropertyNumeric2 ?simProperty2 . +// FILTER (?simProperty2 < (?origProperty2 + 170) && ?simProperty2 > (?origProperty2 - 170)) + p6 = new SPOPredicate(new BOp[] { // + product,// + new Constant(productPropertyNumeric2.getIV()),// + simProperty2// + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation), + new NV(PipelineJoin.Annotations.CONSTRAINTS, + new IConstraint[] {// + new CompareBOp(new BOp[] { + simProperty2, + new MathBOp(origProperty2, + new Constant(new XSDInt( + "170")), + MathOp.PLUS) }, NV + .asMap(new NV[] { new NV( + CompareBOp.Annotations.OP, + CompareOp.LT) })),// + new CompareBOp(new BOp[] { + simProperty2, + new MathBOp(origProperty1, + new Constant(new XSDInt( + "170")), + MathOp.MINUS) }, NV + .asMap(new NV[] { new NV( + CompareBOp.Annotations.OP, + CompareOp.GT) })),// + })// + ); + + // the vertices of the join graph (the predicates). + preds = new IPredicate[] { p0, p1, p2, p3, p4, p5, p6 }; + + } + + doTest(preds); + + } + + /** + * + * @param preds + * @throws Exception + * + * @todo To actually test anything this needs to compare the results (or at + * least the #of result). We could also test for known good join + * orders as generated by the runtime optimizer, but that requires a + * known data set (e.g., U1 or U50) and non-random sampling. + * + * @todo This is currently providing a "hot run" comparison by a series of + * trials. This means that the IO costs are effectively being wiped + * away, assuming that the file system cache is larger than the data + * set. The other way to compare performance is a cold cache / cold + * JVM run using the known solutions produced by the runtime versus + * static query optimizers. + */ + private void doTest(final IPredicate[] preds) throws Exception { + + if (warmUp) + runQuery("Warmup", queryEngine, runStaticQueryOptimizer(preds)); + + /* + * Run the runtime query optimizer once (its cost is not counted + * thereafter). + */ + final IPredicate[] runtimePredOrder = runRuntimeQueryOptimizer(preds); + + long totalRuntimeTime = 0; + long totalStaticTime = 0; + + for (int i = 0; i < ntrials; i++) { + + final String RUNTIME = getName() + " : runtime["+i+"] :"; + + final String STATIC = getName() + " : static ["+i+"] :"; + + final String GIVEN = getName() + " : given ["+i+"] :"; + + if (true/* originalOrder */) { + + runQuery(GIVEN, queryEngine, preds); + + } + + if (runStaticQueryOptimizer) { + + totalStaticTime += runQuery(STATIC, queryEngine, + runStaticQueryOptimizer(preds)); + + } + + if (runRuntimeQueryOptimizer) { + + /* + * Run the runtime query optimizer each time (its overhead is + * factored into the running comparison of the two query + * optimizers). + */ +// final IPredicate[] runtimePredOrder = runRuntimeQueryOptimizer(new JGraph( +// preds)); + + // Evaluate the query using the selected join order. + totalRuntimeTime += runQuery(RUNTIME, queryEngine, + runtimePredOrder); + + } + + } + + if(runStaticQueryOptimizer&&runRuntimeQueryOptimizer) { + System.err.println(getName() + " : Total times" + // + ": static=" + totalStaticTime + // + ", runtime=" + totalRuntimeTime + // + ", delta(static-runtime)=" + (totalStaticTime - totalRuntimeTime)); + } + + } + + /** + * Apply the runtime query optimizer. + * <p> + * Note: This temporarily raises the {@link QueryLog} log level during + * sampling to make the log files cleaner (this can not be done for a + * deployed system since the logger level is global and there are concurrent + * query mixes). + * + * @return The predicates in order as recommended by the runtime query + * optimizer. + * + * @throws Exception + */ + private IPredicate[] runRuntimeQueryOptimizer(final IPredicate[] preds) throws Exception { + + final Logger tmp = Logger.getLogger(QueryLog.class); + final Level oldLevel = tmp.getEffectiveLevel(); + tmp.setLevel(Level.WARN); + + try { + + final JGraph g = new JGraph(preds); + + final Path p = g.runtimeOptimizer(queryEngine, limit, nedges); + +// System.err.println(getName() + " : runtime optimizer join order " +// + Arrays.toString(Path.getVertexIds(p.edges))); + + return p.getPredicates(); + + } finally { + + tmp.setLevel(oldLevel); + + } + + } + + /** + * Apply the static query optimizer. + * + * @return The predicates in order as recommended by the static query + * optimizer. + */ + private IPredicate[] runStaticQueryOptimizer(final IPredicate[] preds) { + + final BOpContextBase context = new BOpContextBase(queryEngine); + + final IRule rule = new Rule("tmp", null/* head */, preds, null/* constraints */); + + final DefaultEvaluationPlan2 plan = new DefaultEvaluationPlan2( + new IRangeCountFactory() { + + public long rangeCount(final IPredicate pred) { + return context.getRelation(pred).getAccessPath(pred) + .rangeCount(false); + } + + }, rule); + + // evaluation plan order. + final int[] order = plan.getOrder(); + + final int[] ids = new int[order.length]; + + final IPredicate[] out = new IPredicate[order.length]; + + for (int i = 0; i < order.length; i++) { + + out[i] = preds[order[i]]; + + ids[i] = out[i].getId(); + + } + +// System.err.println(getName() + " : static optimizer join order " +// + Arrays.toString(ids)); + + return out; + + } + + /** + * Run a query joining a set of {@link IPredicate}s in the given join order. + * + * @return The elapsed query time (ms). + */ + private static long runQuery(final String msg, + final QueryEngine queryEngine, final IPredicate[] predOrder) + throws Exception { + + final BOpIdFactory idFactory = new BOpIdFactory(); + + final int[] ids = new int[predOrder.length]; + + for(int i=0; i<ids.length; i++) { + + final IPredicate<?> p = predOrder[i]; + + idFactory.reserve(p.getId()); + + ids[i] = p.getId(); + + } + + final PipelineOp queryOp = JoinGraph.getQuery(idFactory, predOrder); + + // submit query to runtime optimizer. + final IRunningQuery q = queryEngine.eval(queryOp); + + // drain the query results. + long nout = 0; + long nchunks = 0; + final IAsynchronousIterator<IBindingSet[]> itr = q.iterator(); + try { + while (itr.hasNext()) { + final IBindingSet[] chunk = itr.next(); + nout += chunk.length; + nchunks++; + } + } finally { + itr.close(); + } + + // check the Future for the query. + q.get(); + + // show the results. + final BOpStats stats = q.getStats().get(queryOp.getId()); + + System.err.println(msg + " : ids=" + Arrays.toString(ids) + + ", elapsed=" + q.getElapsed() + ", nout=" + nout + + ", nchunks=" + nchunks + ", stats=" + stats); + + return q.getElapsed(); + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java 2011-01-15 21:23:23 UTC (rev 4103) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java 2011-01-15 22:21:02 UTC (rev 4104) @@ -858,10 +858,11 @@ final long age = now - query.begin; - sb.append("age=" - + java.util.concurrent.TimeUnit.NANOSECONDS - .toMillis(age) + "ms, queryId=" - + query.queryId + ", query=" + query.query+"\n"); + sb.append("age=" + + java.util.concurrent.TimeUnit.NANOSECONDS + .toMillis(age) + "ms, queryId=" + + query.queryId + "\n"); + sb.append(query.query + "\n"); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-15 23:53:30
|
Revision: 4106 http://bigdata.svn.sourceforge.net/bigdata/?rev=4106&view=rev Author: thompsonbry Date: 2011-01-15 23:53:24 +0000 (Sat, 15 Jan 2011) Log Message: ----------- Modified JoinGraph to propagate the PipelineJoin.Annotations.CONSTRAINTS from the _predicate_ to the join. This allows us to annotate the predicate with constraints which will be imposed by the join. That makes it possible for the runtime query optimizer to generate the joins dynamically when they include IConstraint[]s. See trac issue #64 (Runtime Query Optimizer). Fixed some problems in the BSBM Q5 setup as hand coded for the runtime query optimizer. Identified a problem with the runtime query optimizer where it will never include a vertex if it does not share any join variables with the other vertices in the graph (BSBM Q5 does this). I have not fixed this yet. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-15 22:49:40 UTC (rev 4105) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-15 23:53:24 UTC (rev 4106) @@ -198,6 +198,10 @@ * approach should be able to handle queries without perfect / covering * automatically. Then experiment with carrying fewer statement indices * for quads. + * + * @todo Unit test when there are no solutions to the query. In this case there + * will be no paths identified by the optimizer and the final path length + * becomes zero. */ public class JoinGraph extends PipelineOp { @@ -1032,10 +1036,10 @@ * path). */ final int joinId = 1; - final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, // + final Map<String,Object> anns = NV.asMap(// new NV(BOp.Annotations.BOP_ID, joinId),// - new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred - .setBOpId(3)), + // @todo Why not use a factory which avoids bopIds already in use? + new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred.setBOpId(3)), // disallow parallel evaluation. new NV(PipelineJoin.Annotations.MAX_PARALLEL,0), // disable access path coalescing @@ -1056,6 +1060,12 @@ new NV(PipelineJoin.Annotations.SHARED_STATE,true), new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT,BOpEvaluationContext.CONTROLLER) ); + if (vTarget.pred.getProperty(PipelineJoin.Annotations.CONSTRAINTS) != null) { + // Copy constraints from the predicate onto the join, which will apply them. + anns.put(PipelineJoin.Annotations.CONSTRAINTS, vTarget.pred + .getProperty(PipelineJoin.Annotations.CONSTRAINTS)); + } + final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, anns); final PipelineOp queryOp = joinOp; @@ -1805,8 +1815,13 @@ } /** + * Find a good join path in the data given the join graph. The join path + * is not guaranteed to be the best join path (the search performed by + * the runtime optimizer is not exhaustive) but it should always be a + * "good" join path and may often be the "best" join path. * * @param queryEngine + * The query engine. * @param limit * The limit for sampling a vertex and the initial limit for * cutoff join evaluation. @@ -1818,10 +1833,25 @@ * a join path, the starting vertex will be the vertex of * that edge having the lower cardinality. * + * @return The join path identified by the runtime query optimizer as + * the best path given the join graph and the data. + * + * @throws NoSolutionsException + * If there are no solutions for the join graph in the data + * (the query does not have any results). + * * @throws Exception + * + * @todo It is possible that this could throw a + * {@link NoSolutionsException} if the cutoff joins do not use a + * large enough sample to find a join path which produces at least + * one solution. We need to automatically increase the depth of + * search for queries where we have cardinality estimation + * underflows or punt to another method to decide the join order. */ public Path runtimeOptimizer(final QueryEngine queryEngine, - final int limit, final int nedges) throws Exception { + final int limit, final int nedges) throws Exception, + NoSolutionsException { // Setup the join graph. Path[] paths = round0(queryEngine, limit, nedges); @@ -1838,12 +1868,19 @@ int round = 1; - while (round < nvertices - 1) { + while (paths.length > 0 && round < nvertices - 1) { paths = expand(queryEngine, limit, round++, paths); } + if (paths.length == 0) { + + // There are no solutions for the join graph in the data. + throw new NoSolutionsException(); + + } + // Should be one winner. assert paths.length == 1; @@ -2257,6 +2294,10 @@ final boolean v1Found = x.contains(edgeInGraph.v1); final boolean v2Found = x.contains(edgeInGraph.v2); + if (log.isTraceEnabled()) + log.trace("Edge: " + edgeInGraph + ", v1Found=" + + v1Found + ", v2Found=" + v2Found); + if (!v1Found && !v2Found) { // Edge is not connected to this path. continue; @@ -2277,6 +2318,9 @@ if (used.contains(tVertex)) { // Vertex already used to extend this path. + if (log.isTraceEnabled()) + log.trace("Edge: " + edgeInGraph + + " - already used to extend this path."); continue; } @@ -2292,6 +2336,10 @@ // Add to the set of paths for this round. tmp.add(p); + if (log.isTraceEnabled()) + log.trace("Extended path with edge: " + edgeInGraph + + ", new path=" + p); + } } @@ -2806,6 +2854,13 @@ // // anns.add(new NV(PipelineJoin.Annotations.SELECT, vars.toArray(new IVariable[vars.size()]))); + if (p.getProperty(PipelineJoin.Annotations.CONSTRAINTS) != null) { + // Copy constraints from the predicate onto the join, which will + // apply them. + anns.add(new NV(PipelineJoin.Annotations.CONSTRAINTS, p + .getProperty(PipelineJoin.Annotations.CONSTRAINTS))); + } + final PipelineJoin joinOp = new PipelineJoin( lastOp == null ? new BOp[0] : new BOp[] { lastOp }, anns.toArray(new NV[anns.size()])); @@ -2935,4 +2990,34 @@ } + /** + * Exception thrown when the join graph does not have any solutions in the + * data (running the query does not produce any results). + */ + public static class NoSolutionsException extends RuntimeException + { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public NoSolutionsException() { + super(); + } + + public NoSolutionsException(String message, Throwable cause) { + super(message, cause); + } + + public NoSolutionsException(String message) { + super(message); + } + + public NoSolutionsException(Throwable cause) { + super(cause); + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-15 22:49:40 UTC (rev 4105) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-15 23:53:24 UTC (rev 4106) @@ -10,7 +10,6 @@ import org.apache.log4j.Logger; import org.openrdf.query.algebra.Compare.CompareOp; import org.openrdf.query.algebra.MathExpr.MathOp; -import org.semanticweb.yars.nx.dt.numeric.XSDInt; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContextBase; @@ -35,7 +34,7 @@ import com.bigdata.bop.fed.QueryEngineFactory; import com.bigdata.bop.join.PipelineJoin; import com.bigdata.journal.Journal; -import com.bigdata.rdf.internal.IVUtility; +import com.bigdata.rdf.internal.XSDIntIV; import com.bigdata.rdf.internal.constraints.CompareBOp; import com.bigdata.rdf.internal.constraints.MathBOp; import com.bigdata.rdf.model.BigdataURI; @@ -101,7 +100,7 @@ private AbstractTripleStore database; /** The initial sampling limit. */ - private final int limit = 100; + private final int limit = 1000; /** The #of edges considered for the initial paths. */ private final int nedges = 2; @@ -114,7 +113,7 @@ * When true, do a warm up run of the plan generated by the static query * optimizer. */ - private final boolean warmUp = false; + private final boolean warmUp = true; /** * The #of times to run each query. Use N GT ONE (1) if you want to converge @@ -153,8 +152,10 @@ /* * Use a specific file generated by some external process. */ - file = new File("/data/bsbm/bsbm_284826/bigdata-bsbm.RW.jnl"); - namespace = "BSBM_284826"; + final long pc = 284826; // BSBM 100M +// final long pc = 566496; // BSBM 200M + file = new File("/data/bsbm/bsbm_"+pc+"/bigdata-bsbm.RW.jnl"); + namespace = "BSBM_"+pc; } properties.setProperty(Journal.Options.FILE, file.toString()); @@ -260,7 +261,7 @@ .getValueFactory(); final String rdfs = "http://www.w3.org/2000/01/rdf-schema#"; - final String rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; +// final String rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; final String bsbm = "http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/"; // final BigdataURI rdfType = valueFactory.createURI(rdf + "type"); @@ -309,8 +310,8 @@ // The name space for the SPO relation. final String[] spoRelation = new String[] { namespace + ".spo" }; - // The name space for the Lexicon relation. - final String[] lexRelation = new String[] { namespace + ".lex" }; +// // The name space for the Lexicon relation. +// final String[] lexRelation = new String[] { namespace + ".lex" }; final long timestamp = jnl.getLastCommitTime(); @@ -334,8 +335,6 @@ * predicates). The RTO knows to look for the CONSTRAINTS on * the IPredicate and apply them to the constructed join * operator. - * - * FIXME JOinGraph needs to do this ^^^^^^^ */ new NV(PipelineJoin.Annotations.CONSTRAINTS, new IConstraint[] {// @@ -354,6 +353,7 @@ new NV(Annotations.TIMESTAMP, timestamp),// new NV(IPredicate.Annotations.RELATION_NAME, spoRelation) ); + // ?product bsbm:productFeature ?prodFeature . p2 = new SPOPredicate(new BOp[] { // product,// @@ -364,6 +364,7 @@ new NV(Annotations.TIMESTAMP, timestamp),// new NV(IPredicate.Annotations.RELATION_NAME, spoRelation) ); + // <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1092/Product53999> bsbm:productPropertyNumeric1 ?origProperty1 . p3 = new SPOPredicate(new BOp[] { // new Constant(product53999.getIV()),// @@ -374,6 +375,7 @@ new NV(Annotations.TIMESTAMP, timestamp),// new NV(IPredicate.Annotations.RELATION_NAME, spoRelation) ); + // ?product bsbm:productPropertyNumeric1 ?simProperty1 . // FILTER (?simProperty1 < (?origProperty1 + 120) && ?simProperty1 > (?origProperty1 - 120)) p4 = new SPOPredicate(new BOp[] { // @@ -389,8 +391,8 @@ new CompareBOp(new BOp[] { simProperty1, new MathBOp(origProperty1, - new Constant(new XSDInt( - "120")), + new Constant(new XSDIntIV( + 120)), MathOp.PLUS) }, NV .asMap(new NV[] { new NV( CompareBOp.Annotations.OP, @@ -398,8 +400,8 @@ new CompareBOp(new BOp[] { simProperty1, new MathBOp(origProperty1, - new Constant(new XSDInt( - "120")), + new Constant(new XSDIntIV( + 120)), MathOp.MINUS) }, NV .asMap(new NV[] { new NV( CompareBOp.Annotations.OP, @@ -407,13 +409,6 @@ })// ); - /* - * com.bigdata.rdf.internal.constraints.CompareBOp(Var,MathBOp)[ - * com.bigdata.rdf.internal.constraints.CompareBOp.op=GT], - * com.bigdata.rdf.internal.constraints.CompareBOp(Var,MathBOp)[ - * com.bigdata.rdf.internal.constraints.CompareBOp.op=LT]], - */ - // <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1092/Product53999> bsbm:productPropertyNumeric2 ?origProperty2 . p5 = new SPOPredicate(new BOp[] { // new Constant(product53999.getIV()),// @@ -440,17 +435,17 @@ new CompareBOp(new BOp[] { simProperty2, new MathBOp(origProperty2, - new Constant(new XSDInt( - "170")), + new Constant(new XSDIntIV( + 170)), MathOp.PLUS) }, NV .asMap(new NV[] { new NV( CompareBOp.Annotations.OP, CompareOp.LT) })),// new CompareBOp(new BOp[] { simProperty2, - new MathBOp(origProperty1, - new Constant(new XSDInt( - "170")), + new MathBOp(origProperty2, + new Constant(new XSDIntIV( + 170)), MathOp.MINUS) }, NV .asMap(new NV[] { new NV( CompareBOp.Annotations.OP, @@ -473,7 +468,7 @@ * @throws Exception * * @todo To actually test anything this needs to compare the results (or at - * least the #of result). We could also test for known good join + * least the #of results). We could also test for known good join * orders as generated by the runtime optimizer, but that requires a * known data set (e.g., U1 or U50) and non-random sampling. * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-16 00:59:24
|
Revision: 4107 http://bigdata.svn.sourceforge.net/bigdata/?rev=4107&view=rev Author: thompsonbry Date: 2011-01-16 00:59:18 +0000 (Sun, 16 Jan 2011) Log Message: ----------- Change to JoinGraph to let it work with predicates which do not share any variables with other predicates in the join graph. This raises the exploration cost significantly for such queries (e.g., BSBM Q5) but it now produces a good answer (2x faster join path). I've updated the RTO issue (#64) to reflect some additional questions about how to handle predicates which do not share variables and how to handle variables which are "shared" only in the sense that they appear in a constraint on another predicate, but not as part of the access path pattern. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-15 23:53:24 UTC (rev 4106) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-16 00:59:18 UTC (rev 4107) @@ -779,8 +779,9 @@ throw new IllegalArgumentException(); if (shared == null) throw new IllegalArgumentException(); - if (shared.isEmpty()) - throw new IllegalArgumentException(); + // Note: We need to allow edges which do not share variables +// if (shared.isEmpty()) +// throw new IllegalArgumentException(); this.v1 = v1; this.v2 = v2; this.shared = shared; @@ -1782,6 +1783,11 @@ /* * Identify the edges by looking for shared variables among the * predicates. + * + * Note: If a vertex does not share ANY variables then it is paired + * with every other vertex. Such joins will always produce a full + * cross product and they can be taken paired with any of the other + * vertices. */ { @@ -1789,10 +1795,15 @@ for (int i = 0; i < v.length; i++) { + // consider a source vertex. final IPredicate<?> p1 = v[i]; + // #of vertices which share a variable with source vertex. + int nmatched = 0; + for (int j = i + 1; j < v.length; j++) { + // consider a possible target vertex. final IPredicate<?> p2 = v[j]; final Set<IVariable<?>> shared = Rule.getSharedVars(p1, @@ -1800,12 +1811,34 @@ if (shared != null && !shared.isEmpty()) { + // the source and target vertices share var(s). tmp.add(new Edge(V[i], V[j], shared)); + + nmatched++; } } + if (nmatched == 0) { + + /* + * The source vertex does not share any variables. In + * order to explore join paths which include that vertex + * we therefore pair it with each of the other vertices. + */ + for (int j = 0; j < v.length; j++) { + + if (j == i) + continue; + + tmp.add(new Edge(V[i], V[j], + Collections.EMPTY_SET)); + + } + + } + } E = tmp.toArray(new Edge[0]); @@ -2698,8 +2731,10 @@ this.context = context; + // The initial cutoff sampling limit. limit = getLimit(); + // The initial number of edges (1 step paths) to explore. nedges = getNEdges(); if (limit <= 0) Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-15 23:53:24 UTC (rev 4106) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-16 00:59:18 UTC (rev 4107) @@ -100,7 +100,7 @@ private AbstractTripleStore database; /** The initial sampling limit. */ - private final int limit = 1000; + private final int limit = 100; /** The #of edges considered for the initial paths. */ private final int nedges = 2; @@ -113,15 +113,20 @@ * When true, do a warm up run of the plan generated by the static query * optimizer. */ - private final boolean warmUp = true; + private final boolean warmUp = false; /** * The #of times to run each query. Use N GT ONE (1) if you want to converge * onto the hot query performance. */ - private final int ntrials = 1; + private final int ntrials = 3; /** + * When <code>true</code> runs the query in the given order. + */ + private final boolean runGivenOrder = false; + + /** * When <code>true</code> runs the dynamic query optimizer and then evaluates * the generated query plan. */ @@ -501,7 +506,7 @@ final String GIVEN = getName() + " : given ["+i+"] :"; - if (true/* originalOrder */) { + if (runGivenOrder) { runQuery(GIVEN, queryEngine, preds); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-16 16:38:44
|
Revision: 4108 http://bigdata.svn.sourceforge.net/bigdata/?rev=4108&view=rev Author: thompsonbry Date: 2011-01-16 16:38:37 +0000 (Sun, 16 Jan 2011) Log Message: ----------- Working on https://sourceforge.net/apps/trac/bigdata/ticket/230 (occasional errors reported through to the SPARQL client from normal termination rooted in an interrupt of a query when a LIMIT is satisfied). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/CancelQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/RunningQueryCloseableIteration.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/IHaltable.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -305,17 +305,17 @@ public IRunningQuery call() throws Exception { + IRunningQuery runningSubquery = null; IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; try { final QueryEngine queryEngine = parentContext.getRunningQuery() .getQueryEngine(); - final IRunningQuery runningQuery = queryEngine - .eval(subQueryOp); + runningSubquery = queryEngine.eval(subQueryOp); // Iterator visiting the subquery solutions. - subquerySolutionItr = runningQuery.iterator(); + subquerySolutionItr = runningSubquery.iterator(); // Copy solutions from the subquery to the query. BOpUtility.copy(subquerySolutionItr, parentContext @@ -323,20 +323,31 @@ null/* stats */); // wait for the subquery. - runningQuery.get(); + runningSubquery.get(); // done. - return runningQuery; + return runningSubquery; } catch (Throwable t) { - /* - * If a subquery fails, then propagate the error to the - * parent and rethrow the first cause error out of the - * subquery. - */ - throw new RuntimeException(ControllerTask.this.context - .getRunningQuery().halt(t)); + if (runningSubquery == null + || runningSubquery.getCause() != null) { + /* + * If things fail before we start the subquery, or if a + * subquery fails (due to abnormal termination), then + * propagate the error to the parent and rethrow the + * first cause error out of the subquery. + * + * Note: IHaltable#getCause() considers exceptions + * triggered by an interrupt to be normal termination. + * Such exceptions are NOT propagated here and WILL NOT + * cause the parent query to terminate. + */ + throw new RuntimeException(ControllerTask.this.context + .getRunningQuery().halt(runningSubquery.getCause())); + } + + return runningSubquery; } finally { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -27,7 +27,6 @@ package com.bigdata.bop.controller; -import java.nio.channels.ClosedByInterruptException; import java.util.Map; import java.util.UUID; import java.util.concurrent.Callable; @@ -43,10 +42,8 @@ import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.LocalChunkMessage; import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.relation.accesspath.BufferClosedException; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.ThickAsynchronousIterator; -import com.bigdata.util.InnerCause; /** * For each binding set presented, this operator executes a subquery. Any @@ -425,7 +422,7 @@ throw ex; } - + if (ncopied == 0L && optional) { /* @@ -442,28 +439,47 @@ } catch (Throwable t) { - /* - * Note: SliceOp will cause other operators to be - * interrupted during normal evaluation. Therefore, while - * these exceptions should cause the subquery to terminate, - * they should not be reported as errors to the parent - * query. - */ - if (!InnerCause.isInnerCause(t, InterruptedException.class) - && !InnerCause.isInnerCause(t, BufferClosedException.class) - && !InnerCause.isInnerCause(t, ClosedByInterruptException.class)) { - - /* - * If a subquery fails, then propagate the error to the - * parent and rethrow the first cause error out of the - * subquery. - */ - throw new RuntimeException(ControllerTask.this.context - .getRunningQuery().halt(t)); +// /* +// * Note: SliceOp will cause other operators to be +// * interrupted during normal evaluation. Therefore, while +// * these exceptions should cause the subquery to terminate, +// * they should not be reported as errors to the parent +// * query. +// */ +// if (!InnerCause.isInnerCause(t, InterruptedException.class) +// && !InnerCause.isInnerCause(t, BufferClosedException.class) +// && !InnerCause.isInnerCause(t, ClosedByInterruptException.class)) { +// +// /* +// * If a subquery fails, then propagate the error to the +// * parent and rethrow the first cause error out of the +// * subquery. +// */ +// throw new RuntimeException(ControllerTask.this.context +// .getRunningQuery().halt(t)); +// +// } +// +// return runningSubquery; - } - - return runningSubquery; + if (runningSubquery == null + || runningSubquery.getCause() != null) { + /* + * If things fail before we start the subquery, or if a + * subquery fails (due to abnormal termination), then + * propagate the error to the parent and rethrow the + * first cause error out of the subquery. + * + * Note: IHaltable#getCause() considers exceptions + * triggered by an interrupt to be normal termination. + * Such exceptions are NOT propagated here and WILL NOT + * cause the parent query to terminate. + */ + throw new RuntimeException(ControllerTask.this.context + .getRunningQuery().halt(runningSubquery.getCause())); + } + + return runningSubquery; } finally { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -28,7 +28,6 @@ package com.bigdata.bop.engine; import java.nio.ByteBuffer; -import java.nio.channels.ClosedByInterruptException; import java.util.Collections; import java.util.Map; import java.util.UUID; @@ -51,12 +50,11 @@ import com.bigdata.bop.solutions.SliceOp; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.ITx; -import com.bigdata.relation.accesspath.BufferClosedException; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.service.IBigdataFederation; -import com.bigdata.util.InnerCause; import com.bigdata.util.concurrent.Haltable; +import com.bigdata.util.concurrent.IHaltable; /** * Abstract base class for various {@link IRunningQuery} implementations. The @@ -184,7 +182,7 @@ * Note: This is exposed to the {@link QueryEngine} to let it cache the * {@link Future} for recently finished queries. */ - final protected Future<Void> getFuture() { + final protected IHaltable<Void> getFuture() { return future; @@ -665,7 +663,7 @@ if (runState.isAllDone()) { // Normal termination. - halt(); + halt((Void)null); } @@ -771,14 +769,14 @@ } - public void halt() { + final public void halt(final Void v) { - lock.lock(); + lock.lock(); try { // signal normal completion. - future.halt((Void) null); + future.halt((Void) v); // interrupt anything which is running. cancel(true/* mayInterruptIfRunning */); @@ -791,7 +789,7 @@ } - public Throwable halt(final Throwable t) { + final public <T extends Throwable> T halt(final T t) { if (t == null) throw new IllegalArgumentException(); @@ -802,23 +800,8 @@ try { - /* - * Note: SliceOp will cause other operators to be interrupted - * during normal evaluation so it is not useful to log an - * InterruptedException @ ERROR. - */ - if (!InnerCause.isInnerCause(t, InterruptedException.class) - && !InnerCause.isInnerCause(t, BufferClosedException.class) - && !InnerCause.isInnerCause(t, ClosedByInterruptException.class)) { - log.error(toString(), t); - // signal error condition. - return future.halt(t); - } else { - // normal termination. - future.halt((Void)null/* result */); - // the caller's cause. - return t; - } + // halt the query, return [t]. + return future.halt(t); } finally { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -27,7 +27,6 @@ */ package com.bigdata.bop.engine; -import java.nio.channels.ClosedByInterruptException; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -861,20 +860,12 @@ try { t.call(); } catch(Throwable t) { - /* - * Note: SliceOp will cause other operators to be - * interrupted during normal evaluation. Therefore, while - * these exceptions should cause the query to terminate, - * they should not be reported as errors to the query - * controller. - */ - if (!InnerCause.isInnerCause(t, InterruptedException.class) - && !InnerCause.isInnerCause(t, BufferClosedException.class) - && !InnerCause.isInnerCause(t, ClosedByInterruptException.class) - ) { - // Not an error that we should ignore. - throw t; - } + halt(t); + if (getCause() != null) { + // Abnormal termination. + throw new RuntimeException(getCause()); + } + // normal termination - swallow the exception. } finally { t.context.getStats().elapsed.add(System.currentTimeMillis() - begin); @@ -911,10 +902,12 @@ * error message is necessary in order to catch errors in * clientProxy.haltOp() (above and below). */ - final Throwable firstCause = halt(ex1); + // ensure halted. + halt(ex1); + final HaltOpMessage msg = new HaltOpMessage(getQueryId(), t.bopId, - t.partitionId, serviceId, firstCause, t.sinkId, + t.partitionId, serviceId, getCause()/*firstCauseIfError*/, t.sinkId, t.sinkMessagesOut.get(), t.altSinkId, t.altSinkMessagesOut.get(), t.context.getStats()); try { @@ -1484,7 +1477,7 @@ // return sink.flush(); } - public void abort(Throwable cause) { + public void abort(final Throwable cause) { open = false; q.halt(cause); // sink.abort(cause); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -29,7 +29,6 @@ import java.util.Map; import java.util.UUID; -import java.util.concurrent.Future; import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; @@ -40,6 +39,7 @@ import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.service.IBigdataFederation; import com.bigdata.striterator.ICloseableIterator; +import com.bigdata.util.concurrent.IHaltable; /** * Non-Remote interface exposing a limited set of the state of an executing @@ -48,7 +48,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ -public interface IRunningQuery extends Future<Void>{ +public interface IRunningQuery extends IHaltable<Void> { /** * The query. @@ -134,35 +134,35 @@ public long getElapsed(); /** - * Cancel the running query (normal termination). - * <p> - * Note: This method provides a means for an operator to indicate that the - * query should halt immediately for reasons other than abnormal - * termination. - * <p> - * Note: For abnormal termination of a query, just throw an exception out of - * the query operator implementation. - */ - void halt(); - - /** - * Cancel the query (abnormal termination). - * - * @param t - * The cause. - * - * @return The first cause. - * - * @throws IllegalArgumentException - * if the argument is <code>null</code>. - */ - Throwable halt(final Throwable t); - - /** - * Return the cause if the query was terminated by an exception. - * @return - */ - Throwable getCause(); +// * Cancel the running query (normal termination). +// * <p> +// * Note: This method provides a means for an operator to indicate that the +// * query should halt immediately for reasons other than abnormal +// * termination. +// * <p> +// * Note: For abnormal termination of a query, just throw an exception out of +// * the query operator implementation. +// */ +// void halt(); +// +// /** +// * Cancel the query (abnormal termination). +// * +// * @param t +// * The cause. +// * +// * @return The argument. +// * +// * @throws IllegalArgumentException +// * if the argument is <code>null</code>. +// */ +// Throwable halt(final Throwable t); +// +// /** +// * Return the cause if the query was terminated by an exception. +// * @return +// */ +// Throwable getCause(); /** * Return an iterator which will drain the solutions from the query. The Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -58,13 +58,14 @@ import com.bigdata.btree.IndexSegment; import com.bigdata.btree.view.FusedView; import com.bigdata.journal.IIndexManager; -import com.bigdata.rdf.sail.bench.NanoSparqlClient; +import com.bigdata.rdf.sail.bench.NanoSparqlServer; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.ThickAsynchronousIterator; import com.bigdata.resources.IndexManager; import com.bigdata.service.IBigdataFederation; import com.bigdata.service.IDataService; import com.bigdata.util.concurrent.DaemonThreadFactory; +import com.bigdata.util.concurrent.IHaltable; /** * A class managing execution of concurrent queries against a local @@ -367,13 +368,13 @@ * enough that we can not have a false cache miss on a system which is * heavily loaded by a bunch of light queries. */ - private LinkedHashMap<UUID, Future<Void>> doneQueries = new LinkedHashMap<UUID,Future<Void>>( + private LinkedHashMap<UUID, IHaltable<Void>> doneQueries = new LinkedHashMap<UUID,IHaltable<Void>>( 16/* initialCapacity */, .75f/* loadFactor */, true/* accessOrder */) { private static final long serialVersionUID = 1L; @Override - protected boolean removeEldestEntry(Map.Entry<UUID, Future<Void>> eldest) { + protected boolean removeEldestEntry(Map.Entry<UUID, IHaltable<Void>> eldest) { return size() > 100/* maximumCacheCapacity */; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -442,20 +442,12 @@ try { t.call(); } catch(Throwable t) { - /* - * Note: SliceOp will cause other operators to be - * interrupted during normal evaluation. Therefore, while - * these exceptions should cause the query to terminate, - * they should not be reported as errors to the query - * controller. - */ - if (!InnerCause.isInnerCause(t, InterruptedException.class) - && !InnerCause.isInnerCause(t, BufferClosedException.class) - && !InnerCause.isInnerCause(t, ClosedByInterruptException.class) - ) { - // Not an error that we should ignore. - throw t; - } + halt(t); + if (getCause() != null) { + // Abnormal termination. + throw getCause(); + } + // normal termination - swallow the exception. } finally { t.context.getStats().elapsed.add(System.currentTimeMillis() - begin); @@ -486,10 +478,12 @@ * error message is necessary in order to catch errors in * clientProxy.haltOp() (above and below). */ - final Throwable firstCause = halt(ex1); + + // ensure halted. + halt(ex1); final HaltOpMessage msg = new HaltOpMessage(getQueryId(), t.bopId, - -1/*partitionId*/, serviceId, firstCause, t.sinkId, + -1/*partitionId*/, serviceId, getCause()/*firstCauseIfError*/, t.sinkId, 0/*t.sinkMessagesOut.get()*/, t.altSinkId, 0/*t.altSinkMessagesOut.get()*/, t.context.getStats()); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/CancelQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/CancelQuery.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/CancelQuery.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -34,7 +34,7 @@ public void run() { if (cause == null) - q.halt(); + q.halt((Void)null); else q.halt(cause); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -27,6 +27,7 @@ package com.bigdata.bop.join; +import java.nio.channels.ClosedByInterruptException; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -77,6 +78,7 @@ import com.bigdata.service.DataService; import com.bigdata.striterator.IChunkedOrderedIterator; import com.bigdata.striterator.IKeyOrder; +import com.bigdata.util.InnerCause; import com.bigdata.util.concurrent.Haltable; import com.bigdata.util.concurrent.LatchedExecutor; @@ -743,9 +745,8 @@ * This is used for processing errors and also if this task is * interrupted (because the sink has been closed). */ - halt(t); - + // reset the unsync buffers. try { // resetUnsyncBuffers(); @@ -1056,11 +1057,9 @@ return null; } catch (Throwable t) { + + throw new RuntimeException(halt(t)); - halt(t); - - throw new RuntimeException(t); - } } @@ -1692,10 +1691,8 @@ } catch (Throwable t) { - halt(t); + throw new RuntimeException(halt(t)); - throw new RuntimeException(t); - } finally { itr.close(); @@ -1900,10 +1897,8 @@ } catch (Throwable t) { - halt(t); + throw new RuntimeException(halt(t)); - throw new RuntimeException(t); - } finally { itr.close(); @@ -2092,10 +2087,8 @@ } catch (Throwable t) { - halt(t); + throw new RuntimeException(halt(t)); - throw new RuntimeException(t); - } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -336,7 +336,7 @@ if (log.isInfoEnabled()) log.info("Slice will interrupt query."); - context.getRunningQuery().halt(); + context.getRunningQuery().halt((Void) null); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -58,13 +58,15 @@ * <p> * This class embeds certain knowledge about which exceptions may be observed * during normal termination of asynchronous processes using I/O, thread pools, - * and {@link IBlockingBuffer}s. + * and {@link IBlockingBuffer}s. See + * {@link #isNormalTerminationCause(Throwable)} for a list of the + * {@link Throwable} causes which are treated as normal termination. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id: AbstractHaltableProcess.java 2265 2009-10-26 12:51:06Z * thompsonbry $ */ -public class Haltable<V> implements Future<V> { +public class Haltable<V> implements IHaltable<V> { private final transient static Logger log = Logger .getLogger(Haltable.class); @@ -140,25 +142,26 @@ * the cause out of their own context.</strong> * * @param cause - * The cause. + * The cause (required). * * @return The argument. - * - * @throws IllegalArgumentException - * if the cause is <code>null</code>. */ final public <T extends Throwable> T halt(final T cause) { final boolean didHalt; lock.lock(); try { if (didHalt = !halt) { + /* + * This is the first cause. + */ + // note the first cause (and handle an illegal null if found). firstCause = (cause != null ? cause : new IllegalArgumentException()); + // note if abnormal termination (firstCause only) + error = !isNormalTerminationCause(firstCause); try { // signal *all* listeners. halted.signalAll(); - // note if abnormal termination (firstCause only) - error = !isNormalTerminationCause(cause); } finally { halt = true; // volatile write. } @@ -178,18 +181,20 @@ return cause; } - /** - * Return unless processing has been halted. The method should be invoked - * from within the execution of the process itself so that it may notice - * asynchronous termination. It will throw out the wrapper first cause if - * the process is halted. External processes waiting on the {@link Future} - * interface should use {@link #isDone()} which does not have the semantics - * of asserting that the process should still be running. - * - * @throws RuntimeException - * wrapping the {@link #firstCause} iff processing has been - * halted. - */ + /** + * Return unless processing has been halted. The method should be invoked + * from within the execution of the process itself so that it may notice + * asynchronous termination. It will throw out the wrapped first cause if + * the process is halted. + * <p> + * Note: External processes waiting on the {@link Future} interface should + * use {@link #isDone()} which does not have the semantics of asserting that + * the process should still be running. + * + * @throws RuntimeException + * wrapping the {@link #firstCause} iff processing has been + * halted. + */ final public void halted() { if (halt) { @@ -313,24 +318,25 @@ } - /** - * Return the first {@link Throwable} which caused this process to halt, but - * only for abnormal termination. - * - * @return The first {@link Throwable} which caused this process to halt and - * <code>null</code> if the process has not halted or if it halted - * through normal termination. - */ - final public Throwable getCause() { + final public Throwable getCause() { - if (!halt) - return null; + lock.lock(); + try { + + if (!halt) + return null; - if (!error) - return null; + if (!error) + return null; - return firstCause; + return firstCause; + } finally { + + lock.unlock(); + + } + } /** @@ -344,50 +350,51 @@ } - /** - * Return <code>true</code> if the {@link Throwable} is a known normal - * termination cause for the process. The method inspects the stack trace, - * examining both the outer and {@link InnerCause}s. The following causes - * are interpreted as normal termination: - * <dl> - * <dt>{@link InterruptedException}</dt> - * <dd>The process was terminated by an interrupt. Interrupts are typically - * used to terminate asynchronous processes when their production limit has - * been satisfied or the consumer otherwise chooses to - * {@link IAsynchronousIterator#close()} the iterator through which they are - * consuming results from the process.</dd> - * <dt>{@link CancellationException}</dt> - * <dd>A process has been canceled using its {@link Future}.</dd> - * <dt>{@link ClosedByInterruptException}</dt> - * <dd>A process was interrupted during an IO operation.</dd> - * <dt>{@link RejectedExecutionException}</dt> - * <dd>A process was not executed because the pool against which it was - * submitted had been shutdown (this of course implies that the work queue - * was unbounded).</dd> - * <dt>{@link BufferClosedException}</dt> - * <dd>The {@link IBlockingBuffer} on which the process was writing was - * asynchronously closed.</dd> - * </dl> - * - * @param cause - * The {@link Throwable}. - * - * @return <code>true</code> if the {@link Throwable} indicates normal - * termination. - */ + /** + * Return <code>true</code> if the {@link Throwable} is a known normal + * termination cause for the process. The method inspects the stack trace, + * examining both the outer and {@link InnerCause}s. The following causes + * are interpreted as normal termination: + * <dl> + * <dt>{@link InterruptedException}</dt> + * <dd>The process was terminated by an interrupt. Interrupts are typically + * used to terminate asynchronous processes when their production limit has + * been satisfied or the consumer otherwise chooses to + * {@link IAsynchronousIterator#close()} the iterator through which they are + * consuming results from the process.</dd> + * <dt>{@link CancellationException}</dt> + * <dd>A process has been canceled using its {@link Future}.</dd> + * <dt>{@link ClosedByInterruptException}</dt> + * <dd>A process was interrupted during an IO operation.</dd> + * <dt>{@link BufferClosedException}</dt> + * <dd>The {@link IBlockingBuffer} on which the process was writing was + * asynchronously closed.</dd> + * <dt>{@link RejectedExecutionException}</dt> + * <dd>A process was not executed because the pool against which it was + * submitted had been shutdown (this of course implies that the work queue + * was unbounded as a bounded pool will throw this exception if the work + * queue is full).</dd> + * </dl> + * + * @param cause + * The {@link Throwable}. + * + * @return <code>true</code> if the {@link Throwable} indicates normal + * termination. + * + * @see #getCause() + */ protected boolean isNormalTerminationCause(final Throwable cause) { -// if (InnerCause.isInnerCause(cause, CancelledException.class)) -// return true; if (InnerCause.isInnerCause(cause, InterruptedException.class)) return true; if (InnerCause.isInnerCause(cause, CancellationException.class)) return true; if (InnerCause.isInnerCause(cause, ClosedByInterruptException.class)) return true; + if (InnerCause.isInnerCause(cause, BufferClosedException.class)) + return true; if (InnerCause.isInnerCause(cause, RejectedExecutionException.class)) return true; - if (InnerCause.isInnerCause(cause, BufferClosedException.class)) - return true; return false; } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/IHaltable.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/IHaltable.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/IHaltable.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -0,0 +1,46 @@ +package com.bigdata.util.concurrent; + +import java.util.concurrent.Future; + +/** + * Interface extends {@link Future} and provides an interface for managing the + * termination of a process from within that process. + * + * @param <V> + * The generic type of the computation to which the {@link Future} + * evaluates. + */ +public interface IHaltable<V> extends Future<V> { + + /** + * Halt (normal termination). + */ + void halt(V v); + + /** + * Halt (exception thrown). <strong>The caller is responsible for throwing + * their given <i>cause</i> out of their own context.</strong> As a + * convenience, this method returns the given <i>cause</>. + * + * @param cause + * The cause (required). + * + * @return The argument. + */ + <T extends Throwable> T halt(T cause); + + /** + * Return the first {@link Throwable} which caused this process to halt, but + * only for abnormal termination. + * <p> + * {@link IHaltable} considers exceptions triggered by an interrupt to be + * normal termination of the process and will return <code>null</code> for + * such exceptions. + * + * @return The first {@link Throwable} which caused this process to halt and + * <code>null</code> if the process has not halted or if it halted + * through normal termination. + */ + Throwable getCause(); + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -80,17 +80,16 @@ return indexManager; } - /** - * NOP (you have to test things like slices with a full integration). - */ - public void halt() { - log.warn("Mock object does not implement halt()"); - } + @Override + public void halt(Void v) { + log.warn("Mock object does not implement halt(Void)"); + } - public Throwable halt(Throwable t) { + @Override + public <T extends Throwable> T halt(T cause) { log.warn("Mock object does not implement halt(Throwable)"); - return t; - } + return cause; + } public QueryEngine getQueryEngine() { throw new UnsupportedOperationException(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -814,7 +814,7 @@ * Overridden to close the sink so the slice will terminate. */ @Override - public void halt() { + public void halt(Void v) { sink.close(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -55,7 +55,7 @@ /** * Required deep copy constructor. */ - public IsLiteral(final IsInline op) { + public IsLiteral(final IsLiteral op) { super(op); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -23,12 +23,13 @@ */ package com.bigdata.rdf.internal.constraints; +import java.util.Map; + import org.openrdf.query.algebra.MathExpr.MathOp; import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IValueExpression; -import com.bigdata.bop.IVariable; import com.bigdata.bop.ImmutableBOp; import com.bigdata.bop.NV; import com.bigdata.rdf.internal.IV; @@ -71,6 +72,27 @@ } + /** + * Required shallow copy constructor. + * + * @param args + * The operands. + * @param op + * The operation. + */ + public MathBOp(final BOp[] args, Map<String,Object> anns) { + + super(args,anns); + + if (args.length != 2 || args[0] == null || args[1] == null + || getProperty(Annotations.OP) == null) { + + throw new IllegalArgumentException(); + + } + + } + /** * * @param left @@ -84,11 +106,8 @@ public MathBOp(final IValueExpression<IV> left, final IValueExpression<IV> right, final MathOp op) { - super(new BOp[] { left, right }, NV.asMap(new NV(Annotations.OP, op))); + this(new BOp[] { left, right }, NV.asMap(new NV(Annotations.OP, op))); - if (left == null || right == null || op == null) - throw new IllegalArgumentException(); - } // /** Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -24,13 +24,11 @@ import org.openrdf.query.algebra.And; import org.openrdf.query.algebra.Bound; import org.openrdf.query.algebra.Compare; -import org.openrdf.query.algebra.Compare.CompareOp; import org.openrdf.query.algebra.Filter; import org.openrdf.query.algebra.Group; import org.openrdf.query.algebra.Join; import org.openrdf.query.algebra.LeftJoin; import org.openrdf.query.algebra.MathExpr; -import org.openrdf.query.algebra.MathExpr.MathOp; import org.openrdf.query.algebra.MultiProjection; import org.openrdf.query.algebra.Not; import org.openrdf.query.algebra.Or; @@ -43,17 +41,18 @@ import org.openrdf.query.algebra.Regex; import org.openrdf.query.algebra.SameTerm; import org.openrdf.query.algebra.StatementPattern; -import org.openrdf.query.algebra.StatementPattern.Scope; import org.openrdf.query.algebra.TupleExpr; import org.openrdf.query.algebra.UnaryTupleOperator; import org.openrdf.query.algebra.Union; import org.openrdf.query.algebra.ValueConstant; import org.openrdf.query.algebra.ValueExpr; import org.openrdf.query.algebra.Var; +import org.openrdf.query.algebra.Compare.CompareOp; +import org.openrdf.query.algebra.MathExpr.MathOp; +import org.openrdf.query.algebra.StatementPattern.Scope; import org.openrdf.query.algebra.evaluation.impl.EvaluationStrategyImpl; import org.openrdf.query.algebra.evaluation.iterator.FilterIterator; import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; -import org.openrdf.query.parser.serql.AnonymousVarGenerator; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpUtility; @@ -62,12 +61,12 @@ import com.bigdata.bop.IConstant; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; -import com.bigdata.bop.IPredicate.Annotations; import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.IPredicate.Annotations; import com.bigdata.bop.ap.Predicate; import com.bigdata.bop.constraint.AND; import com.bigdata.bop.constraint.BOUND; @@ -91,9 +90,9 @@ import com.bigdata.rdf.sail.sop.SOp; import com.bigdata.rdf.sail.sop.SOp2BOpUtility; import com.bigdata.rdf.sail.sop.SOpTree; -import com.bigdata.rdf.sail.sop.SOpTree.SOpGroup; import com.bigdata.rdf.sail.sop.SOpTreeBuilder; import com.bigdata.rdf.sail.sop.UnsupportedOperatorException; +import com.bigdata.rdf.sail.sop.SOpTree.SOpGroup; import com.bigdata.rdf.spo.DefaultGraphSolutionExpander; import com.bigdata.rdf.spo.ExplicitSPOFilter; import com.bigdata.rdf.spo.ISPO; @@ -526,9 +525,8 @@ log.info("unrecognized value in query: " + ex.getValue()); } return new EmptyIteration<BindingSet, QueryEvaluationException>(); - } catch (QueryEvaluationException ex) { - throw ex; - } catch (Exception ex) { + } catch (Throwable ex) { +// log.error("Remove log stmt:"+ex,ex);// FIXME remove this - I am just looking for the root cause of something in the SAIL. throw new QueryEvaluationException(ex); } } @@ -716,50 +714,74 @@ // Submit query for evaluation. runningQuery = queryEngine.eval(query); - - // Iterator draining the query results. - final IAsynchronousIterator<IBindingSet[]> it1 = - runningQuery.iterator(); - - // De-chunk the IBindingSet[] visited by that iterator. - final IChunkedOrderedIterator<IBindingSet> it2 = - new ChunkedWrappedIterator<IBindingSet>( - new Dechunkerator<IBindingSet>(it1)); - // Materialize IVs as RDF Values. - CloseableIteration<BindingSet, QueryEvaluationException> result = - // Monitor IRunningQuery and cancel if Sesame iterator is closed. - new RunningQueryCloseableIteration<BindingSet, QueryEvaluationException>(runningQuery, - // Convert bigdata binding sets to Sesame binding sets. - new Bigdata2Sesame2BindingSetIterator<QueryEvaluationException>( - // Materialize IVs as RDF Values. - new BigdataBindingSetResolverator(database, it2).start( - database.getExecutorService()))); - -// No - will deadlock if buffer fills up -// // Wait for the Future (checks for errors). -// runningQuery.get(); - - // use the basic filter iterator for remaining filters - if (sesameFilters != null) { - for (Filter f : sesameFilters) { - if (log.isDebugEnabled()) { - log.debug("attaching sesame filter: " + f); - } - result = new FilterIterator(f, result, this); - } - } + /* + * Wrap up the native bigdata query solution iterator as Sesame + * compatible iteration w/ any filters to be interpreted by Sesame. + */ + return wrapQuery(runningQuery, sesameFilters); - return result; - } catch (Throwable t) { - if (runningQuery != null) + if (runningQuery != null) { + // ensure query is halted. runningQuery.cancel(true/* mayInterruptIfRunning */); + } +// log.error("Remove log stmt"+t,t);// FIXME remove this - I am just looking for the root cause of something in the SAIL. throw new QueryEvaluationException(t); } } + /** + * Wrap the {@link IRunningQuery#iterator()}, returning a Sesame compatible + * iteration which will visit the materialized binding sets. + * + * @param runningQuery + * The query. + * @param sesameFilters + * Any filters to be applied by Sesame. + * + * @return The iterator. + * + * @throws QueryEvaluationException + */ + private CloseableIteration<BindingSet, QueryEvaluationException> wrapQuery( + final IRunningQuery runningQuery, + final Collection<Filter> sesameFilters) throws QueryEvaluationException { + + // The iterator draining the query solutions. + final IAsynchronousIterator<IBindingSet[]> it1 = runningQuery + .iterator(); + + // De-chunk the IBindingSet[] visited by that iterator. + final IChunkedOrderedIterator<IBindingSet> it2 = + new ChunkedWrappedIterator<IBindingSet>( + new Dechunkerator<IBindingSet>(it1)); + + // Materialize IVs as RDF Values. + CloseableIteration<BindingSet, QueryEvaluationException> result = + // Monitor IRunningQuery and cancel if Sesame iterator is closed. + new RunningQueryCloseableIteration<BindingSet, QueryEvaluationException>(runningQuery, + // Convert bigdata binding sets to Sesame binding sets. + new Bigdata2Sesame2BindingSetIterator<QueryEvaluationException>( + // Materialize IVs as RDF Values. + new BigdataBindingSetResolverator(database, it2).start( + database.getExecutorService()))); + + // use the basic filter iterator for remaining filters + if (sesameFilters != null) { + for (Filter f : sesameFilters) { + if (log.isDebugEnabled()) { + log.debug("attaching sesame filter: " + f); + } + result = new FilterIterator(f, result, this); + } + } + + return result; + + } + // /** // * This is the method that will attempt to take a top-level join or left // * join and turn it into a native bigdata rule. The Sesame operators Join Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -3437,6 +3437,8 @@ } catch (QueryEvaluationException e) { +// log.error("Remove log stmt"+e,e);// FIXME remove this - I am just looking for the root cause of something in the SAIL. + throw new SailException(e); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/RunningQueryCloseableIteration.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/RunningQueryCloseableIteration.java 2011-01-16 00:59:18 UTC (rev 4107) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/RunningQueryCloseableIteration.java 2011-01-16 16:38:37 UTC (rev 4108) @@ -2,7 +2,7 @@ import info.aduna.iteration.CloseableIteration; -import java.util.concurrent.ExecutionException; +import java.util.NoSuchElementException; import org.openrdf.query.BindingSet; import org.openrdf.query.QueryEvaluationException; @@ -24,6 +24,12 @@ private final IRunningQuery runningQuery; private final CloseableIteration<E, X> src; private boolean checkedFuture = false; + /** + * The next element is buffered so we can always return it if the + * {@link #runningQuery} was not aborted at the time that {@link #hasNext()} + * return <code>true</code>. + */ + private E current = null; public RunningQueryCloseableIteration(final IRunningQuery runningQuery, final CloseableIteration<E, X> src) { @@ -39,25 +45,69 @@ } public boolean hasNext() throws X { - return src.hasNext(); - } - public E next() throws X { + if (current != null) { + // Already buffered. + return true; + } + + if (!src.hasNext()) { + // Source is exhausted. + return false; + } + + // buffer the next element. + current = src.next(); + + // test for abnormal completion of the runningQuery. if (!checkedFuture && runningQuery.isDone()) { try { runningQuery.get(); } catch (InterruptedException e) { + /* + * Interrupted while waiting on the Future (should not happen + * since the Future is already done). + */ throw (X) new QueryEvaluationException(e); - } catch (ExecutionException e) { - throw (X) new QueryEvaluationException(e); + } catch (Throwable e) { + /* + * Exception thrown by the runningQuery. + */ + if (runningQuery.getCause() != null) { + // abnormal termination. + throw (X) new QueryEvaluationException(runningQuery.getCause()); + } + // otherwise this is normal termination. } checkedFuture = true; } - return src.next(); + + // the next element is now buffered. + return true; + } + public E next() throws X { + + if (!hasNext()) + throw new NoSuchElementException(); + + final E tmp = current; + + current = null; + + return tmp; + + } + + /** + * Operation is not supported. + */ public void remove() throws X { - src.remove(); + + // Not supported since we are buffering ahead. + throw new UnsupportedOperationException(); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-16 18:59:49
|
Revision: 4110 http://bigdata.svn.sourceforge.net/bigdata/?rev=4110&view=rev Author: thompsonbry Date: 2011-01-16 18:59:43 +0000 (Sun, 16 Jan 2011) Log Message: ----------- More work on [1]. [1] https://sourceforge.net/apps/trac/bigdata/ticket/230 Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/RunningQueryCloseableIteration.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/UnsupportedOperatorException.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-01-16 16:58:40 UTC (rev 4109) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-01-16 18:59:43 UTC (rev 4110) @@ -35,6 +35,7 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; import java.util.concurrent.LinkedBlockingQueue; @@ -787,6 +788,20 @@ super.run(); + } catch(Throwable t) { + + // ensure query halts. + halt(t); + + if (getCause() != null) { + + // abnormal termination. wrap and rethrow. + throw new RuntimeException(t); + + } + + // otherwise ignore exception (normal termination). + } finally { /* @@ -859,11 +874,11 @@ final long begin = System.currentTimeMillis(); try { t.call(); - } catch(Throwable t) { - halt(t); + } catch(Throwable t2) { + halt(t2); // ensure query halts. if (getCause() != null) { - // Abnormal termination. - throw new RuntimeException(getCause()); + // Abnormal termination - wrap and rethrow. + throw new RuntimeException(t2); } // normal termination - swallow the exception. } finally { @@ -1251,8 +1266,17 @@ public Void call() throws Exception { if (log.isDebugEnabled()) log.debug("Running chunk: " + this); - ft.run(); // run - ft.get(); // verify success + try { + ft.run(); // run + ft.get(); // verify success + } catch (Throwable t) { + halt(t); // ensure query halts. + if (getCause() != null) { + // abnormal termination - wrap and rethrow. + throw new Exception(t); + } + // otherwise ignore exception (normal completion). + } // Done. return null; } // call() Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java 2011-01-16 16:58:40 UTC (rev 4109) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java 2011-01-16 18:59:43 UTC (rev 4110) @@ -444,8 +444,8 @@ } catch(Throwable t) { halt(t); if (getCause() != null) { - // Abnormal termination. - throw getCause(); + // Abnormal termination - wrap and rethrow. + throw new RuntimeException(t); } // normal termination - swallow the exception. } finally { @@ -466,10 +466,6 @@ } catch (Throwable ex1) { - // Log an error. - log.error("queryId=" + getQueryId() + ", bopId=" + t.bopId - + ", bop=" + t.bop, ex1); - /* * Mark the query as halted on this node regardless of whether * we are able to communicate with the query controller. @@ -481,7 +477,15 @@ // ensure halted. halt(ex1); + + if (getCause() != null) { + // Log an error. + log.error("queryId=" + getQueryId() + ", bopId=" + t.bopId + + ", bop=" + t.bop, ex1); + + } + final HaltOpMessage msg = new HaltOpMessage(getQueryId(), t.bopId, -1/*partitionId*/, serviceId, getCause()/*firstCauseIfError*/, t.sinkId, 0/*t.sinkMessagesOut.get()*/, t.altSinkId, Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java 2011-01-16 16:58:40 UTC (rev 4109) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java 2011-01-16 18:59:43 UTC (rev 4110) @@ -385,6 +385,15 @@ * @see #getCause() */ protected boolean isNormalTerminationCause(final Throwable cause) { + if(isTerminationByInterrupt(cause)) + return true; + if (InnerCause.isInnerCause(cause, RejectedExecutionException.class)) + return true; + return false; + } + + static public boolean isTerminationByInterrupt(final Throwable cause) { + if (InnerCause.isInnerCause(cause, InterruptedException.class)) return true; if (InnerCause.isInnerCause(cause, CancellationException.class)) @@ -393,11 +402,11 @@ return true; if (InnerCause.isInnerCause(cause, BufferClosedException.class)) return true; - if (InnerCause.isInnerCause(cause, RejectedExecutionException.class)) - return true; + return false; + } - + /** * This logs all unexpected causes @ WARN (anything not reported as normal * termination by {@link #isNormalTerminationCause(Throwable)}), not just Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-01-16 16:58:40 UTC (rev 4109) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-01-16 18:59:43 UTC (rev 4110) @@ -118,6 +118,7 @@ import com.bigdata.striterator.Dechunkerator; import com.bigdata.striterator.DistinctFilter; import com.bigdata.striterator.IChunkedOrderedIterator; +import com.bigdata.util.concurrent.Haltable; /** * Extended to rewrite Sesame {@link TupleExpr}s onto native {@link Rule}s and @@ -393,7 +394,7 @@ } else { // allow the query to fail - throw ex; + throw new UnsupportedOperatorException(ex); } @@ -450,7 +451,7 @@ } else { // allow the query to fail - throw ex; + throw new UnsupportedOperatorException(ex); } @@ -507,7 +508,7 @@ } else { // allow the query to fail - throw ex; + throw new UnsupportedOperatorException(ex); } @@ -525,6 +526,12 @@ log.info("unrecognized value in query: " + ex.getValue()); } return new EmptyIteration<BindingSet, QueryEvaluationException>(); + } catch(UnsupportedOperatorException ex) { + /* + * Note: Do not wrap as a different exception type. The caller is + * looking for this. + */ + throw new UnsupportedOperatorException(ex); } catch (Throwable ex) { // log.error("Remove log stmt:"+ex,ex);// FIXME remove this - I am just looking for the root cause of something in the SAIL. throw new QueryEvaluationException(ex); @@ -577,7 +584,7 @@ if (sop.isRightSideLeftJoin()) { groupsToPrune.add(sopTree.getGroup(sop.getGroup())); } else { - throw ex; + throw new UnrecognizedValueException(ex); } } } @@ -652,8 +659,12 @@ if (sop.getGroup() == SOpTreeBuilder.ROOT_GROUP_ID) { sopsToPrune.add(sop); sesameFilters.add(filter); - } else { - throw ex; + } else { + /* + * Note: DO NOT wrap with a different exception type - + * the caller is looking for this. + */ + throw new UnsupportedOperatorException(ex); } } } @@ -699,14 +710,43 @@ } - return _evaluateNatively(query, bs, queryEngine, sesameFilters); - + /* + * Begin native bigdata evaluation. + */ + CloseableIteration<BindingSet, QueryEvaluationException> result = _evaluateNatively( + query, bs, queryEngine);// , sesameFilters); + + /* + * Use the basic filter iterator for any remaining filters which will be + * evaluated by Sesame. + * + * Note: Some Sesame filters may pre-fetch one or more result(s). This + * could potentially cause the IRunningQuery to be asynchronously + * terminated by an interrupt. I have lifted the code to wrap the Sesame + * filters around the bigdata evaluation out of the code which starts + * the IRunningQuery evaluation in order to help clarify such + * circumstances as they might relate to [1]. + * + * [1] https://sourceforge.net/apps/trac/bigdata/ticket/230 + */ + if (sesameFilters != null) { + for (Filter f : sesameFilters) { + if (log.isDebugEnabled()) { + log.debug("attaching sesame filter: " + f); + } + result = new FilterIterator(f, result, this); + } + } + + return result; + } - protected CloseableIteration<BindingSet, QueryEvaluationException> + private CloseableIteration<BindingSet, QueryEvaluationException> _evaluateNatively(final PipelineOp query, final BindingSet bs, - final QueryEngine queryEngine, - final Collection<Filter> sesameFilters) + final QueryEngine queryEngine +// , final Collection<Filter> sesameFilters + ) throws QueryEvaluationException { IRunningQuery runningQuery = null; @@ -717,10 +757,20 @@ /* * Wrap up the native bigdata query solution iterator as Sesame - * compatible iteration w/ any filters to be interpreted by Sesame. + * compatible iteration with materialized RDF Values. */ - return wrapQuery(runningQuery, sesameFilters); + return wrapQuery(runningQuery);//, sesameFilters); + } catch (UnsupportedOperatorException t) { + if (runningQuery != null) { + // ensure query is halted. + runningQuery.cancel(true/* mayInterruptIfRunning */); + } + /* + * Note: Do not wrap as a different exception type. The caller is + * looking for this. + */ + throw new UnsupportedOperatorException(t); } catch (Throwable t) { if (runningQuery != null) { // ensure query is halted. @@ -734,20 +784,19 @@ /** * Wrap the {@link IRunningQuery#iterator()}, returning a Sesame compatible - * iteration which will visit the materialized binding sets. + * iteration which will visit Sesame binding sets having materialized RDF + * Values. * * @param runningQuery * The query. - * @param sesameFilters - * Any filters to be applied by Sesame. - * + * * @return The iterator. * - * @throws QueryEvaluationException + * @throws QueryEvaluationException */ private CloseableIteration<BindingSet, QueryEvaluationException> wrapQuery( - final IRunningQuery runningQuery, - final Collection<Filter> sesameFilters) throws QueryEvaluationException { + final IRunningQuery runningQuery + ) throws QueryEvaluationException { // The iterator draining the query solutions. final IAsynchronousIterator<IBindingSet[]> it1 = runningQuery @@ -759,7 +808,7 @@ new Dechunkerator<IBindingSet>(it1)); // Materialize IVs as RDF Values. - CloseableIteration<BindingSet, QueryEvaluationException> result = + final CloseableIteration<BindingSet, QueryEvaluationException> result = // Monitor IRunningQuery and cancel if Sesame iterator is closed. new RunningQueryCloseableIteration<BindingSet, QueryEvaluationException>(runningQuery, // Convert bigdata binding sets to Sesame binding sets. @@ -768,16 +817,6 @@ new BigdataBindingSetResolverator(database, it2).start( database.getExecutorService()))); - // use the basic filter iterator for remaining filters - if (sesameFilters != null) { - for (Filter f : sesameFilters) { - if (log.isDebugEnabled()) { - log.debug("attaching sesame filter: " + f); - } - result = new FilterIterator(f, result, this); - } - } - return result; } @@ -2286,6 +2325,18 @@ private Value value; + /** + * Wrap another instance of this exception class. + * @param cause + */ + public UnrecognizedValueException(final UnrecognizedValueException cause) { + + super(cause); + + this.value = cause.value; + + } + public UnrecognizedValueException(final Value value) { this.value = value; } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/RunningQueryCloseableIteration.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/RunningQueryCloseableIteration.java 2011-01-16 16:58:40 UTC (rev 4109) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/RunningQueryCloseableIteration.java 2011-01-16 18:59:43 UTC (rev 4110) @@ -74,8 +74,8 @@ * Exception thrown by the runningQuery. */ if (runningQuery.getCause() != null) { - // abnormal termination. - throw (X) new QueryEvaluationException(runningQuery.getCause()); + // abnormal termination - wrap and rethrow. + throw (X) new QueryEvaluationException(e); } // otherwise this is normal termination. } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/UnsupportedOperatorException.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/UnsupportedOperatorException.java 2011-01-16 16:58:40 UTC (rev 4109) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/UnsupportedOperatorException.java 2011-01-16 18:59:43 UTC (rev 4110) @@ -2,6 +2,13 @@ import org.openrdf.query.algebra.QueryModelNode; +/** + * An exception thrown when an operator can not be translated into native + * bigdata evaluation. This is used to detect such problems and then optionally + * delegate the operator to openrdf. + * + * @author mrpersonick + */ public class UnsupportedOperatorException extends RuntimeException { /** @@ -11,6 +18,16 @@ private QueryModelNode operator; + /** + * Wrap with another instance of this class. + * + * @param cause + */ + public UnsupportedOperatorException(final UnsupportedOperatorException cause) { + super(cause); + this.operator = cause.operator; + } + public UnsupportedOperatorException(final QueryModelNode operator) { this.operator = operator; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-16 19:30:59
|
Revision: 4112 http://bigdata.svn.sourceforge.net/bigdata/?rev=4112&view=rev Author: thompsonbry Date: 2011-01-16 19:30:52 +0000 (Sun, 16 Jan 2011) Log Message: ----------- Moved the CONSTRAINTS annotation from the PipelineJoin operator to the IPredicate in order to support join graphs. Now that the CONSTRAINTS decorate the IPredicate, they will automatically be evaluated when that predicate is joined and we do not have to worry about raising the CONSTRAINTS onto the join operator (the join operator is still responsible for evaluating the constraints, but it picks them off of the IPredicate now). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-01-16 19:26:10 UTC (rev 4111) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-01-16 19:30:52 UTC (rev 4112) @@ -42,7 +42,6 @@ import com.bigdata.btree.filter.Advancer; import com.bigdata.btree.filter.TupleFilter; import com.bigdata.mdi.PartitionLocator; -import com.bigdata.rawstore.Bytes; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.ElementFilter; @@ -102,7 +101,10 @@ * <code>true</code> iff the predicate is optional (the right operand of * a left join). * - * @deprecated This flag is being moved to the join operator. + * @deprecated This flag is being moved to the join operator (or should + * it stay with the predicate so we can work with join + * graphs more easily, but note that join graphs never + * contain optional joins). */ String OPTIONAL = "optional"; @@ -321,6 +323,12 @@ */ String TIMESTAMP = BOp.class.getName() + ".timestamp"; + /** + * An optional {@link IConstraint}[] which places restrictions on the + * legal patterns in the variable bindings. + */ + String CONSTRAINTS = PipelineJoin.class.getName() + ".constraints"; + } /** @@ -434,6 +442,22 @@ // public IElementFilter<E> getConstraint(); /** + * Return the optional {@link IConstraint}[] to be applied by a join which + * evaluates this {@link IPredicate}. + * <p> + * Note: The {@link Annotations#CONSTRAINTS} are annotated on the + * {@link IPredicate} rather than the join operators so they may be used + * with join graphs, which are expressed solely as an unordered set of + * {@link IPredicate}s. Using join graphs, we are able to do nifty things + * such as runtime query optimization which would not be possible if the + * annotations were decorating the joins since we would be unable to + * dynamically generate the join operators with the necessary annotations. + * + * @see Annotations#CONSTRAINTS + */ + public IConstraint[] constraints(); + + /** * Return the optional filter to be evaluated local to the index. * * @see Annotations#INDEX_LOCAL_FILTER Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2011-01-16 19:26:10 UTC (rev 4111) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2011-01-16 19:30:52 UTC (rev 4112) @@ -35,6 +35,7 @@ import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; +import com.bigdata.bop.IConstraint; import com.bigdata.bop.IElement; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; @@ -244,6 +245,12 @@ // // } + public IConstraint[] constraints() { + + return getProperty(IPredicate.Annotations.CONSTRAINTS, null/* defaultValue */); + + } + final public IFilter getIndexLocalFilter() { return (IFilter) getProperty(Annotations.INDEX_LOCAL_FILTER); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-16 19:26:10 UTC (rev 4111) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-16 19:30:52 UTC (rev 4112) @@ -73,7 +73,6 @@ import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.ThickAsynchronousIterator; -import com.bigdata.relation.rule.Rule; import com.bigdata.striterator.Dechunkerator; import com.bigdata.striterator.IChunkedIterator; @@ -945,6 +944,13 @@ throw new IllegalStateException(); if (v2.sample == null) // vertex not sampled. throw new IllegalStateException(); + /* + * FIXME CONSTRAINT ORDERING : If a variable only appears in a + * CONSTRAINT for one of the two vertices then that vertex must be + * evaluated second. (If the vertices both have this problem then + * the edge can not be evaluated until some other vertex causes the + * variables of either one [v1] or [v2] to become bound.) + */ if (v1.sample.rangeCount < v2.sample.rangeCount) { v = v1; vp = v2; @@ -1061,11 +1067,7 @@ new NV(PipelineJoin.Annotations.SHARED_STATE,true), new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT,BOpEvaluationContext.CONTROLLER) ); - if (vTarget.pred.getProperty(PipelineJoin.Annotations.CONSTRAINTS) != null) { - // Copy constraints from the predicate onto the join, which will apply them. - anns.put(PipelineJoin.Annotations.CONSTRAINTS, vTarget.pred - .getProperty(PipelineJoin.Annotations.CONSTRAINTS)); - } + final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, anns); final PipelineOp queryOp = joinOp; @@ -1335,7 +1337,7 @@ /* * Scan the vertices of the caller's path. If any of those vertices * are NOT found in this path the paths are not unordered variations - * of one aother. + * of one another. */ for (int i = 0; i < v2.length; i++) { @@ -1381,10 +1383,15 @@ for (Edge e : edges) { if (tmp.isEmpty()) { - /* - * The first edge is handled specially in order to report - * the minimum cardinality vertex first. - */ + /* + * The first edge is handled specially in order to report + * the minimum cardinality vertex first. + * + * FIXME CONSTRAINT ORDERING : A vertex can not run until + * all variables appearing in its CONSTRAINTS would be + * bound. This can cause us to use and report an ordering + * which does not place the minimum cardinality vertex 1st. + */ tmp.add(e.getMinimumCardinalityVertex()); tmp.add(e.getMaximumCardinalityVertex()); @@ -1530,26 +1537,30 @@ // The new vertex, which is not part of this path. final Vertex targetVertex = v1Found ? e.v2 : e.v1; - /* - * Chain sample the edge. - * - * Note: ROX uses the intermediate result I(p) for the existing path - * as the input when sampling the edge. The corresponding concept - * for us is the sample for this Path, which will have all variable - * bindings produced so far. In order to estimate the cardinality of - * the new join path we have to do a one step cutoff evaluation of - * the new Edge, given the sample available on the current Path. - * - * FIXME It is possible for the path sample to be empty. Unless the - * sample also happens to be exact, this is an indication that the - * estimated cardinality has underflowed. We track the estimated - * cumulative cardinality, so this does not make the join path an - * immediate winner, but it does mean that we can not probe further - * on that join path as we lack any intermediate solutions to feed - * into the downstream joins. [If we re-sampled the edges in the - * join path in each round then this would help to establish a - * better estimate in successive rounds.] - */ + /* + * Chain sample the edge. + * + * Note: ROX uses the intermediate result I(p) for the existing path + * as the input when sampling the edge. The corresponding concept + * for us is the sample for this Path, which will have all variable + * bindings produced so far. In order to estimate the cardinality of + * the new join path we have to do a one step cutoff evaluation of + * the new Edge, given the sample available on the current Path. + * + * FIXME It is possible for the path sample to be empty. Unless the + * sample also happens to be exact, this is an indication that the + * estimated cardinality has underflowed. We track the estimated + * cumulative cardinality, so this does not make the join path an + * immediate winner, but it does mean that we can not probe further + * on that join path as we lack any intermediate solutions to feed + * into the downstream joins. [If we re-sampled the edges in the + * join path in each round then this would help to establish a + * better estimate in successive rounds.] + * + * FIXME CONSTRAINT ORDERING : It is illegal to add a vertex to the + * path if any variable appearing in its CONSTRAINTS would not be + * bound. + */ final EdgeSample edgeSample = e.estimateCardinality(queryEngine, limit, sourceVertex, targetVertex, @@ -1741,6 +1752,14 @@ */ private final Edge[] E; + /** + * An unordered collection of vertices which do not share any variables + * with the other vertices in the join graph. These vertices are run + * last. Once the {@link #E edges} have been exhausted, these vertices + * are simply appended to the join path in an arbitrary order. + */ + private final Vertex[] unshared; + public List<Vertex> getVertices() { return Collections.unmodifiableList(Arrays.asList(V)); } @@ -1756,6 +1775,10 @@ for (Vertex v : V) { sb.append("\nV[" + v.pred.getId() + "]=" + v); } + sb.append("\n],unshared=["); + for (Vertex v : unshared) { + sb.append("\n" + v); + } sb.append("],E=["); for (Edge e : E) { sb.append("\n" + e); @@ -1780,18 +1803,39 @@ } - /* - * Identify the edges by looking for shared variables among the - * predicates. - * - * Note: If a vertex does not share ANY variables then it is paired - * with every other vertex. Such joins will always produce a full - * cross product and they can be taken paired with any of the other - * vertices. - */ + /* + * Identify the edges by looking for shared variables among the + * predicates. + * + * Note: Variables may appear in the arguments of the predicate, + * e.g., spo(?s,rdf:type,?o). + * + * Note: Variables may ALSO appear in the CONSTRAINTS (imposed on + * the binding sets) or FILTERS (imposed either on the local or + * remote access path). For example, that a variable bound by + * another predicate must take on a value having some mathematical + * relationship to a variable bound by the predicate, e.g., BSBM Q5. + * When a variable appears in a constraint but does not appear as an + * argument to the predicate, then there is an additional + * requirement that the variable MUST become bound before the + * predicate may be evaluated (again, BSBM Q5 has this form). + * + * Note: If a vertex does not share ANY variables (neither in the + * arguments of the predicate nor in its constraints or filters) + * then it can be paired with any of the other vertices. However, in + * such cases we always run such vertices last as they can not + * restrict the cardinality of the rest of the join graph. Such + * vertices are therefore inserted into a separate set and appended + * to the join path once all edges having shared variables have been + * exhausted. + */ { - final List<Edge> tmp = new LinkedList<Edge>(); + // The set of identified edges for vertices which share vars. + final List<Edge> tmp = new LinkedList<Edge>(); + + // The set of vertices which do not share variables. + final List<Vertex> unshared = new LinkedList<Vertex>(); for (int i = 0; i < v.length; i++) { @@ -1806,12 +1850,21 @@ // consider a possible target vertex. final IPredicate<?> p2 = v[j]; - final Set<IVariable<?>> shared = Rule.getSharedVars(p1, - p2); + final Set<IVariable<?>> shared = getSharedVars(p1, p2); if (shared != null && !shared.isEmpty()) { - // the source and target vertices share var(s). + /* + * The source and target vertices share var(s). + * + * Note: A predicate having a variable which appears + * in a CONSTRAINT MUST NOT be added to the join + * path until that variable would be bound. + * Therefore, when selecting the vertices to be used + * to extend a join path, we must consider whether + * or not the join path would bind the variable(s) + * appearing in the CONSTRAINT. + */ tmp.add(new Edge(V[i], V[j], shared)); nmatched++; @@ -1823,26 +1876,37 @@ if (nmatched == 0) { /* - * The source vertex does not share any variables. In - * order to explore join paths which include that vertex - * we therefore pair it with each of the other vertices. + * The source vertex does not share any variables. */ - for (int j = 0; j < v.length; j++) { + + unshared.add(V[i]); - if (j == i) - continue; - - tmp.add(new Edge(V[i], V[j], - Collections.EMPTY_SET)); - - } - } } E = tmp.toArray(new Edge[0]); + + this.unshared = unshared.toArray(new Vertex[0]); + if(!unshared.isEmpty()) { + + /* + * FIXME This needs to be supported. We should explore and + * generate the join paths based on only those vertices + * which do share variables (and hence for which we have + * defined edges). Once the vertices which share variables + * have been exhausted, we should simply append edges for + * the vertices which do not share variables in an arbitrary + * order (they will be run last since they can not constrain + * the evaluation). + */ + + throw new UnsupportedOperationException( + "Some predicates do not share any variables with other predicates."); + + } + } } @@ -1889,13 +1953,21 @@ // Setup the join graph. Path[] paths = round0(queryEngine, limit, nedges); - /* - * The initial paths all have one edge, and hence two vertices. Each - * round adds one more vertex to each path. We are done once we have - * generated paths which include all vertices. - * - * This occurs at round := nvertices - 1 - */ + /* + * The initial paths all have one edge, and hence two vertices. Each + * round adds one more vertex to each path. We are done once we have + * generated paths which include all vertices. + * + * This occurs at round := nvertices - 1 + * + * FIXME UNSHARED VERTICES : Add [unshared] vertices after all + * vertices with shared variables have been incorporated into the + * join paths. This should happen outside of the loop since the + * joins with unshared variables can not constraint the solutions. + * Therefore choose the best join path based on the vertices with + * the shared variables and then simply append the [unshared] + * vertices. + */ final int nvertices = V.length; @@ -2000,25 +2072,29 @@ } - /** - * Choose up to <i>nedges</i> edges to be the starting point. - * - * @param queryEngine - * The query engine. - * @param limit - * The cutoff used when sampling the vertices and when - * sampling the edges. - * @param nedges - * The maximum #of edges to choose. Those having the smallest - * expected cardinality will be chosen. - * - * @return An initial set of paths starting from any most <i>nedges</i>. - * For each of the <i>nedges</i> lowest cardinality edges, the - * starting vertex will be the vertex with the lowest - * cardinality for that edge. - * - * @throws Exception - */ + /** + * Choose up to <i>nedges</i> edges to be the starting point. For each + * of the <i>nedges</i> lowest cardinality edges, the starting vertex + * will be the vertex with the lowest cardinality for that edge. + * <p> + * Note: An edge can not serve as a starting point for exploration if it + * uses variables (for example, in a CONSTRAINT) which are not bound by + * either vertex (since the variable(s) are not bound, the constraint + * would always fail). + * + * @param queryEngine + * The query engine. + * @param limit + * The cutoff used when sampling the vertices and when + * sampling the edges. + * @param nedges + * The maximum #of edges to choose. Those having the smallest + * expected cardinality will be chosen. + * + * @return An initial set of paths starting from at most <i>nedges</i>. + * + * @throws Exception + */ public Path[] round0(final QueryEngine queryEngine, final int limit, final int nedges) throws Exception { @@ -2889,13 +2965,6 @@ // // anns.add(new NV(PipelineJoin.Annotations.SELECT, vars.toArray(new IVariable[vars.size()]))); - if (p.getProperty(PipelineJoin.Annotations.CONSTRAINTS) != null) { - // Copy constraints from the predicate onto the join, which will - // apply them. - anns.add(new NV(PipelineJoin.Annotations.CONSTRAINTS, p - .getProperty(PipelineJoin.Annotations.CONSTRAINTS))); - } - final PipelineJoin joinOp = new PipelineJoin( lastOp == null ? new BOp[0] : new BOp[] { lastOp }, anns.toArray(new NV[anns.size()])); @@ -3025,7 +3094,105 @@ } - /** + /** + * Return the variables in common for two {@link IPredicate}s. All variables + * spanned by either {@link IPredicate} are considered. + * <p> + * Note: Variables may appear in the predicates operands, in the + * {@link Annotations#CONSTRAINTS} associated with the + * predicate, and in the {@link IPredicate.Annotations#ACCESS_PATH_FILTER} + * or {@link IPredicate.Annotations#INDEX_LOCAL_FILTER}. + * <p> + * Note: A variable must become bound before it may be evaluated in + * {@link Annotations#CONSTRAINTS}, an + * {@link IPredicate.Annotations#ACCESS_PATH_FILTER} or an + * {@link IPredicate.Annotations#INDEX_LOCAL_FILTER}. This means that the + * {@link IPredicate}s which can bind the variable must be ordered before + * those which merely test the variable. + * + * + * @param p1 + * A predicate. + * + * @param p2 + * A different predicate. + * + * @return The variables in common -or- <code>null</code> iff there are no + * variables in common. + * + * @throws IllegalArgumentException + * if the two predicates are the same reference. + * + * @todo It should be an error if a variable appear in a test is not bound + * by any possible join path. However, note that it may not be + * possible to determine this by local examination of a join graph + * since we do not know which variables may be presented as already + * bound when the join graph is evaluated (but we can only run the + * join graph currently against static source binding sets and for + * that case this is knowable). + * + * @todo When a variable is only optionally bound and it is discovered at + * runtime that the variable is not bound when it is considered by a + * CONSTRAINT, FILTER, etc., then the SPARQL semantics are that + * evaluation should produce a 'type' error which would cause the + * solution should fail (at least within its current join group). See + * https://sourceforge.net/apps/trac/bigdata/ticket/179. + * + * @todo Unit tests, including those which verify that variables appearing + * in the constraints are reported as shared with those appearing in + * the predicates operands. + */ + static Set<IVariable<?>> getSharedVars(final IPredicate p1, final IPredicate p2) { + + // The set of variables which are shared by those predicates. + final Set<IVariable<?>> sharedVars = new LinkedHashSet<IVariable<?>>(); + + /* + * Collect the variables appearing anyway in [p1], including the + * predicate's operands and its constraints, filters, etc. + */ + final Set<IVariable<?>> p1vars = new LinkedHashSet<IVariable<?>>(); + { + + final Iterator<IVariable<?>> itr = BOpUtility + .getSpannedVariables(p1); + + while (itr.hasNext()) { + + p1vars.add(itr.next()); + + } + + } + + /* + * Consider the variables appearing anyway in [p2], including the + * predicate's operands and its constraints, filters, etc. + */ + { + + final Iterator<IVariable<?>> itr = BOpUtility + .getSpannedVariables(p2); + + while (itr.hasNext()) { + + final IVariable<?> avar = itr.next(); + + if(p1vars.contains(avar)) { + + sharedVars.add(avar); + + } + + } + + } + + return sharedVars; + + } + + /** * Exception thrown when the join graph does not have any solutions in the * data (running the query does not produce any results). */ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-16 19:26:10 UTC (rev 4111) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-16 19:30:52 UTC (rev 4112) @@ -27,7 +27,6 @@ package com.bigdata.bop.join; -import java.nio.channels.ClosedByInterruptException; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -78,7 +77,6 @@ import com.bigdata.service.DataService; import com.bigdata.striterator.IChunkedOrderedIterator; import com.bigdata.striterator.IKeyOrder; -import com.bigdata.util.InnerCause; import com.bigdata.util.concurrent.Haltable; import com.bigdata.util.concurrent.LatchedExecutor; @@ -132,12 +130,6 @@ String SELECT = PipelineJoin.class.getName() + ".select"; /** - * An optional {@link IConstraint}[] which places restrictions on the - * legal patterns in the variable bindings. - */ - String CONSTRAINTS = PipelineJoin.class.getName() + ".constraints"; - - /** * Marks the join as "optional" in the SPARQL sense. Binding sets which * fail the join will be routed to the alternative sink as specified by * either {@link PipelineOp.Annotations#ALT_SINK_REF} or @@ -444,15 +436,6 @@ } /** - * @see Annotations#CONSTRAINTS - */ - public IConstraint[] constraints() { - - return getProperty(Annotations.CONSTRAINTS, null/* defaultValue */); - - } - - /** * @see Annotations#OPTIONAL */ public boolean isOptional() { @@ -659,7 +642,7 @@ this.joinOp = joinOp; this.predicate = joinOp.getPredicate(); - this.constraints = joinOp.constraints(); + this.constraints = predicate.constraints(); this.maxParallel = joinOp.getMaxParallel(); if (maxParallel < 0) throw new IllegalArgumentException(Annotations.MAX_PARALLEL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJoinGraph.java 2011-01-16 19:26:10 UTC (rev 4111) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJoinGraph.java 2011-01-16 19:30:52 UTC (rev 4112) @@ -90,6 +90,14 @@ public void test_Path_addEdge() { fail("write test"); } + + /** + * Test ability to identify shared variables appearing either as predicate + * operands or as part of CONSTRAINTS or FILTERS. + */ + public void test_getSharedVariables() { + fail("write test"); + } // @Override // public Properties getProperties() { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java 2011-01-16 19:26:10 UTC (rev 4111) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java 2011-01-16 19:30:52 UTC (rev 4112) @@ -596,7 +596,7 @@ // new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// // constraint d != Leon - new NV(PipelineJoin.Annotations.CONSTRAINTS, + new NV(Annotations.CONSTRAINTS, new IConstraint[] { new NEConstant(d, new Constant<String>("Leon")) }) // // join is optional. // new NV(PipelineJoin.Annotations.OPTIONAL, true),// Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java 2011-01-16 19:26:10 UTC (rev 4111) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java 2011-01-16 19:30:52 UTC (rev 4112) @@ -80,12 +80,10 @@ return indexManager; } - @Override public void halt(Void v) { log.warn("Mock object does not implement halt(Void)"); } - @Override public <T extends Throwable> T halt(T cause) { log.warn("Mock object does not implement halt(Throwable)"); return cause; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2011-01-16 19:26:10 UTC (rev 4111) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2011-01-16 19:30:52 UTC (rev 4112) @@ -1128,7 +1128,7 @@ new NV(Predicate.Annotations.BOP_ID, joinId),// new NV(PipelineJoin.Annotations.PREDICATE, predOp),// // impose constraint on the join. - new NV(PipelineJoin.Annotations.CONSTRAINTS, + new NV(Annotations.CONSTRAINTS, new IConstraint[] { new EQConstant(y, new Constant<String>("Paul")) })// ); @@ -1592,7 +1592,7 @@ new NV(Predicate.Annotations.BOP_ID, joinId2),// new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// // constraint x == z - new NV(PipelineJoin.Annotations.CONSTRAINTS, + new NV(Annotations.CONSTRAINTS, new IConstraint[] { new EQ(x, z) }), // join is optional. new NV(PipelineJoin.Annotations.OPTIONAL, true),// Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java 2011-01-16 19:26:10 UTC (rev 4111) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java 2011-01-16 19:30:52 UTC (rev 4112) @@ -717,7 +717,7 @@ new NV(Predicate.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.SHARDED),// // impose constraint on the join. - new NV(PipelineJoin.Annotations.CONSTRAINTS, + new NV(Annotations.CONSTRAINTS, new IConstraint[] { new EQConstant(y, new Constant<String>("Paul")) })); @@ -1233,7 +1233,7 @@ new NV(Predicate.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.SHARDED),// // constraint x == z - new NV(PipelineJoin.Annotations.CONSTRAINTS, + new NV(Annotations.CONSTRAINTS, new IConstraint[] { new EQ(x, z) }), // join is optional. new NV(PipelineJoin.Annotations.OPTIONAL, true),// Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java 2011-01-16 19:26:10 UTC (rev 4111) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java 2011-01-16 19:30:52 UTC (rev 4112) @@ -379,7 +379,7 @@ new BOp[] { startOp },// new NV(BOpBase.Annotations.BOP_ID, joinId),// new NV(PipelineJoin.Annotations.PREDICATE, predOp),// - new NV( PipelineJoin.Annotations.CONSTRAINTS, + new NV( Annotations.CONSTRAINTS, new IConstraint[] { new INBinarySearch<String>(y, set) })); // the expected solution (just one). Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-16 19:26:10 UTC (rev 4111) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-16 19:30:52 UTC (rev 4112) @@ -32,7 +32,6 @@ import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.engine.QueryLog; import com.bigdata.bop.fed.QueryEngineFactory; -import com.bigdata.bop.join.PipelineJoin; import com.bigdata.journal.Journal; import com.bigdata.rdf.internal.XSDIntIV; import com.bigdata.rdf.internal.constraints.CompareBOp; @@ -341,7 +340,7 @@ * the IPredicate and apply them to the constructed join * operator. */ - new NV(PipelineJoin.Annotations.CONSTRAINTS, + new NV(Annotations.CONSTRAINTS, new IConstraint[] {// new NEConstant(product, new Constant(product53999 .getIV())) // @@ -391,7 +390,7 @@ new NV(BOp.Annotations.BOP_ID, nextId++),// new NV(Annotations.TIMESTAMP, timestamp),// new NV(IPredicate.Annotations.RELATION_NAME, spoRelation), - new NV(PipelineJoin.Annotations.CONSTRAINTS, + new NV(Annotations.CONSTRAINTS, new IConstraint[] {// new CompareBOp(new BOp[] { simProperty1, @@ -435,7 +434,7 @@ new NV(BOp.Annotations.BOP_ID, nextId++),// new NV(Annotations.TIMESTAMP, timestamp),// new NV(IPredicate.Annotations.RELATION_NAME, spoRelation), - new NV(PipelineJoin.Annotations.CONSTRAINTS, + new NV(Annotations.CONSTRAINTS, new IConstraint[] {// new CompareBOp(new BOp[] { simProperty2, Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-16 19:26:10 UTC (rev 4111) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-16 19:30:52 UTC (rev 4112) @@ -631,11 +631,6 @@ anns.add(new NV(PipelineJoin.Annotations.OPTIONAL, pred .isOptional())); - if (!constraints.isEmpty()) - anns.add(new NV(PipelineJoin.Annotations.CONSTRAINTS, - constraints - .toArray(new IConstraint[constraints.size()]))); - /* * Pull off annotations before we clear them from the predicate. */ @@ -653,6 +648,13 @@ pred = pred.clearAnnotations(new String[] { Annotations.SCOPE, Annotations.QUADS, Annotations.DATASET }); + if (!constraints.isEmpty()) { + // decorate the predicate with any constraints. + pred = (Predicate<?>) pred.setProperty( + IPredicate.Annotations.CONSTRAINTS, constraints + .toArray(new IConstraint[constraints.size()])); + } + if (quads) { /* This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-19 18:34:52
|
Revision: 4130 http://bigdata.svn.sourceforge.net/bigdata/?rev=4130&view=rev Author: thompsonbry Date: 2011-01-19 18:34:43 +0000 (Wed, 19 Jan 2011) Log Message: ----------- - Got rid of PipelineJoin.Annotations.OPTIONAL and just test IPredicate.isOptional() instead of propagating the annotation to the join. - Moved CONSTRAINTS back to the PipelineJoin. - Changed the namespaces of some annotations (IPredicate mainly). - Modified the various runtime query optimizer test suites to run against small data sets bundled within SVN by default. - Added the "bar" data set from MRoy and the data generator for that data set. - Working on a PartitionedJoinGroup utility class which will break down a set of predicates and constraints into a join graph, head plan, and tail plan. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/resources/data/barData/ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/resources/data/barData/barData.trig.gz branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/GenerateBarData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphWithRDF.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2011-01-19 18:22:38 UTC (rev 4129) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2011-01-19 18:34:43 UTC (rev 4130) @@ -31,8 +31,6 @@ import java.util.List; import java.util.Map; -import com.bigdata.bop.IPredicate.Annotations; - import cutthecrap.utils.striterators.IPropertySet; /** @@ -182,6 +180,13 @@ */ BOpEvaluationContext getEvaluationContext(); + /** + * Return <code>true</code> iff this operator is a controller. + * + * @see Annotations#CONTROLLER + */ + boolean isController(); + // /** // * Return <code>true</code> iff this operator is an access path which writes // * on the database. @@ -248,6 +253,8 @@ /** * This annotation determines where an operator will be evaluated * (default {@value #DEFAULT_EVALUATION_CONTEXT}). + * + * @see BOpEvaluationContext */ String EVALUATION_CONTEXT = BOp.class.getName() + ".evaluationContext"; @@ -260,6 +267,8 @@ * arguments. Thus control operators mark a boundary in pipelined * evaluation. Some examples of control operators include UNION, STEPS, * and STAR (aka transitive closure). + * + * @see BOp#isController() */ String CONTROLLER = BOp.class.getName()+".controller"; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-01-19 18:22:38 UTC (rev 4129) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-01-19 18:34:43 UTC (rev 4130) @@ -424,6 +424,12 @@ } + public boolean isController() { + + return getProperty(Annotations.CONTROLLER, false); + + } + /** * Set an annotation. * <p> Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-01-19 18:22:38 UTC (rev 4129) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-01-19 18:34:43 UTC (rev 4130) @@ -34,7 +34,6 @@ import com.bigdata.bop.ap.filter.BOpFilterBase; import com.bigdata.bop.ap.filter.BOpTupleFilter; import com.bigdata.bop.ap.filter.DistinctFilter; -import com.bigdata.bop.join.PipelineJoin; import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.ITuple; import com.bigdata.btree.ITupleCursor; @@ -85,7 +84,7 @@ * @see https://sourceforge.net/apps/trac/bigdata/ticket/180 (Migrate * the RDFS inference and truth maintenance logic to BOPs) */ - String RELATION_NAME = "relationName"; + String RELATION_NAME = IPredicate.class.getName() + ".relationName"; // /** // * The {@link IKeyOrder} which will be used to read on the relation. @@ -98,15 +97,9 @@ // String KEY_ORDER = "keyOrder"; /** - * <code>true</code> iff the predicate is optional (the right operand of - * a left join). - * - * @deprecated This flag is being moved to the join operator (or should - * it stay with the predicate so we can work with join - * graphs more easily, but note that join graphs never - * contain optional joins). + * <code>true</code> iff the predicate has SPARQL optional semantics. */ - String OPTIONAL = "optional"; + String OPTIONAL = IPredicate.class.getName() + ".optional"; // /** // * Constraints on the elements read from the relation. @@ -146,7 +139,7 @@ * * @see IRangeQuery#rangeIterator(byte[], byte[], int, int, IFilter) */ - String INDEX_LOCAL_FILTER = "indexLocalFilter"; + String INDEX_LOCAL_FILTER = IPredicate.class.getName() + ".indexLocalFilter"; /** * An optional {@link BOpFilterBase} to be applied to the elements of @@ -163,7 +156,7 @@ * one another. You can chain {@link FilterBase} filters together as * well. */ - String ACCESS_PATH_FILTER = "accessPathFilter"; + String ACCESS_PATH_FILTER = IPredicate.class.getName() + ".accessPathFilter"; /** * Access path expander pattern. This allows you to wrap or replace the @@ -192,13 +185,13 @@ * * @see IAccessPathExpander */ - String ACCESS_PATH_EXPANDER = "accessPathExpander"; + String ACCESS_PATH_EXPANDER = IPredicate.class.getName() + ".accessPathExpander"; /** * The partition identifier -or- <code>-1</code> if the predicate does * not address a specific shard. */ - String PARTITION_ID = "partitionId"; + String PARTITION_ID = IPredicate.class.getName() + ".partitionId"; int DEFAULT_PARTITION_ID = -1; @@ -240,7 +233,7 @@ * * @see BOpEvaluationContext */ - String REMOTE_ACCESS_PATH = "remoteAccessPath"; + String REMOTE_ACCESS_PATH = IPredicate.class.getName() + ".remoteAccessPath"; boolean DEFAULT_REMOTE_ACCESS_PATH = true; @@ -307,9 +300,9 @@ * Operators which read or write on the database must declare the * {@link Annotations#TIMESTAMP} associated with that operation. * - * @see Annotations#TIMESTAMP + * @see #TIMESTAMP */ - String MUTATION = BOp.class.getName() + ".mutation"; + String MUTATION = IPredicate.class.getName() + ".mutation"; boolean DEFAULT_MUTATION = false; @@ -317,17 +310,15 @@ * The timestamp (or transaction identifier) used by this operator if it * reads or writes on the database (no default). * - * @see com.bigdata.bop.IPredicate.Annotations#MUTATION - * - * @todo Move to {@link IPredicate}? + * @see #MUTATION */ - String TIMESTAMP = BOp.class.getName() + ".timestamp"; + String TIMESTAMP = IPredicate.class.getName() + ".timestamp"; - /** - * An optional {@link IConstraint}[] which places restrictions on the - * legal patterns in the variable bindings. - */ - String CONSTRAINTS = PipelineJoin.class.getName() + ".constraints"; +// /** +// * An optional {@link IConstraint}[] which places restrictions on the +// * legal patterns in the variable bindings. +// */ +// String CONSTRAINTS = PipelineJoin.class.getName() + ".constraints"; } @@ -404,14 +395,9 @@ * For mutation, some {@link IRelation}s may require that all variables * appearing in the head are bound. This and similar constraints can be * enforced using {@link IConstraint}s on the {@link IRule}. - * <p> - * More control over the behavior of optionals may be gained through the use - * of an {@link IAccessPathExpander} pattern. * * @return <code>true</code> iff this predicate is optional when evaluating * a JOIN. - * - * @deprecated By {@link PipelineJoin.Annotations#OPTIONAL} */ public boolean isOptional(); @@ -441,21 +427,21 @@ // */ // public IElementFilter<E> getConstraint(); - /** - * Return the optional {@link IConstraint}[] to be applied by a join which - * evaluates this {@link IPredicate}. - * <p> - * Note: The {@link Annotations#CONSTRAINTS} are annotated on the - * {@link IPredicate} rather than the join operators so they may be used - * with join graphs, which are expressed solely as an unordered set of - * {@link IPredicate}s. Using join graphs, we are able to do nifty things - * such as runtime query optimization which would not be possible if the - * annotations were decorating the joins since we would be unable to - * dynamically generate the join operators with the necessary annotations. - * - * @see Annotations#CONSTRAINTS - */ - public IConstraint[] constraints(); +// /** +// * Return the optional {@link IConstraint}[] to be applied by a join which +// * evaluates this {@link IPredicate}. +// * <p> +// * Note: The {@link Annotations#CONSTRAINTS} are annotated on the +// * {@link IPredicate} rather than the join operators so they may be used +// * with join graphs, which are expressed solely as an unordered set of +// * {@link IPredicate}s. Using join graphs, we are able to do nifty things +// * such as runtime query optimization which would not be possible if the +// * annotations were decorating the joins since we would be unable to +// * dynamically generate the join operators with the necessary annotations. +// * +// * @see Annotations#CONSTRAINTS +// */ +// public IConstraint[] constraints(); /** * Return the optional filter to be evaluated local to the index. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-01-19 18:22:38 UTC (rev 4129) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-01-19 18:34:43 UTC (rev 4130) @@ -91,19 +91,19 @@ boolean DEFAULT_SHARED_STATE = false; - /** - * Annotation used to mark a set of (non-optional) joins which may be - * freely reordered by the query optimizer in order to minimize the - * amount of work required to compute the solutions. - * <p> - * Note: Optional joins MAY NOT appear within a join graph. Optional - * joins SHOULD be evaluated as part of the "tail plan" following the - * join graph, but before operations such as SORT, DISTINCT, etc. When - * the query plan includes {@link #CONDITIONAL_GROUP}s, those groups - * include a leading {@link #JOIN_GRAPH} (required joins) followed by - * zero or more optional joins. - */ - String JOIN_GRAPH = PipelineOp.class.getName() + ".joinGraph"; +// /** +// * Annotation used to mark a set of (non-optional) joins which may be +// * freely reordered by the query optimizer in order to minimize the +// * amount of work required to compute the solutions. +// * <p> +// * Note: Optional joins MAY NOT appear within a join graph. Optional +// * joins SHOULD be evaluated as part of the "tail plan" following the +// * join graph, but before operations such as SORT, DISTINCT, etc. When +// * the query plan includes {@link #CONDITIONAL_GROUP}s, those groups +// * include a leading {@link #JOIN_GRAPH} (required joins) followed by +// * zero or more optional joins. +// */ +// String JOIN_GRAPH = PipelineOp.class.getName() + ".joinGraph"; // /** // * Annotation used to mark a set of operators belonging to a conditional Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2011-01-19 18:22:38 UTC (rev 4129) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2011-01-19 18:34:43 UTC (rev 4130) @@ -245,12 +245,12 @@ // // } - public IConstraint[] constraints() { +// public IConstraint[] constraints() { +// +// return getProperty(IPredicate.Annotations.CONSTRAINTS, null/* defaultValue */); +// +// } - return getProperty(IPredicate.Annotations.CONSTRAINTS, null/* defaultValue */); - - } - final public IFilter getIndexLocalFilter() { return (IFilter) getProperty(Annotations.INDEX_LOCAL_FILTER); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-19 18:22:38 UTC (rev 4129) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-19 18:34:43 UTC (rev 4130) @@ -47,13 +47,13 @@ import org.apache.log4j.Logger; import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpBase; import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpContextBase; import com.bigdata.bop.BOpEvaluationContext; import com.bigdata.bop.BOpIdFactory; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstraint; import com.bigdata.bop.IElement; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; @@ -80,10 +80,10 @@ * A join graph with annotations for estimated cardinality and other details in * support of runtime query optimization. A join graph is a collection of * relations and joins which connect those relations. This boils down to a - * collection of {@link IPredicate}s (selects on relations) and shared variables - * (which identify joins). Operators other than standard joins (including - * optional joins, sort, order by, etc.) must be handled downstream from the - * join graph in a "tail plan". + * collection of {@link IPredicate}s (selects on relations), shared variables + * (which identify joins), and {@link IConstraint}s (which limit solutions). + * Operators other than standard joins (including optional joins, sort, order + * by, etc.) must be handled downstream from the join graph in a "tail plan". * * @see http://arxiv.org/PS_cache/arxiv/pdf/0810/0810.4809v1.pdf, XQuery Join * Graph Isolation. @@ -197,10 +197,10 @@ * approach should be able to handle queries without perfect / covering * automatically. Then experiment with carrying fewer statement indices * for quads. - * + * * @todo Unit test when there are no solutions to the query. In this case there * will be no paths identified by the optimizer and the final path length - * becomes zero. + * becomes zero. */ public class JoinGraph extends PipelineOp { @@ -214,17 +214,24 @@ */ public interface Annotations extends PipelineOp.Annotations { - /** - * The vertices of the join graph expressed an an {@link IPredicate}[]. - */ - String VERTICES = JoinGraph.class.getName() + ".vertices"; + /** + * The vertices of the join graph, expressed an an {@link IPredicate}[] + * (required). + */ + String VERTICES = JoinGraph.class.getName() + ".vertices"; - /** - * The initial limit for cutoff sampling (default - * {@value #DEFAULT_LIMIT}). - */ - String LIMIT = JoinGraph.class.getName() + ".limit"; + /** + * The constraints on the join graph, expressed an an + * {@link IConstraint}[] (optional, defaults to no constraints). + */ + String CONSTRAINTS = JoinGraph.class.getName() + ".constraints"; + /** + * The initial limit for cutoff sampling (default + * {@value #DEFAULT_LIMIT}). + */ + String LIMIT = JoinGraph.class.getName() + ".limit"; + int DEFAULT_LIMIT = 100; /** @@ -240,12 +247,21 @@ /** * @see Annotations#VERTICES */ - public IPredicate[] getVertices() { + public IPredicate<?>[] getVertices() { return (IPredicate[]) getRequiredProperty(Annotations.VERTICES); } + /** + * @see Annotations#CONSTRAINTS + */ + public IConstraint[] getConstraints() { + + return (IConstraint[]) getProperty(Annotations.CONSTRAINTS, null/* none */); + + } + /** * @see Annotations#LIMIT */ @@ -264,25 +280,42 @@ } - public JoinGraph(final NV... anns) { + public JoinGraph(final BOp[] args, final NV... anns) { - this(BOpBase.NOARGS, NV.asMap(anns)); + this(args, NV.asMap(anns)); } - public JoinGraph(final BOp[] args, final Map<String, Object> anns) { + public JoinGraph(final BOp[] args, final Map<String, Object> anns) { - super(args, anns); + super(args, anns); - switch (getEvaluationContext()) { - case CONTROLLER: - break; - default: - throw new UnsupportedOperationException( - Annotations.EVALUATION_CONTEXT + "=" - + getEvaluationContext()); - } + // required property. + final IPredicate<?>[] vertices = (IPredicate[]) getProperty(Annotations.VERTICES); + if (vertices == null) + throw new IllegalArgumentException(Annotations.VERTICES); + + if (vertices.length == 0) + throw new IllegalArgumentException(Annotations.VERTICES); + + if (getLimit() <= 0) + throw new IllegalArgumentException(Annotations.LIMIT); + + if (getNEdges() <= 0) + throw new IllegalArgumentException(Annotations.NEDGES); + + if (!isController()) + throw new IllegalArgumentException(); + + switch (getEvaluationContext()) { + case CONTROLLER: + break; + default: + throw new IllegalArgumentException(Annotations.EVALUATION_CONTEXT + + "=" + getEvaluationContext()); + } + } public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { @@ -383,7 +416,7 @@ /** * The most recently taken sample of the {@link Vertex}. */ - VertexSample sample = null; + transient VertexSample sample = null; Vertex(final IPredicate<?> pred) { @@ -768,7 +801,7 @@ * the cutoff sample of a join path having this edge except for the * degenerate case where the edge is the first edge in the join path. */ - public EdgeSample sample = null; + transient EdgeSample sample = null; public Edge(final Vertex v1, final Vertex v2, final Set<IVariable<?>> shared) { @@ -1560,6 +1593,10 @@ * FIXME CONSTRAINT ORDERING : It is illegal to add a vertex to the * path if any variable appearing in its CONSTRAINTS would not be * bound. + * + * FIXME CONSTRAINT ORDERING : Rather than constraints imposing an + * ordering on joins, constraints need to be attached dynamically to + * the first join for which their variables are known to be bound. */ final EdgeSample edgeSample = e.estimateCardinality(queryEngine, @@ -1787,7 +1824,31 @@ return sb.toString(); } - public JGraph(final IPredicate[] v) { + /** + * + * @param v + * The vertices of the join graph. These are + * {@link IPredicate}s associated with required joins. + * @param constraints + * The constraints of the join graph (optional). Since all + * joins in the join graph are required, constraints are + * dynamically attached to the first join in which all of + * their variables are bound. + * + * @throws IllegalArgumentException + * if the vertices is <code>null</code>. + * @throws IllegalArgumentException + * if the vertices is an empty array. + * @throws IllegalArgumentException + * if any element of the vertices is <code>null</code>. + * @throws IllegalArgumentException + * if any constraint uses a variable which is never bound by + * the given predicates. + * + * @todo unit test for a constraint using a variable which is never + * bound. + */ + public JGraph(final IPredicate[] v, final IConstraint[] constraints) { if (v == null) throw new IllegalArgumentException(); @@ -1797,8 +1858,11 @@ V = new Vertex[v.length]; - for (int i = 0; i < v.length; i++) { + for (int i = 0; i < v.length; i++) { + if (v[i] == null) + throw new IllegalArgumentException(); + V[i] = new Vertex(v[i]); } @@ -1833,9 +1897,12 @@ // The set of identified edges for vertices which share vars. final List<Edge> tmp = new LinkedList<Edge>(); - + + // The set of vertices which share variables. + final Set<Vertex> sharedEdgeVertices = new LinkedHashSet<Vertex>(); + // The set of vertices which do not share variables. - final List<Vertex> unshared = new LinkedList<Vertex>(); + final List<Vertex> unsharedEdgeVertices = new LinkedList<Vertex>(); for (int i = 0; i < v.length; i++) { @@ -1865,31 +1932,44 @@ * or not the join path would bind the variable(s) * appearing in the CONSTRAINT. */ + + if (log.isDebugEnabled()) + log.debug("vertices shared variable(s): vars=" + + shared + ", v1=" + p1 + ", v2=" + p2); + tmp.add(new Edge(V[i], V[j], shared)); + sharedEdgeVertices.add(V[i]); + + sharedEdgeVertices.add(V[j]); + nmatched++; } } - if (nmatched == 0) { + if (nmatched == 0 && !sharedEdgeVertices.contains(V[i])) { /* * The source vertex does not share any variables. */ - - unshared.add(V[i]); + log + .warn("Vertex does not share any variables: " + + V[i]); + + unsharedEdgeVertices.add(V[i]); + } } E = tmp.toArray(new Edge[0]); - this.unshared = unshared.toArray(new Vertex[0]); + this.unshared = unsharedEdgeVertices.toArray(new Vertex[0]); - if(!unshared.isEmpty()) { + if(!unsharedEdgeVertices.isEmpty()) { /* * FIXME This needs to be supported. We should explore and @@ -1902,8 +1982,9 @@ * the evaluation). */ - throw new UnsupportedOperationException( - "Some predicates do not share any variables with other predicates."); + throw new UnsupportedOperationException( + "Some predicates do not share any variables with other predicates: unshared=" + + unsharedEdgeVertices); } @@ -2794,7 +2875,7 @@ private final BOpContext<IBindingSet> context; - private final JGraph g; +// private final JGraph g; private int limit; @@ -2813,22 +2894,20 @@ // The initial number of edges (1 step paths) to explore. nedges = getNEdges(); - if (limit <= 0) - throw new IllegalArgumentException(); +// if (limit <= 0) +// throw new IllegalArgumentException(); +// +// if (nedges <= 0) +// throw new IllegalArgumentException(); - if (nedges <= 0) - throw new IllegalArgumentException(); +// g = new JGraph(getVertices(), getConstraints()); - final IPredicate[] v = getVertices(); - - g = new JGraph(v); - } public Void call() throws Exception { // Create the join graph. - final JGraph g = new JGraph(getVertices()); + final JGraph g = new JGraph(getVertices(), getConstraints()); // Find the best join path. final Path p = g.runtimeOptimizer(context.getRunningQuery() @@ -2839,7 +2918,7 @@ // Generate the query from the join path. final PipelineOp queryOp = JoinGraph.getQuery(idFactory, p - .getPredicates()); + .getPredicates(), getConstraints()); // Run the query, blocking until it is done. JoinGraph.runSubquery(context, queryOp); @@ -2850,27 +2929,27 @@ } // class JoinGraphTask - /** - * Places vertices into order by the {@link BOp#getId()} associated with - * their {@link IPredicate}. - */ - private static class BOpIdComparator implements Comparator<Vertex> { +// /** +// * Places vertices into order by the {@link BOp#getId()} associated with +// * their {@link IPredicate}. +// */ +// private static class BOpIdComparator implements Comparator<Vertex> { +// +// private static final transient Comparator<Vertex> INSTANCE = new BOpIdComparator(); +// +//// @Override +// public int compare(final Vertex o1, final Vertex o2) { +// final int id1 = o1.pred.getId(); +// final int id2 = o2.pred.getId(); +// if (id1 < id2) +// return -1; +// if (id2 > id1) +// return 1; +// return 0; +// } +// +// } - private static final transient Comparator<Vertex> INSTANCE = new BOpIdComparator(); - -// @Override - public int compare(final Vertex o1, final Vertex o2) { - final int id1 = o1.pred.getId(); - final int id2 = o2.pred.getId(); - if (id1 < id2) - return -1; - if (id2 > id1) - return 1; - return 0; - } - - } - /** * Places edges into order by ascending estimated cardinality. Edges which * are not weighted are ordered to the end. @@ -2915,18 +2994,27 @@ * we are not yet handling anything except standard joins in the runtime * optimizer. */ - - /** - * Generate a query plan from an ordered collection of predicates. - * - * @param p - * The join path. - * - * @return The query plan. - */ - static public PipelineOp getQuery(final BOpIdFactory idFactory, - final IPredicate[] preds) { + /** + * Generate a query plan from an ordered collection of predicates. + * + * @param p + * The join path. + * + * @return The query plan. + * + * FIXME Verify that constraints are attached correctly to the + * returned query. + */ + static public PipelineOp getQuery(final BOpIdFactory idFactory, + final IPredicate[] preds, final IConstraint[] constraints) { + + if (constraints != null && constraints.length != 0) { + // FIXME Constraints must be attached to joins. + throw new UnsupportedOperationException( + "Constraints must be attached to joins!"); + } + final PipelineJoin[] joins = new PipelineJoin[preds.length]; // final PipelineOp startOp = new StartOp(new BOp[] {}, @@ -3191,7 +3279,7 @@ return sharedVars; } - + /** * Exception thrown when the join graph does not have any solutions in the * data (running the query does not produce any results). Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-01-19 18:34:43 UTC (rev 4130) @@ -0,0 +1,360 @@ +package com.bigdata.bop.controller; + +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.controller.JoinGraph.NoSolutionsException; + +/** + * Class accepts a join group and partitions it into a join graph and a tail + * plan. + * <p> + * A join group consists of an ordered collection of {@link IPredicate}s and an + * unordered collection of {@link IConstraint}s. {@link IPredicate} representing + * non-optional joins are extracted into a {@link JoinGraph} along with any + * {@link IConstraint}s whose variables are guaranteed to be bound by the + * implied joins. + * <p> + * The remainder of the {@link IPredicate}s and {@link IConstraint}s form a + * "tail plan". {@link IConstraint}s in the tail plan are attached to the last + * {@link IPredicate} at which their variable(s) MIGHT have become bound. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * + * @todo However, how do we manage when there are things like conditional + * routing operators? + * + * @todo The order of the {@link IPredicate}s in the tail plan is currently + * unchanged from their given order (optional joins without constraints + * can not reduce the selectivity of the query). However, it could be + * worthwhile to run optionals with constraints before those without + * constraints since the constraints can reduce the selectivity of the + * query. If we do this, then we need to reorder the optionals based on + * the partial order imposed what variables they MIGHT bind (which are not + * bound by the join graph). + * + * @todo Things like LET can also bind variables. So can a subquery. Analysis of + * those will tell us whether the variable will definitely or + * conditionally become bound (I am assuming that a LET can conditionally + * leave a variable unbound). + * + * @todo runFirst flag on the expander (for free text search). this should be an + * annotation. this can be a headPlan. [There can be constraints which are + * evaluated against the head plan. They need to get attached to the joins + * generated for the head plan. MikeP writes: There is a free text search + * access path that replaces the actual access path for the predicate, + * which is meaningless in an of itself because the P is magical.] + * + * @todo inline APs and get rid of DataSetJoin. Rewrite NG and DG first. + * + * @todo write a method which returns the set of constraints which should be run + * for the last predicate in a given join path (a join path is just an + * ordered array of predicates). + */ +public class PartitionedJoinGroup { + +// private final IPredicate<?>[] headPlan; +// +// private final IConstraint[] headPlanConstraints; + + private final IPredicate<?>[] joinGraphPredicates; + + private final IConstraint[] joinGraphConstraints; + + private final IPredicate<?>[] tailPlan; + +// private final IConstraint[] tailPlanConstraints; + + + /** + * The {@link IPredicate}s in the join graph (required joins). + */ + public IPredicate<?>[] getJoinGraphPredicates() { + return joinGraphPredicates; + } + + /** + * The {@link IConstraint}s to be applied to the {@link IPredicate}s in the + * join graph. Each {@link IConstraint} should be applied as soon as all of + * its variable(s) are known to be bound. The constraints are not attached + * to the {@link IPredicate}s in the join graph because the evaluation order + * of those {@link IPredicate}s is not yet known (it will be determined by a + * query optimizer when it decides on an evaluation order for those joins). + */ + public IConstraint[] getJoinGraphConstraints() { + return joinGraphConstraints; + } + + /** + * The {@link IPredicate}s representing optional joins. Any + * {@link IConstraint}s having variable(s) NOT bound by the required joins + * will already have been attached to the last {@link IPredicate} in the + * tail plan in which their variable(S) MIGHT have been bound. + */ + public IPredicate<?>[] getTailPlan() { + return tailPlan; + } + +// private PartitionedJoinGroup(// +// final IPredicate<?>[] headPlan,// +// final IConstraint[] headPlanConstraints,// +// final IPredicate<?>[] joinGraphPredicates,// +// final IConstraint[] joinGraphConstraints,// +// final IPredicate<?>[] tailPlan,// +// final IConstraint[] tailPlanConstraints// +// ) { +// this.headPlan = headPlan; +// this.headPlanConstraints = headPlanConstraints; +// this.joinGraphPredicates = joinGraphPredicates; +// this.joinGraphConstraints = joinGraphConstraints; +// this.tailPlan = tailPlan; +// this.tailPlanConstraints = tailPlanConstraints; +// } + + /** + * Analyze a set of {@link IPredicate}s representing optional and + * non-optional joins and a collection of {@link IConstraint}s, partitioning + * them into a join graph and a tail plan. + * + * @param sourcePreds + * The predicates. + * @param constraints + * The constraints. + * + * @return A data structure representing both the join graph and the tail + * plan. + * + * @throws IllegalArgumentException + * if the source predicates array is <code>null</code>. + * @throws IllegalArgumentException + * if the source predicates array is empty. + * @throws IllegalArgumentException + * if any element of the source predicates array is + * <code>null</code>. + */ + public PartitionedJoinGroup(// + final IPredicate<?>[] sourcePreds,// + IConstraint[] constraints) { + + if (sourcePreds == null) + throw new IllegalArgumentException(); + if (sourcePreds.length == 0) + throw new IllegalArgumentException(); + + if (constraints == null) { + // replace with an empty array. + constraints = new IConstraint[0]; + } + + /* + * An unordered list of constraints which do not involve ANY variables. + * These constraints should be run first, before the join graph. + * + * @todo add to the class instance fields. + */ + final List<IConstraint> runFirstConstraints = new LinkedList<IConstraint>(); + +// final List<IPredicate<?>> headPlan = new LinkedList<IPredicate<?>>(); +// +// final List<IConstraint> headPlanConstraints = new LinkedList<IConstraint>(); + + /* + * The non-optional predicates. + */ + final List<IPredicate<?>> joinGraphPredicates = new LinkedList<IPredicate<?>>(); + + /* + * The set of variables bound by the non-optional predicates. + */ + final Set<IVariable<?>> joinGraphVars = new LinkedHashSet<IVariable<?>>(); + + /* + * An unordered list of those constraints whose variables are known to + * be bound by the non-optional predicates. + */ + final List<IConstraint> joinGraphConstraints = new LinkedList<IConstraint>(); + + /* + * The predicates representing the optional joins. + */ + final List<IPredicate<?>> tailPlan = new LinkedList<IPredicate<?>>(); + + /* + * An unordered list of those constraints containing at least one + * variable known to NOT be bound by the non-optional predicates. + */ + final List<IConstraint> tailPlanConstraints = new LinkedList<IConstraint>(); + + /* + * Map indicating which constraints are run for which predicate in the + * tail plan. The keys are the bopIds of the predicates in the tail + * plan. The values are the sets of constraints to run for that tail. + */ + final Map<Integer/* predId */, List<IConstraint>> tailPlanConstraintMap = new LinkedHashMap<Integer, List<IConstraint>>(); + + /* + * First identify the predicates which correspond to non-optional joins. + * All other pipeline operators are inserted into the tail plan in the + * order in which they are given. + */ + for (IPredicate<?> p : sourcePreds) { + if (p == null) + throw new IllegalArgumentException(); + if (!p.isOptional()) { + // non-optional predicate. + joinGraphPredicates.add(p); + // variables which will be bound by the join graph. + final Iterator<IVariable<?>> vitr = BOpUtility + .getArgumentVariables(p); + while (vitr.hasNext()) { + joinGraphVars.add(vitr.next()); + } + } else { + // an optional predicate + tailPlan.add(p); + } + } + + /* + * Now break the constraints into two groups - those whose variables are + * bound by the predicates in the join graph (required joins) and those + * having at least one variable bound by an optional join. + */ + for (IConstraint c : constraints) { + boolean allFound = true; + final Iterator<IVariable<?>> vitr = BOpUtility + .getSpannedVariables(c); + if (!vitr.hasNext()) { + /* + * This is a constraint which does not involve any variable so + * we should evaluate it as soon as possible. I.e., before the + * join graph. + */ + runFirstConstraints.add(c); + continue; + } + while (vitr.hasNext()) { + final IVariable<?> var = vitr.next(); + if (!joinGraphVars.contains(var)) { + /* + * This constraint will be evaluated against the tail plan. + */ + allFound = false; + tailPlanConstraints.add(c); + break; + } + } + if (allFound) { + /* + * This constraint will be evaluated by the join graph for the + * first join in in which all of the variables used by the + * constraint are known to be bound. + */ + joinGraphConstraints.add(c); + } + } + + /* + * If a variable is not bound by a required predicate, then we attach + * any constraint using that variable to the last optional predicate in + * which that variable MIGHT become bound. + */ + { + /* + * Populate a map from each variable not bound in the join graph to + * the last index in the tail plan at which it MIGHT become bound. + */ + final Map<IVariable<?>, Integer/* lastIndexOf */> lastIndexOf = new LinkedHashMap<IVariable<?>, Integer>(); + int indexOf = 0; + for (IPredicate<?> p : tailPlan) { + final Iterator<IVariable<?>> vitr = BOpUtility + .getArgumentVariables(p); + while (vitr.hasNext()) { + final IVariable<?> var = vitr.next(); + lastIndexOf.put(var, Integer.valueOf(indexOf)); + } + indexOf++; + } + /* + * For each constraint using at least one variable NOT bound by the + * join graph, find the maximum value of lastIndexOf for the + * variable(s) in that constraint. That is the index of the operator + * in the tail plan to which the constraint should be attached. + */ + for (IConstraint c : tailPlanConstraints) { + final Iterator<IVariable<?>> vitr = BOpUtility + .getSpannedVariables(c); + Integer maxLastIndexOf = null; + while (vitr.hasNext()) { + final IVariable<?> var = vitr.next(); + if (joinGraphVars.contains(var)) { + // This variable is bound by the join graph. + continue; + } + final Integer tmp = lastIndexOf.get(var); + if (tmp == null) { + // This variable is never bound by the query. + throw new NoSolutionsException( + "Variable is never bound: " + var); + } + if (maxLastIndexOf == null + || tmp.intValue() > maxLastIndexOf.intValue()) { + maxLastIndexOf = tmp; + } + } // next variable. + if (maxLastIndexOf == null) { + // A logic error. + throw new AssertionError("maxLastIndexOf is undefined: " + + c); + } + /* + * Add the constraint to the last predicate at which any of its + * variables MIGHT have become bound. + */ + { + /* + * The bopId for the predicate in the tail plan for which, + * when that predicate is evaluated, we will run this + * constraint. + */ + final int predId = tailPlan.get(maxLastIndexOf).getId(); + /* + * The constraint(s) (if any) already associated with that + * predicate. + */ + List<IConstraint> tmp = tailPlanConstraintMap.get(predId); + if (tmp == null) { + tmp = new LinkedList<IConstraint>(); + tailPlanConstraintMap.put(predId, tmp); + } + tmp.add(c); + } + } // next tail plan constraint. + + } + + /* + * Assign to instance fields. + */ + // @todo headPlan +// this.headPlan = null; +// this.headPlanConstraints = null; + this.joinGraphPredicates = joinGraphPredicates + .toArray(new IPredicate[joinGraphPredicates.size()]); + this.joinGraphConstraints = joinGraphConstraints + .toArray(new IConstraint[joinGraphConstraints.size()]); + this.tailPlan = tailPlan.toArray(new IPredicate[tailPlan.size()]); + // @todo tailPlanConstraintMap + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-19 18:22:38 UTC (rev 4129) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-19 18:34:43 UTC (rev 4130) @@ -129,18 +129,27 @@ */ String SELECT = PipelineJoin.class.getName() + ".select"; - /** - * Marks the join as "optional" in the SPARQL sense. Binding sets which - * fail the join will be routed to the alternative sink as specified by - * either {@link PipelineOp.Annotations#ALT_SINK_REF} or - * {@link PipelineOp.Annotations#ALT_SINK_GROUP}. - * - * @see #DEFAULT_OPTIONAL - */ - String OPTIONAL = PipelineJoin.class.getName() + ".optional"; +// /** +// * Marks the join as "optional" in the SPARQL sense. Binding sets which +// * fail the join will be routed to the alternative sink as specified by +// * either {@link PipelineOp.Annotations#ALT_SINK_REF} or +// * {@link PipelineOp.Annotations#ALT_SINK_GROUP}. +// * +// * @see #DEFAULT_OPTIONAL +// * +// * @deprecated We should just inspect +// * {@link IPredicate.Annotations#OPTIONAL}. +// */ +// String OPTIONAL = PipelineJoin.class.getName() + ".optional"; +// +// boolean DEFAULT_OPTIONAL = false; - boolean DEFAULT_OPTIONAL = false; - + /** + * An {@link IConstraint}[] which places restrictions on the legal + * patterns in the variable bindings (optional). + */ + String CONSTRAINTS = PipelineJoin.class.getName() + ".constraints"; + /** * The maximum parallelism with which the pipeline will consume the * source {@link IBindingSet}[] chunk. @@ -435,16 +444,30 @@ } - /** - * @see Annotations#OPTIONAL - */ + /** + * Return the value of {@link IPredicate#isOptional()} for the + * {@link IPredicate} associated with this join. + * + * @see IPredicate.Annotations#OPTIONAL + */ public boolean isOptional() { - return getProperty(Annotations.OPTIONAL, Annotations.DEFAULT_OPTIONAL); +// return getProperty(Annotations.OPTIONAL, Annotations.DEFAULT_OPTIONAL); + return getPredicate().isOptional(); } /** + * + * @see Annotations#CONSTRAINTS + */ + public IConstraint[] constraints() { + + return getProperty(Annotations.CONSTRAINTS, null/* defaultValue */); + + } + + /** * @see Annotations#MAX_PARALLEL */ public int getMaxParallel() { @@ -642,7 +665,7 @@ this.joinOp = joinOp; this.predicate = joinOp.getPredicate(); - this.constraints = predicate.constraints(); + this.constraints = joinOp.constraints(); this.maxParallel = joinOp.getMaxParallel(); if (maxParallel < 0) throw new IllegalArgumentException(Annotations.MAX_PARALLEL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java 2011-01-19 18:22:38 UTC (rev 4129) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java 2011-01-19 18:34:43 UTC (rev 4130) @@ -75,6 +75,12 @@ suite.addTestSuite(TestSubqueryOp.class); + suite.addTestSuite(TestPartitionedJoinGroup.class); + + suite.addTestSuite(TestJoinGraph.class); + + suite.addTestSuite(TestJGraph.class); + // @todo test STAR (transitive closure). // suite.addTestSuite(TestStar.class); Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java 2011-01-19 18:34:43 UTC (rev 4130) @@ -0,0 +1,254 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Jan 18, 2011 + */ + +package com.bigdata.bop.controller; + +import com.bigdata.bop.controller.JoinGraph.JGraph; + +import junit.framework.TestCase2; + +/** + * Test suite for {@link JGraph}, which is the core implementation of the + * runtime query optimizer logic. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + * + * @todo There are some operations which depend on equality or hash code + * behavior for vertices and perhaps edges so those things should also be + * tested. + */ +public class TestJGraph extends TestCase2 { + + /** + * + */ + public TestJGraph() { + } + + /** + * @param name + */ + public TestJGraph(String name) { + super(name); + } + + public void test_something() { + + fail("write tests"); + + } + +// /** +// * Test ability to identify shared variables appearing either as predicate +// * operands or as part of CONSTRAINTS or FILTERS. +// */ +// public void test_getSharedVariables() { +// fail("write test"); +// } +// +// /** +// * Test ability to recognize when there is a predicate without any shared +// * variables. +// */ +// public void test_noSharedVariables() { +// fail("write test"); +// } +// +// public void test_getMinimumCardinalityEdge() { +// fail("write test"); +// } +// +// public void test_moreEdgesToExecute() { +// fail("write test"); +// } +// +// // @todo also getEdgeCount() +// public void test_getEdges() { +// fail("write test"); +// } +// +// public void test_getSelectedJoinPath() { +// fail("write test"); +// } +// +// public void test_getBestAlternativeJoinPath() { +// fail("write test"); +// } +// +// public void test_getVertex() { +// fail("write test"); +// } +// +// // getEdge(v1,v2) +// public void test_getEdge() { +// fail("write test"); +// } +// +// // test ability to obtain a Path which extends another path. +// public void test_Path_addEdge() { +// fail("write test"); +// } + +// @Override +// public Properties getProperties() { +// +// final Properties p = new Properties(super.getProperties()); +// +// p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient +// .toString()); +// +// return p; +// +// } +// +// static private final String namespace = "ns"; +// +// Journal jnl; +// +// R rel; +// +// public void setUp() throws Exception { +// +// jnl = new Journal(getProperties()); +// +// } +// +// /** +// * Create and populate relation in the {@link #namespace}. +// * +// * @return The #of distinct entries. +// */ +// private int loadData(final int scale) { +// +// final String[] names = new String[] { "John", "Mary", "Saul", "Paul", +// "Leon", "Jane", "Mike", "Mark", "Jill", "Jake", "Alex", "Lucy" }; +// +// final Random rnd = new Random(); +// +// // #of distinct instances of each name. +// final int populationSize = Math.max(10, (int) Math.ceil(scale / 10.)); +// +// // #of trailing zeros for each name. +// final int nzeros = 1 + (int) Math.ceil(Math.log10(populationSize)); +// +//// System.out.println("scale=" + scale + ", populationSize=" +//// + populationSize + ", nzeros=" + nzeros); +// +// final NumberFormat fmt = NumberFormat.getIntegerInstance(); +// fmt.setMinimumIntegerDigits(nzeros); +// fmt.setMaximumIntegerDigits(nzeros); +// fmt.setGroupingUsed(false); +// +// // create the relation. +// final R rel = new R(jnl, namespace, ITx.UNISOLATED, new Properties()); +// rel.create(); +// +// // data to insert. +// final E[] a = new E[scale]; +// +// for (int i = 0; i < scale; i++) { +// +// final String n1 = names[rnd.nextInt(names.length)] +// + fmt.format(rnd.nextInt(populationSize)); +// +// final String n2 = names[rnd.nextInt(names.length)] +// + fmt.format(rnd.nextInt(populationSize)); +// +//// System.err.println("i=" + i + ", n1=" + n1 + ", n2=" + n2); +// +// a[i] = new E(n1, n2); +// +// } +// +// // sort before insert for efficiency. +// Arrays.sort(a,R.primaryKeyOrder.getComparator()); +// +// // insert data (the records are not pre-sorted). +// final long ninserts = rel.insert(new ChunkedArrayIterator<E>(a.length, a, null/* keyOrder */)); +// +// // Do commit since not scale-out. +// jnl.commit(); +// +// // should exist as of the last commit point. +// this.rel = (R) jnl.getResourceLocator().locate(namespace, +// ITx.READ_COMMITTED); +// +// assertNotNull(rel); +// +// return (int) ninserts; +// +// } +// +// public void tearDown() throws Exception { +// +// if (jnl != null) { +// jnl.destroy(); +// jnl = null; +// } +// +// // clear reference. +// rel = null; +// +// } + +// public void test_something() { + +//// final int scale = 10000; +//// +//// final int nrecords = loadData(scale); +// +// final IVariable<?> x = Var.var("x"); +// +// final IVariable<?> y = Var.var("y"); +// +// final IPredicate<E> p1 = new Predicate<E>(new BOp[] { x, y }, +// new NV(IPredicate.Annotations.RELATION_NAME, +// new String[] { namespace }),// +// new NV(IPredicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED)// +// ); +// +// final IPredicate<E> p2 = new Predicate<E>(new BOp[] { x, y }, +// new NV(IPredicate.Annotations.RELATION_NAME, +// new String[] { namespace }),// +// new NV(IPredicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED)// +// ); +// +// final IPredicate<E> p3 = new Predicate<E>(new BOp[] { x, y }, +// new NV(IPredicate.Annotations.RELATION_NAME, +// new String[] { namespace }),// +// new NV(IPredicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED)// +// ); +// +// new JoinGraph(// +// new NV(BOp.Annotations.BOP_ID, 1),// +// new NV(JoinGraph.Annotations.VERTICES,new IPredicate[]{}),// +// new NV(JoinGraph.Annotations.SAMPLE_SIZE, 100)// +// ); +//} + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJoinGraph.java 2011-01-19 18:22:38 UTC (rev 4129) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJoinGraph.java 2011-01-19 18:34:43 UTC (rev 4130) @@ -29,11 +29,24 @@ import junit.framework.TestCase2; +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.NV; +import com.bigdata.bop.Var; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.constraint.NEConstant; + /** - * Unit tests for runtime query optimization using {@link JoinGraph}. + * Unit tests for the {@link JoinGraph} operator. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ + * + * @todo Test evaluation of the operator as well. A lot of the guts of that are + * tested by {@link TestJGraph}. */ public class TestJoinGraph extends TestCase2 { @@ -50,193 +63,224 @@ super(name); } - /* - * Among other things, there are some operations which depend on equality or - * hash code behavior for vertices and perhaps edges so those things should - * also be tested. - */ - - public void test_getMinimumCardinalityEdge() { - fail("write test"); - } + public void test_ctor() { - public void test_moreEdgesToExecute() { - fail("write test"); - } + // correct acceptance. + { + final IPredicate[] vertices = new IPredicate[] { + new Predicate(new BOp[]{Var.var("x"),Var.var("y")}),// + new Predicate(new BOp[]{Var.var("y"),Var.var("z")}),// + }; + final IConstraint[] constraints = null; + final JoinGraph joinGraph = new JoinGraph(new BOp[0],// + new NV(JoinGraph.Annotations.VERTICES, vertices),// + new NV(JoinGraph.Annotations.CONTROLLER, true), // + new NV(JoinGraph.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER)// + ); + assertEquals("vertices", vertices, joinGraph.getVertices()); + assertEquals("cons... [truncated message content] |
From: <tho...@us...> - 2011-01-19 21:21:10
|
Revision: 4131 http://bigdata.svn.sourceforge.net/bigdata/?rev=4131&view=rev Author: thompsonbry Date: 2011-01-19 21:21:03 +0000 (Wed, 19 Jan 2011) Log Message: ----------- Turned off backchain() for the named and default graph expanders since inference is not compatible with quads and those expanders were only ever used in quads mode. Backchain access path support has not made it into the new query engine yet. I've filed an issue [1] for this and also added some javadoc notes on IAccessPathExpander and RDFJoinNexus concerning [1] https://sourceforge.net/apps/trac/bigdata/ticket/231 Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/IAccessPathExpander.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/DefaultGraphSolutionExpander.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/NamedGraphSolutionExpander.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/IAccessPathExpander.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/IAccessPathExpander.java 2011-01-19 18:34:43 UTC (rev 4130) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/IAccessPathExpander.java 2011-01-19 21:21:03 UTC (rev 4131) @@ -54,19 +54,24 @@ * onto or otherwise override the given {@link IAccessPath}. */ IAccessPath<E> getAccessPath(IAccessPath<E> accessPath); - + /** * Add the backchainer on top of the expander. * * @return true if the backchainer should run + * + * @deprecated Never <code>true</code>. The backchainer is only run for + * normal predicates in triples mode at this time. If it is to + * be layer, it should be layered as an annotation. See + * https://sourceforge.net/apps/trac/bigdata/ticket/231. */ boolean backchain(); - + /** * If true, the predicate for this expander will be given priority in the * join order. * - * @return true if the predicate should be run first + * @return true if the predicate should be run first */ boolean runFirst(); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java 2011-01-19 18:34:43 UTC (rev 4130) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java 2011-01-19 21:21:03 UTC (rev 4131) @@ -391,6 +391,7 @@ // } // @todo raise into SPORelation#getAccessPath/3? + // @see https://sourceforge.net/apps/trac/bigdata/ticket/231 if(backchain && relation instanceof SPORelation) { if (expander == null || expander.backchain()) { Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/DefaultGraphSolutionExpander.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/DefaultGraphSolutionExpander.java 2011-01-19 18:34:43 UTC (rev 4130) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/DefaultGraphSolutionExpander.java 2011-01-19 21:21:03 UTC (rev 4131) @@ -247,7 +247,7 @@ public boolean backchain() { - return true; + return false; } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/NamedGraphSolutionExpander.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/NamedGraphSolutionExpander.java 2011-01-19 18:34:43 UTC (rev 4130) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/NamedGraphSolutionExpander.java 2011-01-19 21:21:03 UTC (rev 4131) @@ -180,7 +180,7 @@ public boolean backchain() { - return true; + return false; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2011-01-20 02:05:28
|
Revision: 4135 http://bigdata.svn.sourceforge.net/bigdata/?rev=4135&view=rev Author: mrpersonick Date: 2011-01-20 02:05:21 +0000 (Thu, 20 Jan 2011) Log Message: ----------- working through tck failures Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTree.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSparqlTest.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SameTermBOp.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-19 22:42:44 UTC (rev 4134) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-20 02:05:21 UTC (rev 4135) @@ -737,7 +737,26 @@ return sb.toString(); } + + public static String toString2(final BOp bop) { + String s = toString(bop); + s = s.replaceAll("com.bigdata.bop.controller.", ""); + s = s.replaceAll("com.bigdata.bop.join.", ""); + s = s.replaceAll("com.bigdata.bop.solutions.", ""); + s = s.replaceAll("com.bigdata.bop.rdf.filter.", ""); + s = s.replaceAll("com.bigdata.bop.", ""); + s = s.replaceAll("com.bigdata.rdf.sail.", ""); + s = s.replaceAll("com.bigdata.rdf.spo.", ""); +// s = s.replaceAll("com.bigdata.bop..", ""); +// s = s.replaceAll("com.bigdata.bop..", ""); +// s = s.replaceAll("com.bigdata.bop..", ""); +// s = s.replaceAll("com.bigdata.bop..", ""); +// s = s.replaceAll("com.bigdata.bop..", ""); + return s; + + } + private static void toString(final BOp bop, final StringBuilder sb, final int indent) { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-19 22:42:44 UTC (rev 4134) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-20 02:05:21 UTC (rev 4135) @@ -158,6 +158,18 @@ return new FutureTask<Void>(new ControllerTask(this, context)); } + + public String toString() { + + final StringBuilder sb = new StringBuilder(super.toString()); + sb.append("\n{\n"); + final PipelineOp subquery = (PipelineOp) + getRequiredProperty(Annotations.SUBQUERY); + sb.append(BOpUtility.toString(subquery)); + sb.append("\n}"); + return sb.toString(); + + } /** * Evaluates the arguments of the operator as subqueries. The arguments are Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-01-19 22:42:44 UTC (rev 4134) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-01-20 02:05:21 UTC (rev 4135) @@ -98,12 +98,18 @@ return _accept(left.compareTo(right)); } - if (!IVUtility.canNumericalCompare(left)) - throw new NotNumericalException("cannot numerical compare: " + left); + if (!IVUtility.canNumericalCompare(left) || + !IVUtility.canNumericalCompare(right)) { + if (op == CompareOp.EQ) { + return false; + } else if (op == CompareOp.NE) { + return true; + } else { + throw new NotNumericalException("cannot numerical compare: " + + left + " " + op + " " + right); + } + } - if (!IVUtility.canNumericalCompare(right)) - throw new NotNumericalException("cannot numerical compare: " + right); - return _accept(IVUtility.numericalCompare(left, right)); } Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SameTermBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SameTermBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SameTermBOp.java 2011-01-20 02:05:21 UTC (rev 4135) @@ -0,0 +1,87 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import org.openrdf.query.algebra.Compare.CompareOp; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.constraint.BOpConstraint; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.IVUtility; + +/** + * Compare two terms for exact equality. + */ +public class SameTermBOp extends BOpConstraint { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * Required shallow copy constructor. + */ + public SameTermBOp(final BOp[] values, + final Map<String, Object> annotations) { + super(values, annotations); + } + + /** + * Required deep copy constructor. + */ + public SameTermBOp(final SameTermBOp op) { + super(op); + } + + public SameTermBOp(final IValueExpression<IV> left, + final IValueExpression<IV> right) { + + super(new BOp[] { left, right }, null); + + if (left == null || right == null) + throw new IllegalArgumentException(); + + } + + public boolean accept(final IBindingSet s) { + + final IV left = ((IValueExpression<IV>) get(0)).get(s); + final IV right = ((IValueExpression<IV>) get(1)).get(s); + + if (left == null || right == null) + return true; // not yet bound. + + return left.equals(right); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-01-19 22:42:44 UTC (rev 4134) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-01-20 02:05:21 UTC (rev 4135) @@ -29,6 +29,7 @@ import org.openrdf.query.algebra.Join; import org.openrdf.query.algebra.LeftJoin; import org.openrdf.query.algebra.MathExpr; +import org.openrdf.query.algebra.MathExpr.MathOp; import org.openrdf.query.algebra.MultiProjection; import org.openrdf.query.algebra.Not; import org.openrdf.query.algebra.Or; @@ -41,15 +42,13 @@ import org.openrdf.query.algebra.Regex; import org.openrdf.query.algebra.SameTerm; import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.StatementPattern.Scope; import org.openrdf.query.algebra.TupleExpr; import org.openrdf.query.algebra.UnaryTupleOperator; import org.openrdf.query.algebra.Union; import org.openrdf.query.algebra.ValueConstant; import org.openrdf.query.algebra.ValueExpr; import org.openrdf.query.algebra.Var; -import org.openrdf.query.algebra.Compare.CompareOp; -import org.openrdf.query.algebra.MathExpr.MathOp; -import org.openrdf.query.algebra.StatementPattern.Scope; import org.openrdf.query.algebra.evaluation.impl.EvaluationStrategyImpl; import org.openrdf.query.algebra.evaluation.iterator.FilterIterator; import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; @@ -61,12 +60,12 @@ import com.bigdata.bop.IConstant; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IPredicate.Annotations; import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.IPredicate.Annotations; import com.bigdata.bop.ap.Predicate; import com.bigdata.bop.constraint.AND; import com.bigdata.bop.constraint.BOUND; @@ -84,15 +83,16 @@ import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.constraints.CompareBOp; import com.bigdata.rdf.internal.constraints.MathBOp; +import com.bigdata.rdf.internal.constraints.SameTermBOp; import com.bigdata.rdf.lexicon.LexiconRelation; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.sail.BigdataSail.Options; import com.bigdata.rdf.sail.sop.SOp; import com.bigdata.rdf.sail.sop.SOp2BOpUtility; import com.bigdata.rdf.sail.sop.SOpTree; +import com.bigdata.rdf.sail.sop.SOpTree.SOpGroup; import com.bigdata.rdf.sail.sop.SOpTreeBuilder; import com.bigdata.rdf.sail.sop.UnsupportedOperatorException; -import com.bigdata.rdf.sail.sop.SOpTree.SOpGroup; import com.bigdata.rdf.spo.DefaultGraphSolutionExpander; import com.bigdata.rdf.spo.ExplicitSPOFilter; import com.bigdata.rdf.spo.ISPO; @@ -118,7 +118,6 @@ import com.bigdata.striterator.Dechunkerator; import com.bigdata.striterator.DistinctFilter; import com.bigdata.striterator.IChunkedOrderedIterator; -import com.bigdata.util.concurrent.Haltable; /** * Extended to rewrite Sesame {@link TupleExpr}s onto native {@link Rule}s and @@ -516,6 +515,59 @@ } + @Override + public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( + final Filter filter, final BindingSet bs) + throws QueryEvaluationException { + + if (!nativeJoins) { + // Use Sesame 2 evaluation + return super.evaluate(filter, bs); + } + + if (filter.getArg() instanceof StatementPattern) { + // no need to run a query for this, a simple access path scan will do + return super.evaluate(filter, bs); + } + + if (log.isInfoEnabled()) { + log.info("evaluating top-level Filter operator"); + } + + try { + + return evaluateNatively(filter, bs); + + } catch (UnsupportedOperatorException ex) { + + if (allowSesameQueryEvaluation) { + + // Use Sesame 2 evaluation + + log.warn("could not evaluate natively, using Sesame evaluation"); + + if (log.isInfoEnabled()) { + log.info(ex.getOperator()); + } + + // turn off native joins for the remainder, we can't do + // partial execution + nativeJoins = false; + + // defer to Sesame + return super.evaluate(filter, bs); + + } else { + + // allow the query to fail + throw new UnsupportedOperatorException(ex); + + } + + } + + } + CloseableIteration<BindingSet, QueryEvaluationException> evaluateNatively(final TupleExpr tupleExpr, final BindingSet bs) throws QueryEvaluationException, UnsupportedOperatorException { @@ -706,7 +758,7 @@ queryEngine, queryHints); if (log.isInfoEnabled()) - log.info(BOpUtility.toString(query)); + log.info("\n"+BOpUtility.toString2(query)); } @@ -737,6 +789,11 @@ result = new FilterIterator(f, result, this); } } + +// System.err.println("results"); +// while (result.hasNext()) { +// System.err.println(result.next()); +// } return result; @@ -1832,10 +1889,13 @@ toValueExpression(sameTerm.getLeftArg()); final IValueExpression<IV> iv2 = toValueExpression(sameTerm.getRightArg()); - return new CompareBOp(iv1, iv2, CompareOp.EQ); + return new SameTermBOp(iv1, iv2); } - private IConstraint toConstraint(Compare compare) { + private IConstraint toConstraint(final Compare compare) { + if (!database.isInlineLiterals()) { + throw new UnsupportedOperatorException(compare); + } final IValueExpression<IV> iv1 = toValueExpression(compare.getLeftArg()); final IValueExpression<IV> iv2 = Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2011-01-19 22:42:44 UTC (rev 4134) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2011-01-20 02:05:21 UTC (rev 4135) @@ -382,7 +382,7 @@ public static final String NEW_EVAL_STRATEGY = BigdataSail.class.getPackage() .getName()+ ".newEvalStrategy"; - public static final String DEFAULT_NEW_EVAL_STRATEGY = "false"; + public static final String DEFAULT_NEW_EVAL_STRATEGY = "true"; /** * Option as to whether or not to allow Sesame evaluation of queries @@ -396,8 +396,6 @@ public static final String DEFAULT_ALLOW_SESAME_QUERY_EVALUATION = "false"; - - } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-19 22:42:44 UTC (rev 4134) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-20 02:05:21 UTC (rev 4135) @@ -598,27 +598,35 @@ final Collection<IConstraint> constraints = new LinkedList<IConstraint>(); - /* - * Peek through the predicate's args to find its variables. Use - * these to attach constraints to the join based on the variables - * that make their first appearance in this tail. - */ - for (BOp arg : pred.args()) { - if (arg instanceof IVariable<?>) { - final IVariable<?> v = (IVariable<?>) arg; - /* - * We do a remove because we don't ever need to run these - * constraints again during subsequent joins once they have - * been run once at the initial appearance of the variable. - * - * @todo revisit this when we dynamically re-order running - * joins - */ - if (constraintsByVar.containsKey(v)) - constraints.addAll(constraintsByVar.remove(v)); - } +// /* +// * Peek through the predicate's args to find its variables. Use +// * these to attach constraints to the join based on the variables +// * that make their first appearance in this tail. +// */ +// for (BOp arg : pred.args()) { +// if (arg instanceof IVariable<?>) { +// final IVariable<?> v = (IVariable<?>) arg; +// /* +// * We do a remove because we don't ever need to run these +// * constraints again during subsequent joins once they have +// * been run once at the initial appearance of the variable. +// * +// * @todo revisit this when we dynamically re-order running +// * joins +// */ +// if (constraintsByVar.containsKey(v)) +// constraints.addAll(constraintsByVar.remove(v)); +// } +// } + + // just add all the constraints to the very last tail for now + if (i == (order.length-1) && rule.getConstraintCount() > 0) { + final Iterator<IConstraint> it = rule.getConstraints(); + while (it.hasNext()) { + constraints.add(it.next()); + } } - + // annotations for this join. final List<NV> anns = new LinkedList<NV>(); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTree.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTree.java 2011-01-19 22:42:44 UTC (rev 4134) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTree.java 2011-01-20 02:05:21 UTC (rev 4135) @@ -153,6 +153,10 @@ return SOpTree.this; } + public boolean isRoot() { + return group == 0; + } + } public class SOpGroups implements Iterable<SOpGroup> { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java 2011-01-19 22:42:44 UTC (rev 4134) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java 2011-01-20 02:05:21 UTC (rev 4135) @@ -234,7 +234,8 @@ } else if (arg instanceof Join) { collectSOps(sops, (Join) arg, rslj, g, pg); } else if (arg instanceof LeftJoin) { - collectSOps(sops, (LeftJoin) arg, rslj, groupId.incrementAndGet(), g); +// collectSOps(sops, (LeftJoin) arg, rslj, groupId.incrementAndGet(), g); + collectSOps(sops, (LeftJoin) arg, rslj, g, pg); } else { throw new UnsupportedOperatorException(arg); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSparqlTest.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSparqlTest.java 2011-01-19 22:42:44 UTC (rev 4134) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSparqlTest.java 2011-01-20 02:05:21 UTC (rev 4135) @@ -212,6 +212,29 @@ // "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/basic/manifest#spoo-1",//BOOM // "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/graph/manifest#dawg-graph-05", + + /* + * working through the new query engine failures + */ + + // please someone explain this shit to me +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-eq-12" + + // this is that weird "well designed" optional shit P = A OPT (B OPT C) where A and C share variables not in B +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#nested-opt-1" + + // where do we put the !bound(?e) constraint??? +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/bound/manifest#dawg-bound-query-001" + + // "a" and "a"^^xsd:string have different term ids? also bnodes are different +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-eq-07" + + // help, non-optional subquery?? wtf +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#filter-scope-1" + + // this uncovers an obvious bug in our SubqueryOp +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-001" + }); /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |