From: <tho...@us...> - 2011-01-21 21:52:25
|
Revision: 4160 http://bigdata.svn.sourceforge.net/bigdata/?rev=4160&view=rev Author: thompsonbry Date: 2011-01-21 21:52:18 +0000 (Fri, 21 Jan 2011) Log Message: ----------- Added getSharedVars() to BOpUtility. More work on the runtime query optimizer. See https://sourceforge.net/apps/trac/bigdata/ticket/64. Javadoc on AbstractSubqueryOp and SubqueryOp. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/NoSolutionsException.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestAll.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphWithRDF.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -34,6 +34,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.log4j.Logger; @@ -1053,6 +1054,83 @@ return b; } - + + /** + * Return the variable references shared by tw operators. All variables + * spanned by either {@link BOp} are considered. + * + * @param p + * An operator. + * @param c + * Another operator. + * + * @param p + * A predicate. + * + * @param c + * A constraint. + * + * @return The variables in common -or- <code>null</code> iff there are no + * variables in common. + * + * @throws IllegalArgumentException + * if the two either reference is <code>null</code>. + * @throws IllegalArgumentException + * if the reference are the same. + * + * @todo unit tests. + */ + public static Set<IVariable<?>> getSharedVars(final BOp p, final BOp c) { + + if (p == null) + throw new IllegalArgumentException(); + + if (c == null) + throw new IllegalArgumentException(); + + if (p == c) + throw new IllegalArgumentException(); + + // The set of variables which are shared. + final Set<IVariable<?>> sharedVars = new LinkedHashSet<IVariable<?>>(); + + // Collect the variables appearing anywhere in [p]. + final Set<IVariable<?>> p1vars = new LinkedHashSet<IVariable<?>>(); + { + + final Iterator<IVariable<?>> itr = BOpUtility + .getSpannedVariables(p); + + while (itr.hasNext()) { + + p1vars.add(itr.next()); + + } + + } + + // Consider the variables appearing anywhere in [c]. + { + + final Iterator<IVariable<?>> itr = BOpUtility + .getSpannedVariables(c); + + while (itr.hasNext()) { + + final IVariable<?> avar = itr.next(); + + if (p1vars.contains(avar)) { + + sharedVars.add(avar); + + } + + } + + } + + return sharedVars; + + } + } - Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -84,6 +84,11 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ + * + * @todo There is relatively little difference between this class and SubqueryOp + * and we should consider converging them into a single concrete subquery + * operator with specializations for UNION and STEPS. The main difference + * is that the SubqueryOp can not run multiple subqueries. */ abstract public class AbstractSubqueryOp extends PipelineOp { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -67,7 +67,6 @@ import com.bigdata.bop.join.PipelineJoin; import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; import com.bigdata.bop.rdf.join.DataSetJoin; -import com.bigdata.bop.solutions.SliceOp; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.BufferClosedException; import com.bigdata.relation.accesspath.IAccessPath; @@ -75,6 +74,7 @@ import com.bigdata.relation.accesspath.ThickAsynchronousIterator; import com.bigdata.striterator.Dechunkerator; import com.bigdata.striterator.IChunkedIterator; +import com.bigdata.util.concurrent.Haltable; /** * A join graph with annotations for estimated cardinality and other details in @@ -1450,13 +1450,13 @@ * * @see #getVertices() */ - public IPredicate[] getPredicates() { + public IPredicate<?>[] getPredicates() { // The vertices in the selected evaluation order. final Vertex[] vertices = getVertices(); // The predicates in the same order as the vertices. - final IPredicate[] preds = new IPredicate[vertices.length]; + final IPredicate<?>[] preds = new IPredicate[vertices.length]; for (int i = 0; i < vertices.length; i++) { @@ -1469,6 +1469,16 @@ } /** + * Return the {@link BOp} identifiers of the predicates associated with + * each vertex in path order. + */ + public int[] getVertexIds() { + + return getVertexIds(edges); + + } + + /** * Return the {@link BOp} identifiers of the predicates associated with * each vertex in path order. */ @@ -1590,10 +1600,6 @@ * join path in each round then this would help to establish a * better estimate in successive rounds.] * - * FIXME CONSTRAINT ORDERING : It is illegal to add a vertex to the - * path if any variable appearing in its CONSTRAINTS would not be - * bound. - * * FIXME CONSTRAINT ORDERING : Rather than constraints imposing an * ordering on joins, constraints need to be attached dynamically to * the first join for which their variables are known to be bound. @@ -1848,7 +1854,7 @@ * @todo unit test for a constraint using a variable which is never * bound. */ - public JGraph(final IPredicate[] v, final IConstraint[] constraints) { + public JGraph(final IPredicate<?>[] v, final IConstraint[] constraints) { if (v == null) throw new IllegalArgumentException(); @@ -1871,27 +1877,33 @@ * Identify the edges by looking for shared variables among the * predicates. * - * Note: Variables may appear in the arguments of the predicate, - * e.g., spo(?s,rdf:type,?o). + * Note: Variables appear in predicates or in constraints. Edges are + * created to represent possible joins between predicates based on + * those shared variables. There are two cases: * - * Note: Variables may ALSO appear in the CONSTRAINTS (imposed on - * the binding sets) or FILTERS (imposed either on the local or - * remote access path). For example, that a variable bound by - * another predicate must take on a value having some mathematical - * relationship to a variable bound by the predicate, e.g., BSBM Q5. - * When a variable appears in a constraint but does not appear as an - * argument to the predicate, then there is an additional - * requirement that the variable MUST become bound before the - * predicate may be evaluated (again, BSBM Q5 has this form). + * (1) When the target predicate shares a variable with the source + * predicate, then we always create an edge between those predicates + * to represent a possible join. * - * Note: If a vertex does not share ANY variables (neither in the - * arguments of the predicate nor in its constraints or filters) - * then it can be paired with any of the other vertices. However, in - * such cases we always run such vertices last as they can not - * restrict the cardinality of the rest of the join graph. Such - * vertices are therefore inserted into a separate set and appended - * to the join path once all edges having shared variables have been - * exhausted. + * (2) When the source predicate shares a variable with a constraint + * which also shares a variable with the target predicate, then we + * will also create an edge to represent a possible join. + * + * The second case handles the case where variables are transitively + * shared through a constraint, but not directly shared between the + * predicates. BSBM Q5 is an example of this case. + * + * Note: If applying these two rules fails to create any edges for + * some vertex, then it does not share ANY variables and can be + * paired with ANY of the other vertices. However, we always run + * such vertices last as they can not restrict the cumulative + * cardinality of the solutions. Such vertices are therefore + * inserted into a separate set and appended to the join path once + * all edges having shared variables have been exhausted. + * + * FIXME VERTICES WHICH SHARE VARS THROUGH A CONSTRAINT. + * + * FIXME VERTICES WITH NO SHARED VARS. */ { @@ -1917,20 +1929,14 @@ // consider a possible target vertex. final IPredicate<?> p2 = v[j]; - final Set<IVariable<?>> shared = getSharedVars(p1, p2); + final Set<IVariable<?>> shared = BOpUtility + .getSharedVars(p1, p2); if (shared != null && !shared.isEmpty()) { /* - * The source and target vertices share var(s). - * - * Note: A predicate having a variable which appears - * in a CONSTRAINT MUST NOT be added to the join - * path until that variable would be bound. - * Therefore, when selecting the vertices to be used - * to extend a join path, we must consider whether - * or not the join path would bind the variable(s) - * appearing in the CONSTRAINT. + * The source and target vertices share one or more + * variable(s). */ if (log.isDebugEnabled()) @@ -1945,8 +1951,68 @@ nmatched++; - } + } else if (constraints != null) { + /* + * The source and target vertices do not directly + * share any variable(s). However, there may be a + * constraint which shares a variable with both the + * source and target vertex. If such a constraint is + * found, then we add an edge now as that join is + * potentially constrained (less than the full + * Cartesian cross product). + * + * Note: While this identifies possible joins via a + * constraint, such joins are only legal when all + * variables used by the constraint are known to be + * bound. + * + * FIXME We have to reject edges unless there are + * variable(s) which are directly shared between the + * source and target vertex until all the variables + * spanned by the constraint which licenses the join + * have become bound. [Consider marking these edges + * directly so we know that we need to test and see + * whether or not a constraint exists which shares + * at least one variable with both vertices and that + * all variables in that constraint are bound.] + * + * FIXME Since we can attach more than one + * constraint to a vertex, we may have to ask + * whether any set of the available constraints + * shares at least one variable with the source and + * target vertices. [That is, do they have to share + * variables via the same constraint?!?] + */ + + for(IConstraint c : constraints) { + + if(BOpUtility.getSharedVars(p1, c).isEmpty()) + continue; + + if(BOpUtility.getSharedVars(p2, c).isEmpty()) + continue; + + if (log.isDebugEnabled()) + log + .debug("vertices shared variable(s) via constraint: v1=" + + p1 + + ", v2=" + + p2 + + ", c=" + c); + + tmp.add(new Edge(V[i], V[j], shared)); + + sharedEdgeVertices.add(V[i]); + + sharedEdgeVertices.add(V[j]); + + nmatched++; + + } + + } + } if (nmatched == 0 && !sharedEdgeVertices.contains(V[i])) { @@ -1972,14 +2038,24 @@ if(!unsharedEdgeVertices.isEmpty()) { /* - * FIXME This needs to be supported. We should explore and - * generate the join paths based on only those vertices - * which do share variables (and hence for which we have - * defined edges). Once the vertices which share variables - * have been exhausted, we should simply append edges for - * the vertices which do not share variables in an arbitrary - * order (they will be run last since they can not constrain - * the evaluation). + * FIXME NO SHARED VARS : RUN LAST. This needs to be + * supported. When vertices that do not share variables + * either directly or via a constraint then they should run + * last as they can not constrain the query. In this case, + * they are not considered by the runtime optimizer when + * building up the join path until all vertices which share + * variables have been exhausted. At that point, the + * remaining vertices are just appended to whatever join + * path was selected as having the lowest cumulative + * estimated cardinality. + * + * However, if there exists for a vertex which otherwise + * does not share variables a constraint which should be + * evaluated against that vertex, then that constraint + * provides the basis for a edge (aka join). In this case, + * an edge must be created for the vertex based on the + * shared variable in the constraint and its position in the + * join path will be decided by the runtime optimizer. */ throw new UnsupportedOperationException( @@ -2917,8 +2993,8 @@ final BOpIdFactory idFactory = new BOpIdFactory(); // Generate the query from the join path. - final PipelineOp queryOp = JoinGraph.getQuery(idFactory, p - .getPredicates(), getConstraints()); + final PipelineOp queryOp = PartitionedJoinGroup.getQuery(idFactory, + p.getPredicates(), getConstraints()); // Run the query, blocking until it is done. JoinGraph.runSubquery(context, queryOp); @@ -2996,318 +3072,95 @@ */ /** - * Generate a query plan from an ordered collection of predicates. + * Execute the selected join path. + * <p> + * Note: When executing the query, it is actually being executed as a + * subquery. Therefore we have to take appropriate care to ensure that the + * results are copied out of the subquery and into the parent query. See + * {@link AbstractSubqueryOp} for how this is done. * - * @param p - * The join path. + * @throws Exception * - * @return The query plan. + * @todo When we execute the query, we should clear the references to the + * samples (unless they are exact, in which case they can be used as + * is) in order to release memory associated with those samples if the + * query is long running. Samples must be held until we have + * identified the final join path since each vertex will be used by + * each maximum length join path and we use the samples from the + * vertices to re-sample the surviving join paths in each round. * - * FIXME Verify that constraints are attached correctly to the - * returned query. + * @todo If there is a slice on the outer query, then the query result may + * well be materialized by now. + * + * @todo If there are source binding sets then they need to be applied above + * (when we are sampling) and below (when we evaluate the selected + * join path). + * + * FIXME runQuery() is not working correctly. The query is being + * halted by a {@link BufferClosedException} which appears before it + * has materialized the necessary results. */ - static public PipelineOp getQuery(final BOpIdFactory idFactory, - final IPredicate[] preds, final IConstraint[] constraints) { + static private void runSubquery( + final BOpContext<IBindingSet> parentContext, + final PipelineOp queryOp) throws Exception { - if (constraints != null && constraints.length != 0) { - // FIXME Constraints must be attached to joins. - throw new UnsupportedOperationException( - "Constraints must be attached to joins!"); - } - - final PipelineJoin[] joins = new PipelineJoin[preds.length]; + final QueryEngine queryEngine = parentContext.getRunningQuery() + .getQueryEngine(); -// final PipelineOp startOp = new StartOp(new BOp[] {}, -// NV.asMap(new NV[] {// -// new NV(Predicate.Annotations.BOP_ID, idFactory -// .nextId()),// -// new NV(SliceOp.Annotations.EVALUATION_CONTEXT, -// BOpEvaluationContext.CONTROLLER),// -// })); -// -// PipelineOp lastOp = startOp; - PipelineOp lastOp = null; + /* + * Run the query. + * + * @todo pass in the source binding sets here and also when sampling the + * vertices. + */ -// final Set<IVariable> vars = new LinkedHashSet<IVariable>(); -// for(IPredicate p : preds) { -// for(BOp arg : p.args()) { -// if(arg instanceof IVariable) { -// vars.add((IVariable)arg); -// } -// } -// } - - for (int i = 0; i < preds.length; i++) { + IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; - // The next vertex in the selected join order. - final IPredicate p = preds[i]; + final IRunningQuery runningQuery = queryEngine.eval(queryOp); - final List<NV> anns = new LinkedList<NV>(); + try { - anns.add(new NV(PipelineJoin.Annotations.PREDICATE, p)); + // Iterator visiting the subquery solutions. + subquerySolutionItr = runningQuery.iterator(); - anns.add(new NV(PipelineJoin.Annotations.BOP_ID, idFactory - .nextId())); + // Copy solutions from the subquery to the query. + final long nout = BOpUtility.copy(subquerySolutionItr, + parentContext.getSink(), null/* sink2 */, + null/* constraints */, null/* stats */); -// anns.add(new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.ANY)); -// -// anns.add(new NV(PipelineJoin.Annotations.SELECT, vars.toArray(new IVariable[vars.size()]))); + System.out.println("nout=" + nout); - final PipelineJoin joinOp = new PipelineJoin( - lastOp == null ? new BOp[0] : new BOp[] { lastOp }, - anns.toArray(new NV[anns.size()])); + // verify no problems. + runningQuery.get(); - joins[i] = joinOp; + System.out.println("Future Ok"); - lastOp = joinOp; + } catch (Throwable t) { - } + if (Haltable.isTerminationByInterrupt(t)) { -// final PipelineOp queryOp = lastOp; + // normal termination. + return; - /* - * FIXME Why does wrapping with this slice appear to be - * necessary? (It is causing runtime errors when not wrapped). - * Is this a bopId collision which is not being detected? - */ - final PipelineOp queryOp = new SliceOp(new BOp[] { lastOp }, NV - .asMap(new NV[] { - new NV(JoinGraph.Annotations.BOP_ID, idFactory.nextId()), // - new NV(JoinGraph.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER) }) // - ); - - return queryOp; - - } - - /** - * Execute the selected join path. - * <p> - * Note: When executing the query, it is actually being executed as a - * subquery. Therefore we have to take appropriate care to ensure that the - * results are copied out of the subquery and into the parent query. See - * {@link AbstractSubqueryOp} for how this is done. - * - * @todo When we execute the query, we should clear the references to the - * samples (unless they are exact, in which case they can be used as - * is) in order to release memory associated with those samples if the - * query is long running. Samples must be held until we have - * identified the final join path since each vertex will be used by - * each maximum length join path and we use the samples from the - * vertices to re-sample the surviving join paths in each round. - * - * @todo If there is a slice on the outer query, then the query result may - * well be materialized by now. - * - * @todo If there are source binding sets then they need to be applied above - * (when we are sampling) and below (when we evaluate the selected - * join path). - * - * FIXME runQuery() is not working correctly. The query is being - * halted by a {@link BufferClosedException} which appears before it - * has materialized the necessary results. - */ - static public void runSubquery(final BOpContext<IBindingSet> parentContext, - final PipelineOp queryOp) { - - IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; - - try { - - if (log.isInfoEnabled()) - log.info("Running: " + BOpUtility.toString(queryOp)); - - final PipelineOp startOp = (PipelineOp) BOpUtility - .getPipelineStart(queryOp); - - if (log.isInfoEnabled()) - log.info("StartOp: " + BOpUtility.toString(startOp)); - - // Run the query. - final UUID queryId = UUID.randomUUID(); - - final QueryEngine queryEngine = parentContext.getRunningQuery() - .getQueryEngine(); - - final IRunningQuery runningQuery = queryEngine - .eval( - queryId, - queryOp, - new LocalChunkMessage<IBindingSet>( - queryEngine, - queryId, - startOp.getId()/* startId */, - -1 /* partitionId */, - /* - * @todo pass in the source binding sets - * here and also when sampling the - * vertices. - */ - new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { new IBindingSet[] { new HashBindingSet() } }))); - - // Iterator visiting the subquery solutions. - subquerySolutionItr = runningQuery.iterator(); - - // Copy solutions from the subquery to the query. - final long nout = BOpUtility - .copy(subquerySolutionItr, parentContext.getSink(), - null/* sink2 */, null/* constraints */, null/* stats */); - - System.out.println("nout=" + nout); - - // verify no problems. - runningQuery.get(); - - System.out.println("Future Ok"); - - } catch (Throwable t) { - - log.error(t,t); - - /* - * If a subquery fails, then propagate the error to the parent - * and rethrow the first cause error out of the subquery. - */ - throw new RuntimeException(parentContext.getRunningQuery() - .halt(t)); - - } finally { - - if (subquerySolutionItr != null) - subquerySolutionItr.close(); - - } - - } - - /** - * Return the variables in common for two {@link IPredicate}s. All variables - * spanned by either {@link IPredicate} are considered. - * <p> - * Note: Variables may appear in the predicates operands, in the - * {@link Annotations#CONSTRAINTS} associated with the - * predicate, and in the {@link IPredicate.Annotations#ACCESS_PATH_FILTER} - * or {@link IPredicate.Annotations#INDEX_LOCAL_FILTER}. - * <p> - * Note: A variable must become bound before it may be evaluated in - * {@link Annotations#CONSTRAINTS}, an - * {@link IPredicate.Annotations#ACCESS_PATH_FILTER} or an - * {@link IPredicate.Annotations#INDEX_LOCAL_FILTER}. This means that the - * {@link IPredicate}s which can bind the variable must be ordered before - * those which merely test the variable. - * - * - * @param p1 - * A predicate. - * - * @param p2 - * A different predicate. - * - * @return The variables in common -or- <code>null</code> iff there are no - * variables in common. - * - * @throws IllegalArgumentException - * if the two predicates are the same reference. - * - * @todo It should be an error if a variable appear in a test is not bound - * by any possible join path. However, note that it may not be - * possible to determine this by local examination of a join graph - * since we do not know which variables may be presented as already - * bound when the join graph is evaluated (but we can only run the - * join graph currently against static source binding sets and for - * that case this is knowable). - * - * @todo When a variable is only optionally bound and it is discovered at - * runtime that the variable is not bound when it is considered by a - * CONSTRAINT, FILTER, etc., then the SPARQL semantics are that - * evaluation should produce a 'type' error which would cause the - * solution should fail (at least within its current join group). See - * https://sourceforge.net/apps/trac/bigdata/ticket/179. - * - * @todo Unit tests, including those which verify that variables appearing - * in the constraints are reported as shared with those appearing in - * the predicates operands. - */ - static Set<IVariable<?>> getSharedVars(final IPredicate p1, final IPredicate p2) { - - // The set of variables which are shared by those predicates. - final Set<IVariable<?>> sharedVars = new LinkedHashSet<IVariable<?>>(); - - /* - * Collect the variables appearing anyway in [p1], including the - * predicate's operands and its constraints, filters, etc. - */ - final Set<IVariable<?>> p1vars = new LinkedHashSet<IVariable<?>>(); - { - - final Iterator<IVariable<?>> itr = BOpUtility - .getSpannedVariables(p1); - - while (itr.hasNext()) { - - p1vars.add(itr.next()); - } - } + // log.error(t,t); - /* - * Consider the variables appearing anyway in [p2], including the - * predicate's operands and its constraints, filters, etc. - */ - { + /* + * Propagate the error to the parent and rethrow the first cause + * error out of the subquery. + */ + throw new RuntimeException(parentContext.getRunningQuery().halt(t)); - final Iterator<IVariable<?>> itr = BOpUtility - .getSpannedVariables(p2); + } finally { - while (itr.hasNext()) { + runningQuery.cancel(true/* mayInterruptIfRunning */); - final IVariable<?> avar = itr.next(); - - if(p1vars.contains(avar)) { + if (subquerySolutionItr != null) + subquerySolutionItr.close(); - sharedVars.add(avar); - - } - - } - } - - return sharedVars; } - /** - * Exception thrown when the join graph does not have any solutions in the - * data (running the query does not produce any results). - */ - public static class NoSolutionsException extends RuntimeException - { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public NoSolutionsException() { - super(); - } - - public NoSolutionsException(String message, Throwable cause) { - super(message, cause); - } - - public NoSolutionsException(String message) { - super(message); - } - - public NoSolutionsException(Throwable cause) { - super(cause); - } - - } - } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/NoSolutionsException.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/NoSolutionsException.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/NoSolutionsException.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -0,0 +1,31 @@ +package com.bigdata.bop.controller; + +/** + * Exception thrown when the join graph does not have any solutions in the + * data (running the query does not produce any results). + */ +public class NoSolutionsException extends RuntimeException +{ + + /** + * + */ + private static final long serialVersionUID = 1L; + + public NoSolutionsException() { + super(); + } + + public NoSolutionsException(String message, Throwable cause) { + super(message, cause); + } + + public NoSolutionsException(String message) { + super(message); + } + + public NoSolutionsException(Throwable cause) { + super(cause); + } + +} \ No newline at end of file Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/NoSolutionsException.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -10,11 +10,17 @@ import org.apache.log4j.Logger; +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.BOpIdFactory; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; -import com.bigdata.bop.controller.JoinGraph.NoSolutionsException; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.bop.solutions.SliceOp; /** * Class accepts a join group and partitions it into a join graph and a tail @@ -54,10 +60,6 @@ * predicate, which is meaningless in an of itself because the P is * magical.] * - * @todo write a method which returns the set of constraints which should be run - * for the last predicate in a given join path (a join path is just an - * ordered array of predicates). - * * FIXME Add a method to generate a runnable query plan from a collection * of predicates and constraints. This is a bit different for the join * graph and the optionals in the tail plan. The join graph itself should @@ -220,7 +222,54 @@ path[i] = p; } + + return getJoinGraphConstraints(path, joinGraphConstraints + .toArray(new IConstraint[joinGraphConstraints.size()]))[pathIds.length - 1]; + + } + /** + * Given a join path, return the set of constraints to be associated with + * each join in that join path. Only those constraints whose variables are + * known to be bound will be attached. + * + * @param path + * The join path. + * @param joinGraphConstraints + * The constraints to be applied to the join path (optional). + * + * @return The constraints to be paired with each element of the join path. + * + * @throws IllegalArgumentException + * if the join path is <code>null</code>. + * @throws IllegalArgumentException + * if the join path is empty. + * @throws IllegalArgumentException + * if any element of the join path is <code>null</code>. + * @throws IllegalArgumentException + * if any element of the join graph constraints is + * <code>null</code>. + * + * @todo It should be an error if a variable appear in a constraint is not + * bound by any possible join path. However, it may not be possible to + * determine this by local examination of a join graph since we do not + * know which variables may be presented as already bound when the + * join graph is evaluated (but we can only run the join graph + * currently against static source binding sets and for that case this + * is knowable). + */ + static public IConstraint[][] getJoinGraphConstraints( + final IPredicate<?>[] path, final IConstraint[] joinGraphConstraints) { + + if (path == null) + throw new IllegalArgumentException(); + + if (path.length == 0) + throw new IllegalArgumentException(); + + // the set of constraints for each predicate in the join path. + final IConstraint[][] ret = new IConstraint[path.length][]; + /* * For each predicate in the path in the given order, figure out which * constraint(s) would attach to that predicate based on which variables @@ -234,17 +283,20 @@ // the set of constraints which have been consumed. final Set<IConstraint> used = new LinkedHashSet<IConstraint>(); - // the set of constraints for the last predicate in the join path. - final List<IConstraint> ret = new LinkedList<IConstraint>(); - - for(int i = 0; i<path.length; i++) { + for (int i = 0; i < path.length; i++) { - // true iff this is the last join in the path. - final boolean lastJoin = i == path.length - 1; +// // true iff this is the last join in the path. +// final boolean lastJoin = i == path.length - 1; // a predicate in the path. final IPredicate<?> p = path[i]; + if (p == null) + throw new IllegalArgumentException(); + + // the constraints for the current predicate in the join path. + final List<IConstraint> constraints = new LinkedList<IConstraint>(); + { /* * Visit the variables used by the predicate (and bound by it @@ -263,80 +315,80 @@ } } - - // consider each constraint. - for(IConstraint c : joinGraphConstraints) { - if (used.contains(c)) { + if (joinGraphConstraints != null) { + + // consider each constraint. + for (IConstraint c : joinGraphConstraints) { + + if (c == null) + throw new IllegalArgumentException(); + + if (used.contains(c)) { + /* + * Skip constraints which were already assigned to + * predicates before this one in the join path. + */ + continue; + } + /* - * Skip constraints which were already assigned to - * predicates before this one in the join path. + * true iff all variables used by this constraint are bound + * at this point in the join path. */ - continue; - } + boolean allVarsBound = true; - /* - * true iff all variables used by this constraint are bound at - * this point in the join path. - */ - boolean allVarsBound = true; + // visit the variables used by this constraint. + final Iterator<IVariable<?>> vitr = BOpUtility + .getSpannedVariables(c); - // visit the variables used by this constraint. - final Iterator<IVariable<?>> vitr = BOpUtility - .getSpannedVariables(c); + while (vitr.hasNext()) { - while (vitr.hasNext()) { + final IVariable<?> var = vitr.next(); - final IVariable<?> var = vitr.next(); - - if(!boundVars.contains(var)) { - - allVarsBound = false; + if (!boundVars.contains(var)) { - break; + allVarsBound = false; - } + break; - } + } - if (allVarsBound) { + } - /* - * All variables have become bound for this constraint, so - * add it to the set of "used" constraints. - */ - - used.add(c); + if (allVarsBound) { - if (log.isDebugEnabled()) { - log.debug("Constraint attached at index " + i + " of " - + path.length + ", bopId=" + p.getId() - + ", constraint=" + c); - } - - if (lastJoin) { - /* - * If we are on the last join in the join path, then - * this constraint is one of the ones that we will - * return. + * All variables have become bound for this constraint, + * so add it to the set of "used" constraints. */ - - ret.add(c); - } + used.add(c); - } // if(allVarsBound) - - } // next constraint + if (log.isDebugEnabled()) { + log.debug("Constraint attached at index " + i + + " of " + path.length + ", bopId=" + + p.getId() + ", constraint=" + c); + } + + constraints.add(c); + + } // if(allVarsBound) + + } // next constraint + + } // joinGraphConstraints != null; + + // store the constraint[] for that predicate. + ret[i] = constraints.toArray(new IConstraint[constraints.size()]); } // next predicate in the join path. /* - * Return the set of constraints to be applied as of the last predicate - * in the join path. + * Return the set of constraints associated with each predicate in the + * join path. */ - return ret.toArray(new IConstraint[ret.size()]); + return ret; } @@ -589,4 +641,83 @@ } + /** + * Generate a query plan from an ordered collection of predicates. + * + * @param p + * The join path. + * + * @return The query plan. + * + * FIXME Select only those variables required by downstream + * processing or explicitly specified by the caller (in the case + * when this is a subquery, the caller has to declare which + * variables are selected and will be returned out of the subquery). + * + * FIXME For scale-out, we need to either mark the join's evaluation + * context based on whether or not the access path is local or + * remote (and whether the index is key-range distributed or hash + * partitioned). + */ + static public PipelineOp getQuery(final BOpIdFactory idFactory, + final IPredicate<?>[] preds, final IConstraint[] constraints) { + + // figure out which constraints are attached to which predicates. + final IConstraint[][] assignedConstraints = PartitionedJoinGroup + .getJoinGraphConstraints(preds, constraints); + + final PipelineJoin<?>[] joins = new PipelineJoin[preds.length]; + + PipelineOp lastOp = null; + + for (int i = 0; i < preds.length; i++) { + + // The next vertex in the selected join order. + final IPredicate<?> p = preds[i]; + + final List<NV> anns = new LinkedList<NV>(); + + anns.add(new NV(PipelineJoin.Annotations.PREDICATE, p)); + + anns.add(new NV(PipelineJoin.Annotations.BOP_ID, idFactory + .nextId())); + +// anns.add(new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.ANY)); +// +// anns.add(new NV(PipelineJoin.Annotations.SELECT, vars.toArray(new IVariable[vars.size()]))); + + if (assignedConstraints[i] != null + && assignedConstraints[i].length > 0) + anns + .add(new NV(PipelineJoin.Annotations.CONSTRAINTS, + assignedConstraints[i])); + + final PipelineJoin<?> joinOp = new PipelineJoin( + lastOp == null ? new BOp[0] : new BOp[] { lastOp }, anns + .toArray(new NV[anns.size()])); + + joins[i] = joinOp; + + lastOp = joinOp; + + } + +// final PipelineOp queryOp = lastOp; + + /* + * FIXME Why does wrapping with this slice appear to be + * necessary? (It is causing runtime errors when not wrapped). + * Is this a bopId collision which is not being detected? + */ + final PipelineOp queryOp = new SliceOp(new BOp[] { lastOp }, NV + .asMap(new NV[] { + new NV(JoinGraph.Annotations.BOP_ID, idFactory.nextId()), // + new NV(JoinGraph.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER) }) // + ); + + return queryOp; + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -57,6 +57,8 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ + * + * @see AbstractSubqueryOp */ public class SubqueryOp extends PipelineOp { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -87,7 +87,7 @@ // fail("write test"); // } // -// // @todo also getEdgeCount() +// // and also getEdgeCount() // public void test_getEdges() { // fail("write test"); // } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -456,9 +456,6 @@ /** * @todo test with headPlan. - * - * @todo test logic to attach constraints to non-optional joins based on a - * given join path (not yet written). */ public void test_something() { fail("write tests"); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -43,7 +43,7 @@ import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.controller.JoinGraph; +import com.bigdata.bop.controller.PartitionedJoinGroup; import com.bigdata.bop.controller.JoinGraph.JGraph; import com.bigdata.bop.controller.JoinGraph.Path; import com.bigdata.bop.engine.BOpStats; @@ -217,7 +217,7 @@ * JVM run using the known solutions produced by the runtime versus * static query optimizers. */ - protected void doTest(final IPredicate[] preds, + protected void doTest(final IPredicate<?>[] preds, final IConstraint[] constraints) throws Exception { if (warmUp) @@ -228,7 +228,7 @@ * Run the runtime query optimizer once (its cost is not counted * thereafter). */ - final IPredicate[] runtimePredOrder = runRuntimeQueryOptimizer( + final IPredicate<?>[] runtimePredOrder = runRuntimeQueryOptimizer( getQueryEngine(), limit, nedges, preds, constraints); long totalRuntimeTime = 0; @@ -296,9 +296,10 @@ * * @throws Exception */ - static protected IPredicate[] runRuntimeQueryOptimizer( + static protected IPredicate<?>[] runRuntimeQueryOptimizer( final QueryEngine queryEngine, final int limit, final int nedges, - final IPredicate[] preds, IConstraint[] constraints) throws Exception { + final IPredicate<?>[] preds, IConstraint[] constraints) + throws Exception { final Logger tmp = Logger.getLogger(QueryLog.class); final Level oldLevel = tmp.getEffectiveLevel(); @@ -329,8 +330,8 @@ * @return The predicates in order as recommended by the static query * optimizer. */ - static protected IPredicate[] runStaticQueryOptimizer( - final QueryEngine queryEngine, final IPredicate[] preds) { + static protected IPredicate<?>[] runStaticQueryOptimizer( + final QueryEngine queryEngine, final IPredicate<?>[] preds) { final BOpContextBase context = new BOpContextBase(queryEngine); @@ -351,7 +352,7 @@ final int[] ids = new int[order.length]; - final IPredicate[] out = new IPredicate[order.length]; + final IPredicate<?>[] out = new IPredicate[order.length]; for (int i = 0; i < order.length; i++) { @@ -374,15 +375,9 @@ * @return The elapsed query time (ms). */ private static long runQuery(final String msg, - final QueryEngine queryEngine, final IPredicate[] predOrder, + final QueryEngine queryEngine, final IPredicate<?>[] predOrder, final IConstraint[] constraints) throws Exception { - if (constraints != null && constraints.length != 0) { - // FIXME Constraints must be attached to joins. - throw new UnsupportedOperationException( - "Constraints must be attached to joins!"); - } - if (log.isInfoEnabled()) log.info("Running " + msg); @@ -400,38 +395,46 @@ } - final PipelineOp queryOp = JoinGraph.getQuery(idFactory, predOrder, - constraints); + final PipelineOp queryOp = PartitionedJoinGroup.getQuery(idFactory, + predOrder, constraints); // submit query to runtime optimizer. final IRunningQuery q = queryEngine.eval(queryOp); - // drain the query results. - long nout = 0; - long nchunks = 0; - final IAsynchronousIterator<IBindingSet[]> itr = q.iterator(); try { - while (itr.hasNext()) { - final IBindingSet[] chunk = itr.next(); - nout += chunk.length; - nchunks++; + + // drain the query results. + long nout = 0; + long nchunks = 0; + final IAsynchronousIterator<IBindingSet[]> itr = q.iterator(); + try { + while (itr.hasNext()) { + final IBindingSet[] chunk = itr.next(); + nout += chunk.length; + nchunks++; + } + } finally { + itr.close(); } - } finally { - itr.close(); - } - // check the Future for the query. - q.get(); + // check the Future for the query. + q.get(); - // show the results. - final BOpStats stats = q.getStats().get(queryOp.getId()); + // show the results. + final BOpStats stats = q.getStats().get(queryOp.getId()); - System.err.println(msg + " : ids=" + Arrays.toString(ids) - + ", elapsed=" + q.getElapsed() + ", nout=" + nout - + ", nchunks=" + nchunks + ", stats=" + stats); - - return q.getElapsed(); + System.err.println(msg + " : ids=" + Arrays.toString(ids) + + ", elapsed=" + q.getElapsed() + ", nout=" + nout + + ", nchunks=" + nchunks + ", stats=" + stats); + return q.getElapsed(); + + } finally { + + q.cancel(true/* mayInterruptIfRunning */); + + } + } /** Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestAll.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestAll.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -0,0 +1,72 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.bop.rdf.joinGraph; + + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * Aggregates test suites into increasing dependency order. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestAll extends TestCase { + + /** + * + */ + public TestAll() { + + } + + /** + * @param arg0 + */ + public TestAll(String arg0) { + + super(arg0); + + } + + /** + * Returns a test that will run each of the implementation specific test + * suites in turn. + */ + public static Test suite() + { + + final TestSuite suite = new TestSuite("Runtime query optimizer"); + + suite.addTestSuite(TestJoinGraphOnLubm.class); + suite.addTestSuite(TestJoinGraphOnBarData.class); + suite.addTestSuite(TestJoinGraphOnBSBMData.class); + + return suite; + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestAll.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -176,7 +176,7 @@ return new Journal(properties); } - + /** * BSBM Q5 * @@ -202,6 +202,18 @@ * LIMIT 5 * </pre> * + * Note: There are two predicates which bind variables (origProperty1 and + * origProperty2) that are not used by the other predicates and therefore do + * not share any variables which would form "edges" that define joins. In + * general, a join without shared variables means the cross product of the + * sources will be materialized and such joins should be run last. + * <p> + * However, in this case there are SPARQL FILTERs which (a) use those + * variables (origProperty1 and origProperty2); and (b) can constrain the + * query. This means that running the predicates without shared variables + * and applying the constraints before the tail of the plan can in fact lead + * to a more efficient join path. + * * @throws Exception */ public void test_bsbm_q5() throws Exception { Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -211,7 +211,7 @@ * * @throws Exception */ - public void test_query() throws Exception { + public void test_barData_query() throws Exception { final String namespace = getNamespace(); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -310,7 +310,7 @@ * * @throws Exception */ - public void test_query2() throws Exception { + public void test_LUBM_Q2() throws Exception { final String namespace = getNamespace(); @@ -449,7 +449,7 @@ * </pre> * @throws Exception */ - public void test_query8() throws Exception { + public void test_LUBM_Q8() throws Exception { final String namespace = getNamespace(); @@ -580,7 +580,7 @@ * * @throws Exception */ - public void test_query9() throws Exception { + public void test_LUBM_Q9() throws Exception { final String name... [truncated message content] |