From: <tho...@us...> - 2011-01-15 23:53:30
|
Revision: 4106 http://bigdata.svn.sourceforge.net/bigdata/?rev=4106&view=rev Author: thompsonbry Date: 2011-01-15 23:53:24 +0000 (Sat, 15 Jan 2011) Log Message: ----------- Modified JoinGraph to propagate the PipelineJoin.Annotations.CONSTRAINTS from the _predicate_ to the join. This allows us to annotate the predicate with constraints which will be imposed by the join. That makes it possible for the runtime query optimizer to generate the joins dynamically when they include IConstraint[]s. See trac issue #64 (Runtime Query Optimizer). Fixed some problems in the BSBM Q5 setup as hand coded for the runtime query optimizer. Identified a problem with the runtime query optimizer where it will never include a vertex if it does not share any join variables with the other vertices in the graph (BSBM Q5 does this). I have not fixed this yet. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-15 22:49:40 UTC (rev 4105) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-15 23:53:24 UTC (rev 4106) @@ -198,6 +198,10 @@ * approach should be able to handle queries without perfect / covering * automatically. Then experiment with carrying fewer statement indices * for quads. + * + * @todo Unit test when there are no solutions to the query. In this case there + * will be no paths identified by the optimizer and the final path length + * becomes zero. */ public class JoinGraph extends PipelineOp { @@ -1032,10 +1036,10 @@ * path). */ final int joinId = 1; - final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, // + final Map<String,Object> anns = NV.asMap(// new NV(BOp.Annotations.BOP_ID, joinId),// - new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred - .setBOpId(3)), + // @todo Why not use a factory which avoids bopIds already in use? + new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred.setBOpId(3)), // disallow parallel evaluation. new NV(PipelineJoin.Annotations.MAX_PARALLEL,0), // disable access path coalescing @@ -1056,6 +1060,12 @@ new NV(PipelineJoin.Annotations.SHARED_STATE,true), new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT,BOpEvaluationContext.CONTROLLER) ); + if (vTarget.pred.getProperty(PipelineJoin.Annotations.CONSTRAINTS) != null) { + // Copy constraints from the predicate onto the join, which will apply them. + anns.put(PipelineJoin.Annotations.CONSTRAINTS, vTarget.pred + .getProperty(PipelineJoin.Annotations.CONSTRAINTS)); + } + final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, anns); final PipelineOp queryOp = joinOp; @@ -1805,8 +1815,13 @@ } /** + * Find a good join path in the data given the join graph. The join path + * is not guaranteed to be the best join path (the search performed by + * the runtime optimizer is not exhaustive) but it should always be a + * "good" join path and may often be the "best" join path. * * @param queryEngine + * The query engine. * @param limit * The limit for sampling a vertex and the initial limit for * cutoff join evaluation. @@ -1818,10 +1833,25 @@ * a join path, the starting vertex will be the vertex of * that edge having the lower cardinality. * + * @return The join path identified by the runtime query optimizer as + * the best path given the join graph and the data. + * + * @throws NoSolutionsException + * If there are no solutions for the join graph in the data + * (the query does not have any results). + * * @throws Exception + * + * @todo It is possible that this could throw a + * {@link NoSolutionsException} if the cutoff joins do not use a + * large enough sample to find a join path which produces at least + * one solution. We need to automatically increase the depth of + * search for queries where we have cardinality estimation + * underflows or punt to another method to decide the join order. */ public Path runtimeOptimizer(final QueryEngine queryEngine, - final int limit, final int nedges) throws Exception { + final int limit, final int nedges) throws Exception, + NoSolutionsException { // Setup the join graph. Path[] paths = round0(queryEngine, limit, nedges); @@ -1838,12 +1868,19 @@ int round = 1; - while (round < nvertices - 1) { + while (paths.length > 0 && round < nvertices - 1) { paths = expand(queryEngine, limit, round++, paths); } + if (paths.length == 0) { + + // There are no solutions for the join graph in the data. + throw new NoSolutionsException(); + + } + // Should be one winner. assert paths.length == 1; @@ -2257,6 +2294,10 @@ final boolean v1Found = x.contains(edgeInGraph.v1); final boolean v2Found = x.contains(edgeInGraph.v2); + if (log.isTraceEnabled()) + log.trace("Edge: " + edgeInGraph + ", v1Found=" + + v1Found + ", v2Found=" + v2Found); + if (!v1Found && !v2Found) { // Edge is not connected to this path. continue; @@ -2277,6 +2318,9 @@ if (used.contains(tVertex)) { // Vertex already used to extend this path. + if (log.isTraceEnabled()) + log.trace("Edge: " + edgeInGraph + + " - already used to extend this path."); continue; } @@ -2292,6 +2336,10 @@ // Add to the set of paths for this round. tmp.add(p); + if (log.isTraceEnabled()) + log.trace("Extended path with edge: " + edgeInGraph + + ", new path=" + p); + } } @@ -2806,6 +2854,13 @@ // // anns.add(new NV(PipelineJoin.Annotations.SELECT, vars.toArray(new IVariable[vars.size()]))); + if (p.getProperty(PipelineJoin.Annotations.CONSTRAINTS) != null) { + // Copy constraints from the predicate onto the join, which will + // apply them. + anns.add(new NV(PipelineJoin.Annotations.CONSTRAINTS, p + .getProperty(PipelineJoin.Annotations.CONSTRAINTS))); + } + final PipelineJoin joinOp = new PipelineJoin( lastOp == null ? new BOp[0] : new BOp[] { lastOp }, anns.toArray(new NV[anns.size()])); @@ -2935,4 +2990,34 @@ } + /** + * Exception thrown when the join graph does not have any solutions in the + * data (running the query does not produce any results). + */ + public static class NoSolutionsException extends RuntimeException + { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public NoSolutionsException() { + super(); + } + + public NoSolutionsException(String message, Throwable cause) { + super(message, cause); + } + + public NoSolutionsException(String message) { + super(message); + } + + public NoSolutionsException(Throwable cause) { + super(cause); + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-15 22:49:40 UTC (rev 4105) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-15 23:53:24 UTC (rev 4106) @@ -10,7 +10,6 @@ import org.apache.log4j.Logger; import org.openrdf.query.algebra.Compare.CompareOp; import org.openrdf.query.algebra.MathExpr.MathOp; -import org.semanticweb.yars.nx.dt.numeric.XSDInt; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContextBase; @@ -35,7 +34,7 @@ import com.bigdata.bop.fed.QueryEngineFactory; import com.bigdata.bop.join.PipelineJoin; import com.bigdata.journal.Journal; -import com.bigdata.rdf.internal.IVUtility; +import com.bigdata.rdf.internal.XSDIntIV; import com.bigdata.rdf.internal.constraints.CompareBOp; import com.bigdata.rdf.internal.constraints.MathBOp; import com.bigdata.rdf.model.BigdataURI; @@ -101,7 +100,7 @@ private AbstractTripleStore database; /** The initial sampling limit. */ - private final int limit = 100; + private final int limit = 1000; /** The #of edges considered for the initial paths. */ private final int nedges = 2; @@ -114,7 +113,7 @@ * When true, do a warm up run of the plan generated by the static query * optimizer. */ - private final boolean warmUp = false; + private final boolean warmUp = true; /** * The #of times to run each query. Use N GT ONE (1) if you want to converge @@ -153,8 +152,10 @@ /* * Use a specific file generated by some external process. */ - file = new File("/data/bsbm/bsbm_284826/bigdata-bsbm.RW.jnl"); - namespace = "BSBM_284826"; + final long pc = 284826; // BSBM 100M +// final long pc = 566496; // BSBM 200M + file = new File("/data/bsbm/bsbm_"+pc+"/bigdata-bsbm.RW.jnl"); + namespace = "BSBM_"+pc; } properties.setProperty(Journal.Options.FILE, file.toString()); @@ -260,7 +261,7 @@ .getValueFactory(); final String rdfs = "http://www.w3.org/2000/01/rdf-schema#"; - final String rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; +// final String rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; final String bsbm = "http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/"; // final BigdataURI rdfType = valueFactory.createURI(rdf + "type"); @@ -309,8 +310,8 @@ // The name space for the SPO relation. final String[] spoRelation = new String[] { namespace + ".spo" }; - // The name space for the Lexicon relation. - final String[] lexRelation = new String[] { namespace + ".lex" }; +// // The name space for the Lexicon relation. +// final String[] lexRelation = new String[] { namespace + ".lex" }; final long timestamp = jnl.getLastCommitTime(); @@ -334,8 +335,6 @@ * predicates). The RTO knows to look for the CONSTRAINTS on * the IPredicate and apply them to the constructed join * operator. - * - * FIXME JOinGraph needs to do this ^^^^^^^ */ new NV(PipelineJoin.Annotations.CONSTRAINTS, new IConstraint[] {// @@ -354,6 +353,7 @@ new NV(Annotations.TIMESTAMP, timestamp),// new NV(IPredicate.Annotations.RELATION_NAME, spoRelation) ); + // ?product bsbm:productFeature ?prodFeature . p2 = new SPOPredicate(new BOp[] { // product,// @@ -364,6 +364,7 @@ new NV(Annotations.TIMESTAMP, timestamp),// new NV(IPredicate.Annotations.RELATION_NAME, spoRelation) ); + // <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1092/Product53999> bsbm:productPropertyNumeric1 ?origProperty1 . p3 = new SPOPredicate(new BOp[] { // new Constant(product53999.getIV()),// @@ -374,6 +375,7 @@ new NV(Annotations.TIMESTAMP, timestamp),// new NV(IPredicate.Annotations.RELATION_NAME, spoRelation) ); + // ?product bsbm:productPropertyNumeric1 ?simProperty1 . // FILTER (?simProperty1 < (?origProperty1 + 120) && ?simProperty1 > (?origProperty1 - 120)) p4 = new SPOPredicate(new BOp[] { // @@ -389,8 +391,8 @@ new CompareBOp(new BOp[] { simProperty1, new MathBOp(origProperty1, - new Constant(new XSDInt( - "120")), + new Constant(new XSDIntIV( + 120)), MathOp.PLUS) }, NV .asMap(new NV[] { new NV( CompareBOp.Annotations.OP, @@ -398,8 +400,8 @@ new CompareBOp(new BOp[] { simProperty1, new MathBOp(origProperty1, - new Constant(new XSDInt( - "120")), + new Constant(new XSDIntIV( + 120)), MathOp.MINUS) }, NV .asMap(new NV[] { new NV( CompareBOp.Annotations.OP, @@ -407,13 +409,6 @@ })// ); - /* - * com.bigdata.rdf.internal.constraints.CompareBOp(Var,MathBOp)[ - * com.bigdata.rdf.internal.constraints.CompareBOp.op=GT], - * com.bigdata.rdf.internal.constraints.CompareBOp(Var,MathBOp)[ - * com.bigdata.rdf.internal.constraints.CompareBOp.op=LT]], - */ - // <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1092/Product53999> bsbm:productPropertyNumeric2 ?origProperty2 . p5 = new SPOPredicate(new BOp[] { // new Constant(product53999.getIV()),// @@ -440,17 +435,17 @@ new CompareBOp(new BOp[] { simProperty2, new MathBOp(origProperty2, - new Constant(new XSDInt( - "170")), + new Constant(new XSDIntIV( + 170)), MathOp.PLUS) }, NV .asMap(new NV[] { new NV( CompareBOp.Annotations.OP, CompareOp.LT) })),// new CompareBOp(new BOp[] { simProperty2, - new MathBOp(origProperty1, - new Constant(new XSDInt( - "170")), + new MathBOp(origProperty2, + new Constant(new XSDIntIV( + 170)), MathOp.MINUS) }, NV .asMap(new NV[] { new NV( CompareBOp.Annotations.OP, @@ -473,7 +468,7 @@ * @throws Exception * * @todo To actually test anything this needs to compare the results (or at - * least the #of result). We could also test for known good join + * least the #of results). We could also test for known good join * orders as generated by the runtime optimizer, but that requires a * known data set (e.g., U1 or U50) and non-random sampling. * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |