From: <jer...@us...> - 2013-11-15 22:30:38
|
Revision: 7560 http://bigdata.svn.sourceforge.net/bigdata/?rev=7560&view=rev Author: jeremy_carroll Date: 2013-11-15 22:30:29 +0000 (Fri, 15 Nov 2013) Log Message: ----------- Improved estimated of counts for ALPPs with lower bounds of zero. The main change is in ArbitraryLengthPathNode.getEstimatedCardinality() This involved passing the ITripleStore object into the appropriate method, and so several method signatures had to have an extra argument. With this change the just added tests for trac773 all got to be much simpler, and the behavior appears improved. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ArbitraryLengthPathNode.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/IReorderableNode.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/JoinGroupNode.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StatementPatternNode.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysisBase.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/UnionNode.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTStaticJoinOptimizer.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/StaticOptimizer.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/store/ITripleStore.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/optimizers/TestALPPinTrac773.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ArbitraryLengthPathNode.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ArbitraryLengthPathNode.java 2013-11-15 20:40:47 UTC (rev 7559) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ArbitraryLengthPathNode.java 2013-11-15 22:30:29 UTC (rev 7560) @@ -11,6 +11,7 @@ import com.bigdata.rdf.sparql.ast.PathNode.PathMod; import com.bigdata.rdf.sparql.ast.eval.AST2BOpBase; import com.bigdata.rdf.sparql.ast.optimizers.StaticOptimizer; +import com.bigdata.rdf.store.ITripleStore; /** * A special kind of AST node that represents the SPARQL 1.1 arbitrary length @@ -220,19 +221,20 @@ } // @Override - public boolean isReorderable() { + public boolean isReorderable(ITripleStore db) { - final long estCard = getEstimatedCardinality(null); + final long estCard = getEstimatedCardinality(null, db); return estCard >= 0 && estCard < Long.MAX_VALUE; } // @Override - public long getEstimatedCardinality(StaticOptimizer opt) { + public long getEstimatedCardinality(StaticOptimizer opt, ITripleStore db) { final JoinGroupNode group = subgroup(); - + + long zeroMatchAdjustment = 0; /* * if lowerBound() is zero, and both ?s and ?o are * variables then we (notionally) match @@ -244,11 +246,21 @@ * Despite this not being implemented, the optimizer does better * knowing this correctly. */ - if (lowerBound() == 0 && left() instanceof VarNode && right() instanceof VarNode) { - return Long.MAX_VALUE; + if (lowerBound() == 0 ) { + int fixedCount = (left() instanceof VarNode ? 1 : 0) + (right() instanceof VarNode ? 1 : 0); + switch (fixedCount) { + case 0: + zeroMatchAdjustment = left().getValue().equals(right().getValue())?1:0; + break; + case 1: + zeroMatchAdjustment = 1; + break; + case 2: + zeroMatchAdjustment = db.getURICount() + db.getBNodeCount(); // this is too big when we are looking in a reduced dataset + break; + } } - /* * Only deal with singleton paths for now. * @@ -261,8 +273,11 @@ final long estCard = node.getProperty( AST2BOpBase.Annotations.ESTIMATED_CARDINALITY, Long.MAX_VALUE); + + + - return estCard; + return estCard + zeroMatchAdjustment; } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/IReorderableNode.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/IReorderableNode.java 2013-11-15 20:40:47 UTC (rev 7559) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/IReorderableNode.java 2013-11-15 22:30:29 UTC (rev 7560) @@ -29,6 +29,7 @@ import com.bigdata.bop.BOp; import com.bigdata.rdf.sparql.ast.optimizers.StaticOptimizer; +import com.bigdata.rdf.store.ITripleStore; /** * Interface for things which can be re-ordered by the static join @@ -43,7 +44,7 @@ * by examining the type - individual instances of a particular type * may or may not be reorderable. */ - boolean isReorderable(); + boolean isReorderable(ITripleStore db); /** * Return the estimated cardinality - either the range count of a @@ -51,6 +52,6 @@ * group. * @param opt This optimizer can be used to help work out the estimate */ - long getEstimatedCardinality(StaticOptimizer opt); + long getEstimatedCardinality(StaticOptimizer opt, ITripleStore db); } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/JoinGroupNode.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/JoinGroupNode.java 2013-11-15 20:40:47 UTC (rev 7559) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/JoinGroupNode.java 2013-11-15 22:30:29 UTC (rev 7560) @@ -9,6 +9,7 @@ import com.bigdata.bop.IVariable; import com.bigdata.rdf.internal.constraints.InBOp; import com.bigdata.rdf.sparql.ast.service.ServiceNode; +import com.bigdata.rdf.store.ITripleStore; /** * An optional or non-optional collection of query nodes that run together in @@ -348,12 +349,12 @@ } - public List<IReorderableNode> getReorderableChildren() { + public List<IReorderableNode> getReorderableChildren(ITripleStore db) { final List<IReorderableNode> nodes = getChildren(IReorderableNode.class); final Iterator<IReorderableNode> it = nodes.iterator(); while (it.hasNext()) { final IReorderableNode node = it.next(); - if (!node.isReorderable()) { + if (!node.isReorderable(db)) { it.remove(); } } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StatementPatternNode.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StatementPatternNode.java 2013-11-15 20:40:47 UTC (rev 7559) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StatementPatternNode.java 2013-11-15 22:30:29 UTC (rev 7560) @@ -23,6 +23,7 @@ import com.bigdata.rdf.spo.DistinctTermAdvancer; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPOAccessPath; +import com.bigdata.rdf.store.ITripleStore; import com.bigdata.relation.rule.eval.ISolution; import com.bigdata.striterator.IKeyOrder; @@ -635,7 +636,7 @@ * @see com.bigdata.rdf.sparql.ast.IReorderableNode#isReorderable() */ @Override - public boolean isReorderable() { + public boolean isReorderable(ITripleStore db) { return !isOptional(); @@ -645,7 +646,7 @@ * @see com.bigdata.rdf.sparql.ast.IReorderableNode#getEstimatedCardinality() */ @Override - public long getEstimatedCardinality(StaticOptimizer opt) { + public long getEstimatedCardinality(StaticOptimizer opt, ITripleStore db) { return getProperty(AST2BOpBase.Annotations.ESTIMATED_CARDINALITY, -1l); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysisBase.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysisBase.java 2013-11-15 20:40:47 UTC (rev 7559) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysisBase.java 2013-11-15 22:30:29 UTC (rev 7560) @@ -39,6 +39,7 @@ import com.bigdata.bop.IVariable; import com.bigdata.rdf.sparql.ast.eval.IEvaluationContext; import com.bigdata.rdf.sparql.ast.ssets.ISolutionSetManager; +import com.bigdata.rdf.store.ITripleStore; /** * Base class for static analysis. @@ -471,5 +472,9 @@ return set; } + + public ITripleStore getDB() { + return evaluationContext.getAbstractTripleStore(); + } } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/UnionNode.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/UnionNode.java 2013-11-15 20:40:47 UTC (rev 7559) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/UnionNode.java 2013-11-15 22:30:29 UTC (rev 7560) @@ -6,6 +6,7 @@ import com.bigdata.bop.BOp; import com.bigdata.rdf.sparql.ast.optimizers.StaticOptimizer; +import com.bigdata.rdf.store.ITripleStore; /** * A special kind of group {@link IGroupNode} that represents the sparql union @@ -92,22 +93,22 @@ @Override - public long getEstimatedCardinality(StaticOptimizer optimizer) { + public long getEstimatedCardinality(StaticOptimizer optimizer, ITripleStore db) { long cardinality = 0; for (JoinGroupNode child : this) { - StaticOptimizer opt = new StaticOptimizer(optimizer, child.getReorderableChildren()); + StaticOptimizer opt = new StaticOptimizer(optimizer, child.getReorderableChildren(db)); cardinality += opt.getCardinality(); } return cardinality; } @Override - public boolean isReorderable() { + public boolean isReorderable(ITripleStore db) { for (JoinGroupNode child : this) { for (IGroupMemberNode grandchild : child) { if (! (grandchild instanceof IReorderableNode)) return false; - if (! ((IReorderableNode)grandchild).isReorderable()) + if (! ((IReorderableNode)grandchild).isReorderable(db)) return false; } } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTStaticJoinOptimizer.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTStaticJoinOptimizer.java 2013-11-15 20:40:47 UTC (rev 7559) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTStaticJoinOptimizer.java 2013-11-15 22:30:29 UTC (rev 7560) @@ -466,7 +466,7 @@ /* * Let the optimizer handle the simple optionals too. */ - final List<IReorderableNode> nodes = joinGroup.getReorderableChildren(); + final List<IReorderableNode> nodes = joinGroup.getReorderableChildren(ctx.getAbstractTripleStore()); if (!nodes.isEmpty()) { Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/StaticOptimizer.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/StaticOptimizer.java 2013-11-15 20:40:47 UTC (rev 7559) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/StaticOptimizer.java 2013-11-15 22:30:29 UTC (rev 7560) @@ -512,8 +512,7 @@ if (rangeCount[tailIndex] == -1L) { - final long rangeCount = (long) nodes.get(tailIndex) - .getEstimatedCardinality(this); + final long rangeCount = (long) nodes.get(tailIndex).getEstimatedCardinality(this, sa.getDB()); this.rangeCount[tailIndex] = rangeCount; Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/store/ITripleStore.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/store/ITripleStore.java 2013-11-15 20:40:47 UTC (rev 7559) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/store/ITripleStore.java 2013-11-15 22:30:29 UTC (rev 7560) @@ -73,7 +73,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ -interface ITripleStore { +public interface ITripleStore { /** * The #of named graphs. Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/optimizers/TestALPPinTrac773.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/optimizers/TestALPPinTrac773.java 2013-11-15 20:40:47 UTC (rev 7559) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/optimizers/TestALPPinTrac773.java 2013-11-15 22:30:29 UTC (rev 7560) @@ -48,6 +48,9 @@ private class NotNestedHelper extends Helper { public NotNestedHelper(HelperFlag zero_or_one_to_one_or_more, String sym) { + this(zero_or_one_to_one_or_more, sym, true); + } + public NotNestedHelper(HelperFlag zero_or_one_to_one_or_more, String sym, boolean switchOrdering) { String pattern = "c" + sym; given = select( varNode(z), @@ -63,23 +66,35 @@ // we have to evaluate this one earlier in order to get the anonymous variable numbering // lined up. Really we should compare the result with expected wise to // the unimportance of the name of anonymous variables. - ArbitraryLengthPathNode alpp = arbitartyLengthPropertyPath(varNode(x), varNode(z), zero_or_one_to_one_or_more, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 3135) - ) ); - expected = select( varNode(z), + ArbitraryLengthPathNode alpp1; + ArbitraryLengthPathNode alpp2; + if (switchOrdering) { + alpp2 = alpp2(zero_or_one_to_one_or_more); + alpp1 = alpp1(zero_or_one_to_one_or_more); + } else { + alpp1 = alpp1(zero_or_one_to_one_or_more); + alpp2 = alpp2(zero_or_one_to_one_or_more); + + } + + expected = select( varNode(z), where ( - arbitartyLengthPropertyPath(varNode(x), constantNode(b), zero_or_one_to_one_or_more, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 26) - ) ), - alpp, + alpp1, + alpp2, statementPatternNode(varNode(z), constantNode(a), varNode(w), 2054), statementPatternNode(varNode(y), constantNode(c), varNode(x), 15431) ) ); varCount = 0; } + ArbitraryLengthPathNode alpp1(HelperFlag zero_or_one_to_one_or_more) { + return arbitartyLengthPropertyPath(varNode(x), constantNode(b), zero_or_one_to_one_or_more, + joinGroupNode( statementPatternNode(leftVar(), constantNode(c), rightVar(), 26) ) ); + } + ArbitraryLengthPathNode alpp2(HelperFlag zero_or_one_to_one_or_more) { + return arbitartyLengthPropertyPath(varNode(x), varNode(z), zero_or_one_to_one_or_more, + joinGroupNode( statementPatternNode(leftVar(), constantNode(c), rightVar(), 3135) ) ); + } } private class NestedHelper extends Helper { @@ -173,7 +188,7 @@ } public void testNestedPartway() { - new Helper(){{ + new NestedHelper(ZERO_OR_MORE,"*"){{ given = select( varNode(z), where ( @@ -195,26 +210,12 @@ statementPatternNode(varNode(z), constantNode(a), varNode(w), 2054) ) ); - varCount = 0; - expected = select( varNode(z), - where ( - arbitartyLengthPropertyPath(varNode(x), constantNode(b), ZERO_OR_MORE, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 26) - ) ), - statementPatternNode(varNode(y), constantNode(c), varNode(x), 15431), - arbitartyLengthPropertyPath(varNode(x), varNode(z), ZERO_OR_MORE, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 3135) - ) ), - statementPatternNode(varNode(z), constantNode(a), varNode(w), 2054) - ) ); }}.test(); } public void testNotNestedPartway() { - new Helper(){{ + new NotNestedHelper(ZERO_OR_MORE,"*", false){{ given = select( varNode(z), where ( @@ -233,66 +234,14 @@ statementPatternNode(varNode(z), constantNode(a), varNode(w), 2054) ) ); - varCount = 0; - expected = select( varNode(z), - where ( - arbitartyLengthPropertyPath(varNode(x), constantNode(b), ZERO_OR_MORE, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 26) - ) ), - statementPatternNode(varNode(y), constantNode(c), varNode(x), 15431), - arbitartyLengthPropertyPath(varNode(x), varNode(z), ZERO_OR_MORE, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 3135) - ) ), - statementPatternNode(varNode(z), constantNode(a), varNode(w), 2054) - ) ); - }}.test(); } public void testNestedStar() { - new NestedHelper(ZERO_OR_MORE,"*"){{ - // currently not correctly optimized. - // TODO: this expected result is incorrect. - - expected = select( varNode(z), - where ( - arbitartyLengthPropertyPath(varNode(x), constantNode(b), ZERO_OR_MORE, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 26) - ) ), - statementPatternNode(varNode(y), constantNode(c), varNode(x), 15431), - arbitartyLengthPropertyPath(varNode(x), varNode(z), ZERO_OR_MORE, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 3135) - ) ), - statementPatternNode(varNode(z), constantNode(a), varNode(w), 2054) - ) ); - - }}.test(); + new NestedHelper(ZERO_OR_MORE,"*").test(); } public void testNotNestedStar() { - new NotNestedHelper(ZERO_OR_MORE,"*"){{ - // currently not correctly optimized. - // TODO: this expected result is incorrect. - - ArbitraryLengthPathNode alpp = arbitartyLengthPropertyPath(varNode(x), varNode(z), ZERO_OR_MORE, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 3135) - ) ); - expected = select( varNode(z), - where ( - arbitartyLengthPropertyPath(varNode(x), constantNode(b), ZERO_OR_MORE, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 26) - ) ), - statementPatternNode(varNode(y), constantNode(c), varNode(x), 15431), - statementPatternNode(varNode(z), constantNode(a), varNode(w), 2054), - alpp - ) ); - - }}.test(); + new NotNestedHelper(ZERO_OR_MORE,"*").test(); } public void testNestedPlus() { @@ -304,47 +253,10 @@ } public void testNestedQuestionMark() { - new NestedHelper(ZERO_OR_ONE,"?"){{ - // currently not correctly optimized. - // TODO: this expected result is incorrect. - - expected = select( varNode(z), - where ( - arbitartyLengthPropertyPath(varNode(x), constantNode(b), ZERO_OR_ONE, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 26) - ) ), - statementPatternNode(varNode(y), constantNode(c), varNode(x), 15431), - arbitartyLengthPropertyPath(varNode(x), varNode(z), ZERO_OR_ONE, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 3135) - ) ), - statementPatternNode(varNode(z), constantNode(a), varNode(w), 2054) - ) ); - - }}.test(); + new NestedHelper(ZERO_OR_ONE,"?").test(); } public void testNotNestedQuestionMark() { - new NotNestedHelper(ZERO_OR_ONE,"?"){{ - // currently not correctly optimized. - // TODO: this expected result is incorrect. - - ArbitraryLengthPathNode alpp = arbitartyLengthPropertyPath(varNode(x), varNode(z), ZERO_OR_ONE, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 3135) - ) ); - expected = select( varNode(z), - where ( - arbitartyLengthPropertyPath(varNode(x), constantNode(b), ZERO_OR_ONE, - joinGroupNode( - statementPatternNode(leftVar(), constantNode(c), rightVar(), 26) - ) ), - statementPatternNode(varNode(y), constantNode(c), varNode(x), 15431), - statementPatternNode(varNode(z), constantNode(a), varNode(w), 2054), - alpp - ) ); - - }}.test(); + new NotNestedHelper(ZERO_OR_ONE,"?").test(); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |