This list is closed, nobody may subscribe to it.
2010 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(139) |
Aug
(94) |
Sep
(232) |
Oct
(143) |
Nov
(138) |
Dec
(55) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2011 |
Jan
(127) |
Feb
(90) |
Mar
(101) |
Apr
(74) |
May
(148) |
Jun
(241) |
Jul
(169) |
Aug
(121) |
Sep
(157) |
Oct
(199) |
Nov
(281) |
Dec
(75) |
2012 |
Jan
(107) |
Feb
(122) |
Mar
(184) |
Apr
(73) |
May
(14) |
Jun
(49) |
Jul
(26) |
Aug
(103) |
Sep
(133) |
Oct
(61) |
Nov
(51) |
Dec
(55) |
2013 |
Jan
(59) |
Feb
(72) |
Mar
(99) |
Apr
(62) |
May
(92) |
Jun
(19) |
Jul
(31) |
Aug
(138) |
Sep
(47) |
Oct
(83) |
Nov
(95) |
Dec
(111) |
2014 |
Jan
(125) |
Feb
(60) |
Mar
(119) |
Apr
(136) |
May
(270) |
Jun
(83) |
Jul
(88) |
Aug
(30) |
Sep
(47) |
Oct
(27) |
Nov
(23) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(3) |
Oct
|
Nov
|
Dec
|
2016 |
Jan
|
Feb
|
Mar
(4) |
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <mrp...@us...> - 2011-02-22 20:58:01
|
Revision: 4225 http://bigdata.svn.sourceforge.net/bigdata/?rev=4225&view=rev Author: mrpersonick Date: 2011-02-22 20:57:55 +0000 (Tue, 22 Feb 2011) Log Message: ----------- isLiteral support Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestInlineValues.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-02-22 20:56:49 UTC (rev 4224) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-02-22 20:57:55 UTC (rev 4225) @@ -21,6 +21,7 @@ import org.openrdf.model.Literal; import org.openrdf.model.URI; import org.openrdf.model.Value; +import org.openrdf.model.impl.BooleanLiteralImpl; import org.openrdf.query.BindingSet; import org.openrdf.query.Dataset; import org.openrdf.query.QueryEvaluationException; @@ -29,6 +30,7 @@ import org.openrdf.query.algebra.Compare; import org.openrdf.query.algebra.Filter; import org.openrdf.query.algebra.Group; +import org.openrdf.query.algebra.IsLiteral; import org.openrdf.query.algebra.Join; import org.openrdf.query.algebra.LeftJoin; import org.openrdf.query.algebra.MathExpr; @@ -84,10 +86,12 @@ import com.bigdata.btree.keys.IKeyBuilderFactory; import com.bigdata.rdf.internal.DummyIV; import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; import com.bigdata.rdf.internal.constraints.AndBOp; import com.bigdata.rdf.internal.constraints.CompareBOp; import com.bigdata.rdf.internal.constraints.EBVBOp; import com.bigdata.rdf.internal.constraints.IsBoundBOp; +import com.bigdata.rdf.internal.constraints.IsLiteralBOp; import com.bigdata.rdf.internal.constraints.MathBOp; import com.bigdata.rdf.internal.constraints.NotBOp; import com.bigdata.rdf.internal.constraints.OrBOp; @@ -2059,6 +2063,7 @@ */ private IValueExpression<IV> toVE(final ValueExpr ve) throws UnsupportedOperatorException { + if (ve instanceof Var) { return toVE((Var) ve); } else if (ve instanceof ValueConstant) { @@ -2077,6 +2082,8 @@ return toVE((Compare) ve); } else if (ve instanceof Bound) { return toVE((Bound) ve); + } else if (ve instanceof IsLiteral) { + return toVE((IsLiteral) ve); } throw new UnsupportedOperatorException(ve); @@ -2158,6 +2165,11 @@ return new IsBoundBOp(var); } + private IValueExpression<IV> toVE(final IsLiteral isLiteral) { + final IVariable<IV> var = (IVariable<IV>) toVE(isLiteral.getArg()); + return new IsLiteralBOp(var); + } + /** * Generate a bigdata term from a Sesame term. * <p> @@ -2189,7 +2201,14 @@ * value does not exist in the lexicon. */ private IConstant<IV> toVE(final ValueConstant vc) { - final IV iv = ((BigdataValue) vc.getValue()).getIV(); + final IV iv; + final Value v = vc.getValue(); + if (v instanceof BooleanLiteralImpl) { + final BooleanLiteralImpl bl = (BooleanLiteralImpl) v; + iv = XSDBooleanIV.valueOf(bl.booleanValue()); + } else { + iv = ((BigdataValue) vc.getValue()).getIV(); + } if (iv == null) throw new UnrecognizedValueException(vc.getValue()); return new Constant<IV>(iv); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestInlineValues.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestInlineValues.java 2011-02-22 20:56:49 UTC (rev 4224) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestInlineValues.java 2011-02-22 20:57:55 UTC (rev 4225) @@ -29,6 +29,8 @@ import java.util.Collection; import java.util.LinkedList; import java.util.Properties; + +import org.apache.log4j.Logger; import org.openrdf.model.Literal; import org.openrdf.model.URI; import org.openrdf.model.ValueFactory; @@ -48,6 +50,8 @@ */ public class TestInlineValues extends ProxyBigdataSailTestCase { + protected static final Logger log = Logger.getLogger(TestInlineValues.class); + @Override public Properties getProperties() { @@ -210,4 +214,127 @@ } + public void testIsLiteral() throws Exception { + + final BigdataSail sail = getSail(); + sail.initialize(); + final BigdataSailRepository repo = new BigdataSailRepository(sail); + final BigdataSailRepositoryConnection cxn = + (BigdataSailRepositoryConnection) repo.getConnection(); + cxn.setAutoCommit(false); + + try { + + final ValueFactory vf = sail.getValueFactory(); + + URI A = vf.createURI("_:A"); + URI B = vf.createURI("_:B"); + URI X = vf.createURI("_:X"); + URI AGE = vf.createURI("_:AGE"); + Literal _25 = vf.createLiteral(25); + Literal _45 = vf.createLiteral(45); + + cxn.add(A, RDF.TYPE, X); + cxn.add(B, RDF.TYPE, X); + cxn.add(A, AGE, _25); + cxn.add(B, AGE, _45); + + /* + * Note: The either flush() or commit() is required to flush the + * statement buffers to the database before executing any operations + * that go around the sail. + */ + cxn.flush();//commit(); + + if (log.isInfoEnabled()) { + log.info("\n" + sail.getDatabase().dumpStore()); + } + + { + + String query = + "select ?s ?age " + + "WHERE { " + + " ?s <"+RDF.TYPE+"> <"+X+"> . " + + " ?s <"+AGE+"> ?age . " + + " FILTER( isLiteral(?age) ) . " + + "}"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + + if (log.isInfoEnabled()) { + final TupleQueryResult result = tupleQuery.evaluate(); + log.info("results:"); + if (!result.hasNext()) { + log.info("no results."); + } + while (result.hasNext()) { + log.info(result.next()); + } + } + + final TupleQueryResult result = tupleQuery.evaluate(); + + Collection<BindingSet> solution = new LinkedList<BindingSet>(); + solution.add(createBindingSet(new Binding[] { + new BindingImpl("s", A), + new BindingImpl("age", _25) + })); + solution.add(createBindingSet(new Binding[] { + new BindingImpl("s", B), + new BindingImpl("age", _45) + })); + + compare(result, solution); + + } + + { + + String query = + "select ?s ?age " + + "WHERE { " + + " ?s <"+RDF.TYPE+"> <"+X+"> . " + + " ?s <"+AGE+"> ?age . " + + " FILTER( isLiteral("+_25.toString()+") ) . " + + "}"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + + if (log.isInfoEnabled()) { + final TupleQueryResult result = tupleQuery.evaluate(); + log.info("results:"); + if (!result.hasNext()) { + log.info("no results."); + } + while (result.hasNext()) { + log.info(result.next()); + } + } + + final TupleQueryResult result = tupleQuery.evaluate(); + + Collection<BindingSet> solution = new LinkedList<BindingSet>(); + solution.add(createBindingSet(new Binding[] { + new BindingImpl("s", A), + new BindingImpl("age", _25) + })); + solution.add(createBindingSet(new Binding[] { + new BindingImpl("s", B), + new BindingImpl("age", _45) + })); + + compare(result, solution); + + } + + } finally { + cxn.close(); + sail.__tearDownUnitTest(); + } + + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2011-02-22 20:56:55
|
Revision: 4224 http://bigdata.svn.sourceforge.net/bigdata/?rev=4224&view=rev Author: mrpersonick Date: 2011-02-22 20:56:49 +0000 (Tue, 22 Feb 2011) Log Message: ----------- static helper method Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java 2011-02-22 20:49:27 UTC (rev 4223) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java 2011-02-22 20:56:49 UTC (rev 4224) @@ -45,6 +45,10 @@ static public transient final XSDBooleanIV<BigdataLiteral> FALSE = new XSDBooleanIV<BigdataLiteral>(false); + static public final XSDBooleanIV valueOf(final boolean b) { + return b ? TRUE : FALSE; + } + private final boolean value; public XSDBooleanIV(final boolean value) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-22 20:49:33
|
Revision: 4223 http://bigdata.svn.sourceforge.net/bigdata/?rev=4223&view=rev Author: thompsonbry Date: 2011-02-22 20:49:27 +0000 (Tue, 22 Feb 2011) Log Message: ----------- Removed @Override for interface decl. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java 2011-02-22 20:37:50 UTC (rev 4222) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java 2011-02-22 20:49:27 UTC (rev 4223) @@ -120,7 +120,7 @@ * purpose is to evaluate the effective boolean value of a wrapped * expression. */ - @Override +// @Override public XSDBooleanIV get(final IBindingSet bs) { return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-22 20:37:56
|
Revision: 4222 http://bigdata.svn.sourceforge.net/bigdata/?rev=4222&view=rev Author: thompsonbry Date: 2011-02-22 20:37:50 +0000 (Tue, 22 Feb 2011) Log Message: ----------- Made the member field final. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/UnsupportedOperatorException.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/UnsupportedOperatorException.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/UnsupportedOperatorException.java 2011-02-22 20:35:57 UTC (rev 4221) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/UnsupportedOperatorException.java 2011-02-22 20:37:50 UTC (rev 4222) @@ -16,7 +16,7 @@ */ private static final long serialVersionUID = 1493443291958364334L; - private QueryModelNode operator; + final private QueryModelNode operator; /** * Wrap with another instance of this class. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-22 20:36:03
|
Revision: 4221 http://bigdata.svn.sourceforge.net/bigdata/?rev=4221&view=rev Author: thompsonbry Date: 2011-02-22 20:35:57 +0000 (Tue, 22 Feb 2011) Log Message: ----------- Modified to report the operator which was not supported via the exception message. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/UnsupportedOperatorException.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/UnsupportedOperatorException.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/UnsupportedOperatorException.java 2011-02-22 20:30:14 UTC (rev 4220) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/UnsupportedOperatorException.java 2011-02-22 20:35:57 UTC (rev 4221) @@ -29,6 +29,7 @@ } public UnsupportedOperatorException(final QueryModelNode operator) { + super(""+operator); this.operator = operator; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2011-02-22 20:30:20
|
Revision: 4220 http://bigdata.svn.sourceforge.net/bigdata/?rev=4220&view=rev Author: mrpersonick Date: 2011-02-22 20:30:14 +0000 (Tue, 22 Feb 2011) Log Message: ----------- removed the star join test suite Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOStarJoin.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestAll.java 2011-02-22 20:29:45 UTC (rev 4219) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestAll.java 2011-02-22 20:30:14 UTC (rev 4220) @@ -93,7 +93,7 @@ suite.addTestSuite(TestDefaultGraphAccessPath.class); // star joins - suite.addTestSuite(TestSPOStarJoin.class); +// suite.addTestSuite(TestSPOStarJoin.class); // test for shard split handler for the xxxC indices. suite.addTestSuite(TestXXXCShardSplitHandler.class); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOStarJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOStarJoin.java 2011-02-22 20:29:45 UTC (rev 4219) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOStarJoin.java 2011-02-22 20:30:14 UTC (rev 4220) @@ -68,7 +68,7 @@ } - public void testStarJoin1() throws Exception { + private void _testStarJoin1() throws Exception { final AbstractTripleStore store = getStore(getProperties()); @@ -206,7 +206,7 @@ } - public void testStarJoin2() throws Exception { + private void _testStarJoin2() throws Exception { final AbstractTripleStore store = getStore(getProperties()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2011-02-22 20:29:57
|
Revision: 4219 http://bigdata.svn.sourceforge.net/bigdata/?rev=4219&view=rev Author: mrpersonick Date: 2011-02-22 20:29:45 +0000 (Tue, 22 Feb 2011) Log Message: ----------- refactor constraints -> value expressions Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SameTermBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEvaluationStrategyImpl.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AndBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsBoundBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInlineBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteralBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/NotBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/OrBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionConstraint.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInline.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -45,10 +45,16 @@ import com.bigdata.bop.constraint.NE; import com.bigdata.bop.constraint.NEConstant; import com.bigdata.bop.constraint.OR; +import com.bigdata.rdf.internal.constraints.AndBOp; import com.bigdata.rdf.internal.constraints.CompareBOp; -import com.bigdata.rdf.internal.constraints.IsInline; -import com.bigdata.rdf.internal.constraints.IsLiteral; +import com.bigdata.rdf.internal.constraints.EBVBOp; +import com.bigdata.rdf.internal.constraints.IsBoundBOp; +import com.bigdata.rdf.internal.constraints.IsInlineBOp; +import com.bigdata.rdf.internal.constraints.IsLiteralBOp; import com.bigdata.rdf.internal.constraints.MathBOp; +import com.bigdata.rdf.internal.constraints.NotBOp; +import com.bigdata.rdf.internal.constraints.OrBOp; +import com.bigdata.rdf.internal.constraints.SameTermBOp; import com.bigdata.rdf.rules.RejectAnythingSameAsItself; import com.bigdata.rdf.spo.SPOPredicate; import com.bigdata.rdf.spo.SPOStarJoin; @@ -99,9 +105,15 @@ com.bigdata.rdf.magic.MagicPredicate.class,// // com.bigdata.rdf.internal.constraint CompareBOp.class,// - IsInline.class,// - IsLiteral.class,// + IsInlineBOp.class,// + IsLiteralBOp.class,// MathBOp.class,// + AndBOp.class, + EBVBOp.class, + IsBoundBOp.class, + NotBOp.class, + OrBOp.class, + SameTermBOp.class, // com.bigdata.rdf.inf RejectAnythingSameAsItself.class, Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -39,11 +39,11 @@ */ private static final long serialVersionUID = 1L; - static public transient final XSDBooleanIV<BigdataLiteral> TRUE = new XSDBooleanIV<BigdataLiteral>( - true); + static public transient final XSDBooleanIV<BigdataLiteral> TRUE = + new XSDBooleanIV<BigdataLiteral>(true); - static public transient final XSDBooleanIV<BigdataLiteral> FALSE = new XSDBooleanIV<BigdataLiteral>( - false); + static public transient final XSDBooleanIV<BigdataLiteral> FALSE = + new XSDBooleanIV<BigdataLiteral>(false); private final boolean value; Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AndBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AndBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AndBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,114 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.rdf.error.SparqlTypeErrorException; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Imposes the constraint <code>x AND y</code>. + */ +public class AndBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = -1217715173822304819L; + + public AndBOp(final IValueExpression<IV> x, final IValueExpression<IV> y) { + + this(new BOp[] { x, y }, null/*annocations*/); + + } + + /** + * Required shallow copy constructor. + */ + public AndBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 2 || args[0] == null || args[1] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public AndBOp(final AndBOp op) { + super(op); + } + + /** + * Follows semantics from SPARQL spec - "Testing Values". + * <p> + * see http://www.w3.org/TR/rdf-sparql-query/#tests section 11.2 + */ + public boolean accept(final IBindingSet bs) { + + XSDBooleanIV left, right; + + try { + left = (XSDBooleanIV) get(0).get(bs); + } catch (SparqlTypeErrorException ex) { + left = null; + } + + try { + right = (XSDBooleanIV) get(1).get(bs); + } catch (SparqlTypeErrorException ex) { + right = null; + } + + // special error handling per the SPARQL spec + if (left == null || right == null) { + // if one or the other is false, return false + if (left != null && !left.booleanValue()) + return false; + if (right != null && !right.booleanValue()) + return false; + // all other cases, throw a type error + throw new SparqlTypeErrorException(); + } + + return left.booleanValue() && right.booleanValue(); + + } + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -26,6 +26,7 @@ import java.util.Map; +import org.apache.log4j.Logger; import org.openrdf.query.algebra.Compare.CompareOp; import com.bigdata.bop.BOp; @@ -33,21 +34,26 @@ import com.bigdata.bop.IValueExpression; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.constraint.BOpConstraint; +import com.bigdata.rdf.error.SparqlTypeErrorException; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; +import com.bigdata.rdf.internal.XSDBooleanIV; /** * Use inline terms to perform numerical comparison operations. * * @see IVUtility#numericalCompare(IV, IV) */ -public class CompareBOp extends BOpConstraint { +public class CompareBOp extends ValueExpressionBOp + implements IValueExpression<IV> { - /** - * - */ - private static final long serialVersionUID = 1L; + /** + * + */ + private static final long serialVersionUID = 5661497748051783499L; + + protected static final Logger log = Logger.getLogger(CompareBOp.class); + public interface Annotations extends PipelineOp.Annotations { @@ -58,12 +64,27 @@ } + public CompareBOp(final IValueExpression<IV> left, + final IValueExpression<IV> right, final CompareOp op) { + + this(new BOp[] { left, right }, NV.asMap(new NV(Annotations.OP, op))); + + } + /** * Required shallow copy constructor. */ - public CompareBOp(final BOp[] values, - final Map<String, Object> annotations) { - super(values, annotations); + public CompareBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 2 || args[0] == null || args[1] == null + || getProperty(Annotations.OP) == null) { + + throw new IllegalArgumentException(); + + } + } /** @@ -73,32 +94,32 @@ super(op); } - public CompareBOp(final IValueExpression<IV> left, - final IValueExpression<IV> right, final CompareOp op) { - - super(new BOp[] { left, right }, NV.asMap(new NV(Annotations.OP, op))); - - if (left == null || right == null || op == null) - throw new IllegalArgumentException(); - - } - public boolean accept(final IBindingSet s) { - final IV left = ((IValueExpression<IV>) get(0)).get(s); - final IV right = ((IValueExpression<IV>) get(1)).get(s); - + final IV left = get(0).get(s); + final IV right = get(1).get(s); + + // not yet bound if (left == null || right == null) -// return true; // not yet bound. - return false; // no longer allow unbound values + throw new SparqlTypeErrorException(); final CompareOp op = (CompareOp) getProperty(Annotations.OP); - if (left.isTermId() && right.isTermId() && - (op == CompareOp.EQ || op == CompareOp.NE)) { - return _accept(left.compareTo(right)); + if (left.isTermId() && right.isTermId()) { + if (op == CompareOp.EQ || op == CompareOp.NE) { + return _accept(left.compareTo(right)); + } else { + if (log.isInfoEnabled()) + log.info("cannot compare: " + + left + " " + op + " " + right); + + throw new SparqlTypeErrorException(); + } } + /* + * This code is bad. + */ if (!IVUtility.canNumericalCompare(left) || !IVUtility.canNumericalCompare(right)) { if (op == CompareOp.EQ) { @@ -106,8 +127,11 @@ } else if (op == CompareOp.NE) { return true; } else { - throw new NotNumericalException("cannot numerical compare: " + if (log.isInfoEnabled()) + log.info("cannot numerical compare: " + left + " " + op + " " + right); + + throw new SparqlTypeErrorException(); } } @@ -121,7 +145,7 @@ switch(op) { case EQ: - return compare == 0; + return compare == 0; case NE: return compare != 0; case GT: @@ -137,7 +161,13 @@ } } - + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + public static class NotNumericalException extends RuntimeException { /** Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,130 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.rdf.error.SparqlTypeErrorException; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Calculates the "effective boolean value" of an IValueExpression. See the + * SPARQL spec for details. + */ +public class EBVBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = -5701967329003122236L; + + public EBVBOp(final IValueExpression<IV> x) { + + this(new BOp[] { x }, null/*Annotations*/); + + } + + /** + * Required shallow copy constructor. + */ + public EBVBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 1 || args[0] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public EBVBOp(final EBVBOp op) { + super(op); + } + + /** + * 11.2.2 Effective Boolean Value (EBV) + * + * Effective boolean value is used to calculate the arguments to the logical + * functions logical-and, logical-or, and fn:not, as well as evaluate the + * result of a FILTER expression. + * + * The XQuery Effective Boolean Value rules rely on the definition of + * XPath's fn:boolean. The following rules reflect the rules for fn:boolean + * applied to the argument types present in SPARQL Queries: + * + * The EBV of any literal whose type is xsd:boolean or numeric is false if + * the lexical form is not valid for that datatype (e.g. + * "abc"^^xsd:integer). + * + * If the argument is a typed literal with a datatype of xsd:boolean, the + * EBV is the value of that argument. + * + * If the argument is a plain literal or a typed literal with a datatype of + * xsd:string, the EBV is false if the operand value has zero length; + * otherwise the EBV is true. + * + * If the argument is a numeric type or a typed literal with a datatype + * derived from a numeric type, the EBV is false if the operand value is NaN + * or is numerically equal to zero; otherwise the EBV is true. + * + * All other arguments, including unbound arguments, produce a type error. + * + * An EBV of true is represented as a typed literal with a datatype of + * xsd:boolean and a lexical value of "true"; an EBV of false is represented + * as a typed literal with a datatype of xsd:boolean and a lexical value of + * "false". + */ + public boolean accept(final IBindingSet bs) { + + final IV iv = get(0).get(bs); + + if (iv instanceof XSDBooleanIV) { + return ((XSDBooleanIV) iv).booleanValue(); + } + + throw new SparqlTypeErrorException(); + + } + + /** + * We know we can strengthen the return type on this one since its whole + * purpose is to evaluate the effective boolean value of a wrapped + * expression. + */ + @Override + public XSDBooleanIV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsBoundBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsBoundBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsBoundBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,84 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.IVariable; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Imposes the constraint <code>bound(x)</code> for the variable x. + */ +public class IsBoundBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = -7408654639183330874L; + + public IsBoundBOp(final IVariable<IV> x) { + + this(new BOp[] { x }, null/*annocations*/); + + } + + /** + * Required shallow copy constructor. + */ + public IsBoundBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 1 || args[0] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public IsBoundBOp(final IsBoundBOp op) { + super(op); + } + + public boolean accept(final IBindingSet s) { + + return get(0).get(s) != null; + + } + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Deleted: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInline.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInline.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInline.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -1,102 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -package com.bigdata.rdf.internal.constraints; - -import java.util.Map; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstant; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.NV; -import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.constraint.BOpConstraint; -import com.bigdata.rdf.internal.IV; - -/** - * Imposes the constraint <code>isInline(x)</code>. - */ -public class IsInline extends BOpConstraint { - - /** - * - */ - private static final long serialVersionUID = 3125106876006900339L; - - public interface Annotations extends PipelineOp.Annotations { - - /** - * If true, only accept variable bindings for {@link #x} that have an - * inline internal value {@link IV}. Otherwise only accept variable bindings - * that are not inline in the statement indices. - * <p> - * @see IV#isInline() - */ - String INLINE = IsInline.class.getName() + ".inline"; - - } - - /** - * Required shallow copy constructor. - */ - public IsInline(final BOp[] values, - final Map<String, Object> annotations) { - super(values, annotations); - } - - /** - * Required deep copy constructor. - */ - public IsInline(final IsInline op) { - super(op); - } - - public IsInline(final IVariable<IV> x, final boolean inline) { - - super(new BOp[] { x }, NV.asMap(new NV(Annotations.INLINE, inline))); - - if (x == null) - throw new IllegalArgumentException(); - - } - - public boolean accept(IBindingSet s) { - - // get binding for "x". - final IConstant<IV> x = s.get((IVariable<IV>) get(0)/*x*/); - - if (x == null) - return true; // not yet bound. - - final IV iv = x.get(); - - final boolean inline = - (Boolean) getRequiredProperty(Annotations.INLINE); - - return iv.isInline() == inline; - - } - -} Copied: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInlineBOp.java (from rev 4196, branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInline.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInlineBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInlineBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,109 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.rdf.error.SparqlTypeErrorException; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Imposes the constraint <code>isInline(x)</code>. + */ +public class IsInlineBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = 3125106876006900339L; + + public interface Annotations extends PipelineOp.Annotations { + + /** + * If true, only accept variable bindings for {@link #x} that have an + * inline internal value {@link IV}. Otherwise only accept variable bindings + * that are not inline in the statement indices. + * <p> + * @see IV#isInline() + */ + String INLINE = IsInlineBOp.class.getName() + ".inline"; + + } + + public IsInlineBOp(final IVariable<IV> x, final boolean inline) { + + this(new BOp[] { x }, NV.asMap(new NV(Annotations.INLINE, inline))); + + } + + /** + * Required shallow copy constructor. + */ + public IsInlineBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 1 || args[0] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public IsInlineBOp(final IsInlineBOp op) { + super(op); + } + + public boolean accept(final IBindingSet bs) { + + final boolean inline = + (Boolean) getRequiredProperty(Annotations.INLINE); + + final IV iv = get(0).get(bs); + + // not yet bound + if (iv == null) + throw new SparqlTypeErrorException(); + + return iv.isInline() == inline; + + } + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Deleted: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -1,85 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -package com.bigdata.rdf.internal.constraints; - -import java.util.Map; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstant; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.NV; -import com.bigdata.bop.constraint.BOpConstraint; -import com.bigdata.rdf.internal.IV; - -/** - * Imposes the constraint <code>isLiteral(x)</code>. - */ -public class IsLiteral extends BOpConstraint { - - /** - * - */ - private static final long serialVersionUID = 3125106876006900339L; - - /** - * Required shallow copy constructor. - */ - public IsLiteral(final BOp[] values, - final Map<String, Object> annotations) { - super(values, annotations); - } - - /** - * Required deep copy constructor. - */ - public IsLiteral(final IsLiteral op) { - super(op); - } - - public IsLiteral(final IVariable<IV> x) { - - super(new BOp[] { x }, null/*annocations*/); - - if (x == null) - throw new IllegalArgumentException(); - - } - - public boolean accept(IBindingSet s) { - - // get binding for "x". - final IConstant<IV> x = s.get((IVariable<IV>) get(0)/*x*/); - - if (x == null) - return true; // not yet bound. - - final IV iv = x.get(); - - return iv.isLiteral(); - - } - -} Copied: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteralBOp.java (from rev 4196, branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteralBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteralBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,91 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.IVariable; +import com.bigdata.rdf.error.SparqlTypeErrorException; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Imposes the constraint <code>isLiteral(x)</code>. + */ +public class IsLiteralBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = 3125106876006900339L; + + public IsLiteralBOp(final IVariable<IV> x) { + + this(new BOp[] { x }, null/*annocations*/); + + } + + /** + * Required shallow copy constructor. + */ + public IsLiteralBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 1 || args[0] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public IsLiteralBOp(final IsLiteralBOp op) { + super(op); + } + + public boolean accept(IBindingSet bs) { + + final IV iv = get(0).get(bs); + + // not yet bound + if (iv == null) + throw new SparqlTypeErrorException(); + + return iv.isLiteral(); + + } + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -32,6 +32,7 @@ import com.bigdata.bop.IValueExpression; import com.bigdata.bop.ImmutableBOp; import com.bigdata.bop.NV; +import com.bigdata.rdf.error.SparqlTypeErrorException; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; @@ -40,7 +41,7 @@ * operation to be applied to the operands is specified by the * {@link Annotations#OP} annotation. */ -final public class MathBOp extends ImmutableBOp +final public class MathBOp extends ValueExpressionBOp implements IValueExpression<IV> { /** @@ -62,14 +63,20 @@ } /** - * Required deep copy constructor. * + * @param left + * The left operand. + * @param right + * The right operand. * @param op + * The annotation specifying the operation to be performed on + * those operands. */ - public MathBOp(final MathBOp op) { + public MathBOp(final IValueExpression<IV> left, + final IValueExpression<IV> right, final MathOp op) { - super(op); - + this(new BOp[] { left, right }, NV.asMap(new NV(Annotations.OP, op))); + } /** @@ -94,37 +101,35 @@ } /** + * Required deep copy constructor. * - * @param left - * The left operand. - * @param right - * The right operand. * @param op - * The annotation specifying the operation to be performed on - * those operands. */ - public MathBOp(final IValueExpression<IV> left, - final IValueExpression<IV> right, final MathOp op) { + public MathBOp(final MathBOp op) { - this(new BOp[] { left, right }, NV.asMap(new NV(Annotations.OP, op))); - + super(op); + } -// /** -// * Clone is overridden to reduce heap churn. -// */ -// final public Math clone() { -// -// return this; -// -// } + final public IV get(final IBindingSet bs) { + + final IV left = left().get(bs); + final IV right = right().get(bs); + + // not yet bound + if (left == null || right == null) + throw new SparqlTypeErrorException(); + + return IVUtility.numericalMath(left, right, op()); + } + public IValueExpression<IV> left() { - return (IValueExpression<IV>) get(0); + return get(0); } public IValueExpression<IV> right() { - return (IValueExpression<IV>) get(1); + return get(1); } public MathOp op() { @@ -156,11 +161,10 @@ final public boolean equals(final IValueExpression<IV> o) { - if(!(o instanceof MathBOp)) { + if(!(o instanceof MathBOp)) { // incomparable types. return false; } - return equals((MathBOp) o); } @@ -172,39 +176,18 @@ private int hash = 0; public int hashCode() { - + int h = hash; - if (h == 0) { - final int n = arity(); - for (int i = 0; i < n; i++) { - h = 31 * h + get(i).hashCode(); - } - h = 31 * h + op().hashCode(); - hash = h; - } - return h; - + } - final public IV get(final IBindingSet bindingSet) { - - final IV left = left().get(bindingSet); - final IV right = right().get(bindingSet); - - if (left == null || right == null) - return null; - - return IVUtility.numericalMath(left, right, op()); - - } - } Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/NotBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/NotBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/NotBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,85 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Imposes the constraint <code>!x</code>. + */ +public class NotBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = -5701967329003122236L; + + public NotBOp(final IValueExpression<IV> x) { + + this(new BOp[] { x }, null/*Annotations*/); + + } + + /** + * Required shallow copy constructor. + */ + public NotBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 1 || args[0] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public NotBOp(final NotBOp op) { + super(op); + } + + public boolean accept(final IBindingSet bs) { + + final XSDBooleanIV iv = (XSDBooleanIV) get(0).get(bs); + + return !iv.booleanValue(); + + } + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/OrBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/OrBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/OrBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,114 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.rdf.error.SparqlTypeErrorException; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Imposes the constraint <code>x OR y</code>. + */ +public class OrBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = 610253427197564102L; + + public OrBOp(final IValueExpression<IV> x, final IValueExpression<IV> y) { + + this(new BOp[] { x, y }, null/*annocations*/); + + } + + /** + * Required shallow copy constructor. + */ + public OrBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 2 || args[0] == null || args[1] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public OrBOp(final OrBOp op) { + super(op); + } + + /** + * Follows semantics from SPARQL spec - "Testing Values". + * <p> + * see http://www.w3.org/TR/rdf-sparql-query/#tests section 11.2 + */ + public boolean accept(final IBindingSet bs) { + + XSDBooleanIV left, right; + + try { + left = (XSDBooleanIV) get(0).get(bs); + } catch (SparqlTypeErrorException ex) { + left = null; + } + + try { + right = (XSDBooleanIV) get(1).get(bs); + } catch (SparqlTypeErrorException ex) { + right = null; + } + + // special error handling per the SPARQL spec + if (left == null || right == null) { + // if one or the other is true, return true + if (left != null && left.booleanValue()) + return true; + if (right != null && right.booleanValue()) + return true; + // all other cases, throw a type error + throw new SparqlTypeErrorException(); + } + + return left.booleanValue() || right.booleanValue(); + + } + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SameTermBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SameTermBOp.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SameTermBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -26,33 +26,41 @@ import java.util.Map; -import org.openrdf.query.algebra.Compare.CompareOp; - import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IValueExpression; -import com.bigdata.bop.NV; -import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.constraint.BOpConstraint; +import com.bigdata.rdf.error.SparqlTypeErrorException; import com.bigdata.rdf.internal.IV; -import com.bigdata.rdf.internal.IVUtility; +import com.bigdata.rdf.internal.XSDBooleanIV; /** * Compare two terms for exact equality. */ -public class SameTermBOp extends BOpConstraint { +public class SameTermBOp extends ValueExpressionBOp + implements IValueExpression<IV> { /** * */ private static final long serialVersionUID = 1L; + public SameTermBOp(final IValueExpression<IV> left, + final IValueExpression<IV> right) { + + this(new BOp[] { left, right }, null); + + } + /** * Required shallow copy constructor. */ - public SameTermBOp(final BOp[] values, - final Map<String, Object> annotations) { - super(values, annotations); + public SameTermBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 2 || args[0] == null || args[1] == null) + throw new IllegalArgumentException(); + } /** @@ -62,26 +70,23 @@ super(op); } - public SameTermBOp(final IValueExpression<IV> left, - final IValueExpression<IV> right) { - - super(new BOp[] { left, right }, null); + public boolean accept(final IBindingSet bs) { - if (left == null || right == null) - throw new IllegalArgumentException(); + final IV left = get(0).get(bs); + final IV right = get(1).get(bs); - } - - public boolean accept(final IBindingSet s) { - - final IV left = ((IValueExpression<IV>) get(0)).get(s); - final IV right = ((IValueExpression<IV>) get(1)).get(s); - + // not yet bound if (left == null || right == null) - return true; // not yet bound. + throw new SparqlTypeErrorException(); return left.equals(right); } + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + } Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,66 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.ImmutableBOp; +import com.bigdata.rdf.internal.IV; + +/** + * Base class for RDF value expression BOps. Value expressions perform some + * evaluation on one or more value expressions as input and produce one + * value expression as output (boolean, numeric value, etc.) + */ +public abstract class ValueExpressionBOp extends ImmutableBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = -7068219781217676085L; + + /** + * Required shallow copy constructor. + */ + public ValueExpressionBOp(final BOp[] args, final Map<String, Object> anns) { + super(args, anns); + } + + /** + * Required deep copy constructor. + */ + public ValueExpressionBOp(final ValueExpressionBOp op) { + super(op); + } + + @Override + public IValueExpression<IV> get(final int i) { + return (IValueExpression<IV>) super.get(i); + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionConstraint.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionConstraint.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionConstraint.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,109 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.constraint.BOpConstraint; +import com.bigdata.rdf.error.SparqlTypeErrorException; +import com.bigdata.rdf.internal.IV; + +/** + * BOpConstraint that wraps a {@link EBVBOp}, which itself computes the + * effective boolean value of an IValueExpression. + */ +public class ValueExpressionConstraint extends BOpConstraint { + + /** + * + */ + private static final long serialVersionUID = -7068219781217676085L; + + protected static final Logger log = Logger.getLogger(ValueExpressionConstraint.class); + + /** + * Convenience method to generate a constraint from a value expression. + */ + public static IConstraint wrap(final IValueExpression<IV> ve) { + return new ValueExpressionConstraint(new EBVBOp(ve)); + } + + + public ValueExpressionConstraint(final EBVBOp x) { + + this(new BOp[] { x }, null/*annocations*/); + + } + + /** + * Required shallow copy constructor. + */ + public ValueExpressionConstraint(final BOp[] args, + final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 1 || args[0] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public ValueExpressionConstraint(final ValueExpressionConstraint op) { + super(op); + } + + @Override + public EBVBOp get(final int i) { + return (EBVBOp) super.get(i); + } + + public boolean accept(final IBindingSet bs) { + + try { + + // evaluate the EBV operator + return get(0).get(bs).booleanValue(); + + } catch (SparqlTypeErrorException ex) { + + // trap the type error and filter out the solution + if (log.isInfoEnabled()) + log.info("discarding solution due to type error: " + bs); + return false; + + } + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -12,18 +12,22 @@ import com.bigdata.bop.Constant; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.Var; import com.bigdata.bop.IPredicate.Annotations; -import com.bigdata.bop.constraint.NEConstant; import com.bigdata.bop.engine.QueryLog; import com.bigdata.bop.joinGraph.rto.JoinGraph; import com.bigdata.journal.ITx; import com.bigdata.journal.Journal; +import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.XSDIntIV; import com.bigdata.rdf.internal.constraints.CompareBOp; import com.bigdata.rdf.internal.constraints.MathBOp; +import com.bigdata.rdf.internal.constraints.NotBOp; +import com.bigdata.rdf.internal.constraints.SameTermBOp; +import com.bigdata.rdf.internal.constraints.ValueExpressionConstraint; import com.bigdata.rdf.model.BigdataURI; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.model.BigdataValueFactory; @@ -380,15 +384,14 @@ // the vertices of the join graph (the predicates). preds = new IPredicate[] { p0, p1, p2, p3, p4, p5, p6 }; - // the constraints on the join graph. - constraints = new IConstraint[] { + final IValueExpression[] ves = new IValueExpression[] { /* * FILTER * (<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances * /dataFromProducer1092/Product53999> != ?product) */ - new NEConstant(product, new Constant(product53999.getIV())), // + new NotBOp(new SameTermBOp(product, new Constant(product53999.getIV()))), // /* * FILTER (?simProperty1 < (?origProperty1 + 120) && @@ -431,6 +434,12 @@ CompareOp.GT) })),// }; + + // the constraints on the join graph. + constraints = new IConstraint[ves.length]; + for (int i = 0; i < ves.length; i++) { + constraints[i] = ValueExpressionConstraint.wrap(ves[i]); + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -28,23 +28,39 @@ package com.bigdata.rdf.internal.constraints; import java.util.GregorianCalendar; +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; +import java.util.concurrent.atomic.AtomicInteger; import javax.xml.datatype.XMLGregorianCalendar; +import org.apache.log4j.Logger; import org.openrdf.model.vocabulary.RDF; +import org.openrdf.query.QueryEvaluationException; import org.openrdf.query.algebra.Compare.CompareOp; import org.openrdf.query.algebra.MathExpr.MathOp; +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpUtility; import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; +import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; import com.bigdata.bop.Var; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.fed.QueryEngineFactory; import com.bigdata.bop.joinGraph.IEvaluationPlan; import com.bigdata.bop.joinGraph.IEvaluationPlanFactory; import com.bigdata.bop.joinGraph.fast.DefaultEvaluationPlanFactory2; +import com.bigdata.btree.IRangeQuery; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.model.BigdataLiteral; @@ -53,15 +69,23 @@ import com.bigdata.rdf.model.BigdataValueFactory; import com.bigdata.rdf.rio.StatementBuffer; import com.bigdata.rdf.rules.RuleContextEnum; +import com.bigdata.rdf.sail.BigdataSail; +import com.bigdata.rdf.sail.Rule2BOpUtility; +import com.bigdata.rdf.sail.sop.SOp2BOpUtility; +import com.bigdata.rdf.sail.sop.UnsupportedOperatorException; import com.bigdata.rdf.spo.SPOPredicate; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.rdf.store.ProxyTestCase; +import com.bigdata.relation.accesspath.ElementFilter; +import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.rule.IRule; import com.bigdata.relation.rule.Rule; import com.bigdata.relation.rule.eval.ActionEnum; import com.bigdata.relation.rule.eval.IJoinNexus; import com.bigdata.relation.rule.eval.IJoinNexusFactory; import com.bigdata.relation.rule.eval.ISolution; +import com.bigdata.striterator.ChunkedWrappedIterator; +import com.bigdata.striterator.Dechunkerator; import com.bigdata.striterator.IChunkedOrderedIterator; import com.sun.org.apache.xerces.internal.jaxp.datatype.XMLGregorianCalendarImpl; @@ -71,6 +95,8 @@ */ public class TestInlineConstraints extends ProxyTestCase { + protected static final Logger log = Logger.getLogger(TestInlineConstraints.class); + /** * */ @@ -85,6 +111,13 @@ super(name); } + @Override + public Properties getProperties() { + final Properties props = super.getProperties(); + props.setProperty(BigdataSail.Options.INLINE_DATE_TIMES, "true"); + return props; + } + public void testGT() { // store with no owl:sameAs closure @@ -142,25 +175,27 @@ final IRule rule = new Rule("test_greater_than", null, // head new IPredicate[] { - new SPOPredicate(SPO, s, type, x), - new SPOPredicate(SPO, s, age, a) + toPredicate(db, s, type, x), + toPredicate(db, s, age, a) }, // constraints on the rule. new IConstraint[] { - new CompareBOp(a, new Constant<IV>(_35.getIV()), CompareOp.GT) - }); + ValueExpressionConstraint.wrap(new CompareBOp(a, new Constant<IV>(_35.getIV()), CompareOp.GT)) + } + ); try { int numSolutions = 0; - IChunkedOrderedIterator<ISolution> solutions = runQuery(db, rule); + final IChunkedOrderedIterator<IBindingSet> solutions = runQuery(db, rule); while (solutions.hasNext()) { - ISolution solution = solutions.next(); + final IBindingSet bs = solutions.next(); - IBindingSet bs = solution.getBindingSet(); + ... [truncated message content] |
From: <tho...@us...> - 2011-02-21 22:01:59
|
Revision: 4218 http://bigdata.svn.sourceforge.net/bigdata/?rev=4218&view=rev Author: thompsonbry Date: 2011-02-21 22:01:49 +0000 (Mon, 21 Feb 2011) Log Message: ----------- - Working on join graphs and the runtime query optimizer. - Moved JoinGraph, NoSolutionsException, and PartitionedJoinGraph into the com.bigdata.bop.joinGraph package. - Moved IEvaluationPlan, IEvaluationPlanFactory, DefaultEvaluationPlan, etc. into the com.bigdata.bop.joinGraph package. - Moved BOpUtility into the com.bigdata.bop.util package. - Added partial support for dynamically identifying edges based on constraints and for accepting unconstrained edges once there are no other vertices available to extend the join graph. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/package.html branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IRuleState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IRuleStatisticsFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/RuleState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/RuleStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/MockJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/filter/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/eval/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/OwlSameAsPropertiesExpandingIterator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/InferenceEngine.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/magic/TestIRIS.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/AbstractRuleTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestOptionals.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestRuleExpansion.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPORelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOStarJoin.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/DefaultRangeCountFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/FixedEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/IEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/IEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/IRangeCountFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/NOPEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/NoReorderEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/NoSolutionsException.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/fast/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/fast/DefaultEvaluationPlan2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/fast/DefaultEvaluationPlanFactory2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/package.html branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/util/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/fast/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/fast/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/fast/TestDefaultEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/rto/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/rto/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/rto/TestJGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/rto/TestJoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility_canJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility_canJoinUsingConstraints.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility_sharedVariables.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/NoSolutionsException.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/DefaultEvaluationPlan2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/DefaultEvaluationPlanFactory2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/DefaultRangeCountFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/FixedEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IRangeCountFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/NOPEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/NoReorderEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoinUsingConstraints.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/eval/TestDefaultEvaluationPlan.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-02-21 18:33:22 UTC (rev 4217) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-02-21 22:01:49 UTC (rev 4218) @@ -40,8 +40,8 @@ import org.apache.log4j.Logger; import com.bigdata.bop.BOp.Annotations; -import com.bigdata.bop.controller.PartitionedJoinGroup; import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.joinGraph.PartitionedJoinGroup; import com.bigdata.btree.AbstractNode; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; @@ -1381,6 +1381,9 @@ /* * Find the constraints that will run with each vertex of the new * join path. + * + * TODO This is a forward reference to a different package, so maybe + * move the canJoinWithConstraints() method to that package? */ final IConstraint[][] constraintRunArray = PartitionedJoinGroup .getJoinGraphConstraints(newPath, constraints); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-02-21 18:33:22 UTC (rev 4217) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-02-21 22:01:49 UTC (rev 4218) @@ -34,6 +34,7 @@ import com.bigdata.bop.ap.filter.BOpFilterBase; import com.bigdata.bop.ap.filter.BOpTupleFilter; import com.bigdata.bop.ap.filter.DistinctFilter; +import com.bigdata.bop.joinGraph.IEvaluationPlan; import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.ITuple; import com.bigdata.btree.ITupleCursor; @@ -47,7 +48,6 @@ import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.rule.IAccessPathExpander; import com.bigdata.relation.rule.IRule; -import com.bigdata.relation.rule.eval.IEvaluationPlan; import com.bigdata.relation.rule.eval.pipeline.JoinMasterTask; import com.bigdata.service.ndx.IClientIndex; import com.bigdata.striterator.IKeyOrder; Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-21 18:33:22 UTC (rev 4217) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-21 22:01:49 UTC (rev 4218) @@ -1,3260 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -/* - * Created on Aug 16, 2010 - */ - -package com.bigdata.bop.controller; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.Formatter; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.concurrent.FutureTask; - -import org.apache.log4j.Logger; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpContext; -import com.bigdata.bop.BOpContextBase; -import com.bigdata.bop.BOpEvaluationContext; -import com.bigdata.bop.BOpIdFactory; -import com.bigdata.bop.BOpUtility; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstraint; -import com.bigdata.bop.IElement; -import com.bigdata.bop.IPredicate; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.NV; -import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.ap.SampleIndex; -import com.bigdata.bop.ap.SampleIndex.SampleType; -import com.bigdata.bop.bindingSet.HashBindingSet; -import com.bigdata.bop.engine.IRunningQuery; -import com.bigdata.bop.engine.LocalChunkMessage; -import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.bop.join.PipelineJoin; -import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; -import com.bigdata.bop.rdf.join.DataSetJoin; -import com.bigdata.relation.IRelation; -import com.bigdata.relation.accesspath.BufferClosedException; -import com.bigdata.relation.accesspath.IAccessPath; -import com.bigdata.relation.accesspath.IAsynchronousIterator; -import com.bigdata.relation.accesspath.ThickAsynchronousIterator; -import com.bigdata.striterator.Dechunkerator; -import com.bigdata.striterator.IChunkedIterator; -import com.bigdata.util.concurrent.Haltable; - -/** - * A join graph with annotations for estimated cardinality and other details in - * support of runtime query optimization. A join graph is a collection of - * relations and joins which connect those relations. This boils down to a - * collection of {@link IPredicate}s (selects on relations), shared variables - * (which identify joins), and {@link IConstraint}s (which limit solutions). - * Operators other than standard joins (including optional joins, sort, order - * by, etc.) must be handled downstream from the join graph in a "tail plan". - * - * @see http://arxiv.org/PS_cache/arxiv/pdf/0810/0810.4809v1.pdf, XQuery Join - * Graph Isolation. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * - * @todo Examine the overhead of the runtime optimizer. Look at ways to prune - * its costs. For example, by pruning the search, by recognizing when the - * query is simple enough to execute directly, by recognizing when we have - * already materialized the answer to the query, etc. - * - * @todo Cumulative estimated cardinality is an estimate of the work to be done. - * However, the actual cost of a join depends on whether we will use - * nested index subquery or a hash join and the cost of that operation on - * the database. There could be counter examples where the cost of the - * hash join with a range scan using the unbound variable is LT the nested - * index subquery. For those cases, we will do the same amount of IO on - * the hash join but there will still be a lower cardinality to the join - * path since we are feeding in fewer solutions to be joined. - * - * @todo Look at the integration with the SAIL. We decorate the joins with some - * annotations. Those will have to be correctly propagated to the "edges" - * in order for edge sampling and incremental evaluation (or final - * evaluation) to work. The {@link DataSetJoin} essentially inlines one of - * its access paths. That should really be changed into an inline access - * path and a normal join operator so we can defer some of the details - * concerning the join operator annotations until we decide on the join - * path to be executed. An inline AP really implies an inline relation, - * which in turn implies that the query is a searchable context for - * query-local resources. - * <p> - * For s/o, when the AP is remote, the join evaluation context must be ANY - * and otherwise (for s/o) it must be SHARDED. - * <p> - * Since the join graph is fed the vertices (APs), it does not have access - * to the annotated joins so we need to generated appropriately annotated - * joins when sampling an edge and when evaluation a subquery. - * <p> - * One solution would be to always use the unpartitioned views of the - * indices for the runtime query optimizer, which is how we are estimating - * the range counts of the access paths right now. [Note that the static - * query optimizer ignores named and default graphs, while the runtime - * query optimizer SHOULD pay attention to these things and exploit their - * conditional selectivity for the query plan.] - * - * @todo Handle optional join graphs by first applying the runtime optimizer to - * the main join graph and obtaining a sample for the selected join path. - * That sample will then be feed into the the optional join graph in order - * to optimize the join order within the optional join graph (a join order - * which is selective in the optional join graph is better since it will - * result in faster rejections of intermediate results and hence do less - * work). - * <p> - * This is very much related to accepting a collection of non-empty - * binding sets when running the join graph. However, optional join graph - * should be presented in combination with the original join graph and the - * starting paths must be constrained to have the selected join path for - * the original join graph as a prefix. With this setup, the original join - * graph has been locked in to a specific join path and the sampling of - * edges and vertices for the optional join graph can proceed normally. - * <p> - * True optionals will always be appended as part of the "tail plan" for - * any join graph and can not be optimized as each optional join must run - * regardless (as long as the intermediate solution survives the - * non-optional joins). - * - * @todo There are two cases where a join graph must be optimized against a - * specific set of inputs. In one case, it is a sample (this is how - * optimization of an optional join group proceeds per above). In the - * other case, the set of inputs is fixed and is provided instead of a - * single empty binding set as the starting condition. This second case is - * actually a bit more complicated since we can not use a random sample of - * vertices unless the do not share any variables with the initial binding - * sets. When there is a shared variable, we need to do a cutoff join of - * the edge with the initial binding sets. When there is not a shared - * variable, we can sample the vertex and then do a cutoff join. - * - * @todo When we run into a cardinality estimation underflow (the expected - * cardinality goes to zero) we could double the sample size for just - * those join paths which hit a zero estimated cardinality and re-run them - * within the round. This would imply that we keep per join path limits. - * The vertex and edge samples are already aware of the limit at which - * they were last sampled so this should not cause any problems there. - * <p> - * A related option would be to deepen the samples only when we are in - * danger of cardinality estimation underflow. E.g., a per-path limit. - * Resampling vertices may only make sense when we increase the limit - * since otherwise we may find a different correlation with the new sample - * but the comparison of paths using one sample base with paths using a - * different sample base in a different round does not carry forward the - * cardinality estimates from the prior round (unless we do something like - * a weighted moving average). - * - * @todo When comparing choices among join paths having fully bound tails where - * the estimated cardinality has also gone to zero, we should prefer to - * evaluate vertices in the tail with better index locality first. For - * example, if one vertex had one variable in the original plan while - * another had two variables, then solutions which reach the 2-var vertex - * could be spread out over a much wider range of the selected index than - * those which reach the 1-var vertex. [In order to support this, we would - * need a means to indicate that a fully bound access path should use an - * index specified by the query optimizer rather than the primary index - * for the relation. In addition, this suggests that we should keep bloom - * filters for more than just the SPO(C) index in scale-out.] - * - * @todo Examine behavior when we do not have perfect covering indices. This - * will mean that some vertices can not be sampled using an index and that - * estimation of their cardinality will have to await the estimation of - * the cardinality of the edge(s) leading to that vertex. Still, the - * approach should be able to handle queries without perfect / covering - * automatically. Then experiment with carrying fewer statement indices - * for quads. - * - * @todo Unit test when there are no solutions to the query. In this case there - * will be no paths identified by the optimizer and the final path length - * becomes zero. - */ -public class JoinGraph extends PipelineOp { - - private static final transient Logger log = Logger - .getLogger(JoinGraph.class); - - private static final long serialVersionUID = 1L; - - /** - * Known annotations. - */ - public interface Annotations extends PipelineOp.Annotations { - - /** - * The vertices of the join graph, expressed an an {@link IPredicate}[] - * (required). - */ - String VERTICES = JoinGraph.class.getName() + ".vertices"; - - /** - * The constraints on the join graph, expressed an an - * {@link IConstraint}[] (optional, defaults to no constraints). - */ - String CONSTRAINTS = JoinGraph.class.getName() + ".constraints"; - - /** - * The initial limit for cutoff sampling (default - * {@value #DEFAULT_LIMIT}). - */ - String LIMIT = JoinGraph.class.getName() + ".limit"; - - int DEFAULT_LIMIT = 100; - - /** - * The <i>nedges</i> edges of the join graph having the lowest - * cardinality will be used to generate the initial join paths (default - * {@value #DEFAULT_NEDGES}). This must be a positive integer. - */ - String NEDGES = JoinGraph.class.getName() + ".nedges"; - - int DEFAULT_NEDGES = 2; - } - - /** - * @see Annotations#VERTICES - */ - public IPredicate<?>[] getVertices() { - - return (IPredicate[]) getRequiredProperty(Annotations.VERTICES); - - } - - /** - * @see Annotations#CONSTRAINTS - */ - public IConstraint[] getConstraints() { - - return (IConstraint[]) getProperty(Annotations.CONSTRAINTS, null/* none */); - - } - - /** - * @see Annotations#LIMIT - */ - public int getLimit() { - - return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); - - } - - /** - * @see Annotations#NEDGES - */ - public int getNEdges() { - - return getProperty(Annotations.NEDGES, Annotations.DEFAULT_NEDGES); - - } - - public JoinGraph(final BOp[] args, final NV... anns) { - - this(args, NV.asMap(anns)); - - } - - public JoinGraph(final BOp[] args, final Map<String, Object> anns) { - - super(args, anns); - - // required property. - final IPredicate<?>[] vertices = (IPredicate[]) getProperty(Annotations.VERTICES); - - if (vertices == null) - throw new IllegalArgumentException(Annotations.VERTICES); - - if (vertices.length == 0) - throw new IllegalArgumentException(Annotations.VERTICES); - - if (getLimit() <= 0) - throw new IllegalArgumentException(Annotations.LIMIT); - - if (getNEdges() <= 0) - throw new IllegalArgumentException(Annotations.NEDGES); - - if (!isController()) - throw new IllegalArgumentException(); - - switch (getEvaluationContext()) { - case CONTROLLER: - break; - default: - throw new IllegalArgumentException(Annotations.EVALUATION_CONTEXT - + "=" + getEvaluationContext()); - } - - } - - public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { - - return new FutureTask<Void>(new JoinGraphTask(context)); - - } - - /** - * A sample of a {@link Vertex} (an access path). - */ - public static class VertexSample { - - /** - * Fast range count. This will be the same for each sample taken - * (assuming a read historical view or even a time scale of query which - * is significantly faster than update). - */ - public final long rangeCount; - - /** - * The limit used to produce the {@link #sample}. - */ - public final int limit; - - /** - * When <code>true</code>, the result is not a sample but the - * materialized access path. - * - * TODO When <code>true</code>, we could run the join against the sample - * rather than the disk. This would require wrapping the sample as an - * access path. Since all exact samples will be pretty small, this is - * not likely to have any great performance benefit. - */ - public final boolean exact; - - /** - * Sample. - */ - final Object[] sample; - - /** - * - * @param rangeCount - * @param limit - * @param exact - * @param sample - */ - public VertexSample(final long rangeCount, final int limit, - final boolean exact, final Object[] sample) { - - if (rangeCount < 0L) - throw new IllegalArgumentException(); - - if (limit <= 0) - throw new IllegalArgumentException(); - - if (sample == null) - throw new IllegalArgumentException(); - - this.rangeCount = rangeCount; - - this.limit = limit; - - this.exact = exact; - - this.sample = sample; - - } - - public String toString() { - return "VertexSample{rangeCount=" + rangeCount + ",limit=" + limit - + ",exact=" + exact + ", sampleSize=" + sample.length + "}"; - } - - } - - /** - * A vertex of the join graph is an annotated relation (this corresponds to - * an {@link IPredicate} with additional annotations to support the adaptive - * query optimization algorithm). - * <p> - * The unique identifier for a {@link Vertex} (within a given join graph) is - * the {@link BOp.Annotations#BOP_ID} decorating its {@link IPredicate}. - * {@link #hashCode()} is defined in terms of this unique identifier so we - * can readily detect when a {@link Set} already contains a given - * {@link Vertex}. - */ - public static class Vertex implements Serializable { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public final IPredicate<?> pred; - - /** - * The most recently taken sample of the {@link Vertex}. - */ - transient VertexSample sample = null; - - Vertex(final IPredicate<?> pred) { - - if (pred == null) - throw new IllegalArgumentException(); - - this.pred = pred; - - } - - public String toString() { - - return "Vertex{pred=" + pred + ",sample=" + sample + "}"; - - } - - /** - * Equals is based on a reference test. - */ - public boolean equals(Object o) { - return this == o; - } - - /** - * The hash code is just the {@link BOp.Annotations#BOP_ID} of the - * associated {@link IPredicate}. - */ - public int hashCode() { - return pred.getId(); - } - - /** - * Take a sample of the vertex, updating {@link #sample} as a - * side-effect. If the sample is already exact, then this is a NOP. If - * the vertex was already sampled to that limit, then this is a NOP (you - * have to raise the limit to re-sample the vertex). - * - * @param limit - * The sample cutoff. - */ - public void sample(final QueryEngine queryEngine, final int limit) { - - if (queryEngine == null) - throw new IllegalArgumentException(); - - if (limit <= 0) - throw new IllegalArgumentException(); - - final VertexSample oldSample = this.sample; - - if (oldSample != null && oldSample.exact) { - - /* - * The old sample is already the full materialization of the - * vertex. - */ - - return; - - } - - if (oldSample != null && oldSample.limit >= limit) { - - /* - * The vertex was already sampled to this limit. - */ - - return; - - } - - final BOpContextBase context = new BOpContextBase(queryEngine); - - final IRelation r = context.getRelation(pred); - - final IAccessPath ap = context.getAccessPath(r, pred); - - final long rangeCount = oldSample == null ? ap - .rangeCount(false/* exact */) : oldSample.rangeCount; - - if (rangeCount <= limit) { - - /* - * Materialize the access path. - * - * TODO This could be more efficient if we raised it onto the AP - * or if we overrode CHUNK_CAPACITY and the fully buffered - * iterator threshold such that everything was materialized as a - * single chunk. - */ - - final List<Object> tmp = new ArrayList<Object>((int) rangeCount); - - final IChunkedIterator<Object> itr = ap.iterator(); - - try { - - while (itr.hasNext()) { - - tmp.add(itr.next()); - - } - - } finally { - - itr.close(); - } - - sample = new VertexSample(rangeCount, limit, true/* exact */, - tmp.toArray(new Object[0])); - - } else { - - /* - * Materialize a random sample from the access path. - */ - -// final SampleType sampleType = SampleType.EVEN; - final SampleType sampleType = SampleType.RANDOM; - - final SampleIndex<?> sampleOp = new SampleIndex( - new BOp[] {}, // - NV.asMap(// - new NV(SampleIndex.Annotations.PREDICATE, pred),// - new NV(SampleIndex.Annotations.LIMIT, limit),// - new NV(SampleIndex.Annotations.SAMPLE_TYPE, sampleType.name())// - )); - - sample = new VertexSample(rangeCount, limit, false/* exact */, - sampleOp.eval(context)); - - } - - if (log.isTraceEnabled()) - log.trace("Sampled: " + sample); - - return; - - } - - } - - /** - * Type safe enumeration describes the edge condition (if any) for a - * cardinality estimate. - */ - public static enum EstimateEnum { - /** - * An estimate, but not any of the edge conditions. - */ - Normal(" "), - /** - * The cardinality estimate is exact. - */ - Exact("E"), - /** - * The cardinality estimation is a lower bound (the actual cardinality - * may be higher than the estimated value). - */ - LowerBound("L"), - /** - * Flag is set when the cardinality estimate underflowed (false zero - * (0)). - */ - Underflow("U"); - - private EstimateEnum(final String code) { - - this.code = code; - - } - - private final String code; - - public String getCode() { - - return code; - - } - - } // EstimateEnum - - /** - * A sample of an {@link Edge} (a join). - */ - public static class EdgeSample { - - /** - * The fast range count (aka cardinality) for the source vertex of the - * edge (whichever vertex has the lower cardinality). - */ - public final long rangeCount; - - /** - * <code>true</code> iff the source sample is exact (because the source - * is either a fully materialized vertex or an edge whose solutions have - * been fully materialized). - */ - public final boolean sourceSampleExact; - - /** - * The limit used to sample the edge (this is the limit on the #of - * solutions generated by the cutoff join used when this sample was - * taken). - */ - public final int limit; - - /** - * The #of binding sets out of the source sample vertex sample which - * were consumed. - */ - public final int inputCount; - - /** - * The #of binding sets generated before the join was cutoff. - * <p> - * Note: If the outputCount is zero then this is a good indicator that - * there is an error in the query such that the join will not select - * anything. This is not 100%, merely indicative. - */ - public final long outputCount; - - /** - * The ratio of the #of input samples consumed to the #of output samples - * generated (the join hit ratio or scale factor). - */ - public final double f; - - /** - * The estimated cardinality of the join. - */ - public final long estimatedCardinality; - - /** - * Indicates whether the estimate is exact, an upper bound, or a lower - * bound. - * - * TODO This field should be used to avoid needless re-computation of a - * join whose exact solution is already known. - */ - public final EstimateEnum estimateEnum; - - /** - * The sample of the solutions for the join path. - */ - private final IBindingSet[] sample; - - /** - * Create an object which encapsulates a sample of an edge. - * - * @param limit - * The limit used to sample the edge (this is the limit on - * the #of solutions generated by the cutoff join used when - * this sample was taken). - * @param sourceVertexSample - * The sample for source vertex of the edge (whichever vertex - * has the lower cardinality). - * @param inputCount - * The #of binding sets out of the source sample vertex - * sample which were consumed. - * @param outputCount - * The #of binding sets generated before the join was cutoff. - */ - EdgeSample( - // final VertexSample sourceVertexSample, - final long sourceSampleRangeCount,// - final boolean sourceSampleExact, // - final int sourceSampleLimit,// - final int limit,// - final int inputCount, // - final long outputCount,// - final double f, - final long estimatedCardinality, - final IBindingSet[] sample) { - - if (sample == null) - throw new IllegalArgumentException(); - - // this.rangeCount = sourceVertexSample.rangeCount; - this.rangeCount = sourceSampleRangeCount; - - this.sourceSampleExact = sourceSampleExact; - - this.limit = limit; - - this.inputCount = inputCount; - - this.outputCount = outputCount; - - this.f = f; - - this.estimatedCardinality = estimatedCardinality; - - if (sourceSampleExact && outputCount < limit) { - /* - * Note: If the entire source vertex is being fed into the - * cutoff join and the cutoff join outputCount is LT the limit, - * then the sample is the actual result of the join. That is, - * feeding all source solutions into the join gives fewer than - * the desired number of output solutions. - */ - estimateEnum = EstimateEnum.Exact; - } else if (inputCount == 1 && outputCount == limit) { - /* - * If the inputCount is ONE (1) and the outputCount is the - * limit, then the estimated cardinality is a lower bound as - * more than outputCount solutions might be produced by the join - * when presented with a single input solution. - */ - estimateEnum = EstimateEnum.LowerBound; - } else if (!sourceSampleExact - && inputCount == Math.min(sourceSampleLimit, rangeCount) - && outputCount == 0) { - /* - * When the source sample was not exact, the inputCount is EQ to - * the lesser of the source range count and the source sample - * limit, and the outputCount is ZERO (0), then feeding in all - * source solutions in is not sufficient to generate any output - * solutions. In this case, the estimated join hit ratio appears - * to be zero. However, the estimation of the join hit ratio - * actually underflowed and the real join hit ratio might be a - * small non-negative value. A real zero can only be identified - * by executing the full join. - * - * Note: An apparent join hit ratio of zero does NOT imply that - * the join will be empty (unless the source vertex sample is - * actually the fully materialized access path - this case is - * covered above). - */ - estimateEnum = EstimateEnum.Underflow; - } else { - estimateEnum = EstimateEnum.Normal; - } - - this.sample = sample; - } - - public String toString() { - return getClass().getName() // - + "{ rangeCount=" + rangeCount// - + ", sourceSampleExact=" + sourceSampleExact// - + ", limit=" + limit // - + ", inputCount=" + inputCount// - + ", outputCount=" + outputCount // - + ", f=" + f// - + ", estimatedCardinality=" + estimatedCardinality// - + ", estimateEnum=" + estimateEnum// -// + ", estimateIsLowerBound=" + estimateIsLowerBound// -// + ", estimateIsUpperBound=" + estimateIsUpperBound// -// + ", sampleIsExactSolution=" + estimateIsExact // - + "}"; - } - - }; - - /** - * An edge of the join graph is an annotated join operator. The edges of the - * join graph are undirected. Edges exist when the vertices share at least - * one variable. - * <p> - * {@link #hashCode()} is defined in terms of the unordered hash codes of - * the individual vertices. - */ - public static class Edge implements Serializable { - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * The vertices connected by that edge. - */ - public final Vertex v1, v2; - - /** - * The set of shared variables. - */ - public final Set<IVariable<?>> shared; - - /** - * The last sample for this edge and <code>null</code> if the edge has - * not been sampled. - * <p> - * Note: This sample is only the one-step cutoff evaluation of the edge - * given a sample of its vertex having the lesser cardinality. It is NOT - * the cutoff sample of a join path having this edge except for the - * degenerate case where the edge is the first edge in the join path. - */ - transient EdgeSample sample = null; - - public Edge(final Vertex v1, final Vertex v2, - final Set<IVariable<?>> shared) { - if (v1 == null) - throw new IllegalArgumentException(); - if (v2 == null) - throw new IllegalArgumentException(); - if (shared == null) - throw new IllegalArgumentException(); - // Note: We need to allow edges which do not share variables -// if (shared.isEmpty()) -// throw new IllegalArgumentException(); - this.v1 = v1; - this.v2 = v2; - this.shared = shared; - } - - /** - * The edge label is formed from the {@link BOp.Annotations#BOP_ID} of - * its ordered vertices (v1,v2). - */ - public String getLabel() { - - return "(" + v1.pred.getId() + "," + v2.pred.getId() + ")"; - - } - - /** - * Note: The vertices of the edge are labeled using the - * {@link BOp.Annotations#BOP_ID} associated with the {@link IPredicate} - * for each vertex. - */ - public String toString() { - - return "Edge{ "+getLabel()+", estCard=" - + (sample == null ? "N/A" : sample.estimatedCardinality) - + ", shared=" + shared.toString() + ", sample=" + sample - + "}"; - - } - - /** - * Equality is determined by reference testing. - */ - public boolean equals(final Object o) { - - return this == o; - - } - - /** - * The hash code of an edge is the hash code of the vertex with the - * smaller hash code X 31 plus the hash code of the vertex with the - * larger hash code. This definition compensates for the arbitrary order - * in which the vertices may be expressed and also recognizes that the - * vertex hash codes are based on the bop ids, which are often small - * integers. - */ - public int hashCode() { - - if (hash == 0) { - - final int h1 = v1.hashCode(); - final int h2 = v2.hashCode(); - - final int h; - if (h1 < h2) { - - h = h1 * 31 + h2; - - } else { - - h = h2 * 31 + h1; - - } - - hash = h; - - } - return hash; - - } - - private int hash; - - /** - * Return the vertex with the smaller estimated cardinality. - * - * @throws IllegalStateException - * if either vertex has not been sampled. - */ - public Vertex getMinimumCardinalityVertex() { - - if (v1.sample == null) // vertex not sampled. - throw new IllegalStateException(); - - if (v2.sample == null) // vertex not sampled. - throw new IllegalStateException(); - - return (v1.sample.rangeCount < v2.sample.rangeCount) ? v1 : v2; - - } - - /** - * Return the vertex with the larger estimated cardinality (the vertex - * not returned by {@link #getMinimumCardinalityVertex()}). - * - * @throws IllegalStateException - * if either vertex has not been sampled. - */ - public Vertex getMaximumCardinalityVertex() { - - // The vertex with the minimum cardinality. - final Vertex o = getMinimumCardinalityVertex(); - - // Return the other vertex. - return (v1 == o) ? v2 : v1; - - } - - /** - * Estimate the cardinality of the edge, updating {@link #sample} as a - * side-effect. This is a NOP if the edge has already been sampled at - * that <i>limit</i>. This is a NOP if the edge sample is exact. - * - * @param context - * - * @return The new {@link EdgeSample} (this is also updated on - * {@link #sample} as a side-effect). - * - * @throws Exception - */ - public EdgeSample estimateCardinality(final QueryEngine queryEngine, - final int limit) throws Exception { - - if (limit <= 0) - throw new IllegalArgumentException(); - -// /* -// * Note: There is never a need to "re-sample" the edge. Unlike ROX, -// * we always can sample a vertex. This means that we can sample the -// * edges exactly once, during the initialization of the join graph. -// */ -// if (sample != null) -// throw new RuntimeException(); - - if (sample != null) { - - if (sample.limit >= limit) { - - // Already sampled at that limit. - return sample; - - } - - if (sample.estimateEnum == EstimateEnum.Exact) { - - // Sample is exact (fully materialized result). - return sample; - - } - - } - - /* - * Figure out which vertex has the smaller cardinality. The sample - * of that vertex is used since it is more representative than the - * sample of the other vertex. - */ - // vertex v, vprime - final Vertex v, vp; - if (v1.sample == null) // vertex not sampled. - throw new IllegalStateException(); - if (v2.sample == null) // vertex not sampled. - throw new IllegalStateException(); - /* - * FIXME CONSTRAINT ORDERING : If a variable only appears in a - * CONSTRAINT for one of the two vertices then that vertex must be - * evaluated second. (If the vertices both have this problem then - * the edge can not be evaluated until some other vertex causes the - * variables of either one [v1] or [v2] to become bound.) - */ - if (v1.sample.rangeCount < v2.sample.rangeCount) { - v = v1; - vp = v2; - } else { - v = v2; - vp = v1; - } - - /* - * Convert the source sample into an IBindingSet[]. - * - * TODO We might as well do this when we sample the vertex. - */ - final IBindingSet[] sourceSample = new IBindingSet[v.sample.sample.length]; - { - for (int i = 0; i < sourceSample.length; i++) { - final IBindingSet bset = new HashBindingSet(); - BOpContext.copyValues((IElement) v.sample.sample[i], - v.pred, bset); - sourceSample[i] = bset; - } - } - - // Sample the edge and save the sample on the edge as a side-effect. - this.sample = estimateCardinality(queryEngine, limit, v, vp, - v.sample.rangeCount, v.sample.exact, v.sample.limit, - sourceSample); - - return sample; - - } - - /** - * Estimate the cardinality of the edge given a sample of either a - * vertex or a join path leading up to that edge. - * <p> - * Note: The caller is responsible for protecting against needless - * re-sampling. - * - * @param queryEngine - * @param limit - * @param vSource - * The source vertex. - * @param vTarget - * The target vertex - * @param sourceSample - * The sample for the source vertex. When this is a one-step - * estimation of the cardinality of the edge, then this - * sample is taken from the {@link VertexSample}. When the - * edge (vSource,vTarget) extends some {@link Path}, then - * this is taken from the {@link EdgeSample} for that - * {@link Path}. - * - * @return The result of sampling that edge. - * - * @throws Exception - */ - public EdgeSample estimateCardinality(final QueryEngine queryEngine, - final int limit, final Vertex vSource, final Vertex vTarget, - final long sourceSampleRangeCount, - final boolean sourceSampleExact, - final int sourceSampleLimit, - final IBindingSet[] sourceSample) - throws Exception { - - if (limit <= 0) - throw new IllegalArgumentException(); - - /* - * Note: This sets up a cutoff pipeline join operator which makes an - * accurate estimate of the #of input solutions consumed and the #of - * output solutions generated. From that, we can directly compute - * the join hit ratio. This approach is preferred to injecting a - * "RowId" column as the estimates are taken based on internal - * counters in the join operator and the join operator knows how to - * cutoff evaluation as soon as the limit is satisfied, thus - * avoiding unnecessary effort. - * - * TODO Any constraints on the edge (other than those implied by - * shared variables) need to be annotated on the join. Constraints - * (other than range constraints which are directly coded by the - * predicate) will not reduce the effort to compute the join, but - * they can reduce the cardinality of the join and that is what we - * are trying to estimate here. - * - * TODO How can join constraints be moved around? Just attach them - * where ever a variable becomes bound? And when do we filter out - * variables which are not required downstream? Once we decide on a - * join path and execute it fully (rather than sampling that join - * path). - */ - final int joinId = 1; - final Map<String,Object> anns = NV.asMap(// - new NV(BOp.Annotations.BOP_ID, joinId),// - // @todo Why not use a factory which avoids bopIds already in use? - new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred.setBOpId(3)), - // disallow parallel evaluation of tasks. - new NV(PipelineOp.Annotations.MAX_PARALLEL,1), - // disallow parallel evaluation of chunks. - new NV(PipelineJoin.Annotations.MAX_PARALLEL_CHUNKS,0), - // disable access path coalescing - new NV(PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS,false), - // cutoff join. - new NV(PipelineJoin.Annotations.LIMIT,(long)limit), - /* - * Note: In order to have an accurate estimate of the join - * hit ratio we need to make sure that the join operator - * runs using a single PipelineJoinStats instance which will - * be visible to us when the query is cutoff. In turn, this - * implies that the join must be evaluated on the query - * controller. - * - * @todo This implies that sampling of scale-out joins must - * be done using remote access paths. - */ - new NV(PipelineJoin.Annotations.SHARED_STATE,true), - new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT,BOpEvaluationContext.CONTROLLER) - ); - - final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, anns); - - final PipelineOp queryOp = joinOp; - - // run the cutoff sampling of the edge. - final UUID queryId = UUID.randomUUID(); - final IRunningQuery runningQuery = queryEngine.eval(queryId, - queryOp, new LocalChunkMessage<IBindingSet>(queryEngine, - queryId, joinOp.getId()/* startId */, - -1 /* partitionId */, - new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { sourceSample }))); - - final List<IBindingSet> result = new LinkedList<IBindingSet>(); - try { - try { - IBindingSet bset = null; - // Figure out the #of source samples consumed. - final Iterator<IBindingSet> itr = new Dechunkerator<IBindingSet>( - runningQuery.iterator()); - while (itr.hasNext()) { - bset = itr.next(); - result.add(bset); - } - } finally { - // verify no problems. - runningQuery.get(); - } - } finally { - runningQuery.cancel(true/* mayInterruptIfRunning */); - } - - // The join hit ratio can be computed directly from these stats. - final PipelineJoinStats joinStats = (PipelineJoinStats) runningQuery - .getStats().get(joinId); - - if (log.isTraceEnabled()) - log.trace(joinStats.toString()); - - /* - * TODO Improve comments here. See if it is possible to isolate a - * common base class which would simplify the setup of the cutoff - * join and the computation of the sample stats. - */ - - // #of solutions in. - final int nin = (int) joinStats.inputSolutions.get(); - - // #of solutions out. - long nout = joinStats.outputSolutions.get(); - - // cumulative range count of the sampled access paths. - final long sumRangeCount = joinStats.accessPathRangeCount.get(); - - if (nin == 1 && nout == limit) { - /* - * We are getting [limit] solutions out for one solution in. In - * this case, (nout/nin) is a lower bound for the estimated - * cardinality of the edge. In fact, this condition suggests - * that the upper bound is a must better estimate of the - * cardinality of this join. Therefore, we replace [nout] with - * the sum of the range counts for the as-bound predicates - * considered by the cutoff join. - * - * For example, consider a join feeding a rangeCount of 16 into - * a rangeCount of 175000. With a limit of 100, we estimated the - * cardinality at 1600L (lower bound). In fact, the cardinality - * is 16*175000. This falsely low estimate can cause solutions - * which are really better to be dropped. - * - * @todo we should mark [nout] when we do this show that it - * shows up in the trace! Also, the rangeCount is sometimes - * falsely high. However, that should be corrected by random - * resampling of the vertices and paths. - */ - nout = sumRangeCount; - - } - - final double f = nout == 0 ? 0 : (nout / (double) nin); - - final long estimatedCardinality = (long) (sourceSampleRangeCount * f); - - final EdgeSample edgeSample = new EdgeSample( - sourceSampleRangeCount, // - sourceSampleExact, // @todo redundant with sourceSampleLimit - sourceSampleLimit, // - limit, // - nin,// - nout, // - f, // - estimatedCardinality, // - result.toArray(new IBindingSet[result.size()])); - - if (log.isDebugEnabled()) - log.debug(getLabel() + " : newSample=" + edgeSample); - - return edgeSample; - - } - - } - - /** - * A sequence of {@link Edge}s (aka join steps). - */ - public static class Path { - - /** - * An immutable ordered list of the edges in the (aka the sequence of - * joins represented by this path). - */ - public final List<Edge> edges; - - /** - * The sample obtained by the step-wise cutoff evaluation of the ordered - * edges of the path. - * <p> - * Note: This sample is generated one edge at a time rather than by - * attempting the cutoff evaluation of the entire join path (the latter - * approach does allow us to limit the amount of work to be done to - * satisfy the cutoff). - */ - public EdgeSample sample; - - /** - * The cumulative estimated cardinality of the path. This is zero for an - * empty path. For a path consisting of a single edge, this is the - * estimated cardinality of that edge. When creating a new path by - * adding an edge to an existing path, the cumulative cardinality of the - * new path is the cumulative cardinality of the existing path plus the - * estimated cardinality of the cutoff join of the new edge given the - * input sample of the existing path. - * - * @todo track this per vertex as well as the total for more interesting - * traces in showPath(Path). - */ - final public long cumulativeEstimatedCardinality; - - public String toString() { - final StringBuilder sb = new StringBuilder(); - sb.append("Path{"); - boolean first = true; - for (Edge e : edges) { - if (!first) - sb.append(","); - sb.append(e.getLabel()); - first = false; - } - sb.append(",cumEstCard=" + cumulativeEstimatedCardinality - + ",sample=" + sample + "}"); - return sb.toString(); - } - - /** - * Create an empty path. - */ - public Path() { - this.edges = Collections.emptyList(); - this.cumulativeEstimatedCardinality = 0; - this.sample = null; - } - - /** - * Create a path from a single edge. - * - * @param e - * The edge. - */ - public Path(final Edge e) { - - if (e == null) - throw new IllegalArgumentException(); - - if (e.sample == null) - throw new IllegalArgumentException("Not sampled: " + e); - - this.edges = Collections.singletonList(e); - - this.sample = e.sample; - - this.cumulativeEstimatedCardinality = e.sample.estimatedCardinality; - - } - - /** - * Constructor used by {@link #addEdge(QueryEngine, int, Edge)} - * - * @param edges - * The edges in the new path. - * @param cumulativeEstimatedCardinality - * The cumulative estimated cardinality of the new path. - * @param sample - * The sample from the last - */ - private Path(final List<Edge> edges, - final long cumulativeEstimatedCardinality, - final EdgeSample sample) { - - if (edges == null) - throw new IllegalArgumentException(); - - if (cumulativeEstimatedCardinality < 0) - throw new IllegalArgumentException(); - - if (sample == null) - throw new IllegalArgumentException(); - - this.edges = Collections.unmodifiableList(edges); - - this.cumulativeEstimatedCardinality = cumulativeEstimatedCardinality; - - this.sample = sample; - - } - - /** - * Return <code>true</code> iff the {@link Path} contains at least one - * {@link Edge} for that {@link Vertex}. - * - * @param v - * The vertex - * - * @return true if the vertex is already part of the path. - */ - public boolean contains(final Vertex v) { - - if (v == null) - throw new IllegalArgumentException(); - - for (Edge e : edges) { - - if (e.v1 == v || e.v2 == v) - return true; - - } - - return false; - } - - /** - * Return <code>true</code> if this path is an unordered variant of the - * given path (same vertices in any order). - * - * @param p - * Another path. - * - * @return <code>true</code> if this path is an unordered variant of the - * given path. - */ - public boolean isUnorderedVariant(final Path p) { - - if (p == null) - throw new IllegalArgumentException(); - - if (edges.size() != p.edges.size()) { - /* - * Fast rejection. This assumes that each edge after the first - * adds one distinct vertex to the path. That assumption is - * enforced by #addEdge(). - */ - return false; - } - - final Vertex[] v1 = getVertices(); - final Vertex[] v2 = p.getVertices(); - - if (v1.length != v2.length) { - - // Reject (this case is also covered by the test above). - return false; - - } - - /* - * Scan the vertices of the caller's path. If any of those vertices - * are NOT found in this path the paths are not unordered variations - * of one another. - */ - for (int i = 0; i < v2.length; i++) { - - final Vertex tmp = v2[i]; - - boolean found = false; - for (int j = 0; j < v1.length; j++) { - - if (v1[j] == tmp) { - found = true; - break; - } - - } - - if (!found) { - return false; - } - - } - - return true; - - } - - /** - * Return the vertices in this path (in path order). For the first edge, - * the minimum cardinality vertex is always reported first (this is - * critical for producing the correct join plan). For the remaining - * edges in the path, the unvisited is reported. - * - * @return The vertices (in path order). - * - * TODO This could be rewritten without the toArray() using a - * method which visits the vertices of a path in any order. - * - * @todo unit test for the first vertex to be reported. - */ - public Vertex[] getVertices() { - - final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); - - for (Edge e : edges) { - - if (tmp.isEmpty()) { - /* - * The first edge is handled specially in order to report - * the minimum cardinality vertex first. - * - * FIXME CONSTRAINT ORDERING : A vertex can not run until - * all variables appearing in its CONSTRAINTS would be - * bound. This can cause us to use and report an ordering - * which does not place the minimum cardinality vertex 1st. - */ - tmp.add(e.getMinimumCardinalityVertex()); - tmp.add(e.getMaximumCardinalityVertex()); - - } else { - - tmp.add(e.v1); - - tmp.add(e.v2); - - } - - } - - final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); - - return a; - - } - - /** - * Return the {@link IPredicate}s associated with the vertices of the - * join path in path order. - * - * @see #getVertices() - */ - public IPredicate<?>[] getPredicates() { - - // The vertices in the selected evaluation order. - final Vertex[] vertices = getVertices(); - - // The predicates in the same order as the vertices. - final IPredicate<?>[] preds = new IPredicate[vertices.length]; - - for (int i = 0; i < vertices.length; i++) { - - preds[i] = vertices[i].pred; - - } - - return preds; - - } - - /** - * Return the {@link BOp} identifiers of the predicates associated with - * each vertex in path order. - */ - public int[] getVertexIds() { - - return getVertexIds(edges); - - } - - /** - * Return the {@link BOp} identifiers of the predicates associated with - * each vertex in path order. - */ - static public int[] getVertexIds(final List<Edge> edges) { - - final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); - - for (Edge e : edges) { - - tmp.add(e.v1); - - tmp.add(e.v2); - - } - - final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); - - final int[] b = new int[a.length]; - - for (int i = 0; i < a.length; i++) { - - b[i] = a[i].pred.getId(); - - } - - return b; - - } - - /** - * Return <code>true</code> if this path begins with the given path. - * - * @param p - * The given path. - * - * @return <code>true</code> if this path begins with the given path. - */ - public boolean beginsWith(final Path p) { - - if (p == null) - throw new IllegalArgumentException(); - - if (p.edges.size() > edges.size()) { - // Proven false since the caller's path is lon... [truncated message content] |
From: <mrp...@us...> - 2011-02-21 18:33:28
|
Revision: 4217 http://bigdata.svn.sourceforge.net/bigdata/?rev=4217&view=rev Author: mrpersonick Date: 2011-02-21 18:33:22 +0000 (Mon, 21 Feb 2011) Log Message: ----------- fixed some problems with the change sets test case and added it to the SIDs test suite Modified Paths: -------------- trunk/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithSids.java trunk/bigdata-sails/src/test/com/bigdata/rdf/sail/TestChangeSets.java Modified: trunk/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithSids.java =================================================================== --- trunk/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithSids.java 2011-02-21 17:48:49 UTC (rev 4216) +++ trunk/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithSids.java 2011-02-21 18:33:22 UTC (rev 4217) @@ -83,6 +83,8 @@ suite.addTestSuite(TestDescribe.class); + suite.addTestSuite(TestChangeSets.class); + return suite; } Modified: trunk/bigdata-sails/src/test/com/bigdata/rdf/sail/TestChangeSets.java =================================================================== --- trunk/bigdata-sails/src/test/com/bigdata/rdf/sail/TestChangeSets.java 2011-02-21 17:48:49 UTC (rev 4216) +++ trunk/bigdata-sails/src/test/com/bigdata/rdf/sail/TestChangeSets.java 2011-02-21 18:33:22 UTC (rev 4217) @@ -52,6 +52,8 @@ import com.bigdata.striterator.ChunkedArrayIterator; /** + * Test change sets. This is meant to be run in triples with SIDs mode. + * * @author <a href="mailto:mrp...@us...">Mike Personick</a> * @version $Id$ */ @@ -59,14 +61,10 @@ protected static final Logger log = Logger.getLogger(TestChangeSets.class); - public Properties getTriplesNoInference() { + public Properties getNoInferenceProps() { Properties props = super.getProperties(); - // triples with sids - props.setProperty(BigdataSail.Options.QUADS, "false"); - props.setProperty(BigdataSail.Options.STATEMENT_IDENTIFIERS, "false"); - // no inference props.setProperty(BigdataSail.Options.TRUTH_MAINTENANCE, "false"); props.setProperty(BigdataSail.Options.AXIOMS_CLASS, NoAxioms.class.getName()); @@ -78,25 +76,6 @@ } - public Properties getTriplesWithInference() { - - Properties props = super.getProperties(); - - // triples with sids - props.setProperty(BigdataSail.Options.QUADS, "false"); - props.setProperty(BigdataSail.Options.STATEMENT_IDENTIFIERS, "false"); - - // no inference - props.setProperty(BigdataSail.Options.TRUTH_MAINTENANCE, "true"); - props.setProperty(BigdataSail.Options.AXIOMS_CLASS, OwlAxioms.class.getName()); - props.setProperty(BigdataSail.Options.VOCABULARY_CLASS, RDFSVocabulary.class.getName()); - props.setProperty(BigdataSail.Options.JUSTIFY, "true"); - props.setProperty(BigdataSail.Options.TEXT_INDEX, "false"); - - return props; - - } - /** * */ @@ -112,7 +91,9 @@ public void testSimpleAdd() throws Exception { - final BigdataSail sail = getSail(getTriplesNoInference()); + final BigdataSail sail = getSail(getNoInferenceProps()); + try { + sail.initialize(); final BigdataSailRepository repo = new BigdataSailRepository(sail); final BigdataSailRepositoryConnection cxn = @@ -204,6 +185,9 @@ } finally { cxn.close(); + } + + } finally { sail.__tearDownUnitTest(); } @@ -211,8 +195,10 @@ public void testSimpleRemove() throws Exception { - final BigdataSail sail = getSail(getTriplesNoInference()); - sail.initialize(); + final BigdataSail sail = getSail(getNoInferenceProps()); + try { + + sail.initialize(); final BigdataSailRepository repo = new BigdataSailRepository(sail); final BigdataSailRepositoryConnection cxn = (BigdataSailRepositoryConnection) repo.getConnection(); @@ -288,6 +274,9 @@ } finally { cxn.close(); + } + + } finally { sail.__tearDownUnitTest(); } @@ -295,15 +284,10 @@ public void testSids() throws Exception { - Properties props = getTriplesNoInference(); + final BigdataSail sail = getSail(getNoInferenceProps()); + try { - if (!Boolean.valueOf(props.getProperty(BigdataSail.Options.STATEMENT_IDENTIFIERS)).booleanValue()) { - log.warn("cannot run this test without sids enabled"); - return; - } - - final BigdataSail sail = getSail(getTriplesNoInference()); - sail.initialize(); + sail.initialize(); final BigdataSailRepository repo = new BigdataSailRepository(sail); final BigdataSailRepositoryConnection cxn = (BigdataSailRepositoryConnection) repo.getConnection(); @@ -402,6 +386,9 @@ } finally { cxn.close(); + } + + } finally { sail.__tearDownUnitTest(); } @@ -409,7 +396,9 @@ public void testTMAdd() throws Exception { - final BigdataSail sail = getSail(getTriplesWithInference()); + final BigdataSail sail = getSail(); + try { + sail.initialize(); final BigdataSailRepository repo = new BigdataSailRepository(sail); final BigdataSailRepositoryConnection cxn = @@ -499,6 +488,9 @@ } finally { cxn.close(); + } + + } finally { sail.__tearDownUnitTest(); } @@ -506,7 +498,9 @@ public void testTMRetract() throws Exception { - final BigdataSail sail = getSail(getTriplesWithInference()); + final BigdataSail sail = getSail(); + try { + sail.initialize(); final BigdataSailRepository repo = new BigdataSailRepository(sail); final BigdataSailRepositoryConnection cxn = @@ -606,6 +600,9 @@ } finally { cxn.close(); + } + + } finally { sail.__tearDownUnitTest(); } @@ -613,7 +610,9 @@ public void testTMUpdate() throws Exception { - final BigdataSail sail = getSail(getTriplesWithInference()); + final BigdataSail sail = getSail(); + try { + sail.initialize(); final BigdataSailRepository repo = new BigdataSailRepository(sail); final BigdataSailRepositoryConnection cxn = @@ -717,6 +716,9 @@ } finally { cxn.close(); + } + + } finally { sail.__tearDownUnitTest(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2011-02-21 17:48:56
|
Revision: 4216 http://bigdata.svn.sourceforge.net/bigdata/?rev=4216&view=rev Author: mrpersonick Date: 2011-02-21 17:48:49 +0000 (Mon, 21 Feb 2011) Log Message: ----------- created a test case template for user-submitted bugs Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestTemplate.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/data.ttl Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestTemplate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestTemplate.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestTemplate.java 2011-02-21 17:48:49 UTC (rev 4216) @@ -0,0 +1,258 @@ +/** +Copyright (C) SYSTAP, LLC 2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +package com.bigdata.rdf.sail; + +import java.util.Collection; +import java.util.LinkedList; +import java.util.Properties; + +import org.apache.log4j.Logger; +import org.openrdf.model.vocabulary.RDF; +import org.openrdf.model.vocabulary.RDFS; +import org.openrdf.query.BindingSet; +import org.openrdf.query.QueryLanguage; +import org.openrdf.query.TupleQueryResult; +import org.openrdf.repository.Repository; +import org.openrdf.repository.RepositoryConnection; +import org.openrdf.repository.sail.SailRepository; +import org.openrdf.repository.sail.SailTupleQuery; +import org.openrdf.rio.RDFFormat; +import org.openrdf.sail.Sail; +import org.openrdf.sail.memory.MemoryStore; + +import com.bigdata.rdf.axioms.NoAxioms; +import com.bigdata.rdf.store.BD; +import com.bigdata.rdf.vocab.NoVocabulary; + +/** + * Unit test template for use in submission of bugs. + * <p> + * This test case will delegate to an underlying backing store. You can + * specify this store via a JVM property as follows: + * <code>-DtestClass=com.bigdata.rdf.sail.TestBigdataSailWithQuads</code> + * <p> + * There are three possible configurations for the testClass: + * <ul> + * <li>com.bigdata.rdf.sail.TestBigdataSailWithQuads (quads mode)</li> + * <li>com.bigdata.rdf.sail.TestBigdataSailWithoutSids (triples mode)</li> + * <li>com.bigdata.rdf.sail.TestBigdataSailWithSids (SIDs mode)</li> + * </ul> + * <p> + * The default for triples and SIDs mode is for inference with truth maintenance + * to be on. If you would like to turn off inference, make sure to do so in + * {@link #getProperties()}. + * + * @author <a href="mailto:mrp...@us...">Mike Personick</a> + * @version $Id$ + */ +public class TestTemplate extends QuadsTestCase { + + protected static final Logger log = Logger.getLogger(TestTemplate.class); + + /** + * Please set your database properties here, except for your journal file, + * please DO NOT SPECIFY A JOURNAL FILE. + */ + @Override + public Properties getProperties() { + + Properties props = super.getProperties(); + + /* + * For example, here is a set of five properties that turns off + * inference, truth maintenance, and the free text index. + */ + props.setProperty(BigdataSail.Options.AXIOMS_CLASS, NoAxioms.class.getName()); + props.setProperty(BigdataSail.Options.VOCABULARY_CLASS, NoVocabulary.class.getName()); + props.setProperty(BigdataSail.Options.TRUTH_MAINTENANCE, "false"); + props.setProperty(BigdataSail.Options.JUSTIFY, "false"); + props.setProperty(BigdataSail.Options.TEXT_INDEX, "false"); + + return props; + + } + + public TestTemplate() { + } + + public TestTemplate(String arg0) { + super(arg0); + } + + public void testBug() throws Exception { + + /* + * We use an in-memory Sesame store as our point of reference. This + * will supply the "correct" answer to the query (below). + */ + final Sail sesameSail = new MemoryStore(); + + /* + * The bigdata store, backed by a temporary journal file. + */ + final BigdataSail bigdataSail = getSail(); + + /* + * Data file containing the data demonstrating your bug. + */ + final String data = "data.ttl"; + final String baseURI = ""; + final RDFFormat format = RDFFormat.TURTLE; + + /* + * Query(ies) demonstrating your bug. + */ + final String query = + "prefix bd: <"+BD.NAMESPACE+"> " + + "prefix rdf: <"+RDF.NAMESPACE+"> " + + "prefix rdfs: <"+RDFS.NAMESPACE+"> " + + + "SELECT DISTINCT ?neType ?majorType ?minorType " + + "WHERE { " + + " { " + + " ?neType <http://www.w3.org/2000/01/rdf-schema#subClassOf> <http://example/class/NamedEntity> . " + + " FILTER(?neType != <http://example/class/NamedEntity>) " + + " } " + + " UNION " + + " { ?lookup <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example/class/Lookup> . " + + " ?lookup <http://example/prop/lookup/majorType> ?majorType . " + + " OPTIONAL { ?lookup <http://example/prop/lookup/minorType> ?minorType } " + + " } " + + "}"; + + try { + + sesameSail.initialize(); + bigdataSail.initialize(); + + final Repository sesameRepo = new SailRepository(sesameSail); + final BigdataSailRepository bigdataRepo = new BigdataSailRepository(bigdataSail); + + { // load the data into the Sesame store + + final RepositoryConnection cxn = sesameRepo.getConnection(); + try { + cxn.setAutoCommit(false); + cxn.add(getClass().getResourceAsStream(data), baseURI, format); + cxn.commit(); + } finally { + cxn.close(); + } + + } + + { // load the data into the bigdata store + + final RepositoryConnection cxn = bigdataRepo.getConnection(); + try { + cxn.setAutoCommit(false); + cxn.add(getClass().getResourceAsStream(data), baseURI, format); + cxn.commit(); + } finally { + cxn.close(); + } + + } + + final Collection<BindingSet> answer = new LinkedList<BindingSet>(); + + /* + * Here is how you manually build the answer set, but please make + * sure you answer truly is correct if you choose to do it this way. + +// answer.add(createBindingSet( +// new BindingImpl("neType", vf.createURI("http://example/class/Location")) +// )); +// answer.add(createBindingSet( +// new BindingImpl("neType", vf.createURI("http://example/class/Person")) +// )); + + */ + + /* + * Run the problem query using the Sesame store to gather the + * correct results. + */ + { + final RepositoryConnection cxn = sesameRepo.getConnection(); + try { + final SailTupleQuery tupleQuery = (SailTupleQuery) + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(false /* includeInferred */); + final TupleQueryResult result = tupleQuery.evaluate(); + + if (log.isInfoEnabled()) { + log.info("sesame results:"); + if (!result.hasNext()) { + log.info("no results."); + } + } + + while (result.hasNext()) { + final BindingSet bs = result.next(); + answer.add(bs); + if (log.isInfoEnabled()) + log.info(bs); + } + } finally { + cxn.close(); + } + } + + /* + * Run the problem query using the bigdata store and then compare + * the answer. + */ + final RepositoryConnection cxn = bigdataRepo.getReadOnlyConnection(); + try { + final SailTupleQuery tupleQuery = (SailTupleQuery) + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(false /* includeInferred */); + + if (log.isInfoEnabled()) { + final TupleQueryResult result = tupleQuery.evaluate(); + log.info("bigdata results:"); + if (!result.hasNext()) { + log.info("no results."); + } + while (result.hasNext()) { + log.info(result.next()); + } + } + + final TupleQueryResult result = tupleQuery.evaluate(); + compare(result, answer); + + } finally { + cxn.close(); + } + + } finally { + bigdataSail.__tearDownUnitTest(); + sesameSail.shutDown(); + } + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/data.ttl =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/data.ttl (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/data.ttl 2011-02-21 17:48:49 UTC (rev 4216) @@ -0,0 +1,21 @@ +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . + +<http://example/class/Location> rdfs:subClassOf <http://example/class/NamedEntity> . +<http://example/class/Location> rdfs:label "Location" . +<http://example/class/NamedEntity> rdfs:label "NamedEntity" . +<http://example/elem/loc/loc1> rdfs:label "label: Amsterdam" . +<http://example/elem/loc/loc1> rdf:type <http://example/class/Location> . +<http://example/elem/loc/loc1> <http://example/prop/lookupName> "amsterdam" . +<http://example/elem/loc/loc2> rdfs:label "label: Den Haag" . +<http://example/elem/loc/loc2> rdf:type <http://example/class/Location> . +<http://example/elem/loc/loc2> <http://example/prop/lookupName> "den haag" . +<http://example/elem/loc/loc3> rdfs:label "label: IJmuiden" . +<http://example/elem/loc/loc3> rdf:type <http://example/class/Location> . +<http://example/elem/loc/loc3> <http://example/prop/lookupName> "ijmuiden" . +<http://example/elem/loc/loc3> <http://example/prop/disabled> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> . +<http://example/class/Person> rdfs:subClassOf <http://example/class/NamedEntity> . +<http://example/class/Person> rdfs:label "Person" . +<http://example/elem/person/puk> rdfs:label "label: Puk van de Petteflet" . +<http://example/elem/person/puk> rdf:type <http://example/class/Person> . +<http://example/elem/person/puk> <http://example/prop/lookupName> "puk van de petteflet" . This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2011-02-21 17:12:15
|
Revision: 4215 http://bigdata.svn.sourceforge.net/bigdata/?rev=4215&view=rev Author: mrpersonick Date: 2011-02-21 17:12:09 +0000 (Mon, 21 Feb 2011) Log Message: ----------- better handling of unrecognized values in queries Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEvaluationStrategyImpl.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedUnions.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/union.ttl Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-02-21 14:30:25 UTC (rev 4214) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-02-21 17:12:09 UTC (rev 4215) @@ -703,13 +703,14 @@ * If we encounter a value not in the lexicon, we can * still continue with the query if the value is in * either an optional tail or an optional join group (i.e. - * if it appears on the right side of a LeftJoin). + * if it appears on the right side of a LeftJoin). We can + * also continue if the value is in a UNION. * Otherwise we can stop evaluating right now. */ - if (sop.isRightSideLeftJoin()) { + if (sop.getGroup() == SOpTreeBuilder.ROOT_GROUP_ID) { + throw new UnrecognizedValueException(ex); + } else { groupsToPrune.add(sopTree.getGroup(sop.getGroup())); - } else { - throw new UnrecognizedValueException(ex); } } } @@ -720,6 +721,15 @@ * not in the lexicon. */ sopTree = stb.pruneGroups(sopTree, groupsToPrune); + + /* + * If after pruning groups with unrecognized values we end up with a + * UNION with no subqueries, we can safely just return an empty + * iteration. + */ + if (SOp2BOpUtility.isEmptyUnion(sopTree.getRoot())) { + return new EmptyIteration<BindingSet, QueryEvaluationException>(); + } /* * If we have a filter in the root group (one that can be safely applied @@ -2047,9 +2057,33 @@ } private IConstraint toConstraint(Or or) { - final IConstraint right = toConstraint(or.getRightArg()); - final IConstraint left = toConstraint(or.getLeftArg()); - return new OR(left, right); + IConstraint left = null, right = null; + UnrecognizedValueException uve = null; + try { + left = toConstraint(or.getLeftArg()); + } catch (UnrecognizedValueException ex) { + uve = ex; + } + try { + right = toConstraint(or.getRightArg()); + } catch (UnrecognizedValueException ex) { + uve = ex; + } + + /* + * if both sides contain unrecognized values, then we need to throw + * the exception up. but if only one does, then we can still handle it + * since we are doing an OR. + */ + if (left == null && right == null) { + throw uve; + } + + if (left != null && right != null) { + return new OR(left, right); + } else { + return left != null ? left : right; + } } private IConstraint toConstraint(And and) { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java 2011-02-21 14:30:25 UTC (rev 4214) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java 2011-02-21 17:12:09 UTC (rev 4215) @@ -105,6 +105,18 @@ } + public static boolean isEmptyUnion(final SOpGroup sopGroup) { + + if (isUnion(sopGroup)) { + final SOpGroups children = sopGroup.getChildren(); + if (children == null || children.size() == 0) { + return true; + } + } + return false; + + } + /** * Because of the way we parse the Sesame operator tree, the single * optional tails get placed in their own singleton subgroup without any @@ -214,10 +226,10 @@ * First do the non-optional subqueries (UNIONs) */ for (SOpGroup child : children) { - if (!isUnion(child)) + if (!isUnion(child) || isEmptyUnion(child)) continue; - final PipelineOp subquery = convert( + final PipelineOp subquery = union( child, idFactory, db, queryEngine, queryHints); final boolean optional = isOptional(child); final int subqueryId = idFactory.incrementAndGet(); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEvaluationStrategyImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEvaluationStrategyImpl.java 2011-02-21 14:30:25 UTC (rev 4214) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEvaluationStrategyImpl.java 2011-02-21 17:12:09 UTC (rev 4215) @@ -118,7 +118,7 @@ " ?p = <"+property1+">) " + "}"; - { // evalute it once so i can see it + { // evaluate it once so i can see it final StringWriter sw = new StringWriter(); final SPARQLResultsXMLWriter handler = new SPARQLResultsXMLWriter( @@ -891,6 +891,29 @@ } + public void test_join_combo_3() throws Exception { + + // define the vocabulary + + // define the graph + Graph graph = new GraphImpl(); + + // define the query + String query = + "PREFIX : <http://example/> " + + "SELECT * " + + "{ " + + " { ?x :p ?y } UNION { ?p a ?z } " + + "}"; + + // define the correct answer + Collection<BindingSet> answer = new LinkedList<BindingSet>(); + + // run the test + runQuery(graph, query, answer); + + } + public void test_two_nested_opt() throws Exception { // define the vocabulary Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedUnions.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedUnions.java 2011-02-21 14:30:25 UTC (rev 4214) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedUnions.java 2011-02-21 17:12:09 UTC (rev 4215) @@ -39,12 +39,18 @@ import org.openrdf.query.BindingSet; import org.openrdf.query.QueryLanguage; import org.openrdf.query.TupleQueryResult; +import org.openrdf.query.algebra.Distinct; import org.openrdf.query.algebra.Projection; import org.openrdf.query.algebra.QueryRoot; import org.openrdf.query.algebra.TupleExpr; import org.openrdf.query.impl.BindingImpl; +import org.openrdf.repository.Repository; import org.openrdf.repository.RepositoryConnection; +import org.openrdf.repository.sail.SailRepository; import org.openrdf.repository.sail.SailTupleQuery; +import org.openrdf.rio.RDFFormat; +import org.openrdf.sail.Sail; +import org.openrdf.sail.memory.MemoryStore; import com.bigdata.rdf.axioms.NoAxioms; import com.bigdata.rdf.sail.sop.SOpTree; @@ -332,5 +338,109 @@ } } + + public void testForumBug() throws Exception { + +// final Sail sail = new MemoryStore(); + final Sail sail = getSail(); + + try { + + sail.initialize(); + final Repository repo = sail instanceof BigdataSail ? + new BigdataSailRepository((BigdataSail)sail) : + new SailRepository(sail); + final RepositoryConnection cxn = repo.getConnection(); + + try { + + final ValueFactory vf = sail.getValueFactory(); + + cxn.setAutoCommit(false); + + /* + * load the data + */ + cxn.add(getClass().getResourceAsStream("union.ttl"),"",RDFFormat.TURTLE); + + /* + * Note: The either flush() or commit() is required to flush the + * statement buffers to the database before executing any + * operations that go around the sail. + */ + cxn.commit(); + + { + + String query = + "prefix bd: <"+BD.NAMESPACE+"> " + + "prefix rdf: <"+RDF.NAMESPACE+"> " + + "prefix rdfs: <"+RDFS.NAMESPACE+"> " + + "SELECT DISTINCT ?neType ?majorType ?minorType " + + "WHERE { " + + " { " + + " ?neType <http://www.w3.org/2000/01/rdf-schema#subClassOf> <http://example/class/NamedEntity> . " + + " FILTER(?neType != <http://example/class/NamedEntity>) " + + " } " + + " UNION " + + " { ?lookup <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example/class/Lookup> . " + + " ?lookup <http://example/prop/lookup/majorType> ?majorType . " + + " OPTIONAL { ?lookup <http://example/prop/lookup/minorType> ?minorType } " + + " } " + + "}"; + + final SailTupleQuery tupleQuery = (SailTupleQuery) + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(false /* includeInferred */); + + if (sail instanceof BigdataSail && log.isInfoEnabled()) { + + final BigdataSailTupleQuery bdTupleQuery = + (BigdataSailTupleQuery) tupleQuery; + final QueryRoot root = (QueryRoot) bdTupleQuery.getTupleExpr(); + final Distinct d = (Distinct) root.getArg(); + final Projection p = (Projection) d.getArg(); + final TupleExpr tupleExpr = p.getArg(); + log.info(tupleExpr); + + final SOpTreeBuilder stb = new SOpTreeBuilder(); + final SOpTree tree = stb.collectSOps(tupleExpr); + + log.info(tree); + log.info(query); + + } + + if (log.isInfoEnabled()) { + final TupleQueryResult result = tupleQuery.evaluate(); + log.info("results:"); + while (result.hasNext()) { + log.info(result.next()); + } + } + + final Collection<BindingSet> answer = new LinkedList<BindingSet>(); + answer.add(createBindingSet( + new BindingImpl("neType", vf.createURI("http://example/class/Location")) + )); + answer.add(createBindingSet( + new BindingImpl("neType", vf.createURI("http://example/class/Person")) + )); + + final TupleQueryResult result = tupleQuery.evaluate(); + compare(result, answer); + + } + + } finally { + cxn.close(); + } + } finally { + if (sail instanceof BigdataSail) + ((BigdataSail)sail).__tearDownUnitTest();//shutDown(); + } + + } + } Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/union.ttl =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/union.ttl (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/union.ttl 2011-02-21 17:12:09 UTC (rev 4215) @@ -0,0 +1,21 @@ +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . + +<http://example/class/Location> rdfs:subClassOf <http://example/class/NamedEntity> . +<http://example/class/Location> rdfs:label "Location" . +<http://example/class/NamedEntity> rdfs:label "NamedEntity" . +<http://example/elem/loc/loc1> rdfs:label "label: Amsterdam" . +<http://example/elem/loc/loc1> rdf:type <http://example/class/Location> . +<http://example/elem/loc/loc1> <http://example/prop/lookupName> "amsterdam" . +<http://example/elem/loc/loc2> rdfs:label "label: Den Haag" . +<http://example/elem/loc/loc2> rdf:type <http://example/class/Location> . +<http://example/elem/loc/loc2> <http://example/prop/lookupName> "den haag" . +<http://example/elem/loc/loc3> rdfs:label "label: IJmuiden" . +<http://example/elem/loc/loc3> rdf:type <http://example/class/Location> . +<http://example/elem/loc/loc3> <http://example/prop/lookupName> "ijmuiden" . +<http://example/elem/loc/loc3> <http://example/prop/disabled> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> . +<http://example/class/Person> rdfs:subClassOf <http://example/class/NamedEntity> . +<http://example/class/Person> rdfs:label "Person" . +<http://example/elem/person/puk> rdfs:label "label: Puk van de Petteflet" . +<http://example/elem/person/puk> rdf:type <http://example/class/Person> . +<http://example/elem/person/puk> <http://example/prop/lookupName> "puk van de petteflet" . This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-21 14:30:31
|
Revision: 4214 http://bigdata.svn.sourceforge.net/bigdata/?rev=4214&view=rev Author: thompsonbry Date: 2011-02-21 14:30:25 +0000 (Mon, 21 Feb 2011) Log Message: ----------- Wrote unit tests for BOpUtility#canJoinUsingConstraints() Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoinUsingConstraints.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoinUsingConstraints.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoinUsingConstraints.java 2011-02-21 00:18:36 UTC (rev 4213) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoinUsingConstraints.java 2011-02-21 14:30:25 UTC (rev 4214) @@ -27,7 +27,10 @@ package com.bigdata.bop; +import java.util.LinkedHashSet; import java.util.Map; +import java.util.Random; +import java.util.Set; import junit.framework.TestCase2; @@ -52,6 +55,29 @@ * <p> * Note: To avoid a dependency on the RDF model layer, this just uses String * constants for URIs and Literals. + * <h2>Analysis of BSBM Q5</h2> + * The following predicates all join on {@link #product}: + * <ul> + * <li>{@link #p0}</li> + * <li>{@link #p2}</li> + * <li>{@link #p4}</li> + * <li>{@link #p5}</li> + * </ul> + * The predicates ({@link #p3} and {@link #p5}) do not directly join with any of + * the other predicates (they do not directly share any variables). In general, + * a join without shared variables means the cross product of the sources will + * be materialized and such joins should be run last. + * <p> + * However, in this case there are two SPARQL FILTERs ({@link #c1} and + * {@link #c2}) which (a) use those variables ({@link #origProperty1} and + * {@link #origProperty2}); and (b) can constrain the query. This means that + * running the predicates without shared variables and applying the constraints + * before the tail of the plan can in fact lead to a more efficient join path. + * <p> + * This set of unit tests explores various join paths and verifies that the + * canJoin() and canJoinUsingConstraints() methods correctly recognize edges by + * which a join path can be extended corresponding to both static and dynamic + * analysis of the query. * * @see BOpUtility#canJoin(IPredicate, IPredicate) * @see BOpUtility#canJoinUsingConstraints(IPredicate[], IPredicate, @@ -59,7 +85,19 @@ * @see JGraph * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ + * @version $Id: TestBOpUtility_canJoinUsingConstraints.java 4211 2011-02-20 + * 21:20:44Z thompsonbry $ + * + * @todo These are the full plans generated by the runtime and static + * optimizers. One way to test canJoinXXX() is to run out these join plans + * and verify that they report "true" in each case. However, the critical + * bit to test are join plans where the predicates w/o the shared + * variables can be run earlier due to the FILTERs. + * + * <pre> + * test_bsbm_q5 : static [0] : : ids=[1, 2, 4, 6, 0, 3, 5] + * test_bsbm_q5 : runtime[0] : : ids=[1, 2, 0, 4, 6, 3, 5] + * </pre> */ //@SuppressWarnings("unchecked") public class TestBOpUtility_canJoinUsingConstraints extends TestCase2 { @@ -447,48 +485,222 @@ final IConstraint[] constraints = new IConstraint[] { c0, c1, c2 }; /** - * path=[p0], vertex=p2, constraints=[]. + * Unit test for one-step joins based on the {@link #product} variable. */ - public void test_canJoinUsingConstraints_01() { + public void test_canJoinUsingConstraints_1step_productVar() { // share ?product - assertTrue(BOpUtility.canJoin(p0, p2)); + final IPredicate<?>[] a = new IPredicate[] { p0, p2, p4, p6 }; + for (int i = 0; i < a.length; i++) { + for (int j = i; j < a.length; j++) { + final IPredicate<?> t0 = a[i]; + final IPredicate<?> t1 = a[j]; + assertTrue(BOpUtility.canJoin(t0, t1)); + assertTrue(BOpUtility.canJoin(t1, t0)); + if (t0 != t1) { + /* + * Test join path extension, but not when the vertex used to + * extend the path is already present in the join path. + */ + assertTrue(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { t0 }, // path + t1,// vertex + new IConstraint[0]// constraints + )); + assertTrue(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { t1 }, // path + t0,// vertex + new IConstraint[0]// constraints + )); + } + } + } + + } + + /** + * Unit test for multi-step join paths based on the {@link #product} + * variable. + */ + public void test_canJoinUsingConstraints_multiStep_productVar() { + + final Random r = new Random(); + + // share ?product + final IPredicate<?>[] a = new IPredicate[] { p0, p2, p4, p6 }; + + // existing path length [1:3]. + final int existingPathLength = r.nextInt(3)+1; + + // generated pre-existing path. + final IPredicate<?>[] path = new IPredicate[existingPathLength]; + // vertex which will extend that path + final IPredicate<?> vertex; + { + // collection of predicates used so far by the path. + final Set<Integer> used = new LinkedHashSet<Integer>(); + for (int i = 0; i < path.length; i++) { + // Locate an unused predicate. + int index; + while (true) { + index = r.nextInt(a.length); + if (!used.contains(index)) { + used.add(index); + break; + } + } + // add to the path. + path[i] = a[index]; + } + // Locate an unused predicate to serve as the extension vertex. + { + // Locate an unused predicate. + int index; + while (true) { + index = r.nextInt(a.length); + if (!used.contains(index)) { + used.add(index); + break; + } + } + vertex = a[index]; + } + } + + // Verify all joins in the path are legal. + for (int i = 0; i < path.length - 1; i++) { + assertTrue(BOpUtility.canJoin(path[i], path[i + 1])); + } + + // Verify the extension of the path is legal. assertTrue(BOpUtility.canJoinUsingConstraints(// - new IPredicate[] { p0 }, // path - p2,// vertex + path,// + vertex,// new IConstraint[0]// constraints )); } /** - * path=[p0], vertex=p2, constraints=[]. - * - * @todo Test w/o any constraints or with all constraints. Testing with only - * some of the constraints does not add much here (we probably do not - * need to have [c0] defined for this set of unit tests). - * - * @todo These are the full plans generated by the runtime and static - * optimizers. One way to test canJoinXXX() is to run out these join - * plans and verify that they report "true" in each case. However, the - * critical bit to test are join plans where the predicates w/o the - * shared variables can be run earlier due to the FILTERs. - * - * <pre> - * test_bsbm_q5 : static [0] : : ids=[1, 2, 4, 6, 0, 3, 5] - * test_bsbm_q5 : runtime[0] : : ids=[1, 2, 0, 4, 6, 3, 5] - * </pre> + * Unit test examines the predicates without shared variables and verifies + * (a) that joins are not permitted when the constraints are not considered; + * and (b) that joins are permitted when the constraints are considered. + * <p> + * This test is identical to {@link #test_canJoinUsingConstraints_p5_p6()()} + * except that it considers the ({@link #p3} x {@link #p4}) join via the + * {@link #c1} constraint instead. */ - public void test_canJoinUsingConstraints_02() { + public void test_canJoinUsingConstraints_p3_p4() { - // share ?product - assertTrue(BOpUtility.canJoin(p0, p2)); + /* + * Verify (p3,p4) join is not permitted when we do not consider the + * constraints (i.e., the join would be an unconstrained cross product + * if it were executed). + */ + assertFalse(BOpUtility.canJoin(p3, p4)); + assertFalse(BOpUtility.canJoin(p4, p3)); + assertFalse(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p3 }, // path + p4,// vertex + new IConstraint[0]// constraints + )); + assertFalse(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p4 }, // path + p3,// vertex + new IConstraint[0]// constraints + )); + + /* + * Verify (p3,p4) join is not permitted if we do not consider the + * constraint which provides the shared variables. + */ + assertFalse(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p3 }, // path + p4,// vertex + new IConstraint[] { c2 }// constraints + )); + assertFalse(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p4 }, // path + p3,// vertex + new IConstraint[] { c2 }// constraints + )); + + /* + * Verify (p3,p4) join is permitted if we consider the constraint which + * provides the shared variables. + */ assertTrue(BOpUtility.canJoinUsingConstraints(// - new IPredicate[] { p0 }, // path - p2,// vertex + new IPredicate[] { p3 }, // path + p4,// vertex + new IConstraint[] { c1 }// constraints + )); + assertTrue(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p4 }, // path + p3,// vertex + new IConstraint[] { c1 }// constraints + )); + + } + + /** + * Unit test examines the predicates without shared variables and verifies + * (a) that joins are not permitted when the constraints are not considered; + * and (b) that joins are permitted when the constraints are considered. + * <p> + * This test is identical to {@link #test_canJoinUsingConstraints_p3_p4()} + * except that it considers the ({@link #p5} x {@link #p6}) join via the + * {@link #c2} constraint instead. + */ + public void test_canJoinUsingConstraints_p5_p6() { + + /* + * Verify (p5,p6) join is not permitted when we do not consider the + * constraints (i.e., the join would be an unconstrained cross product + * if it were executed). + */ + assertFalse(BOpUtility.canJoin(p5, p6)); + assertFalse(BOpUtility.canJoin(p6, p5)); + assertFalse(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p5 }, // path + p6,// vertex new IConstraint[0]// constraints )); + assertFalse(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p6 }, // path + p5,// vertex + new IConstraint[0]// constraints + )); + /* + * Verify (p5,p6) join is not permitted if we do not consider the + * constraint which provides the shared variables. + */ + assertFalse(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p5 }, // path + p6,// vertex + new IConstraint[] { c1 }// constraints + )); + assertFalse(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p6 }, // path + p5,// vertex + new IConstraint[] { c1 }// constraints + )); + + /* + * Verify (p5,p6) join is permitted if we consider the constraint which + * provides the shared variables. + */ + assertTrue(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p5 }, // path + p6,// vertex + new IConstraint[] { c2 }// constraints + )); + assertTrue(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p6 }, // path + p5,// vertex + new IConstraint[] { c2 }// constraints + )); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2011-02-21 00:18:42
|
Revision: 4213 http://bigdata.svn.sourceforge.net/bigdata/?rev=4213&view=rev Author: mrpersonick Date: 2011-02-21 00:18:36 +0000 (Mon, 21 Feb 2011) Log Message: ----------- turn off stopword filter when using prefix match Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/IAnalyzerFactory.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java 2011-02-20 23:39:28 UTC (rev 4212) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java 2011-02-21 00:18:36 UTC (rev 4213) @@ -27,9 +27,12 @@ package com.bigdata.search; +import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.Locale; import java.util.Map; +import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.br.BrazilianAnalyzer; @@ -69,7 +72,7 @@ } - public Analyzer getAnalyzer(final String languageCode) { + public Analyzer getAnalyzer(final String languageCode, final boolean filterStopwords) { final IKeyBuilder keyBuilder = fullTextIndex.getKeyBuilder(); @@ -86,7 +89,7 @@ .getSortKeyGenerator().getLocale(); // The analyzer for that locale. - Analyzer a = getAnalyzer(locale.getLanguage()); + Analyzer a = getAnalyzer(locale.getLanguage(), filterStopwords); if (a != null) return a; @@ -137,7 +140,7 @@ } - Analyzer a = ctor.newInstance(); + Analyzer a = ctor.newInstance(filterStopwords); return a; @@ -145,7 +148,7 @@ abstract private static class AnalyzerConstructor { - abstract public Analyzer newInstance(); + abstract public Analyzer newInstance(final boolean filterStopwords); } @@ -187,13 +190,17 @@ return analyzers; } - + analyzers = new HashMap<String, AnalyzerConstructor>(); + + final Set<?> emptyStopwords = Collections.EMPTY_SET; { AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new BrazilianAnalyzer(Version.LUCENE_CURRENT); + public Analyzer newInstance(final boolean filterStopwords) { + return filterStopwords ? + new BrazilianAnalyzer(Version.LUCENE_CURRENT) : + new BrazilianAnalyzer(Version.LUCENE_CURRENT, emptyStopwords); } }; analyzers.put("por", a); @@ -212,8 +219,8 @@ */ { AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new ChineseAnalyzer(); + public Analyzer newInstance(final boolean filterStopwords) { + return new ChineseAnalyzer(); } }; analyzers.put("zho", a); @@ -227,8 +234,10 @@ */ { AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new CJKAnalyzer(Version.LUCENE_CURRENT); + public Analyzer newInstance(final boolean filterStopwords) { + return filterStopwords ? + new CJKAnalyzer(Version.LUCENE_CURRENT) : + new CJKAnalyzer(Version.LUCENE_CURRENT, emptyStopwords); } }; // analyzers.put("zho", a); @@ -243,8 +252,10 @@ { AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new CzechAnalyzer(Version.LUCENE_CURRENT); + public Analyzer newInstance(final boolean filterStopwords) { + return filterStopwords ? + new CzechAnalyzer(Version.LUCENE_CURRENT) : + new CzechAnalyzer(Version.LUCENE_CURRENT, emptyStopwords); } }; analyzers.put("ces",a); @@ -254,8 +265,10 @@ { AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new DutchAnalyzer(Version.LUCENE_CURRENT); + public Analyzer newInstance(final boolean filterStopwords) { + return filterStopwords ? + new DutchAnalyzer(Version.LUCENE_CURRENT) : + new DutchAnalyzer(Version.LUCENE_CURRENT, emptyStopwords); } }; analyzers.put("dut",a); @@ -265,8 +278,10 @@ { AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new FrenchAnalyzer(Version.LUCENE_CURRENT); + public Analyzer newInstance(final boolean filterStopwords) { + return filterStopwords ? + new FrenchAnalyzer(Version.LUCENE_CURRENT) : + new FrenchAnalyzer(Version.LUCENE_CURRENT, emptyStopwords); } }; analyzers.put("fra",a); @@ -280,8 +295,10 @@ */ { AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new GermanAnalyzer(Version.LUCENE_CURRENT); + public Analyzer newInstance(final boolean filterStopwords) { + return filterStopwords ? + new GermanAnalyzer(Version.LUCENE_CURRENT) : + new GermanAnalyzer(Version.LUCENE_CURRENT, emptyStopwords); } }; analyzers.put("deu",a); @@ -292,8 +309,10 @@ // Note: ancient greek has a different code (grc). { AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new GreekAnalyzer(Version.LUCENE_CURRENT); + public Analyzer newInstance(final boolean filterStopwords) { + return filterStopwords ? + new GreekAnalyzer(Version.LUCENE_CURRENT) : + new GreekAnalyzer(Version.LUCENE_CURRENT, emptyStopwords); } }; analyzers.put("gre",a); @@ -304,8 +323,10 @@ // @todo what about other Cyrillic scripts? { AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new RussianAnalyzer(Version.LUCENE_CURRENT); + public Analyzer newInstance(final boolean filterStopwords) { + return filterStopwords ? + new RussianAnalyzer(Version.LUCENE_CURRENT) : + new RussianAnalyzer(Version.LUCENE_CURRENT, emptyStopwords); } }; analyzers.put("rus",a); @@ -314,7 +335,7 @@ { AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { + public Analyzer newInstance(final boolean filterStopwords) { return new ThaiAnalyzer(Version.LUCENE_CURRENT); } }; @@ -325,8 +346,10 @@ // English { AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new StandardAnalyzer(Version.LUCENE_CURRENT); + public Analyzer newInstance(final boolean filterStopwords) { + return filterStopwords ? + new StandardAnalyzer(Version.LUCENE_CURRENT) : + new StandardAnalyzer(Version.LUCENE_CURRENT, emptyStopwords); } }; analyzers.put("eng", a); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2011-02-20 23:39:28 UTC (rev 4212) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2011-02-21 00:18:36 UTC (rev 4213) @@ -599,9 +599,9 @@ * * @return The token analyzer best suited to the indicated language family. */ - protected Analyzer getAnalyzer(final String languageCode) { + protected Analyzer getAnalyzer(final String languageCode, final boolean filterStopwords) { - return analyzerFactory.getAnalyzer(languageCode); + return analyzerFactory.getAnalyzer(languageCode, filterStopwords); } @@ -761,24 +761,11 @@ * * @todo is it using a language family specific stopword list? */ - final Analyzer a = getAnalyzer(languageCode); + final Analyzer a = getAnalyzer(languageCode, filterStopwords); - TokenStream tokenStream; - if (filterStopwords) { - tokenStream = a.tokenStream(null/* @todo field? */, r); - } else { - /* - * To eliminiate stopword filtering, we simulate the tokenStream() - * operation above per the javadoc for that method, which says: - * "Constructs a StandardTokenizer filtered by a StandardFilter, - * a LowerCaseFilter and a StopFilter", eliminating the StopFilter. - */ - tokenStream = new StandardTokenizer(Version.LUCENE_CURRENT, r); - tokenStream = new StandardFilter(tokenStream); - } + TokenStream tokenStream = a.tokenStream(null/* @todo field? */, r); // force to lower case. - // might be able to move this inside the else {} block above? tokenStream = new LowerCaseFilter(tokenStream); return tokenStream; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/IAnalyzerFactory.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/IAnalyzerFactory.java 2011-02-20 23:39:28 UTC (rev 4212) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/IAnalyzerFactory.java 2011-02-21 00:18:36 UTC (rev 4213) @@ -45,9 +45,11 @@ * @param languageCode * The language code or <code>null</code> to use the default * {@link Locale}. + * @param filterStopwords + * if false, return an analyzer with no stopwords * * @return The token analyzer best suited to the indicated language family. */ - Analyzer getAnalyzer(final String languageCode); + Analyzer getAnalyzer(final String languageCode, final boolean filterStopwords); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2011-02-20 23:39:34
|
Revision: 4212 http://bigdata.svn.sourceforge.net/bigdata/?rev=4212&view=rev Author: mrpersonick Date: 2011-02-20 23:39:28 +0000 (Sun, 20 Feb 2011) Log Message: ----------- turn off stopword filter when using prefix match Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2011-02-20 21:20:44 UTC (rev 4211) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2011-02-20 23:39:28 UTC (rev 4212) @@ -51,7 +51,10 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.util.Version; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; @@ -661,6 +664,19 @@ } /** + * See {@link #index(TokenBuffer, long, int, String, Reader, boolean)}. + * <p> + * Uses a default filterStopwords value of true. + * + */ + public void index(final TokenBuffer buffer, final long docId, final int fieldId, + final String languageCode, final Reader r) { + + index(buffer, docId, fieldId, languageCode, r, true/* filterStopwords */); + + } + + /** * Index a field in a document. * <p> * Note: This method does NOT force a write on the indices. If the <i>buffer</i> @@ -684,11 +700,13 @@ * {@link Locale}. * @param r * A reader on the text to be indexed. + * @param filterStopwords + * if true, filter stopwords from the token stream * * @see TokenBuffer#flush() */ public void index(final TokenBuffer buffer, final long docId, final int fieldId, - final String languageCode, final Reader r) { + final String languageCode, final Reader r, final boolean filterStopwords) { /* * Note: You can invoke this on a read-only index. It is only overflow @@ -701,7 +719,7 @@ int n = 0; // tokenize (note: docId,fieldId are not on the tokenStream, but the field could be). - final TokenStream tokenStream = getTokenStream(languageCode, r); + final TokenStream tokenStream = getTokenStream(languageCode, r, filterStopwords); try { while (tokenStream.incrementToken()) { TermAttribute term=tokenStream.getAttribute(TermAttribute.class); @@ -729,10 +747,14 @@ * * @param r * A reader on the text to be indexed. + * + * @param filterStopwords + * if true, filter stopwords from the token stream * * @return The extracted token stream. */ - protected TokenStream getTokenStream(final String languageCode, final Reader r) { + protected TokenStream getTokenStream(final String languageCode, + final Reader r, final boolean filterStopwords) { /* * Note: This stripping out stopwords by default. @@ -741,9 +763,22 @@ */ final Analyzer a = getAnalyzer(languageCode); - TokenStream tokenStream = a.tokenStream(null/* @todo field? */, r); + TokenStream tokenStream; + if (filterStopwords) { + tokenStream = a.tokenStream(null/* @todo field? */, r); + } else { + /* + * To eliminiate stopword filtering, we simulate the tokenStream() + * operation above per the javadoc for that method, which says: + * "Constructs a StandardTokenizer filtered by a StandardFilter, + * a LowerCaseFilter and a StopFilter", eliminating the StopFilter. + */ + tokenStream = new StandardTokenizer(Version.LUCENE_CURRENT, r); + tokenStream = new StandardFilter(tokenStream); + } // force to lower case. + // might be able to move this inside the else {} block above? tokenStream = new LowerCaseFilter(tokenStream); return tokenStream; @@ -1037,9 +1072,15 @@ final TokenBuffer buffer = new TokenBuffer(1, this); + /* + * If we are using prefix match (* operator) then we don't want + * to filter stopwords from the search query. + */ + final boolean filterStopwords = !prefixMatch; + index(buffer, Long.MIN_VALUE/* docId */, Integer.MIN_VALUE/* fieldId */, languageCode, - new StringReader(query)); + new StringReader(query), filterStopwords); if (buffer.size() == 0) { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2011-02-20 21:20:44 UTC (rev 4211) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2011-02-20 23:39:28 UTC (rev 4212) @@ -40,6 +40,7 @@ import java.util.Set; import java.util.concurrent.TimeUnit; +import org.apache.log4j.Logger; import org.openrdf.model.BNode; import org.openrdf.model.Graph; import org.openrdf.model.Literal; @@ -99,6 +100,8 @@ */ public class TestSearchQuery extends ProxyBigdataSailTestCase { + protected static final Logger log = Logger.getLogger(TestSearchQuery.class); + public TestSearchQuery() { } @@ -708,6 +711,7 @@ final URI s5 = vf.createURI(BD.NAMESPACE+"s5"); final URI s6 = vf.createURI(BD.NAMESPACE+"s6"); final URI s7 = vf.createURI(BD.NAMESPACE+"s7"); + final URI s8 = vf.createURI(BD.NAMESPACE+"s8"); final Literal l1 = vf.createLiteral("how"); final Literal l2 = vf.createLiteral("now"); final Literal l3 = vf.createLiteral("brown"); @@ -715,6 +719,7 @@ final Literal l5 = vf.createLiteral("how now"); final Literal l6 = vf.createLiteral("brown cow"); final Literal l7 = vf.createLiteral("how now brown cow"); + final Literal l8 = vf.createLiteral("toilet"); cxn.add(s1, RDFS.LABEL, l1); cxn.add(s2, RDFS.LABEL, l2); @@ -723,6 +728,7 @@ cxn.add(s5, RDFS.LABEL, l5); cxn.add(s6, RDFS.LABEL, l6); cxn.add(s7, RDFS.LABEL, l7); + cxn.add(s8, RDFS.LABEL, l8); /* * Note: The either flush() or commit() is required to flush the @@ -739,6 +745,7 @@ literals.put(((BigdataValue)l5).getIV(), l5); literals.put(((BigdataValue)l6).getIV(), l6); literals.put(((BigdataValue)l7).getIV(), l7); + literals.put(((BigdataValue)l8).getIV(), l8); final Map<IV, URI> uris = new LinkedHashMap<IV, URI>(); uris.put(((BigdataValue)l1).getIV(), s1); @@ -748,6 +755,7 @@ uris.put(((BigdataValue)l5).getIV(), s5); uris.put(((BigdataValue)l6).getIV(), s6); uris.put(((BigdataValue)l7).getIV(), s7); + uris.put(((BigdataValue)l8).getIV(), s8); /**/ if (log.isInfoEnabled()) { @@ -1066,6 +1074,71 @@ } + { // prefix match using a stopword + + final String searchQuery = "to*"; + final double minRelevance = 0.0d; + + final String query = + "select ?s ?o ?score " + + "where " + + "{ " + + " ?s <"+RDFS.LABEL+"> ?o . " + + " ?o <"+BD.SEARCH+"> \""+searchQuery+"\" . " + + " ?o <"+BD.RELEVANCE+"> ?score . " + +// " ?o <"+BD.MIN_RELEVANCE+"> \""+minRelevance+"\" . " + +// " ?o <"+BD.MAX_HITS+"> \"5\" . " + +// " filter regex(?o, \""+searchQuery+"\") " + + "} " + + "order by desc(?score)"; + + log.info("\n"+query); + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(true /* includeInferred */); + TupleQueryResult result = tupleQuery.evaluate(); + + int i = 0; + while (result.hasNext()) { + log.info(i++ + ": " + result.next().toString()); + } + assertTrue("wrong # of results: " + i, i == 1); + + result = tupleQuery.evaluate(); + + Collection<BindingSet> answer = new LinkedList<BindingSet>(); + + final ITextIndexer search = + sail.getDatabase().getLexiconRelation().getSearchEngine(); + final Hiterator<IHit> hits = + search.search(searchQuery, + null, // languageCode + true, // prefixMatch + minRelevance, // minCosine + 10000, // maxRank (=maxResults + 1) + 1000L, // timeout + TimeUnit.MILLISECONDS // unit + ); + + while (hits.hasNext()) { + final IHit hit = hits.next(); + final IV id = new TermId(VTE.LITERAL, hit.getDocId()); + final Literal score = vf.createLiteral(hit.getCosine()); + final URI s = uris.get(id); + final Literal o = literals.get(id); + final BindingSet bs = createBindingSet( + new BindingImpl("s", s), + new BindingImpl("o", o), + new BindingImpl("score", score)); + log.info(bs); + answer.add(bs); + } + + compare(result, answer); + + } + } finally { cxn.close(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-20 21:20:51
|
Revision: 4211 http://bigdata.svn.sourceforge.net/bigdata/?rev=4211&view=rev Author: thompsonbry Date: 2011-02-20 21:20:44 +0000 (Sun, 20 Feb 2011) Log Message: ----------- Working on unit tests for getSharedVars(), canJoin(), and now canJoinWithConstraints(). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoinUsingConstraints.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-02-20 13:50:40 UTC (rev 4210) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-02-20 21:20:44 UTC (rev 4211) @@ -27,6 +27,7 @@ package com.bigdata.bop; +import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; @@ -39,6 +40,7 @@ import org.apache.log4j.Logger; import com.bigdata.bop.BOp.Annotations; +import com.bigdata.bop.controller.PartitionedJoinGroup; import com.bigdata.bop.engine.BOpStats; import com.bigdata.btree.AbstractNode; import com.bigdata.relation.accesspath.IAsynchronousIterator; @@ -1010,41 +1012,41 @@ } - /** - * Inject (or replace) an {@link Integer} "rowId" column. This does not have - * a side-effect on the source {@link IBindingSet}s. - * - * @param var - * The name of the column. - * @param start - * The starting value for the identifier. - * @param in - * The source {@link IBindingSet}s. - * - * @return The modified {@link IBindingSet}s. - */ - public static IBindingSet[] injectRowIdColumn(final IVariable var, - final int start, final IBindingSet[] in) { +// /** +// * Inject (or replace) an {@link Integer} "rowId" column. This does not have +// * a side-effect on the source {@link IBindingSet}s. +// * +// * @param var +// * The name of the column. +// * @param start +// * The starting value for the identifier. +// * @param in +// * The source {@link IBindingSet}s. +// * +// * @return The modified {@link IBindingSet}s. +// */ +// public static IBindingSet[] injectRowIdColumn(final IVariable<?> var, +// final int start, final IBindingSet[] in) { +// +// if (in == null) +// throw new IllegalArgumentException(); +// +// final IBindingSet[] out = new IBindingSet[in.length]; +// +// for (int i = 0; i < out.length; i++) { +// +// final IBindingSet bset = in[i].clone(); +// +// bset.set(var, new Constant<Integer>(Integer.valueOf(start + i))); +// +// out[i] = bset; +// +// } +// +// return out; +// +// } - if (in == null) - throw new IllegalArgumentException(); - - final IBindingSet[] out = new IBindingSet[in.length]; - - for (int i = 0; i < out.length; i++) { - - final IBindingSet bset = in[i].clone(); - - bset.set(var, new Constant<Integer>(Integer.valueOf(start + i))); - - out[i] = bset; - - } - - return out; - - } - /** * Return an ordered array of the bopIds associated with an ordered array of * predicates (aka a join path). @@ -1077,29 +1079,20 @@ } /** - * Return the variable references shared by tw operators. All variables - * spanned by either {@link BOp} are considered. + * Return the variable references shared by two operators. All variables + * spanned by either {@link BOp} are considered, regardless of whether they + * appear as operands or within annotations. * * @param p * An operator. * @param c * Another operator. * - * @param p - * A predicate. + * @return The variable(s) in common. This may be an empty set, but it is + * never <code>null</code>. * - * @param c - * A constraint. - * - * @return The variables in common -or- <code>null</code> iff there are no - * variables in common. - * * @throws IllegalArgumentException * if the two either reference is <code>null</code>. - * @throws IllegalArgumentException - * if the reference are the same. - * - * @todo unit tests. */ public static Set<IVariable<?>> getSharedVars(final BOp p, final BOp c) { @@ -1109,8 +1102,12 @@ if (c == null) throw new IllegalArgumentException(); - if (p == c) - throw new IllegalArgumentException(); + /* + * Note: This is allowed since both arguments might be the same variable + * or constant. + */ +// if (p == c) +// throw new IllegalArgumentException(); // The set of variables which are shared. final Set<IVariable<?>> sharedVars = new LinkedHashSet<IVariable<?>>(); @@ -1154,4 +1151,285 @@ } + /** + * Return <code>true</code> iff two predicates can join on the basis of at + * least one variable which is shared directly by those predicates. Only the + * operands of the predicates are considered. + * <p> + * Note: This method will only identify joins where the predicates directly + * share at least one variable. However, joins are also possible when the + * predicates share variables via one or more constraint(s). Use + * {@link #canJoinUsingConstraints(IPredicate[], IPredicate, IConstraint[])} + * to identify such joins. + * <p> + * Note: Any two predicates may join regardless of the presence of shared + * variables. However, such joins will produce the full cross product of the + * binding sets selected by each predicate. As such, they should be run last + * and this method will not return <code>true</code> for such predicates. + * <p> + * Note: This method is more efficient than {@link #getSharedVars(BOp, BOp)} + * because it does not materialize the sets of shared variables. However, it + * only considers the operands of the {@link IPredicate}s and is thus more + * restricted than {@link #getSharedVars(BOp, BOp)} as well. + * + * @param p1 + * A predicate. + * @param p2 + * Another predicate. + * + * @return <code>true</code> iff the predicates share at least one variable + * as an operand. + * + * @throws IllegalArgumentException + * if the two either reference is <code>null</code>. + */ +// * @throws IllegalArgumentException +// * if the reference are the same. + static public boolean canJoin(final IPredicate<?> p1, final IPredicate<?> p2) { + + if (p1 == null) + throw new IllegalArgumentException(); + + if (p2 == null) + throw new IllegalArgumentException(); + +// if (p1 == p2) +// throw new IllegalArgumentException(); + + // iterator scanning the operands of p1. + final Iterator<IVariable<?>> itr1 = BOpUtility.getArgumentVariables(p1); + + while (itr1.hasNext()) { + + final IVariable<?> v1 = itr1.next(); + + // iterator scanning the operands of p2. + final Iterator<IVariable<?>> itr2 = BOpUtility + .getArgumentVariables(p2); + + while (itr2.hasNext()) { + + final IVariable<?> v2 = itr2.next(); + + if (v1 == v2) { + + if (log.isDebugEnabled()) + log.debug("Can join: sharedVar=" + v1 + ", p1=" + p1 + + ", p2=" + p2); + + return true; + + } + + } + + } + + if (log.isDebugEnabled()) + log.debug("No directly shared variable: p1=" + p1 + ", p2=" + p2); + + return false; + + } + + /** + * Return <code>true</code> iff a predicate may be used to extend a join + * path on the basis of at least one variable which is shared either + * directly or via one or more constraints which may be attached to the + * predicate when it is added to the join path. The join path is used to + * decide which variables are known to be bound, which in turn decides which + * constraints may be run. Unlike the case when the variable is directly + * shared between the two predicates, a join involving a constraint requires + * us to know which variables are already bound so we can know when the + * constraint may be attached. + * <p> + * Note: Use {@link #canJoin(IPredicate, IPredicate)} instead to identify + * joins based on a variable which is directly shared. + * <p> + * Note: Any two predicates may join regardless of the presence of shared + * variables. However, such joins will produce the full cross product of the + * binding sets selected by each predicate. As such, they should be run last + * and this method will not return <code>true</code> for such predicates. + * + * @param path + * A join path containing at least one predicate. + * @param vertex + * A predicate which is being considered as an extension of that + * join path. + * @param constraints + * A set of zero or more constraints (optional). Constraints are + * attached dynamically once the variables which they use are + * bound. Hence, a constraint will always share a variable with + * any predicate to which it is attached. If any constraints are + * attached to the given vertex and they share a variable which + * has already been bound by the join path, then the vertex may + * join with the join path even if it does not directly bind that + * variable. + * + * @return <code>true</code> iff the vertex can join with the join path via + * a shared variable. + * + * @throws IllegalArgumentException + * if the join path is <code>null</code>. + * @throws IllegalArgumentException + * if the join path is empty. + * @throws IllegalArgumentException + * if any element in the join path is <code>null</code>. + * @throws IllegalArgumentException + * if the vertex is <code>null</code>. + * @throws IllegalArgumentException + * if the vertex is already part of the join path. + * @throws IllegalArgumentException + * if any element in the optional constraints array is + * <code>null</code>. + */ + static public boolean canJoinUsingConstraints(final IPredicate<?>[] path, + final IPredicate<?> vertex, final IConstraint[] constraints) { + + /* + * Check arguments. + */ + if (path == null) + throw new IllegalArgumentException(); + if (vertex == null) + throw new IllegalArgumentException(); + // constraints MAY be null. + if (path.length == 0) + throw new IllegalArgumentException(); + { + for (IPredicate<?> p : path) { + if (p == null) + throw new IllegalArgumentException(); + if (vertex == p) + throw new IllegalArgumentException(); + } + } + + /* + * Find the set of variables which are known to be bound because they + * are referenced as operands of the predicates in the join path. + */ + final Set<IVariable<?>> knownBound = new LinkedHashSet<IVariable<?>>(); + + for (IPredicate<?> p : path) { + + final Iterator<IVariable<?>> vitr = BOpUtility + .getArgumentVariables(p); + + while (vitr.hasNext()) { + + knownBound.add(vitr.next()); + + } + + } + + /* + * + * If the given predicate directly shares a variable with any of the + * predicates in the join path, then we can return immediately. + */ + { + + final Iterator<IVariable<?>> vitr = BOpUtility + .getArgumentVariables(vertex); + + while (vitr.hasNext()) { + + final IVariable<?> var = vitr.next(); + + if(knownBound.contains(var)) { + + if (log.isDebugEnabled()) + log.debug("Can join: sharedVar=" + var + ", path=" + + Arrays.toString(path) + ", vertex=" + vertex); + + return true; + + } + + } + + } + + if(constraints == null) { + + // No opportunity for a constraint based join. + + if (log.isDebugEnabled()) + log.debug("No directly shared variable: path=" + + Arrays.toString(path) + ", vertex=" + vertex); + + return false; + + } + + /* + * Find the set of constraints which can run with the vertex given the + * join path. + */ + { + + // Extend the new join path. + final IPredicate<?>[] newPath = new IPredicate[path.length + 1]; + + System.arraycopy(path/* src */, 0/* srcPos */, newPath/* dest */, + 0/* destPos */, path.length); + + newPath[path.length] = vertex; + + /* + * Find the constraints that will run with each vertex of the new + * join path. + */ + final IConstraint[][] constraintRunArray = PartitionedJoinGroup + .getJoinGraphConstraints(newPath, constraints); + + /* + * Consider only the constraints attached to the last vertex in the + * new join path. All of their variables will be bound since (by + * definition) a constraint may not run until its variables are + * bound. If any of the constraints attached to that last share any + * variables which were already known to be bound in the caller's + * join path, then the vertex can join (without of necessity being + * a full cross product join). + */ + final IConstraint[] vertexConstraints = constraintRunArray[path.length]; + + for (IConstraint c : vertexConstraints) { + + // consider all variables spanned by the constraint. + final Iterator<IVariable<?>> vitr = BOpUtility + .getSpannedVariables(c); + + while (vitr.hasNext()) { + + final IVariable<?> var = vitr.next(); + + if (knownBound.contains(var)) { + + if (log.isDebugEnabled()) + log.debug("Can join: sharedVar=" + var + ", path=" + + Arrays.toString(path) + ", vertex=" + + vertex + ", constraint=" + c); + + return true; + + } + + } + + } + + } + + if (log.isDebugEnabled()) + log.debug("No shared variable: path=" + Arrays.toString(path) + + ", vertex=" + vertex + ", constraints=" + + Arrays.toString(constraints)); + + return false; + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java 2011-02-20 13:50:40 UTC (rev 4210) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java 2011-02-20 21:20:44 UTC (rev 4211) @@ -73,6 +73,12 @@ // counting variables, etc. suite.addTestSuite(TestBOpUtility.class); + // unit tests for allowing joins based on shared variables in preds. + suite.addTestSuite(TestBOpUtility_canJoin.class); + + // more complex logic for join paths. + suite.addTestSuite(TestBOpUtility_canJoinUsingConstraints.class); + // constraint operators (EQ, NE, etc). suite.addTest(com.bigdata.bop.constraint.TestAll.suite()); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2011-02-20 13:50:40 UTC (rev 4210) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2011-02-20 21:20:44 UTC (rev 4211) @@ -29,11 +29,13 @@ import java.util.Iterator; import java.util.Map; +import java.util.Set; import java.util.concurrent.FutureTask; import junit.framework.TestCase2; -import junit.framework.TestCase2; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.constraint.BOpConstraint; /** * Unit tests for {@link BOpUtility}. @@ -296,7 +298,7 @@ // .annotationOpPreOrderIterator(op2); // while (itr.hasNext()) { // final BOp t = itr.next(); -// System.out.println(i + " : " + t);// @todo uncomment +//// System.out.println(i + " : " + t); //// assertTrue("index=" + i + ", expected=" + expected[i] + ", actual=" //// + t, expected[i].equals(t)); // i++; @@ -828,5 +830,199 @@ } } + + /** + * Unit test for correct rejection of illegal arguments. + * + * @see BOpUtility#getSharedVars(BOp, BOp) + */ + public void test_getSharedVariables_correctRejection() { + + // correct rejection w/ null arg. + try { + BOpUtility.getSharedVars(Var.var("x"), null); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Ignoring expected exception: " + ex); + } + + // correct rejection w/ null arg. + try { + BOpUtility.getSharedVars(null, Var.var("x")); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Ignoring expected exception: " + ex); + } + + } + + /** + * Unit test for correct identification of cases in which there are no + * shared variables. + * + * @see BOpUtility#getSharedVars(BOp, BOp) + */ + @SuppressWarnings("unchecked") + public void test_getSharedVariables_nothingShared() { + + // nothing shared. + assertTrue(BOpUtility.getSharedVars(Var.var("x"), Var.var("y")) + .isEmpty()); + + // nothing shared. + assertTrue(BOpUtility.getSharedVars(Var.var("x"), + new Constant<String>("x")).isEmpty()); + + // nothing shared. + assertTrue(BOpUtility.getSharedVars(// + Var.var("x"),// + new Predicate(new BOp[] { Var.var("y"), Var.var("z") },// + (Map) null/* annotations */)// + ).isEmpty()); + + // nothing shared. + assertTrue(BOpUtility.getSharedVars(// + Var.var("x"),// + new Predicate(new BOp[] { Var.var("y"), Var.var("z") },// + new NV("name", "value")// + )).isEmpty()); + + } + + /** + * Unit test for correct identification of cases in which there are shared + * variables. + * + * @see BOpUtility#getSharedVars(BOp, BOp) + */ + @SuppressWarnings("unchecked") + public void test_getSharedVariables_somethingShared() { + + // two variables + assertSameVariables(// + new IVariable[] { Var.var("x") }, // + BOpUtility.getSharedVars(// + Var.var("x"), // + Var.var("x")// + )); + + // variable and expression. + assertSameVariables(// + new IVariable[] { Var.var("x") }, // + BOpUtility.getSharedVars(// + Var.var("x"), // + new BOpBase(// + new BOp[] { new Constant<String>("x"), + Var.var("x") },// + null// annotations + )// + )); + + // expression and variable. + assertSameVariables(// + new IVariable[] { Var.var("x") }, // + BOpUtility.getSharedVars(// + new BOpBase(// + new BOp[] { new Constant<String>("x"), + Var.var("x") },// + null// annotations + ),// + Var.var("x") // + )); + + // variable and predicate w/o annotations. + assertSameVariables(// + new IVariable[] { Var.var("x") }, // + BOpUtility.getSharedVars(// + Var.var("x"),// + new Predicate(new BOp[] { Var.var("y"), Var.var("x") },// + (Map) null/* annotations */)// + )); + + // predicate w/o annotations and variable. + assertSameVariables(// + new IVariable[] { Var.var("x") }, // + BOpUtility.getSharedVars(// + new Predicate(new BOp[] { Var.var("y"), Var.var("x") },// + (Map) null/* annotations */),// + Var.var("x")// + )); + + // variable and predicate w/ annotations (w/o var). + assertSameVariables(// + new IVariable[] { Var.var("x") }, // + BOpUtility.getSharedVars(// + Var.var("x"),// + new Predicate(new BOp[] { Var.var("x"), Var.var("z") },// + new NV("name", "value")// + ))); + + // variable and predicate w/ annotations (w/ same var). + assertSameVariables(// + new IVariable[] { Var.var("x") }, // + BOpUtility.getSharedVars(// + Var.var("x"),// + new Predicate(new BOp[] { Var.var("y"), Var.var("z") },// + new NV("name", Var.var("x"))// + ))); + + // variable and predicate w/ annotations (w/ another var). + assertSameVariables(// + new IVariable[] { /*Var.var("x")*/ }, // + BOpUtility.getSharedVars(// + Var.var("x"),// + new Predicate(new BOp[] { Var.var("y"), Var.var("z") },// + new NV("name", Var.var("z"))// + ))); + + // two predicates + assertSameVariables(// + new IVariable[] { Var.var("y"), Var.var("z") }, // + BOpUtility.getSharedVars(// + new Predicate(new BOp[] { Var.var("y"), Var.var("z") },// + new NV("name", Var.var("z"))// + ), // + new Predicate(new BOp[] { Var.var("y"), Var.var("z") },// + new NV("name", Var.var("x"))// + )// + )); + + // two predicates + assertSameVariables(// + new IVariable[] { Var.var("x"), Var.var("y"), Var.var("z") }, // + BOpUtility.getSharedVars(// + new Predicate(new BOp[] { Var.var("y"), Var.var("x") },// + new NV("name", Var.var("z"))// + ), // + new Predicate(new BOp[] { Var.var("y"), Var.var("z") },// + new NV("name", Var.var("x"))// + )// + )); + + } + /** + * Test helper. + * @param expected The expected variables in any order. + * @param actual A set of variables actually reported. + */ + private static void assertSameVariables(final IVariable<?>[] expected, + final Set<IVariable<?>> actual) { + + for(IVariable<?> var : expected) { + + if(!actual.contains(var)) { + + fail("Expecting: "+var); + + } + + } + + assertEquals("size", expected.length, actual.size()); + + } + } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoin.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoin.java 2011-02-20 21:20:44 UTC (rev 4211) @@ -0,0 +1,147 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Feb 20, 2011 + */ + +package com.bigdata.bop; + +import com.bigdata.bop.ap.Predicate; + +import junit.framework.TestCase2; + +/** + * Unit tests for {@link BOpUtility#canJoin(IPredicate, IPredicate)} + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestBOpUtility_canJoin extends TestCase2 { + + /** + * + */ + public TestBOpUtility_canJoin() { + } + + /** + * @param name + */ + public TestBOpUtility_canJoin(String name) { + super(name); + } + + + /** + * Correct rejection tests. + * + * @see BOpUtility#canJoin(IPredicate, IPredicate). + */ + @SuppressWarnings("unchecked") + public void test_canJoin_correctRejection() { + + final IVariable<?> x = Var.var("x"); + final IVariable<?> y = Var.var("y"); + final IVariable<?> z = Var.var("z"); + + final IPredicate<?> p1 = new Predicate(new BOp[]{x,y}); + final IPredicate<?> p2 = new Predicate(new BOp[]{y,z}); + + // correct rejection w/ null arg. + try { + BOpUtility.canJoin(null,p2); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Ignoring expected exception: " + ex); + } + + // correct rejection w/ null arg. + try { + BOpUtility.canJoin(p1,null); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Ignoring expected exception: " + ex); + } + + } + + /** + * Semantics tests focused on shared variables in the operands. + * + * @see BOpUtility#canJoin(IPredicate, IPredicate) + */ + @SuppressWarnings("unchecked") + public void test_canJoin() { + + final IVariable<?> u = Var.var("u"); + final IVariable<?> x = Var.var("x"); + final IVariable<?> y = Var.var("y"); + final IVariable<?> z = Var.var("z"); + + final IPredicate<?> p1 = new Predicate(new BOp[] { x, y }); + final IPredicate<?> p2 = new Predicate(new BOp[] { y, z }); + final IPredicate<?> p3 = new Predicate(new BOp[] { u, z }); + + // share y + assertTrue(BOpUtility.canJoin(p1, p2)); + + // share z + assertTrue(BOpUtility.canJoin(p2, p3)); + + // share z + assertFalse(BOpUtility.canJoin(p1, p3)); + + // shares (x,y) with self. + assertTrue(BOpUtility.canJoin(p1, p1)); + + } + + /** + * Verify that joins are not permitted when the variables are + * only shared via an annotation. + * + * @see BOpUtility#canJoin(IPredicate, IPredicate) + */ + @SuppressWarnings("unchecked") + public void test_canJoin_annotationsAreIngored() { + + final IVariable<?> x = Var.var("x"); + final IVariable<?> y = Var.var("y"); + final IVariable<?> z = Var.var("z"); + + final IPredicate<?> p1 = new Predicate(new BOp[] { x, },// + new NV("foo", y)// + ); + final IPredicate<?> p2 = new Predicate(new BOp[] { z },// + new NV("foo", y) + ); + + // verify that the variables in the annotations are ignored. + assertFalse(BOpUtility.canJoin(p1, p2)); + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoin.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoinUsingConstraints.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoinUsingConstraints.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoinUsingConstraints.java 2011-02-20 21:20:44 UTC (rev 4211) @@ -0,0 +1,494 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Feb 20, 2011 + */ + +package com.bigdata.bop; + +import java.util.Map; + +import junit.framework.TestCase2; + +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.constraint.AND; +import com.bigdata.bop.constraint.BOpConstraint; +import com.bigdata.bop.controller.JoinGraph.JGraph; + +/** + * + * This test suite is built around around BSBM Q5. Each test has an existing + * join path and a new vertex to be added to the join path. The question is + * whether or not the vertex <em>can join</em> with the join path using one or + * more shared variable(s). This tests a method used to incrementally grow a + * join path when it is dynamically decided that an {@link IPredicate} may be + * added to the join path based on shared variables. Static analysis easily + * reports those joins which are allowed based on the variables directly given + * with two {@link IPredicate}s. The purpose of this test suite is to explore + * when joins (based on shared variables) become permissible through + * {@link IConstraint}s as the variable(s) used within those constraints become + * bound. + * <p> + * Note: To avoid a dependency on the RDF model layer, this just uses String + * constants for URIs and Literals. + * + * @see BOpUtility#canJoin(IPredicate, IPredicate) + * @see BOpUtility#canJoinUsingConstraints(IPredicate[], IPredicate, + * IConstraint[]) + * @see JGraph + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +//@SuppressWarnings("unchecked") +public class TestBOpUtility_canJoinUsingConstraints extends TestCase2 { + + /** + * + */ + public TestBOpUtility_canJoinUsingConstraints() { + } + + /** + * @param name + */ + public TestBOpUtility_canJoinUsingConstraints(String name) { + super(name); + } + + /** + * Unit tests to verify that arguments are validated. + * + * @see BOpUtility#canJoinUsingConstraints(IPredicate[], IPredicate, + * IConstraint[]) + */ + public void test_canJoinUsingConstraints_illegalArgument() { + + final IVariable<?> x = Var.var("x"); + final IVariable<?> y = Var.var("y"); + + final IPredicate<?> p1 = new Predicate(new BOp[]{x}); + + final IPredicate<?> p2 = new Predicate(new BOp[]{y}); + + // path must not be null. + try { + BOpUtility.canJoinUsingConstraints(// + null, // path + p1,// vertex + new IConstraint[0]// constraints + ); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Expecting: " + IllegalArgumentException.class); + } + + // vertex must not be null. + try { + BOpUtility.canJoinUsingConstraints(// + new IPredicate[]{p1}, // path + null,// vertex + new IConstraint[0]// constraints + ); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Expecting: " + IllegalArgumentException.class); + } + + // path may not be empty. + try { + BOpUtility.canJoinUsingConstraints(// + new IPredicate[] {}, // path + p1,// vertex + new IConstraint[0]// constraints + ); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Expecting: " + IllegalArgumentException.class); + } + + // path elements may not be null. + try { + BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p2, null }, // path + p1,// vertex + new IConstraint[0]// constraints + ); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Expecting: " + IllegalArgumentException.class); + } + + // vertex must not appear in the path. + try { + BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p2, p1 }, // path + p1,// vertex + new IConstraint[0]// constraints + ); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Expecting: " + IllegalArgumentException.class); + } + + // constraint array may not contain null elements. + try { + BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p2 }, // path + p1,// vertex + new IConstraint[] { // + new NEConstant(x, new Constant<Integer>(12)), // + null // + }// constraints + ); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Expecting: " + IllegalArgumentException.class); + } + + } + + // The comparison operators. + static private final int GT = 0, LT = 1;// , EQ = 2, GTE = 3, LTE = 4; + + // The math operators. + static private final int PLUS = 0, MINUS = 1; + + // Annotation for the comparison or math operator. + static private final String OP = "op"; + + /** + * A do-nothing constraint. The constraint is never evaluated. It is only + * used to test the logic which decides when two predicates can join based + * on variable(s) shared via a constraint. + */ + static private final class MyCompareOp extends BOpConstraint { + + private static final long serialVersionUID = 1L; + + /** + * Required deep copy constructor. + * + * @param op + */ + public MyCompareOp(MyCompareOp op) { + super(op); + } + + /** + * @param args + * @param annotations + */ + public MyCompareOp(BOp[] args, Map<String, Object> annotations) { + super(args, annotations); + } + + public boolean accept(IBindingSet bindingSet) { + throw new UnsupportedOperationException(); + } + + } + + /** + * A do-nothing constraint. The constraint is never evaluated. It is only + * used to test the logic which decides when two predicates can join based + * on variable(s) shared via a constraint. + */ + static private final class NEConstant extends BOpConstraint { + + private static final long serialVersionUID = 1L; + + /** + * Required deep copy constructor. + * + * @param op + */ + public NEConstant(NEConstant op) { + super(op); + } + + /** + * @param args + * @param annotations + */ + public NEConstant(BOp[] args, Map<String, Object> annotations) { + super(args, annotations); + } + + public NEConstant(IVariable<?> var, IConstant<?> value) { + this(new BOp[] { var, value }, null/* annotations */); + } + + public boolean accept(IBindingSet bindingSet) { + throw new UnsupportedOperationException(); + } + + } + + /** + * A do-nothing value expression. The expression is never evaluated. It is + * only used to test the logic which decides when two predicates can join + * based on variable(s) shared via a constraint. + */ + static private final class MathBOp extends ImmutableBOp implements + IValueExpression { + + private static final long serialVersionUID = 1L; + + /** + * Required deep copy constructor. + * + * @param op + */ + public MathBOp(final MathBOp op) { + + super(op); + + } + + /** + * Required shallow copy constructor. + * + * @param args + * The operands. + * @param op + * The operation. + */ + public MathBOp(final BOp[] args, Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 2 || args[0] == null || args[1] == null + || getProperty(OP) == null) { + + throw new IllegalArgumentException(); + + } + + } + + /** + * + * @param left + * The left operand. + * @param right + * The right operand. + * @param op + * The annotation specifying the operation to be performed on + * those operands. + */ + public MathBOp(final IValueExpression left, + final IValueExpression right, final int op) { + + this(new BOp[] { left, right }, NV.asMap(new NV(OP, op))); + + } + + public Object get(IBindingSet bindingSet) { + throw new UnsupportedOperationException(); + } + } + + static private final String rdfs = "http://www.w3.org/2000/01/rdf-schema#"; + + static private final String bsbm = "http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/"; + + static private final String rdfsLabel = rdfs + "label"; + + static private final String productFeature = bsbm + "productFeature"; + + static private final String productPropertyNumeric1 = "productPropertyNumeric1"; + + static private final String productPropertyNumeric2 = bsbm + + "productPropertyNumeric2"; + + static private final String productInstance = "http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product22"; + + private int nextId = 0; + + final IVariable<?> product = Var.var("product"); + + final IVariable<?> productLabel = Var.var("productLabel"); + + final IVariable<?> prodFeature = Var.var("prodFeature"); + + final IVariable<?> simProperty1 = Var.var("simProperty1"); + + final IVariable<?> simProperty2 = Var.var("simProperty2"); + + final IVariable<?> origProperty1 = Var.var("origProperty1"); + + final IVariable<?> origProperty2 = Var.var("origProperty2"); + + /** ?product rdfs:label ?productLabel . */ + final private IPredicate<?> p0 = new Predicate(new BOp[] {// + product, new Constant(rdfsLabel), productLabel },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** productInstance bsbm:productFeature ?prodFeature . */ + final private IPredicate<?> p1 = new Predicate(new BOp[] { // + new Constant(productInstance), new Constant(productFeature), + prodFeature },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** ?product bsbm:productFeature ?prodFeature . */ + final private IPredicate<?> p2 = new Predicate(new BOp[] { // + product, new Constant(productFeature), prodFeature },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** productInstance bsbm:productPropertyNumeric1 ?origProperty1 . */ + final private IPredicate<?> p3 = new Predicate(new BOp[] { // + new Constant<String>(productInstance), + new Constant(productPropertyNumeric1), origProperty1 },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** ?product bsbm:productPropertyNumeric1 ?simProperty1 . */ + final private IPredicate<?> p4 = new Predicate(new BOp[] { // + product, new Constant(productPropertyNumeric1), simProperty1 },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** productInstance bsbm:productPropertyNumeric2 ?origProperty2 . */ + final private IPredicate<?> p5 = new Predicate(new BOp[] { // + new Constant(productInstance), + new Constant(productPropertyNumeric2), origProperty2 },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** ?product bsbm:productPropertyNumeric2 ?simProperty2 . */ + final private IPredicate<?> p6 = new Predicate(new BOp[] { // + product, new Constant(productPropertyNumeric2), simProperty2 },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** The vertices of the join graph (the predicates). */ + final IPredicate<?>[] preds = new IPredicate[] { p0, p1, p2, p3, p4, p5, p6 }; + + /** + * FILTER (productInstance != ?product) + */ + final IConstraint c0 = new NEConstant(product, new Constant<String>( + productInstance)); + + /** + * FILTER (?simProperty1 < (?origProperty1 + 120) && ?simProperty1 > + * (?origProperty1 - 120)) + * <p> + * Note: The AND in the compound filters is typically optimized out such + * that each of these is represented as its own IConstraint, but I have + * combined them for the purposes of these unit tests. + */ + final IConstraint c1 = new AND(// + new MyCompareOp( + new BOp[] { + simProperty1, + new MathBOp(origProperty1, new Constant<Integer>( + 120), PLUS) }, NV.asMap(new NV[] { new NV( + OP, LT) })), // + new MyCompareOp(new BOp[] { + simProperty1, + new MathBOp(origProperty1, new Constant<Integer>(120), + MINUS) }, NV.asMap(new NV[] { new NV(OP, GT) }))// + ); + + /** + * FILTER (?simProperty2 < (?origProperty2 + 170) && ?simProperty2 > + * (?origProperty2 - 170)) + * <p> + * Note: The AND in the compound filters is typically optimized out such + * that each of these is represented as its own IConstraint, but I have + * combined them for the purposes of these unit tests. + */ + final IConstraint c2 = new AND(// + new MyCompareOp( + new BOp[] { + simProperty2, + new MathBOp(origProperty2, new Constant<Integer>( + 170), PLUS) }, NV.asMap(new NV[] { new NV( + OP, LT) })),// + new MyCompareOp(new BOp[] { + simProperty2, + new MathBOp(origProperty2, new Constant<Integer>(170), + MINUS) }, NV.asMap(new NV[] { new NV(OP, GT) }))// + ); + + /** The constraints on the join graph. */ + final IConstraint[] constraints = new IConstraint[] { c0, c1, c2 }; + + /** + * path=[p0], vertex=p2, constraints=[]. + */ + public void test_canJoinUsingConstraints_01() { + + // share ?product + assertTrue(BOpUtility.canJoin(p0, p2)); + assertTrue(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p0 }, // path + p2,// vertex + new IConstraint[0]// constraints + )); + + } + + /** + * path=[p0], vertex=p2, constraints=[]. + * + * @todo Test w/o any constraints or with all constraints. Testing with only + * some of the constraints does not add much here (we probably do not + * need to have [c0] defined for this set of unit tests). + * + * @todo These are the full plans generated by the runtime and static + * optimizers. One way to test canJoinXXX() is to run out these join + * plans and verify that they report "true" in each case. However, the + * critical bit to test are join plans where the predicates w/o the + * shared variables can be run earlier due to the FILTERs. + * + * <pre> + * test_bsbm_q5 : static [0] : : ids=[1, 2, 4, 6, 0, 3, 5] + * test_bsbm_q5 : runtime[0] : : ids=[1, 2, 0, 4, 6, 3, 5] + * </pre> + */ + public void test_canJoinUsingConstraints_02() { + + // share ?product + assertTrue(BOpUtility.canJoin(p0, p2)); + assertTrue(BOpUtility.canJoinUsingConstraints(// + new IPredicate[] { p0 }, // path + p2,// vertex + new IConstraint[0]// constraints + )); + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoinUsingConstraints.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-20 13:50:46
|
Revision: 4210 http://bigdata.svn.sourceforge.net/bigdata/?rev=4210&view=rev Author: thompsonbry Date: 2011-02-20 13:50:40 +0000 (Sun, 20 Feb 2011) Log Message: ----------- javadoc update on shared variables and constraints for the runtime optimizer. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-18 19:43:36 UTC (rev 4209) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-20 13:50:40 UTC (rev 4210) @@ -1950,36 +1950,42 @@ } /* - * Identify the edges by looking for shared variables among the - * predicates. + * Create edges to represent possible joins between predicates based + * on directly shared variables. * - * Note: Variables appear in predicates or in constraints. Edges are - * created to represent possible joins between predicates based on - * those shared variables. There are two cases: + * Note: There are really three classes of joins to be considered. * - * (1) When the target predicate shares a variable with the source - * predicate, then we always create an edge between those predicates - * to represent a possible join. + * (1) The target predicate directly shares a variable with the + * source predicate. These joins are always constrained since the + * source predicate will have bound that variable. * * (2) When the source predicate shares a variable with a constraint - * which also shares a variable with the target predicate, then we - * will also create an edge to represent a possible join. + * which also shares a variable with the target predicate. While the + * predicates do not directly share a variable, these joins are + * constrained by the shared variable in the constraint on the + * target predicate. BSBM Q5 is an example of this case. We do not + * create edges for such joins here. Instead, we dynamically + * determine when a constrained join is possible when extending the + * join path in each round. * - * The second case handles the case where variables are transitively - * shared through a constraint, but not directly shared between the - * predicates. BSBM Q5 is an example of this case. + * (3) Any two predicates may always be joined. However, joins which + * do not share variables either directly or indirectly will be full + * cross products. Therefore such joins are run last and we do not + * create edges for them here. * - * Note: If applying these two rules fails to create any edges for - * some vertex, then it does not share ANY variables and can be - * paired with ANY of the other vertices. However, we always run - * such vertices last as they can not restrict the cumulative - * cardinality of the solutions. Such vertices are therefore - * inserted into a separate set and appended to the join path once - * all edges having shared variables have been exhausted. + * FIXME VERTICES WHICH SHARE VARS THROUGH A CONSTRAINT : handle + * dynamically rather than enumerating since the joins are path + * dependent. * - * FIXME VERTICES WHICH SHARE VARS THROUGH A CONSTRAINT. - * - * FIXME VERTICES WITH NO SHARED VARS. + * FIXME VERTICES WITH NO SHARED VARS : handle once all other joins + * have been exhausted. [These do not need to be collected, we just + * need to run them after all the constrained joins have been run. + * Also, these joins should not be sampled by the query optimizer + * since there is no reason to choose one ordering of these vertices + * over another. [The only reason to sample these joins would be + * to estimate the total cumulative cardinality of the join graph, + * which might be useful when estimating the run time or resource + * demand.]] */ { @@ -3158,7 +3164,7 @@ * we are not yet handling anything except standard joins in the runtime * optimizer. */ - + /** * Execute the selected join path. * <p> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-18 19:43:42
|
Revision: 4209 http://bigdata.svn.sourceforge.net/bigdata/?rev=4209&view=rev Author: thompsonbry Date: 2011-02-18 19:43:36 +0000 (Fri, 18 Feb 2011) Log Message: ----------- javadoc Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-18 19:39:00 UTC (rev 4208) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-18 19:43:36 UTC (rev 4209) @@ -89,7 +89,9 @@ /** * When true, the test uses hardcoded access to an existing Journal already - * loaded with some BSBM data set. + * loaded with some a larger data set (you need to run against a moderately + * large data set to assess the relative performance of the static and + * runtime query optimizers). */ private static final boolean useExistingJournal = false; Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java 2011-02-18 19:39:00 UTC (rev 4208) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java 2011-02-18 19:43:36 UTC (rev 4209) @@ -93,6 +93,12 @@ } + /** + * When true, the test uses hardcoded access to an existing Journal already + * loaded with some a larger data set (you need to run against a moderately + * large data set to assess the relative performance of the static and + * runtime query optimizers). + */ private final static boolean useExistingJournal = false; protected Journal getJournal(final Properties properties) throws Exception { Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-02-18 19:39:00 UTC (rev 4208) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-02-18 19:43:36 UTC (rev 4209) @@ -134,6 +134,12 @@ } + /** + * When true, the test uses hardcoded access to an existing Journal already + * loaded with some a larger data set (you need to run against a moderately + * large data set to assess the relative performance of the static and + * runtime query optimizers). + */ static private final boolean useExistingJournal = false; protected Journal getJournal(final Properties properties) throws Exception { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-18 19:39:08
|
Revision: 4208 http://bigdata.svn.sourceforge.net/bigdata/?rev=4208&view=rev Author: thompsonbry Date: 2011-02-18 19:39:00 +0000 (Fri, 18 Feb 2011) Log Message: ----------- Added support for random sampling of the standalone database B+Tree indices in support of the Runtime Query Optimizer. Modified the Advancer pattern to permit one-time initialization of the advancer. Added information about the selected join path to the JoinGraph INFO trace. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/filter/Advancer.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2011-02-17 22:58:07 UTC (rev 4207) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2011-02-18 19:39:00 UTC (rev 4208) @@ -27,10 +27,16 @@ package com.bigdata.bop.ap; +import it.unimi.dsi.bits.BitVector; +import it.unimi.dsi.bits.LongArrayBitVector; +import it.unimi.dsi.fastutil.ints.IntOpenHashSet; + import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.Map; +import java.util.Random; import java.util.concurrent.Callable; import com.bigdata.bop.AbstractAccessPathOp; @@ -45,6 +51,7 @@ import com.bigdata.btree.ITupleCursor; import com.bigdata.btree.filter.Advancer; import com.bigdata.btree.view.FusedView; +import com.bigdata.rawstore.Bytes; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.IAccessPath; @@ -79,6 +86,27 @@ private static final long serialVersionUID = 1L; /** + * Typesafe enumeration of different kinds of index sampling strategies. + * + * @todo It is much more efficient to take clusters of samples when you can + * accept the bias. Taking a clustered sample really requires knowing + * where the leaf boundaries are in the index, e.g., using + * {@link ILeafCursor}. Taking all tuples from a few leaves in each + * sample might produce a faster estimation of the correlation when + * sampling join paths. + */ + public static enum SampleType { + /** + * Samples are taken at even space offsets. + */ + EVEN, + /** + * Sample offsets are computed randomly. + */ + RANDOM; + } + + /** * Known annotations. */ public interface Annotations extends BOp.Annotations { @@ -86,16 +114,32 @@ /** * The sample limit (default {@value #DEFAULT_LIMIT}). */ - String LIMIT = "limit"; + String LIMIT = SampleIndex.class.getName() + ".limit"; int DEFAULT_LIMIT = 100; /** + * The random number generator seed -or- ZERO (0L) for a random seed + * (default {@value #DEFAULT_SEED}). A non-zero value may be used to + * create a repeatable sample. + */ + String SEED = SampleIndex.class.getName() + ".seed"; + + long DEFAULT_SEED = 0L; + + /** * The {@link IPredicate} describing the access path to be sampled * (required). */ String PREDICATE = SampleIndex.class.getName() + ".predicate"; + + /** + * The type of sample to take (default {@value #DEFAULT_SAMPLE_TYPE)}. + */ + String SAMPLE_TYPE = SampleIndex.class.getName() + ".sampleType"; + String DEFAULT_SAMPLE_TYPE = SampleType.RANDOM.name(); + } public SampleIndex(SampleIndex<E> op) { @@ -115,7 +159,20 @@ return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); } + + public long seed() { + return getProperty(Annotations.SEED, Annotations.DEFAULT_SEED); + + } + + public SampleType getSampleType() { + + return SampleType.valueOf(getProperty(Annotations.SAMPLE_TYPE, + Annotations.DEFAULT_SAMPLE_TYPE)); + + } + @SuppressWarnings("unchecked") public IPredicate<E> getPredicate() { @@ -195,7 +252,7 @@ /** Return a sample from the access path. */ public E[] call() throws Exception { - return sample(limit(), getPredicate()).getSample(); + return sample(limit(), getSampleType(), getPredicate()).getSample(); } @@ -206,7 +263,7 @@ * @return */ public AccessPathSample<E> sample(final int limit, - IPredicate<E> predicate) { + final SampleType sampleType, IPredicate<E> predicate) { final IRelation<E> relation = context.getRelation(predicate); @@ -242,10 +299,25 @@ /* * Add advancer to collect sample. */ + + final Advancer<E> advancer; + switch (sampleType) { + case EVEN: + advancer = new EvenSampleAdvancer<E>(// rangeCount, + limit, accessPath.getFromKey(), accessPath.getToKey()); + break; + case RANDOM: + advancer = new RandomSampleAdvancer<E>(// rangeCount, + seed(), limit, accessPath.getFromKey(), accessPath + .getToKey()); + break; + default: + throw new UnsupportedOperationException("SampleType=" + + sampleType); + } + predicate = ((Predicate<E>) predicate) - .addIndexLocalFilter(new SampleAdvancer<E>(//rangeCount, - limit, accessPath.getFromKey(), accessPath - .getToKey())); + .addIndexLocalFilter(advancer); return new AccessPathSample<E>(limit, context.getAccessPath( relation, predicate)); @@ -256,20 +328,21 @@ /** * An advancer pattern which is designed to take evenly distributed samples - * from an index. The caller specifies the #of tuples to be skipped after - * each tuple visited. That number should be computed based on the estimated - * range count of the index and the desired sample size. This can fail to - * gather the desired number of sample if additional filters are applied - * which further restrict the elements selected by the predicate. However, - * it will still faithfully represent the expected cardinality of the - * sampled access path. + * from an index. The caller specifies the #of tuples to be sampled. This + * class estimates the range count of the access path and then computes the + * #of samples to be skipped after each tuple visited. + * <p> + * Note: This can fail to gather the desired number of sample if additional + * filters are applied which further restrict the elements selected by the + * predicate. However, it will still faithfully represent the expected + * cardinality of the sampled access path (tuples tested). * * @author tho...@us... * * @param <E> * The generic type of the elements visited by that access path. */ - private static class SampleAdvancer<E> extends Advancer<E> { + private static class EvenSampleAdvancer<E> extends Advancer<E> { private static final long serialVersionUID = 1L; @@ -296,30 +369,13 @@ * @param limit * The #of samples to visit. */ - public SampleAdvancer(final int limit, final byte[] fromKey, + public EvenSampleAdvancer(final int limit, final byte[] fromKey, final byte[] toKey) { this.limit = limit; this.toKey = toKey; } - /** - * @todo This is taking evenly spaced samples. It is much more efficient - * to take clusters of samples when you can accept the bias. - * Taking a clustered sample really requires knowing where the - * leaf boundaries are in the index, e.g., using - * {@link ILeafCursor}. - * <p> - * Taking all tuples from a few leaves in each sample might - * produce a faster estimation of the correlation when sampling - * join paths. - * - * @todo Rather than evenly spaced samples, we should be taking a random - * sample. This could be achieved using a random initial offset - * and random increment as long as the initial offset was in the - * range of a single increment and we compute the increment such - * that N+1 intervals exist. - */ @Override protected void advance(final ITuple<E> tuple) { @@ -336,6 +392,11 @@ toIndex = toKey == null ? ndx.getEntryCount() : ndx .indexOf(toKey); + if (toIndex < 0) { + // convert insert position to index. + toIndex = -toIndex + 1; + } + final int rangeCount = (toIndex - fromIndex); skipCount = Math.max(1, rangeCount / limit); @@ -365,9 +426,125 @@ } - } // class SampleAdvancer + } // class EvenSampleAdvancer /** + * An advancer pattern which is designed to take randomly distributed + * samples from an index. The caller specifies the #of tuples to be sampled. + * This class estimates the range count of the access path and then computes + * a set of random offsets into the access path from which it will collect + * the desired #of samples. + * <p> + * Note: This can fail to gather the desired number of sample if additional + * filters are applied which further restrict the elements selected by the + * predicate. However, it will still faithfully represent the expected + * cardinality of the sampled access path (tuples tested). + * + * @author tho...@us... + * + * @param <E> + * The generic type of the elements visited by that access path. + */ + private static class RandomSampleAdvancer<E> extends Advancer<E> { + + private static final long serialVersionUID = 1L; + + /** The random number generator seed. */ + private final long seed; + + /** The desired total limit on the sample. */ + private final int limit; + + private final byte[] fromKey, toKey; + + /* + * Transient data. This gets initialized when we visit the first tuple. + */ + + /** The offset of each tuple to be sampled. */ + private transient int[] offsets; + /** The #of tuples accepted so far. */ + private transient int nread = 0; + /** The inclusive lower bound of the first tuple actually visited. */ + private transient int fromIndex; + /** The exclusive upper bound of the last tuple which could be visited. */ + private transient int toIndex; + + /** + * + * @param limit + * The #of samples to visit. + */ + public RandomSampleAdvancer(final long seed, final int limit, + final byte[] fromKey, final byte[] toKey) { + + this.seed = seed; + this.limit = limit; + this.fromKey = fromKey; + this.toKey = toKey; + } + + @Override + protected boolean init() { + + final AbstractBTree ndx = (AbstractBTree) src.getIndex(); + + // inclusive lower bound. + fromIndex = fromKey == null ? 0 : ndx.indexOf(fromKey); + + if (fromIndex < 0) { + // convert insert position to index. + fromIndex = -fromIndex + 1; + } + + // exclusive upper bound. + toIndex = toKey == null ? ndx.getEntryCount() : ndx.indexOf(toKey); + + if (toIndex < 0) { + // convert insert position to index. + toIndex = -toIndex + 1; + } + + // get offsets to be sampled. + offsets = new SmartOffsetSampler().getOffsets(seed, limit, + fromIndex, toIndex); + + // Skip to the first tuple. + src.seek(ndx.keyAt(offsets[0])); + + return true; + + } + + @Override + protected void advance(final ITuple<E> tuple) { + + final AbstractBTree ndx = (AbstractBTree) src.getIndex(); + + if (nread < offsets.length - 1) { + + /* + * Skip to the next tuple. + */ + + final int nextIndex = offsets[nread]; + +// System.err.println("limit=" + limit + ", rangeCount=" +// + (toIndex - fromIndex) + ", fromIndex=" + fromIndex +// + ", toIndex=" + toIndex + ", currentIndex=" +// + currentIndex + ", nextIndex=" + nextIndex); + + src.seek(ndx.keyAt(nextIndex)); + + } + + nread++; + + } + + } // class RandomSampleAdvancer + + /** * A sample from an access path. * * @param <E> @@ -459,4 +636,355 @@ } // AccessPathSample + /** + * Interface for obtaining an array of tuple offsets to be sampled. + * + * @author thompsonbry + */ + public interface IOffsetSampler { + + /** + * Return an array of tuple indices which may be used to sample a key + * range of some index. + * <p> + * Note: The caller must stop when it runs out of offsets, not when the + * limit is satisfied, as there will be fewer offsets returned when the + * half open range is smaller than the limit. + * + * @param seed + * The seed for the random number generator -or- ZERO (0L) + * for a random seed. A non-zero value may be used to create + * a repeatable sample. + * @param limit + * The maximum #of tuples to sample. + * @param fromIndex + * The inclusive lower bound. + * @param toIndex + * The exclusive upper bound0 + * + * @return An array of at most <i>limit</i> offsets into the index. The + * offsets will lie in the half open range (fromIndex,toIndex]. + * The elements of the array will be in ascending order. No + * offsets will be repeated. + * + * @throws IllegalArgumentException + * if <i>limit</i> is non-positive. + * @throws IllegalArgumentException + * if <i>fromIndex</i> is negative. + * @throws IllegalArgumentException + * if <i>toIndex</i> is negative. + * @throws IllegalArgumentException + * unless <i>toIndex</i> is GT <i>fromIndex</i>. + */ + int[] getOffsets(final long seed, int limit, final int fromIndex, + final int toIndex); + } + + /** + * A smart implementation which uses whichever implementation is most + * efficient for the limit and key range to be sampled. + * + * @author thompsonbry + */ + public static class SmartOffsetSampler implements IOffsetSampler { + + /** + * {@inheritDoc} + */ + public int[] getOffsets(final long seed, int limit, + final int fromIndex, final int toIndex) { + + if (limit < 1) + throw new IllegalArgumentException(); + if (fromIndex < 0) + throw new IllegalArgumentException(); + if (toIndex < 0) + throw new IllegalArgumentException(); + if (toIndex <= fromIndex) + throw new IllegalArgumentException(); + + final int rangeCount = (toIndex - fromIndex); + + if (limit > rangeCount) + limit = rangeCount; + + if (limit == rangeCount) { + + // Visit everything. + return new EntireRangeOffsetSampler().getOffsets(seed, limit, + fromIndex, toIndex); + + } + + /* + * Random offsets visiting a subset of the key range using a + * selection without replacement pattern (the same tuple is never + * visited twice). + * + * FIXME When the limit approaches the range count and the range + * count is large (too large for a bit vector or acceptance set + * approach), then we are better off creating a hash set of offsets + * NOT to be visited and then just choosing (rangeCount-limit) + * offsets to reject. This will be less expensive than computing the + * acceptance set directly. However, to really benefit from the + * smaller memory profile, we would also need to wrap that with an + * iterator pattern so the smaller memory representation could be of + * use when the offset[] is applied (e.g., modify the IOffsetSampler + * interface to be an iterator with various ctor parameters rather + * than returning an array as we do today). + */ + + // FIXME BitVectorOffsetSampler is broken. + if (false && rangeCount < Bytes.kilobyte32 * 8) { + + // NB: 32k range count uses a 4k bit vector. + return new BitVectorOffsetSampler().getOffsets(seed, limit, + fromIndex, toIndex); + + } + + /* + * When limit is small (or significantly smaller than the + * rangeCount), then we are much better off creating a hash set of + * the offsets which have been accepted. + * + * Good unless [limit] is very large. + */ + return new AcceptanceSetOffsetSampler().getOffsets(seed, limit, + fromIndex, toIndex); + + } + + } + + /** + * Returns all offsets in the half-open range, but may only be used when + * the limit GTE the range count. + */ + static public class EntireRangeOffsetSampler implements IOffsetSampler { + + /** + * {@inheritDoc} + * + * @throws UnsupportedOperationException + * if <i>limit!=rangeCount</i> (after adjusting for limits + * greater than the rangeCount). + */ + public int[] getOffsets(final long seed, int limit, + final int fromIndex, final int toIndex) { + + if (limit < 1) + throw new IllegalArgumentException(); + if (fromIndex < 0) + throw new IllegalArgumentException(); + if (toIndex < 0) + throw new IllegalArgumentException(); + if (toIndex <= fromIndex) + throw new IllegalArgumentException(); + + final int rangeCount = (toIndex - fromIndex); + + if (limit > rangeCount) + limit = rangeCount; + + if (limit != rangeCount) + throw new UnsupportedOperationException(); + + // offsets of tuples to visit. + final int[] offsets = new int[limit]; + + for (int i = 0; i < limit; i++) { + + offsets[i] = fromIndex + i; + + } + + return offsets; + + } + } + + /** + * Return a randomly selected ordered array of offsets in the given + * half-open range. + * <p> + * This approach is based on a bit vector. If the bit is already marked, + * then the offset has been used and we scan until we find the next free + * offset. This requires [rangeCount] bits, so it works well when the + * rangeCount of the key range is small. For example, a range count of 32k + * requires a 4kb bit vector, which is quite manageable. + * + * FIXME There is something broken in this class, probably an assumption I + * have about how {@link LongArrayBitVector} works. If you enable it in the + * stress test, it will fail. + */ + static public class BitVectorOffsetSampler implements IOffsetSampler { + + public int[] getOffsets(final long seed, int limit, + final int fromIndex, final int toIndex) { + + if (limit < 1) + throw new IllegalArgumentException(); + if (fromIndex < 0) + throw new IllegalArgumentException(); + if (toIndex < 0) + throw new IllegalArgumentException(); + if (toIndex <= fromIndex) + throw new IllegalArgumentException(); + + final int rangeCount = (toIndex - fromIndex); + + if (limit > rangeCount) + limit = rangeCount; + + // offsets of tuples to visit. + final int[] offsets = new int[limit]; + + // create a cleared bit vector of the stated capacity. + final BitVector v = LongArrayBitVector.ofLength(// + rangeCount// capacity (in bits) + ); + + // Random number generator using caller's seed (if given). + final Random rnd = seed == 0L ? new Random() : new Random(seed); + + // Choose random tuple indices for the remaining tuples. + for (int i = 0; i < limit; i++) { + + /* + * Look for an unused bit starting at this index. If necessary, + * this will wrap around to zero. + */ + + // k in (0:rangeCount-1). + int k = rnd.nextInt(rangeCount); + + if (v.getBoolean((long) k)) { + // This bit is already taken. + final long nextZero = v.nextZero((long) k); + if (nextZero != -1L) { + k = (int) nextZero; + } else { + final long priorZero = v.previousZero((long) k); + if (priorZero != -1L) { + k = (int) priorZero; + } else { + // No empty bit found? + throw new AssertionError(); + } + } + } + + assert !v.getBoolean(k); + + // Set the bit. + v.add(k, true); + + assert v.getBoolean(k); + + offsets[i] = fromIndex + k; + + assert offsets[i] < toIndex; + + } + + // put them into sorted order for more efficient traversal. + Arrays.sort(offsets); + + // System.err.println(Arrays.toString(offsets)); + + return offsets; + + } + + } + + /** + * An implementation based on an acceptance set of offsets which have been + * accepted. This implementation is a good choice when the limit moderate + * (~100k) and the rangeCount is significantly greater than the limit. The + * memory demand is the O(limit). + * + * @author thompsonbry + */ + static public class AcceptanceSetOffsetSampler implements IOffsetSampler { + + public int[] getOffsets(final long seed, int limit, + final int fromIndex, final int toIndex) { + + if (limit < 1) + throw new IllegalArgumentException(); + if (fromIndex < 0) + throw new IllegalArgumentException(); + if (toIndex < 0) + throw new IllegalArgumentException(); + if (toIndex <= fromIndex) + throw new IllegalArgumentException(); + + final int rangeCount = (toIndex - fromIndex); + + if (limit > rangeCount) + limit = rangeCount; + + // offsets of tuples to visit. + final int[] offsets = new int[limit]; + + // hash set of accepted offsets. + final IntOpenHashSet v = new IntOpenHashSet( + rangeCount// capacity + ); + + // Random number generator using caller's seed (if given). + final Random rnd = seed == 0L ? new Random() : new Random(seed); + + // Choose random tuple indices for the remaining tuples. + for (int i = 0; i < limit; i++) { + + /* + * Look for an unused bit starting at this index. If necessary, + * this will wrap around to zero. + */ + + // k in (0:rangeCount-1). + int k = rnd.nextInt(rangeCount); + + int round = 0; + while (v.contains(k)) { + + k++; + + if (k == rangeCount) { + // wrap around. + if (++round > 1) { + // no empty bit found? + throw new AssertionError(); + } + // reset starting index. + k = 0; + } + + } + + assert !v.contains(k); + + // Set the bit. + v.add(k); + + offsets[i] = fromIndex + k; + + assert offsets[i] < toIndex; + + } + + // put them into sorted order for more efficient traversal. + Arrays.sort(offsets); + + // System.err.println(Arrays.toString(offsets)); + + return offsets; + + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-17 22:58:07 UTC (rev 4207) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-18 19:39:00 UTC (rev 4208) @@ -60,6 +60,7 @@ import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.ap.SampleIndex; +import com.bigdata.bop.ap.SampleIndex.SampleType; import com.bigdata.bop.bindingSet.HashBindingSet; import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.LocalChunkMessage; @@ -534,11 +535,16 @@ * Materialize a random sample from the access path. */ - final SampleIndex sampleOp = new SampleIndex( +// final SampleType sampleType = SampleType.EVEN; + final SampleType sampleType = SampleType.RANDOM; + + final SampleIndex<?> sampleOp = new SampleIndex( new BOp[] {}, // NV.asMap(// new NV(SampleIndex.Annotations.PREDICATE, pred),// - new NV(SampleIndex.Annotations.LIMIT, limit))); + new NV(SampleIndex.Annotations.LIMIT, limit),// + new NV(SampleIndex.Annotations.SAMPLE_TYPE, sampleType.name())// + )); sample = new VertexSample(rangeCount, limit, false/* exact */, sampleOp.eval(context)); @@ -1081,7 +1087,7 @@ // @todo Why not use a factory which avoids bopIds already in use? new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred.setBOpId(3)), // disallow parallel evaluation of tasks. - new NV(PipelineJoin.Annotations.MAX_PARALLEL,1), + new NV(PipelineOp.Annotations.MAX_PARALLEL,1), // disallow parallel evaluation of chunks. new NV(PipelineJoin.Annotations.MAX_PARALLEL_CHUNKS,0), // disable access path coalescing @@ -1172,6 +1178,11 @@ * cardinality at 1600L (lower bound). In fact, the cardinality * is 16*175000. This falsely low estimate can cause solutions * which are really better to be dropped. + * + * @todo we should mark [nout] when we do this show that it + * shows up in the trace! Also, the rangeCount is sometimes + * falsely high. However, that should be corrected by random + * resampling of the vertices and paths. */ nout = sumRangeCount; @@ -1226,11 +1237,14 @@ /** * The cumulative estimated cardinality of the path. This is zero for an * empty path. For a path consisting of a single edge, this is the - * estimated cardinality of that edge. When creating a new path adding - * an edge to an existing path, the cumulative cardinality of the new - * path is the cumulative cardinality of the existing path plus the + * estimated cardinality of that edge. When creating a new path by + * adding an edge to an existing path, the cumulative cardinality of the + * new path is the cumulative cardinality of the existing path plus the * estimated cardinality of the cutoff join of the new edge given the * input sample of the existing path. + * + * @todo track this per vertex as well as the total for more interesting + * traces in showPath(Path). */ final public long cumulativeEstimatedCardinality; @@ -1672,7 +1686,7 @@ static public String showTable(final Path[] a,final Path[] pruned) { final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); - f.format("%5s %10s%1s * %7s (%3s/%3s) = %10s%1s : %10s %10s", + f.format("%5s %10s%1s * %10s (%6s/%6s) = %10s%1s : %10s %10s", "path",// "rangeCount",// "",// sourceSampleExact @@ -1698,9 +1712,9 @@ } } if (x.sample == null) { - f.format("p[%2d] %10d%1s * %7s (%3s/%3s) = %10s%1s : %10s", i, "N/A", "", "N/A", "N/A", "N/A", "N/A", "", "N/A"); + f.format("p[%2d] %10d%1s * %10s (%6s/%6s) = %10s%1s : %10s", i, "N/A", "", "N/A", "N/A", "N/A", "N/A", "", "N/A"); } else { - f.format("p[%2d] %10d%1s * % 7.2f (%3d/%3d) = % 10d%1s : % 10d", i, + f.format("p[%2d] %10d%1s * % 10.2f (%6d/%6d) = % 10d%1s : % 10d", i, x.sample.rangeCount,// x.sample.sourceSampleExact?"E":"",// x.sample.f,// @@ -1730,6 +1744,66 @@ } /** + * Show the details of a join path, including the estimated cardinality and + * join hit ratio for each step in the path. + * + * @param p + * The join path. + */ + public static String showPath(final Path x) { + if(x == null) + throw new IllegalArgumentException(); + final StringBuilder sb = new StringBuilder(); + final Formatter f = new Formatter(sb); + { + /* + * @todo show sumEstCard for each step of the path. Only the + * estimate for the current path length is currently preserved. We + * would need to preserve the estimate for each step in the path to + * show it here. + * + * @todo show limit on EdgeSample? + */ + f.format("%6s %10s%1s * %10s (%6s/%6s) = %10s%1s",// : %10s",// + "edge", + "rangeCount",// + "",// sourceSampleExact + "f",// + "out",// + "in",// + "estCard",// + ""// estimateIs(Exact|LowerBound|UpperBound) +// "sumEstCard",// + ); + int i = 0; + for (Edge e : x.edges) { + sb.append("\n"); + if (e.sample == null) { + f.format("%6s %10d%1s * %10s (%6s/%6s) = %10s%1s",// + e.getLabel(),// + "N/A", "", "N/A", "N/A", "N/A", "N/A", "", "N/A"); + } else { + f.format("%6s %10d%1s * % 10.2f (%6d/%6d) = % 10d%1s",// + e.getLabel(),// + e.sample.rangeCount,// + e.sample.sourceSampleExact ? "E" : "",// + e.sample.f,// + e.sample.outputCount,// + e.sample.inputCount,// + e.sample.estimatedCardinality,// + e.sample.estimateEnum.getCode()// +// e.cumulativeEstimatedCardinality// + ); + } +// sb.append("\nv[" + vertexIds[i] + "] " + e.toString()); + i++; + } + } + sb.append("\n"); + return sb.toString(); + } + + /** * A runtime optimizer for a join graph. The {@link JoinGraph} bears some * similarity to ROX (Runtime Optimizer for XQuery), but has several * significant differences: @@ -2148,6 +2222,18 @@ // Should be one winner. assert paths.length == 1; + if (log.isInfoEnabled()) { + + /* + * @todo It would be nice to show the plan with the filters + * attached, but that might be something that the caller does. + */ + log.info("\n*** Selected join path: " + + Arrays.toString(paths[0].getVertexIds()) + "\n" + + showPath(paths[0])); + + } + return paths[0]; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-02-17 22:58:07 UTC (rev 4207) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-02-18 19:39:00 UTC (rev 4208) @@ -204,9 +204,7 @@ * The maximum #of solutions which will be generated by the join * (default {@value #DEFAULT_LIMIT}). * - * @todo Unit tests for this feature. It currently breaks out of loops - * but does not explicitly interrupt the task. See the uses of - * {@link JoinTask#limit}. + * @todo Unit tests for this feature (it is used by the JoinGraph). */ String LIMIT = PipelineJoin.class.getName() + ".limit"; @@ -594,6 +592,8 @@ /** * An optional limit on the #of solutions to be produced. The limit is * ignored if it is {@link Long#MAX_VALUE}. + * + * @see Annotations#LIMIT */ final private long limit; @@ -808,6 +808,9 @@ // // stats.elapsed.add(System.currentTimeMillis() - begin); +// } finally { +// System.err.println(joinOp.toString()); +// System.err.println(stats.toString()); } } @@ -1624,6 +1627,12 @@ halted(); if (limit != Long.MAX_VALUE && exactOutputCount.get() > limit) { + // break query @ limit. + if (log.isInfoEnabled()) + log.info("Breaking query @ limit: limit=" + limit + + ", exactOutputCount=" + + exactOutputCount.get()); +// halt((Void) null); return null; } @@ -1713,6 +1722,12 @@ if (limit != Long.MAX_VALUE && exactOutputCount.incrementAndGet() > limit) { + // break query @ limit. + if (log.isInfoEnabled()) + log.info("Breaking query @ limit: limit=" + limit + + ", exactOutputCount=" + + exactOutputCount.get()); +// halt((Void) null); break; } @@ -1927,6 +1942,12 @@ if (limit != Long.MAX_VALUE && exactOutputCount.incrementAndGet() > limit) { + // break query @ limit. + if (log.isInfoEnabled()) + log.info("Breaking query @ limit: limit=" + limit + + ", exactOutputCount=" + + exactOutputCount.get()); +// halt((Void) null); break; } @@ -2119,6 +2140,12 @@ if (limit != Long.MAX_VALUE && exactOutputCount.incrementAndGet() > limit) { + // break query @ limit. + if (log.isInfoEnabled()) + log.info("Breaking query @ limit: limit=" + limit + + ", exactOutputCount=" + + exactOutputCount.get()); +// halt((Void) null); break; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/filter/Advancer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/filter/Advancer.java 2011-02-17 22:58:07 UTC (rev 4207) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/filter/Advancer.java 2011-02-18 19:39:00 UTC (rev 4208) @@ -63,6 +63,18 @@ } + /** + * Hook for one-time initialization invoked before the advancer visits the + * first tuple. The default implementation simply returns <code>true</code>. + * + * @return <code>false</code> if nothing should be visited. + */ + protected boolean init() { + + return true; + + } + /** * Offers an opportunity to advance the source {@link ITupleCursor} to a * new key using {@link ITupleCursor#seek(byte[]). @@ -87,6 +99,11 @@ final private Advancer<E> filter; /** + * Used to invoke {@link Advancer#init()}. + */ + private boolean firstTime = true; + + /** * Set true iff we exceed the bounds on the {@link ITupleCursor}. For * example, if we run off the end of an index partition. This is used to * simulate the exhaustion of the cursor when you advance past its @@ -116,6 +133,20 @@ public boolean hasNext() { + if(firstTime) { + + if (!filter.init()) { + + exhausted = true; + + return false; + + } + + firstTime =false; + + } + if(exhausted) return false; return src.hasNext(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java 2011-02-17 22:58:07 UTC (rev 4207) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java 2011-02-18 19:39:00 UTC (rev 4208) @@ -41,6 +41,10 @@ import com.bigdata.bop.NV; import com.bigdata.bop.Var; import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.ap.SampleIndex.AcceptanceSetOffsetSampler; +import com.bigdata.bop.ap.SampleIndex.IOffsetSampler; +import com.bigdata.bop.ap.SampleIndex.SampleType; +import com.bigdata.bop.ap.SampleIndex.SmartOffsetSampler; import com.bigdata.journal.BufferMode; import com.bigdata.journal.ITx; import com.bigdata.journal.Journal; @@ -175,8 +179,124 @@ } /** + * Stress test for {@link IOffsetSampler}s. + * + * TODO Look at the distributions of the different {@link IOffsetSampler}s. + * They should be uniform. + */ + public void test_offsetSamplers() { + + // Note: Only handles a special case! +// new GetOffsetsEntireRange(), + + final IOffsetSampler[] samplers = new IOffsetSampler[] { + new SmartOffsetSampler(), // +// new BitVectorOffsetSampler(),// + new AcceptanceSetOffsetSampler(),// +// new RejectionSetOffsetSampler(), // + }; + + final Random r = new Random(); + + final int ntrials = 1000; + + for (int trial = 0; trial < ntrials; trial++) { + + // 10% seed is 0L (which gets turned into random anyway) + final long seed = r.nextDouble() < .1 ? 0 : r.nextLong(); + + final int entryCount = r.nextInt(100000); + + // 10% fromIndex is zero. + final int fromIndex = r.nextDouble() < .1 ? 0 : r + .nextInt(entryCount); + + final int remaining = entryCount - fromIndex; + + final int toIndex = r.nextDouble() < .1 ? entryCount : (fromIndex + + r.nextInt(remaining) + 1); + + final int rangeCount = toIndex - fromIndex; + + final int limit = r.nextDouble() < .1 ? r.nextInt(100) + 1 : r + .nextDouble() < .5 ? r.nextInt(entryCount) + 1 : r + .nextInt(10000) + 1; + + for (IOffsetSampler sampler : samplers) { + + try { + + final long begin = System.currentTimeMillis(); + + final int[] offsets = sampler.getOffsets(seed, limit, fromIndex, toIndex); + + final long elapsed = System.currentTimeMillis() - begin; + + if (elapsed > 1000) { + log.warn("Slow: elapsed=" + elapsed + ", class=" + + sampler.getClass() + ", seed=" + seed + + ", limit=" + limit + ", fromIndex=" + + fromIndex + ",toIndex=" + toIndex); + } + + // check the #of offsets returned. + final int noffsets = offsets.length; + assertTrue(noffsets <= limit); + if (limit > rangeCount) + assertTrue(noffsets <= rangeCount); + else + assertTrue(noffsets == limit); + + // check offsets ordered, within range, and w/o dups. + int lastOffset = -1; + for (int j = 0; j < offsets.length; j++) { + + final int offset = offsets[j]; + + if (offset < fromIndex) + fail("index=" + j + + ", offset LT fromIndex: offset=" + offset + + ", fromIndex=" + fromIndex); + + if (offset >= toIndex) + fail("index=" + j + ", offset GTE toIndex: offset=" + + offset + ", toIndex=" + toIndex); + + if (offset <= lastOffset) { + fail("index=" + j + ", lastOffset=" + lastOffset + + ", but offset=" + offset); + } + + lastOffset = offset; + + } + + } catch (Throwable t) { + + fail("sampler=" + sampler.getClass() + ", seed=" + seed + + ", limit=" + limit + ", fromIndex=" + fromIndex + + ",toIndex=" + toIndex + ", rangeCount=" + + rangeCount, t); + + } + + } + + } + + } + + /** * Unit test verifies some aspects of a sample taken from a local index * (primarily that the sample respects the limit). + * + * @todo test when the range count is zero. + * + * @todo test when the inclusive lower bound of a key range is an insertion + * point (no tuple for that key). + * + * @todo test when the exclusive upper bound of a key range is an insertion + * point (no tuple for that key). */ public void test_something() { @@ -194,42 +314,59 @@ new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED)// ); - final BOpContextBase context = new BOpContextBase(null/* fed */, jnl/* indexManager */); - final int[] limits = new int[] { // 1, 9, 19, 100, 217, 900,// nrecords, nrecords + 1 }; - for (int limit : limits) { + for (SampleType sampleType : SampleType.values()) { - final SampleIndex<E> sampleOp = new SampleIndex<E>( - new BOp[0], - NV - .asMap( - // - new NV(SampleIndex.Annotations.PREDICATE, - predicate),// - new NV(SampleIndex.Annotations.LIMIT, limit)// - )); + if (log.isInfoEnabled()) + log.info("Testing: SampleType=" + sampleType); - final E[] a = sampleOp.eval(context); + for (int limit : limits) { -// System.err.println("limit=" + limit + ", nrecords=" + nrecords -// + ", nsamples=" + a.length); -// -// for (int i = 0; i < a.length && i < 10; i++) { -// System.err.println("a[" + i + "]=" + a[i]); -// } + doTest(nrecords, limit, sampleType, predicate); - final int nexpected = Math.min(nrecords, limit); + } - assertEquals("#samples (limit=" + limit + ", nrecords=" + nrecords - + ", nexpected=" + nexpected + ")", nexpected, a.length); - } } + + private void doTest(final int nrecords, final int limit, + final SampleType sampleType, final IPredicate<E> predicate) { + final BOpContextBase context = new BOpContextBase(null/* fed */, jnl/* indexManager */); + + final SampleIndex<E> sampleOp = new SampleIndex<E>( new BOp[0], // + NV.asMap(// + new NV(SampleIndex.Annotations.PREDICATE, predicate),// + new NV(SampleIndex.Annotations.LIMIT, limit),// + new NV(SampleIndex.Annotations.SAMPLE_TYPE, sampleType + .name())// + )); + + final E[] a = sampleOp.eval(context); + + if (log.isInfoEnabled()) { + + System.err.println("limit=" + limit + ", nrecords=" + nrecords + + ", nsamples=" + a.length + ", sampleType=" + sampleType); + + for (int i = 0; i < a.length && i < 10; i++) { + + System.err.println("a[" + i + "]=" + a[i]); + + } + + } + + final int nexpected = Math.min(nrecords, limit); + + assertEquals("#samples (limit=" + limit + ", nrecords=" + nrecords + + ", nexpected=" + nexpected + ")", nexpected, a.length); + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-17 22:58:07 UTC (rev 4207) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-18 19:39:00 UTC (rev 4208) @@ -91,7 +91,7 @@ * When true, the test uses hardcoded access to an existing Journal already * loaded with some BSBM data set. */ - private static final boolean useExistingJournal = true; + private static final boolean useExistingJournal = false; // private static final long existingPC = 284826; // BSBM 100M @@ -219,7 +219,7 @@ */ public void test_bsbm_q5() throws Exception { -// QueryLog.logTableHeader(); + QueryLog.logTableHeader(); final String namespace = getNamespace(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-17 22:58:16
|
Revision: 4207 http://bigdata.svn.sourceforge.net/bigdata/?rev=4207&view=rev Author: thompsonbry Date: 2011-02-17 22:58:07 +0000 (Thu, 17 Feb 2011) Log Message: ----------- Redo of failed commit. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Bind.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariable.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/IAggregate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Steps.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Union.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemoryGroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestVar.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestUnion.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_DistinctOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_GroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_SortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemoryGroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/COUNT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/GROUP_CONCAT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MAX.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MIN.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SAMPLE.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SUM.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlClient.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByStagedOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/raba/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestGroupByUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/RunQuery.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -246,6 +246,36 @@ return args[index]; } + +// /** +// * Return a new {@link BOp} where the specified argument has been replaced +// * by the given value. This is a copy-on-write operation. The original +// * {@link BOp} is NOT modified by this method. +// * +// * @param index +// * The index of the argument whose value will be changed. +// * @param arg +// * The new value for that argument. +// * +// * @return A new operator in which the given argument has been replaced. +// * +// * @throws IndexOutOfBoundsException +// * unless <i>index</i> is in (0:{@link #arity()}]. +// * @throws IllegalArgumentException +// * if <i>arg</i> is <code>null</code>. +// */ +// public BOp setArg(final int index,final BOp arg) { +// +// if(arg == null) +// throw new IllegalArgumentException(); +// +// final BOpBase tmp = this.clone(); +// +// tmp._set(index, arg); +// +// return tmp; +// +// } /** * Set the value of an operand. @@ -264,7 +294,7 @@ * * @todo thread safety and visibility.... */ - final protected void set(final int index, final BOp op) { + final protected void _set(final int index, final BOp op) { this.args[index] = op; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -79,7 +79,8 @@ * <ol> * <li>{@link BOp.Annotations#EVALUATION_CONTEXT} is * {@link BOpEvaluationContext#CONTROLLER}</li> - * <li>{@link PipelineOp.Annotations#THREAD_SAFE} is <code>false</code></li> + * <li>{@link PipelineOp.Annotations#MAX_PARALLEL} is <code>1</code></li> + * <li>{@link PipelineOp.Annotations#PIPELINED} is <code>true</code></li> * </ol> * Under these circumstances, it is possible for the {@link IQueryClient} to * atomically decide that a specific invocation of the operator task for the @@ -90,7 +91,8 @@ * controller. In addition, the operator must declare that it is NOT thread * safe in order for the query engine to serialize its evaluation tasks. * - * @return + * @todo This should be a ctor parameter. We just have to update the test + * suites for the changed method signature. */ public boolean isLastInvocation() { return lastInvocation.get(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Bind.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Bind.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Bind.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -24,7 +24,7 @@ /** * @param var - * The {@link IVariable} which will be bound to result of + * The {@link IVariable} which will be bound to the result of * evaluating the associated value expression. * @param expr * The {@link IValueExpression} to be evaluated. @@ -44,23 +44,42 @@ super(args, annotations); } + /** + * Return the variable which will be bound to the result of evaluating the + * associated value expression. + */ @SuppressWarnings("unchecked") - @Override + public IVariable<E> getVar() { + + return (IVariable<E>) get(0); + + } + + /** + * Return the value expression. + */ + @SuppressWarnings("unchecked") + public IValueExpression<E> getExpr() { + + return (IValueExpression<E>) get(1); + + } + public E get(final IBindingSet bindingSet) { - - final IVariable<E> var = (IVariable<E>) get(0); - final IValueExpression<E> expr = (IValueExpression<E>) get(1); + final IVariable<E> var = getVar(); + final IValueExpression<E> expr = getExpr(); + // evaluate the value expression. E val = expr.get(bindingSet); - + // bind the variable as a side-effect. bindingSet.set(var, new Constant<E>(val)); - + // return the evaluated value return val; - + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariable.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariable.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariable.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -41,5 +41,10 @@ * {@link IVariableOrConstant#getName()} */ int hashCode(); + + /** + * Return <code>true</code> iff this is the special variable <code>*</code> + */ + boolean isWildcard(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -35,7 +35,7 @@ import org.apache.log4j.Logger; import com.bigdata.bop.engine.BOpStats; -import com.bigdata.bop.engine.ChunkedRunningQuery; +import com.bigdata.bop.engine.IChunkMessage; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.solutions.SliceOp; import com.bigdata.relation.accesspath.IAsynchronousIterator; @@ -101,24 +101,65 @@ boolean DEFAULT_SHARED_STATE = false; /** - * Annotation may be used to indicate operators which are not thread - * safe (default {@value #DEFAULT_THREAD_SAFE}). Concurrent invocations - * of the evaluation task will not be scheduled for a given shard for an - * operator which is not thread safe. - * - * @todo Unit tests for {@link ChunkedRunningQuery} to verify that it - * eventually schedules operator tasks which were deferred to - * prevent concurrent evaluation. - * - * @todo This is currently not used. However, it could simplify the - * logic for operators, such as SLICE, which otherwise depend on - * {@link #SHARED_STATE} to provide their own synchronization. + * This option may be used to place an optional limit on the #of + * concurrent tasks which may run for the same (bopId,shardId) for a + * given query (default {@value #DEFAULT_MAX_PARALLEL}). The query is + * guaranteed to make progress as long as this is some positive integer. + * While limiting this value can limit the concurrency with which + * certain operators are evaluated and that can have a negative effect + * on the throughput, it controls both the demand on the JVM heap and + * the #of threads consumed. + * <p> + * Note: {@link #MAX_PARALLEL} is the annotation for pipelined joins + * which has the strongest effect on performance. Changes to both + * {@link #MAX_MESSAGES_PER_TASK} and {@link #PIPELINE_QUEUE_CAPACITY} + * have less effect and performance tends to be best around a modest + * value (10) for those annotations. */ - String THREAD_SAFE = PipelineOp.class.getName() + ".threadSafe"; + String MAX_PARALLEL = PipelineOp.class.getName() + ".maxParallel"; - boolean DEFAULT_THREAD_SAFE = true; + /** + * @see #MAX_PARALLEL + */ + int DEFAULT_MAX_PARALLEL = 5; /** + * For a pipelined operator, this is the maximum number of messages that + * will be assigned to a single invocation of the evaluation task for + * that operator (default {@value #DEFAULT_MAX_MESSAGES_PER_TASK}). By + * default the {@link QueryEngine} MAY (and generally does) combine + * multiple {@link IChunkMessage}s from the work queue of an operator + * for each evaluation pass made for that operator. When ONE (1), each + * {@link IChunkMessage} will be assigned to a new evaluation task for + * the operator. The value of this annotation must be a positive + * integer. If the operator is not-pipelined, then the maximum amount of + * data to be assigned to an evaluation task is governed by + * {@link #MAX_MEMORY} instead. + */ + String MAX_MESSAGES_PER_TASK = PipelineOp.class.getName() + + ".maxMessagesPerTask"; + + /** + * @see #MAX_MESSAGES_PER_TASK + */ + int DEFAULT_MAX_MESSAGES_PER_TASK = 10; + + /** + * For pipelined operators, this is the capacity of the input queue for + * that operator. Producers will block if the input queue for the target + * operator is at its capacity. This provides an important limit on the + * amount of data which can be buffered on the JVM heap during pipelined + * query evaluation. + */ + String PIPELINE_QUEUE_CAPACITY = PipelineOp.class.getName() + + ".pipelineQueueCapacity"; + + /** + * @see #PIPELINE_QUEUE_CAPACITY + */ + int DEFAULT_PIPELINE_QUEUE_CAPACITY = 10; + + /** * Annotation used to mark pipelined (aka vectored) operators. When * <code>false</code> the operator will use either "at-once" or * "blocked" evaluation depending on how it buffers its data for @@ -126,6 +167,9 @@ */ String PIPELINED = PipelineOp.class.getName() + ".pipelined"; + /** + * @see #PIPELINED + */ boolean DEFAULT_PIPELINED = true; /** @@ -159,87 +203,11 @@ */ String MAX_MEMORY = PipelineOp.class.getName() + ".maxMemory"; + /** + * @see #MAX_MEMORY + */ int DEFAULT_MAX_MEMORY = 0; -// /** -// * Annotation used to mark a set of (non-optional) joins which may be -// * freely reordered by the query optimizer in order to minimize the -// * amount of work required to compute the solutions. -// * <p> -// * Note: Optional joins MAY NOT appear within a join graph. Optional -// * joins SHOULD be evaluated as part of the "tail plan" following the -// * join graph, but before operations such as SORT, DISTINCT, etc. When -// * the query plan includes {@link #CONDITIONAL_GROUP}s, those groups -// * include a leading {@link #JOIN_GRAPH} (required joins) followed by -// * zero or more optional joins. -// */ -// String JOIN_GRAPH = PipelineOp.class.getName() + ".joinGraph"; - -// /** -// * Annotation used to mark a set of operators belonging to a conditional -// * binding group. Bindings within with the group will be discarded if -// * any required operator in the group fails. For example, if a binding -// * set exits via the alternative sink for a required join then any -// * conditional bindings within the group will be discarded. -// * <p> -// * Together with {@link #ALT_SINK_GROUP}, the {@link #CONDITIONAL_GROUP} -// * annotation provides the information necessary in order to decide the -// * re-entry point in the query plan when a join within an conditional -// * binding group fails. -// * <p> -// * The {@link #CONDITIONAL_GROUP} annotation controls the -// * {@link IBindingSet#push()} and {@link IBindingSet#pop(boolean)} of -// * individual solutions as they propagate through the pipeline. When a -// * pipeline starts, the {@link IBindingSet} stack contains only the top -// * level symbol table (i.e., name/value bindings). When an intermediate -// * solution enters a {@link PipelineOp} marked as belonging to a -// * {@link #CONDITIONAL_GROUP}, a new symbol table is -// * {@link IBindingSet#push() pushed} onto the stack for that solution. -// * If the solution leaves the optional join group via the default sink, -// * then the symbol table is "saved" when it is -// * {@link IBindingSet#pop(boolean) popped} off of the stack. If the -// * solution leaves the join group via the alternative sink, then the -// * symbol table is discarded when it is {@link IBindingSet#pop(boolean) -// * popped} off of the stack. This provides for conditional binding of -// * variables within the operators of the group. -// * <p> -// * The value of the {@link #CONDITIONAL_GROUP} is an {@link Integer} -// * which uniquely identifies the group within the query. -// * -// * @deprecated The binding set stack push/pop mechanisms are not -// * sufficient to support optional join groups. This -// * annotation will be removed unless it proves valuable for -// * marking the elements of a join group, in which case the -// * javadoc needs to be updated. -// */ -// String CONDITIONAL_GROUP = PipelineOp.class.getName() + ".conditionalGroup"; - -// /** -// * Annotation used to designate the target when a required operator -// * within an {@link #CONDITIONAL_GROUP} fails. The value of this -// * annotation must be the {@link #CONDITIONAL_GROUP} identifier -// * corresponding to the next conditional binding group in the query -// * plan. If there is no such group, then the {@link #ALT_SINK_REF} -// * should be used instead to specify the target operator in the -// * pipeline, e.g., a {@link SliceOp}. -// * <p> -// * The target {@link #CONDITIONAL_GROUP} is specified (rather than the -// * bopId of the target join) since the non-optional joins in the target -// * {@link #CONDITIONAL_GROUP} be reordered by the query optimizer. The -// * entry point for solutions redirected to the {@link #ALT_SINK_GROUP} -// * is therefore the first operator in the target -// * {@link #CONDITIONAL_GROUP}. This decouples the routing decisions from -// * the join ordering decisions. -// * -// * @see #CONDITIONAL_GROUP -// * @see #ALT_SINK_REF -// * -// * @deprecated The binding set stack push/pop mechanisms are not -// * sufficient to support optional join groups. This -// * annotation will be removed. -// */ -// String ALT_SINK_GROUP = PipelineOp.class.getName() + ".altSinkGroup"; - } /** @@ -261,13 +229,19 @@ final Map<String, Object> annotations) { super(args, annotations); - + + if (getMaxParallel() < 1) + throw new IllegalArgumentException(Annotations.MAX_PARALLEL + "=" + + getMaxParallel()); + + // @todo range check the rest of the annotations. + } /** * @see BufferAnnotations#CHUNK_CAPACITY */ - public int getChunkCapacity() { + final public int getChunkCapacity() { return getProperty(Annotations.CHUNK_CAPACITY, Annotations.DEFAULT_CHUNK_CAPACITY); @@ -277,7 +251,7 @@ /** * @see BufferAnnotations#CHUNK_OF_CHUNKS_CAPACITY */ - public int getChunkOfChunksCapacity() { + final public int getChunkOfChunksCapacity() { return getProperty(Annotations.CHUNK_OF_CHUNKS_CAPACITY, Annotations.DEFAULT_CHUNK_OF_CHUNKS_CAPACITY); @@ -287,7 +261,7 @@ /** * @see BufferAnnotations#CHUNK_TIMEOUT */ - public long getChunkTimeout() { + final public long getChunkTimeout() { return getProperty(Annotations.CHUNK_TIMEOUT, Annotations.DEFAULT_CHUNK_TIMEOUT); @@ -334,31 +308,49 @@ * @see Annotations#PIPELINED * @see Annotations#MAX_MEMORY */ - public boolean isPipelined() { - return getProperty(PipelineOp.Annotations.PIPELINED, + final public boolean isPipelined() { + + return getProperty(PipelineOp.Annotations.PIPELINED, PipelineOp.Annotations.DEFAULT_PIPELINED); + } +// /** +// * Return <code>true</code> iff concurrent invocations of the operator are +// * permitted. +// * <p> +// * Note: Operators which are not thread-safe still permit concurrent +// * evaluation for <em>distinct</em> partitions. In order to ensure that all +// * invocations of the operator within a query are serialized (no more than +// * one concurrent invocation) you must also specify +// * {@link BOpEvaluationContext#CONTROLLER}. +// * +// * @see Annotations#THREAD_SAFE +// * @see BOp.Annotations#EVALUATION_CONTEXT +// */ +// public boolean isThreadSafe() { +// +// return getProperty(Annotations.THREAD_SAFE, +// Annotations.DEFAULT_THREAD_SAFE); +// +// } + /** - * Return <code>true</code> iff concurrent invocations of the operator are - * permitted. - * <p> - * Note: Operators which are not thread-safe still permit concurrent - * evaluation for <em>distinct</em> partitions. In order to ensure that all - * invocations of the operator within a query are serialized (no more than - * one concurrent invocation) you must also specify - * {@link BOpEvaluationContext#CONTROLLER}. + * The maximum parallelism with which tasks may be evaluated for this + * operator (this is a per-shard limit in scale-out). A value of ONE (1) + * indicates that at most ONE (1) instance of this task may be executing in + * parallel for a given shard and may be used to indicate that the operator + * evaluation task is not thread-safe. * - * @see Annotations#THREAD_SAFE - * @see BOp.Annotations#EVALUATION_CONTEXT + * @see Annotations#MAX_PARALLEL */ - public boolean isThreadSafe() { + final public int getMaxParallel() { - return getProperty(Annotations.THREAD_SAFE, - Annotations.DEFAULT_THREAD_SAFE); - - } + return getProperty(PipelineOp.Annotations.MAX_PARALLEL, + PipelineOp.Annotations.DEFAULT_MAX_PARALLEL); + } + /** * Return <code>true</code> iff {@link #newStats()} must be shared across * all invocations of {@link #eval(BOpContext)} for this operator for a @@ -366,7 +358,7 @@ * * @see Annotations#SHARED_STATE */ - public boolean isSharedState() { + final public boolean isSharedState() { return getProperty(Annotations.SHARED_STATE, Annotations.DEFAULT_SHARED_STATE); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -124,6 +124,12 @@ } + public boolean isWildcard() { + + return name.length() == 1 && name.charAt(0) == '*'; + + } + // public int compareTo(IVariableOrConstant arg0) { // // // order vars before ids Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -4,11 +4,11 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.BOpBase; +import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IValueExpression; import com.bigdata.bop.ImmutableBOp; import com.bigdata.bop.NV; import com.bigdata.bop.Var; -import com.bigdata.bop.BOp.Annotations; /** * Abstract base class for aggregate functions. @@ -17,21 +17,100 @@ * * @param <E> */ -abstract public class AggregateBase<E> extends ImmutableBOp implements IAggregate<E> { +public class AggregateBase<E> extends ImmutableBOp implements IAggregate<E> { /** * */ private static final long serialVersionUID = 1L; + /** + * A type safe enumeration of well known aggregate functions. + */ + static public enum FunctionCode { + + /** + * The count of the #of computed value expressions within the solution + * group. In combination with the special keyword DISTINCT, this is the + * #of distinct values from the computed value expression within the + * solution group. When given with the special variable <code>*</code>, + * this is the count of the #of solutions (or distinct solutions if also + * combined with DISTINCT) within the group. + */ + COUNT(0), + + /** + * The sum of the computed value expressions within the solution group. + * In combination with the special keyword DISTINCT, this is the sum of + * the distinct values from the computed value expressions within the + * solution group. + */ + SUM(1), + + /** + * The average is defined as + * <code>AVG(expr) := SUM(expr)/COUNT(expr)</code>. Note that both SUM + * and COUNT can be hash partitioned over a cluster, so it often makes + * sense to rewrite AVG(expr) internally in terms of COUNT and SUM. This + * may be combined with DISTINCT. + */ + AVG(2), + + /** + * MIN(expr) is the minimum observed value for the computed value + * expressions according to the ordering semantics of + * <code>ORDER BY expr ASC</code>. This may be combined with DISTINCT. + */ + MIN(3), + + /** + * MAX(expr) is the maximum observed value for the computed value + * expressions according to the ordering semantics of + * <code>ORDER BY expr ASC</code>. This may be combined with DISTINCT. + */ + MAX(4), + + /** + * The combined values of the computed value expressions as a string. + * This may be combined with DISTINCT. + */ + GROUP_CONCAT(5), + + /** + * This evaluates to an arbitrary value of the computed value + * expressions. This may be combined with DISTINCT to sample from the + * distinct computed values. While the implementation is not required to + * choose randomly among the values to be sampled, random sampling may + * prove more useful to some applications. + */ + SAMPLE(6); + + private FunctionCode(int code) { + this.code = code; + } + + final private int code; + + public int getCode() { + return code; + } + + } + public interface Annotations extends ImmutableBOp.Annotations { /** + * The aggregate function identifier ({@link FunctionCode#COUNT}, + * {@link FunctionCode#SUM}, etc). + */ + String FUNCTION_CODE = AggregateBase.class.getName() + ".functionCode"; + + /** * Optional boolean property indicates whether the aggregate applies to * the distinct within group solutions (default * {@value #DEFAULT_DISTINCT}). */ - String DISTINCT = AggregateBase.class.getName()+".distinct"; + String DISTINCT = AggregateBase.class.getName() + ".distinct"; boolean DEFAULT_DISTINCT = false; @@ -41,36 +120,40 @@ super(op); } + /** + * Core shallow copy constructor. The <i>distinct</i> option is modeled + * using {@link Annotations#DISTINCT}. The <i>expr</i> is modeled as the + * first argument for the aggregate function. + * + * @param args + * @param annotations + */ public AggregateBase(BOp[] args, Map<String, Object> annotations) { super(args, annotations); - if (!isWildcardAllowed() && getExpression() == Var.var("*")) { - - /* - * Only COUNT may use the wildcard '*' variable. - */ - - throw new UnsupportedOperationException("'*' not permitted."); - - } - } /** - * + * @param functionCode + * The type safe value identifying the desired aggregate + * function. * @param distinct * <code>true</code> iff the keyword DISTINCT was used, for * example <code>COUNT(DISTINCT y)</code> * @param expr * The value expression to be computed, for example * <code>x</code> in <code>COUNT(DISTINCT x)</code> or - * <code>y+x</code> in <code>MIN(x+y)</code>. + * <code>y+x</code> in <code>MIN(x+y)</code>. Note that only + * COUNT may be used with the special variable <code>*</code>. */ - public AggregateBase(final boolean distinct, final IValueExpression<E> expr) { + public AggregateBase(final FunctionCode functionCode, + final boolean distinct, final IValueExpression<E> expr) { - this(new BOp[] { expr }, distinct ? NV.asMap(new NV( - Annotations.DISTINCT, true)) : null); + this(new BOp[] { expr }, NV.asMap(// + new NV(Annotations.FUNCTION_CODE, functionCode), // + new NV(Annotations.DISTINCT, distinct))// + ); } @@ -87,15 +170,33 @@ } + public boolean isWildcard() { + + return get(0).equals(Var.var("*")); + + } + /** - * Return <code>true</code> iff the {@link IValueExpression} may be the - * special variable <code>*</code>. The default implementation always - * returns <code>false</code>. + * Operation is not implemented by this class and must be overridden if the + * {@link AggregateBase} is to be directly evaluated. However, note that the + * computation of aggregate functions is often based on hard coded + * recognition of the appropriate function code. */ - public boolean isWildcardAllowed() { + public E get(IBindingSet bset) { + throw new UnsupportedOperationException(); + } - return false; - + public AggregateBase<E> setExpression(final IValueExpression<E> newExpr) { + + if (newExpr == null) + throw new IllegalArgumentException(); + + final AggregateBase<E> tmp = (AggregateBase<E>) this.clone(); + + tmp._set(0, newExpr); + + return tmp; + } - + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/IAggregate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/IAggregate.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/IAggregate.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -7,6 +7,14 @@ * An aggregate operator, such as SUM, COUNT, MIN, MAX, etc. * * @author thompsonbry + * + * @todo In order to assign nice labels to select expressions we need to know + * (or be able to generate) the original syntactic expression, e.g., + * <code>i+j<code> or <code>SUM(i*2)+j</code>. The textual value of these + * expressions will be used as if they were variable names. Since a + * subquery could be part of a SELECT expression, this means that we need + * to be able to do this for any SPARQL query construct. I do not believe + * that openrdf currently supports this. */ public interface IAggregate<E> extends IValueExpression<E>{ @@ -29,11 +37,20 @@ * </pre> */ boolean isDistinct(); + + /** + * Return <code>true</code> iff the {@link IValueExpression} is the special + * variable <code>*</code> (but note that this is only allowed for COUNT). + */ + boolean isWildcard(); /** * Return the {@link IValueExpression} to be computed by the aggregate. For - * <code>COUNT</code> this may be the special variable <code>*</code>, which - * is interpreted to mean all variables declared in the source solutions. + * example, is the aggregate function is <code>SUM(i+2)</code>, then this + * expression would be <code>i+2</code>. For <code>COUNT</code> this may be + * the special variable <code>*</code>, which is interpreted to mean all + * variables declared in the source solutions. The "DISTINCT" keyword is + * reported separately by {@link #isDistinct()}. */ IValueExpression<E> getExpression(); @@ -42,5 +59,16 @@ * internal state of the {@link IAggregate} operator). */ E get(IBindingSet bset); - + + /** + * Return a new {@link IAggregate} where the expression has been replaced by + * the given expression (copy-on-write). + * + * @param newExpr + * The new expression. + * + * @return The new {@link IAggregate}. + */ + IAggregate<E> setExpression(IValueExpression<E> newExpr); + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -349,7 +349,7 @@ continue; } - tmp.set(i, val.clone()); + tmp._set(i, val.clone()); // modified = true; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -59,13 +59,6 @@ * <p> * Note: This operator must execute on the query controller. * <p> - * The {@link PipelineOp.Annotations#SINK_REF} of each child operand should be - * overridden to specify the parent of the this operator. If you fail to do - * this, then the intermediate results of the subqueries will be routed to this - * operator. This may cause unnecessary network traffic when running against the - * {@link IBigdataFederation}. It may also cause the query to block if the - * buffer capacity is limited. - * <p> * If you want to route intermediate results from other computations into * subqueries, then consider a {@link Tee} pattern instead. * <p> @@ -73,14 +66,12 @@ * * <pre> * SLICE[1]( - * UNION[2]([...],{subqueries=[a{sinkRef=1},b{sinkRef=1},c{sinkRef=1}]}) + * UNION[2]([...],{subqueries=[a,b,c]}) * ) * </pre> * * Will run the subqueries <i>a</i>, <i>b</i>, and <i>c</i> in parallel. Each - * subquery will be run once for each source {@link IBindingSet}. The output of - * those subqueries is explicitly routed to the SLICE operator using - * {@link PipelineOp.Annotations#SINK_REF} for efficiency in scale-out. + * subquery will be run once for each source {@link IBindingSet}. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ @@ -109,19 +100,19 @@ * The maximum parallelism with which the subqueries will be evaluated * (default is unlimited). */ - String MAX_PARALLEL = AbstractSubqueryOp.class.getName() - + ".maxParallel"; + String MAX_PARALLEL_SUBQUERIES = AbstractSubqueryOp.class.getName() + + ".maxParallelSubqueries"; - int DEFAULT_MAX_PARALLEL = Integer.MAX_VALUE; + int DEFAULT_MAX_PARALLEL_SUBQUERIES = Integer.MAX_VALUE; } /** - * @see Annotations#MAX_PARALLEL + * @see Annotations#MAX_PARALLEL_SUBQUERIES */ - public int getMaxParallel() { - return getProperty(Annotations.MAX_PARALLEL, - Annotations.DEFAULT_MAX_PARALLEL); + public int getMaxParallelSubqueries() { + return getProperty(Annotations.MAX_PARALLEL_SUBQUERIES, + Annotations.DEFAULT_MAX_PARALLEL_SUBQUERIES); } /** @@ -207,8 +198,8 @@ this.subqueries = (BOp[]) controllerOp .getRequiredProperty(Annotations.SUBQUERIES); - this.nparallel = controllerOp.getProperty(Annotations.MAX_PARALLEL, - Annotations.DEFAULT_MAX_PARALLEL); + this.nparallel = controllerOp.getProperty(Annotations.MAX_PARALLEL_SUBQUERIES, + Annotations.DEFAULT_MAX_PARALLEL_SUBQUERIES); this.executor = new LatchedExecutor(context.getIndexManager() .getExecutorService(), nparallel); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -1080,8 +1080,10 @@ new NV(BOp.Annotations.BOP_ID, joinId),// // @todo Why not use a factory which avoids bopIds already in use? new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred.setBOpId(3)), - // disallow parallel evaluation. - new NV(PipelineJoin.Annotations.MAX_PARALLEL,0), + // disallow parallel evaluation of tasks. + new NV(PipelineJoin.Annotations.MAX_PARALLEL,1), + // disallow parallel evaluation of chunks. + new NV(PipelineJoin.Annotations.MAX_PARALLEL_CHUNKS,0), // disable access path coalescing new NV(PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS,false), // cutoff join. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -713,7 +713,9 @@ .asMap(new NV[] { new NV(JoinGraph.Annotations.BOP_ID, idFactory.nextId()), // new NV(JoinGraph.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER) }) // + BOpEvaluationContext.CONTROLLER),// + new NV(PipelineOp.Annotations.SHARED_STATE,true),// + }) // ); return queryOp; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Steps.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Steps.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Steps.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -74,8 +74,8 @@ super(args, annotations); - if (getMaxParallel() != 1) - throw new IllegalArgumentException(Annotations.MAX_PARALLEL + "=" + if (getMaxParallelSubqueries() != 1) + throw new IllegalArgumentException(Annotations.MAX_PARALLEL_SUBQUERIES + "=" + getMaxParallel()); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Union.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Union.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Union.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -42,8 +42,7 @@ * * Will run the subqueries <i>a</i>, <i>b</i>, and <i>c</i> in parallel for each * source {@link IBindingSet}. The output of those subqueries will be routed to - * the UNION operator (their parent) unless the subqueries explicitly override - * this behavior using {@link PipelineOp.Annotations#SINK_REF}. + * the UNION operator (their parent). * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -405,6 +405,33 @@ } + /** + * Return the {@link BOp} having the specified id. + * + * @param bopId + * The {@link BOp} identifier. + * + * @return The {@link BOp}. + * + * @throws IllegalArgumentException + * if there is no {@link BOp} with that identifier declared in + * this query. + */ + final public BOp getBOp(final int bopId) { + + final BOp bop = getBOpIndex().get(bopId); + + if (bop == null) { + + throw new IllegalArgumentException("Not found: id=" + bopId + + ", query=" + query); + + } + + return bop; + + } + /** * @param queryEngine * The {@link QueryEngine} on which the query is running. In @@ -620,6 +647,9 @@ try { + if(log.isInfoEnabled())//FIXME TRACE + log.info(msg.toString()); + if (runState.startOp(msg)) { /* @@ -673,10 +703,13 @@ if (!queryId.equals(msg.queryId)) throw new IllegalArgumentException(); - lock.lock(); + lock.lock(); try { + if(log.isInfoEnabled())//FIXME TRACE + log.info(msg.toString()); + // update per-operator statistics. final BOpStats tmp = statsMap.putIfAbsent(msg.bopId, msg.taskStats); @@ -1129,6 +1162,21 @@ } + /** + * Return the textual representation of the {@link RunState} of this query. + * <p> + * Note: Exposed for log messages in derived classes since {@link #runState} + * is private. + */ + protected String runStateString() { + lock.lock(); + try { + return runState.toString(); + } finally { + lock.unlock(); + } + } + public String toString() { final StringBuilder sb = new StringBuilder(getClass().getName()); sb.append("{queryId=" + queryId); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -37,7 +37,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; -import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.atomic.AtomicInteger; @@ -45,6 +44,7 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BOpEvaluationContext; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.NoSuchBOpException; @@ -57,6 +57,7 @@ import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.relation.accesspath.IMultiSourceAsynchronousIterator; import com.bigdata.relation.accesspath.MultiSourceSequentialAsynchronousIterator; +import com.bigdata.rwstore.sector.IMemoryManager; import com.bigdata.service.IBigdataFederation; import com.bigdata.util.InnerCause; import com.bigdata.util.concurrent.Memoizer; @@ -72,13 +73,17 @@ * distribution of the shards. This evaluation strategy is compatible with both * the {@link Journal} (aka standalone) and the {@link IBigdataFederation} (aka * clustered or scale-out). + * <p> + * Note: The challenge with this implementation is managing the amount of data + * buffered on the JVM heap without introducing control structures which can + * result in deadlock or starvation. This has been addressed to a large extent + * by sharing a lock between this class and the per-operator input work queues + * using modified version of the JSR 166 classes. For high volume operator at + * once evaluation, we need to buffer the data on the native process heap using + * the {@link IMemoryManager}. * - * @todo The challenge with this implementation is managing the amount of data - * buffered on the JVM heap without introducing control structures which - * can result in deadlock or starvation. One way to manage this is to move - * the data off of the JVM heap onto direct ByteBuffers and then - * potentially spilling blocks to disk, e.g., using an RWStore based cache - * pattern. + * @todo {@link IMemoryManager} integration and support + * {@link PipelineOp.Annotations#MAX_MEMORY}. */ public class ChunkedRunningQuery extends AbstractRunningQuery { @@ -90,96 +95,6 @@ */ private final static Logger chunkTaskLog = Logger .getLogger(ChunkTask.class); - -// /** -// * The maximum number of operator tasks which may be concurrently executed -// * for a given (bopId,shardId). -// * -// * @see QueryEngineTestAnnotations#MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD -// */ -// final private int maxConcurrentTasksPerOperatorAndShard; - -// /** -// * The maximum #of concurrent tasks for this query across all operators and -// * shards. -// * -// * Note: This is not a safe option and MUST be removed. It is possible for -// * N-1 tasks to backup with the Nth task not running due to concurrent -// * execution of some of the N-t tasks. -// */ -// final private int maxConcurrentTasks = 10; - - /* - * FIXME Explore the use of this semaphore to limit the maximum #of messages - * further. (Note that placing a limit on messages would allow us to buffer - * potentially many chunks. That could be solved by making LocalChunkMessage - * transparent in terms of the #of chunks or _binding_sets_ which it is - * carrying, but let's take this one step at a time). - * - * The first issue is ensuring that the query continue to make progress when - * a semaphore with a limited #of permits is introduced. This is because the - * ChunkFutureTask only attempts to schedule the next task for a given - * (bopId,shardId) but we could have failed to accept outstanding work for - * any of a number of operator/shard combinations. Likewise, the QueryEngine - * tells the RunningQuery to schedule work each time a message is dropped - * onto the QueryEngine, but the signal to execute more work is lost if the - * permits were not available immediately. - * - * One possibility would be to have a delayed retry. Another would be to - * have ChunkTaskFuture try to run *any* messages, not just messages for the - * same (bopId,shardId). - * - * Also, when scheduling work, there needs to be some bias towards the - * downstream operators in the query plan in order to ensure that they get a - * chance to clear work from upstream operators. This suggests that we might - * carry an order[] and use it to scan the work queue -- or make the work - * queue a priority heap using the order[] to place a primary sort over the - * bopIds in terms of the evaluation order and letting the shardIds fall in - * increasing shard order so we have a total order for the priority heap (a - * total order may also require a tie breaker, but I think that the priority - * heap allows ties). - * - * This concept of memory overhead and permits would be associated with the - * workload waiting on a given node for processing. (In scale-out, we do not - * care how much data is moving in the cluster, only how much data is - * challenging an individual machine). - * - * This emphasize again why we need to get the data off of the Java heap. - * - * The same concept should apply for chained buffers. Maybe one way to do - * this is to allocate a fixed budget to each query for the Java heap and - * the C heap and then the query blocks or goes to disk. - */ -// /** -// * The maximum number of binding sets which may be outstanding before a task -// * which is producing binding sets will block. This value may be used to -// * limit the memory demand of a query in which some operators produce -// * binding sets faster than other operators can consume them. -// * -// * @todo This could be generalized to consider the Java heap separately from -// * the native heap as we get into the use of native ByteBuffers to -// * buffer intermediate results. -// * -// * @todo This is expressed in terms of messages and not {@link IBindingSet}s -// * because the {@link LocalChunkMessage} does not self-report the #of -// * {@link IBindingSet}s (or chunks). [It should really be bytes on the -// * heap even if we can count binding sets and #s of bindings, but we -// * do not serialize all binding sets so we have to have one measure -// * for serialized and one measure for live objects.] -// */ -// final private int maxOutstandingMessageCount = 100; -// -// /** -// * A counting semaphore used to limit the #of outstanding binding set chunks -// * which may be buffered before a producer will block when trying to emit -// * another chunk. -// * -// * @see HandleChunkBuffer#outputChunk(IBindingSet[]) -// * @see #scheduleNext(BSBundle) -// * -// * @see #maxOutstandingMessageCount -// */ -// final private Semaphore outstandingMessageSemaphore = new Semaphore(maxOutstandingMessageCount); /** * A collection of (bopId,partitionId) keys mapped onto a collection of @@ -188,58 +103,39 @@ */ private final ConcurrentHashMap<BSBundle, ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask>> operatorFutures; - /** - * A map of unbounded work queues for each (bopId,partitionId). Empty queues - * are removed from the map. - * <p> - * The map is guarded by the {@link #lock}. - */ + /** + * A map of unbounded work queues for each (bopId,partitionId). Empty queues + * are removed from the map. + * <p> + * The map is guarded by the {@link #lock}. + * + * FIXME Either this and/or {@link #operatorFutures} must be a weak value + * map in order to ensure that entries are eventually cleared in scale-out + * where the #of entries can potentially be very large since they are per + * (bopId,shardId). While these maps were initially declared as + * {@link ConcurrentHashMap} instances, if we remove entries once the + * map/queue entry is empty, this appears to open a concurrency hole which + * does not exist if we leave entries with empty map/queue values in the + * map. Changing to a weak value map should provide the necessary pruning of + * unused entries without opening up this concurrency hole. + */ private final Map<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>> operatorQueues; - -// /** -// * When running in stand alone, we can chain together the operators and have -// * much higher throughput. Each operator has an {@link BlockingBuffer} which -// * is essentially its input queue. The operator will drain its input queue -// * using {@link BlockingBuffer#iterator()}. -// * <p> -// * Each operator closes its {@link IBlockingBuffer} sink(s) once its own -// * source has been closed and it has finished processing that source. Since -// * multiple producers can target the same operator, we need a means to -// * ensure that the source for the target operator is not closed until each -// * producer which targets that operator has closed its corresponding sink. -// * <p> -// * In order to support this many-to-one producer/consumer pattern, we wrap -// * the input queue (a {@link BlockingBuffer}) for each operator having -// * multiple sources with a {@link MultiplexBlockingBuffer}. This class gives -// * each producer their own view on the underlying {@link BlockingBuffer}. -// * The underlying {@link BlockingBuffer} will not be closed until all -// * source(s) have closed their view of that buffer. This collection keeps -// * track of the {@link MultiplexBlockingBuffer} wrapping the -// * {@link BlockingBuffer} which is the input queue for each operator. -// * <p> -// * The input queues themselves are {@link BlockingBuffer} objects. Those -// * objects are available from this map using -// * {@link MultiplexBlockingBuffer#getBackingBuffer()}. These buffers are -// * pre-allocated by {@link #populateInputBufferMap(BOp)}. -// * {@link #startTasks(BOp)} is responsible for starting the operator tasks -// * in a "back-to-front" order. {@link #startQuery(IChunkMessage)} kicks off -// * the query and invokes {@link #startTasks(BOp)} to chain the input queues -// * and output queues together (when so chained, the output queues are skins -// * over the input queues obtained from {@link MultiplexBlockingBuffer}). -// * -// * FIXME The inputBufferMap will let us construct consumer producer chains -// * where the consumer _waits_ for all producer(s) which target the consumer -// * to close the sink associated with that consumer. Unlike when attaching an -// * {@link IChunkMessage} to an already running operator, the consumer will -// * NOT terminate (due to lack up input) until each running producer -// * terminating that consumer terminates. This will improve concurrency, -// * result in fewer task instances, and have better throughput than attaching -// * a chunk to an already running task. However, in scale-out we will have -// * tasks running on different nodes so we can not always chain together the -// * producer and consumer in this tightly integrated manner. -// */ -// final private ConcurrentHashMap<Integer/*operator*/, MultiplexBlockingBuffer<IBindingSet[]>/*inputQueue*/> inputBufferMap; + /** + * FIXME It appears that this is Ok based on a single unit test known to + * fail when {@link #removeMapOperatorQueueEntries} is <code>true</code>, + * but I expect that a similar concurrency problem could also exist for the + * {@link #operatorFutures} even through it does not produce a deadlock. + */ + static private final boolean removeMapOperatorFutureEntries = false; + + /** + * FIXME See operatorQueues for why removing the map entries appears to + * cause problems. This is problem is demonstrated by + * TestQueryEngine#test_query_slice_noLimit() when + * {@link PipelineOp.Annotations#PIPELINE_QUEUE_CAPACITY} is ONE (1). + */ + static private final boolean removeMapOperatorQueueEntries = false; // /** // * The chunks available for immediate processing (they must have been @@ -285,286 +181,12 @@ super(queryEngine, queryId, controller, clientProxy, query); -//// combineReceivedChunks = query.getProperty( -//// QueryEngineTestAnnotations.COMBINE_RECEIVED_CHUNKS, -//// QueryEngineTestAnnotations.DEFAULT_COMBINE_RECEIVED_CHUNKS); - -// this.maxConcurrentTasksPerOperatorAndShard = 300; -// this.maxConcurrentTasksPerOperatorAndShard = query -// .getProperty( -// QueryEngineTestAnnotations.MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD, -// QueryEngineTestAnnotations.DEFAULT_MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD); - this.operatorFutures = new ConcurrentHashMap<BSBundle, ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask>>(); this.operatorQueues = new ConcurrentHashMap<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>>(); -// /* -// * Setup the BOpStats object for each pipeline operator in the query. -// */ -// if (controller) { -// -//// runState = new RunState(this); -// -//// statsMap = new ConcurrentHashMap<Integer, BOpStats>(); -//// -//// populateStatsMap(query); -// -//// /* -//// * FIXME Review the concept of mutation queries. It used to be that -//// * queries could only either read or write. Now we have access paths -//// * which either read or write and each query could use zero or more -//// * such access paths. -//// */ -//// if (true/*!query.isMutation()*/) { -//// -//// // read-only query. -//// -//// final BOpStats queryStats = statsMap.get(query.getId()); -// -//// queryBuffer = new BlockingBufferWithStats<IBindingSet[]>(query, -//// queryStats); -//// -//// queryIterator = new QueryResultIterator<IBindingSet[]>(this, -//// queryBuffer.iterator()); -// -//// } else { -//// -//// // Note: Not used for mutation queries. -//// queryBuffer = null; -//// queryIterator = null; -// -// } -// -// } else { -// -//// runState = null; // Note: only on the query controller. -//// statsMap = null; // Note: only on the query controller. -//// queryBuffer = null; // Note: only on the query controller. -//// queryIterator = null; // Note: only when queryBuffer is defined. -// -// } - } -// /** -// * Take a chunk generated by some pass over an operator and make it -// * available to the target operator. How this is done depends on whether the -// * query is running against a standalone database or the scale-out database. -// * <p> -// * Note: The return value is used as part of the termination criteria for -// * the query. -// * <p> -// * The default implementation supports a standalone database. The generated -// * chunk is left on the Java heap and handed off synchronously using -// * {@link QueryEngine#acceptChunk(IChunkMessage)}. That method will queue -// * the chunk for asynchronous processing. -// * -// * @param bop -// * The operator which wrote on the sink. -// * @param sinkId -// * The identifier of the target operator. -// * @param sink -// * The intermediate results to be passed to that target operator. -// * -// * @return The #of {@link IChunkMessage} sent. This will always be ONE (1) -// * for scale-up. For scale-out, there will be at least one -// * {@link IChunkMessage} per index partition over which the -// * intermediate results were mapped. -// */ -// protected <E> int handleOutputChunk(final BOp bop, final int sinkId, -// final IBlockingBuffer<IBindingSet[]> sink) { -// -// if (bop == null) -// throw new IllegalArgumentException(); -// -// if (sink == null) -// throw new IllegalArgumentException(); -// -// if (inputBufferMap != null && inputBufferMap.get(sinkId) != null) { -// /* -// * FIXME The sink is just a wrapper for t... [truncated message content] |
From: <tho...@us...> - 2011-02-17 13:38:54
|
Revision: 4206 http://bigdata.svn.sourceforge.net/bigdata/?rev=4206&view=rev Author: thompsonbry Date: 2011-02-17 13:38:48 +0000 (Thu, 17 Feb 2011) Log Message: ----------- Added constructor variants for LinkedBlockingDeque which permit the caller to pass in their ReentrantLock. The intention is to support design patterns where an outer lock is used to guard operations both outside of the collection and within the collection. When the collection uses its own lock, this can create a deadlock if the outer class issues concurrent requests which lead to blocking operation on the collection. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/LinkedBlockingDeque.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/jsr166/LinkedBlockingDequeTest.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/LinkedBlockingDeque.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/LinkedBlockingDeque.java 2011-02-17 12:56:14 UTC (rev 4205) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/LinkedBlockingDeque.java 2011-02-17 13:38:48 UTC (rev 4206) @@ -125,13 +125,13 @@ private final int capacity; /** Main lock guarding all access */ - final ReentrantLock lock = new ReentrantLock(); + final ReentrantLock lock;// = new ReentrantLock(); /** Condition for waiting takes */ - private final Condition notEmpty = lock.newCondition(); + private final Condition notEmpty;// = lock.newCondition(); /** Condition for waiting puts */ - private final Condition notFull = lock.newCondition(); + private final Condition notFull;// = lock.newCondition(); /** * Creates a {@code LinkedBlockingDeque} with a capacity of @@ -142,14 +142,52 @@ } /** + * Creates a {@code LinkedBlockingDeque} with a capacity of + * {@link Integer#MAX_VALUE} using the caller's lock. + */ + public LinkedBlockingDeque(final ReentrantLock lock) { + this(Integer.MAX_VALUE, lock); + } + + /** * Creates a {@code LinkedBlockingDeque} with the given (fixed) capacity. * * @param capacity the capacity of this deque * @throws IllegalArgumentException if {@code capacity} is less than 1 */ public LinkedBlockingDeque(int capacity) { + this(capacity, new ReentrantLock()); +// if (capacity <= 0) throw new IllegalArgumentException(); +// this.capacity = capacity; + } + + /** + * Creates a {@code LinkedBlockingDeque} with the given (fixed) capacity and + * the caller's {@link ReentrantLock} object. + * <p> + * <strong>Caution:</strong> By using the caller's lock, this constructor + * allows the caller to break the encapsulation of the synchronization and + * lock-based notification (signals). This can be used advantageously to + * create designs where an outer lock is shared by the collection which + * avoid deadlock arising from blocking operations on an inner lock while + * holding a distinct outer lock. However, the caller's decisions about its + * lock are no longer independent of the design decisions within this class + * since they share the same lock. + * + * @param capacity + * the capacity of this deque + * @param lock + * the lock object. + * @throws IllegalArgumentException + * if {@code capacity} is less than 1 + */ + public LinkedBlockingDeque(final int capacity, final ReentrantLock lock) { if (capacity <= 0) throw new IllegalArgumentException(); + if (lock == null) throw new NullPointerException(); this.capacity = capacity; + this.lock = lock; + this.notEmpty = lock.newCondition(); + this.notFull = lock.newCondition(); } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/jsr166/LinkedBlockingDequeTest.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/jsr166/LinkedBlockingDequeTest.java 2011-02-17 12:56:14 UTC (rev 4205) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/jsr166/LinkedBlockingDequeTest.java 2011-02-17 13:38:48 UTC (rev 4206) @@ -7,6 +7,8 @@ import junit.framework.*; import java.util.*; import java.util.concurrent.*; +import java.util.concurrent.locks.ReentrantLock; + import static java.util.concurrent.TimeUnit.MILLISECONDS; import java.io.*; @@ -326,7 +328,7 @@ */ public void testConstructor3() { try { - LinkedBlockingDeque q = new LinkedBlockingDeque(null); + LinkedBlockingDeque q = new LinkedBlockingDeque((Collection)null); shouldThrow(); } catch (NullPointerException success) {} } @@ -368,6 +370,27 @@ } /** + * Deque constructor with <code>null</code> {@link ReentrantLock} argument + * throws NPE. + */ + public void testConstructor7() { + try { + LinkedBlockingDeque q = new LinkedBlockingDeque(20,null/*lock*/); + shouldThrow(); + } catch (NullPointerException success) {} + } + + /** + * Initializing from null Lock throws NPE + */ + public void testConstructor8() { + try { + LinkedBlockingDeque q = new LinkedBlockingDeque((ReentrantLock)null); + shouldThrow(); + } catch (NullPointerException success) {} + } + + /** * Deque transitions from empty to full when elements added */ public void testEmptyFull() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-17 12:56:23
|
Revision: 4205 http://bigdata.svn.sourceforge.net/bigdata/?rev=4205&view=rev Author: thompsonbry Date: 2011-02-17 12:56:14 +0000 (Thu, 17 Feb 2011) Log Message: ----------- Initial check in of the LinkedBlockingQueue, LinkedBlockingDeque and their test suites from the JSR 166 sources. The source files are in the public domain per the author's declaration, which is replicated below: /* * Written by Doug Lea with assistance from members of JCP JSR-166 * Expert Group and released to the public domain, as explained at * http://creativecommons.org/licenses/publicdomain */ These files are being introduced so that we may address certain patterns, such as found in BlockingBuffer and ChunkedRunningQuery, where nested locks occur which lead to deadlock if we use a blocking operation on the queue and otherwise lead to unsatisfactory designs involving polling and retries on the queue. The files as checked in have been modified solely to resolve dependencies and (in the case of the unit tests) to comment out some unused methods which depend on Java 1.7 features. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/concurrent/TestAll.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/LinkedBlockingDeque.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/LinkedBlockingQueue.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/package.html branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/jsr166/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/jsr166/BlockingQueueTest.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/jsr166/JSR166TestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/jsr166/LinkedBlockingDequeTest.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/jsr166/LinkedBlockingQueueTest.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/jsr166/TestAll.java Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/LinkedBlockingDeque.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/LinkedBlockingDeque.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/LinkedBlockingDeque.java 2011-02-17 12:56:14 UTC (rev 4205) @@ -0,0 +1,1171 @@ +/* + * Written by Doug Lea with assistance from members of JCP JSR-166 + * Expert Group and released to the public domain, as explained at + * http://creativecommons.org/licenses/publicdomain + */ + +package com.bigdata.jsr166; + +import java.util.AbstractQueue; +import java.util.Collection; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.concurrent.BlockingDeque; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; + +/** + * An optionally-bounded {@linkplain BlockingDeque blocking deque} based on + * linked nodes. + * + * <p> The optional capacity bound constructor argument serves as a + * way to prevent excessive expansion. The capacity, if unspecified, + * is equal to {@link Integer#MAX_VALUE}. Linked nodes are + * dynamically created upon each insertion unless this would bring the + * deque above capacity. + * + * <p>Most operations run in constant time (ignoring time spent + * blocking). Exceptions include {@link #remove(Object) remove}, + * {@link #removeFirstOccurrence removeFirstOccurrence}, {@link + * #removeLastOccurrence removeLastOccurrence}, {@link #contains + * contains}, {@link #iterator iterator.remove()}, and the bulk + * operations, all of which run in linear time. + * + * <p>This class and its iterator implement all of the + * <em>optional</em> methods of the {@link Collection} and {@link + * Iterator} interfaces. + * + * <p>This class is a member of the + * <a href="{@docRoot}/../technotes/guides/collections/index.html"> + * Java Collections Framework</a>. + * + * @since 1.6 + * @author Doug Lea + * @param <E> the type of elements held in this collection + */ +public class LinkedBlockingDeque<E> + extends AbstractQueue<E> + implements BlockingDeque<E>, java.io.Serializable { + + /* + * Implemented as a simple doubly-linked list protected by a + * single lock and using conditions to manage blocking. + * + * To implement weakly consistent iterators, it appears we need to + * keep all Nodes GC-reachable from a predecessor dequeued Node. + * That would cause two problems: + * - allow a rogue Iterator to cause unbounded memory retention + * - cause cross-generational linking of old Nodes to new Nodes if + * a Node was tenured while live, which generational GCs have a + * hard time dealing with, causing repeated major collections. + * However, only non-deleted Nodes need to be reachable from + * dequeued Nodes, and reachability does not necessarily have to + * be of the kind understood by the GC. We use the trick of + * linking a Node that has just been dequeued to itself. Such a + * self-link implicitly means to jump to "first" (for next links) + * or "last" (for prev links). + */ + + /* + * We have "diamond" multiple interface/abstract class inheritance + * here, and that introduces ambiguities. Often we want the + * BlockingDeque javadoc combined with the AbstractQueue + * implementation, so a lot of method specs are duplicated here. + */ + + private static final long serialVersionUID = -387911632671998426L; + + /** Doubly-linked list node class */ + static final class Node<E> { + /** + * The item, or null if this node has been removed. + */ + E item; + + /** + * One of: + * - the real predecessor Node + * - this Node, meaning the predecessor is tail + * - null, meaning there is no predecessor + */ + Node<E> prev; + + /** + * One of: + * - the real successor Node + * - this Node, meaning the successor is head + * - null, meaning there is no successor + */ + Node<E> next; + + Node(E x) { + item = x; + } + } + + /** + * Pointer to first node. + * Invariant: (first == null && last == null) || + * (first.prev == null && first.item != null) + */ + transient Node<E> first; + + /** + * Pointer to last node. + * Invariant: (first == null && last == null) || + * (last.next == null && last.item != null) + */ + transient Node<E> last; + + /** Number of items in the deque */ + private transient int count; + + /** Maximum number of items in the deque */ + private final int capacity; + + /** Main lock guarding all access */ + final ReentrantLock lock = new ReentrantLock(); + + /** Condition for waiting takes */ + private final Condition notEmpty = lock.newCondition(); + + /** Condition for waiting puts */ + private final Condition notFull = lock.newCondition(); + + /** + * Creates a {@code LinkedBlockingDeque} with a capacity of + * {@link Integer#MAX_VALUE}. + */ + public LinkedBlockingDeque() { + this(Integer.MAX_VALUE); + } + + /** + * Creates a {@code LinkedBlockingDeque} with the given (fixed) capacity. + * + * @param capacity the capacity of this deque + * @throws IllegalArgumentException if {@code capacity} is less than 1 + */ + public LinkedBlockingDeque(int capacity) { + if (capacity <= 0) throw new IllegalArgumentException(); + this.capacity = capacity; + } + + /** + * Creates a {@code LinkedBlockingDeque} with a capacity of + * {@link Integer#MAX_VALUE}, initially containing the elements of + * the given collection, added in traversal order of the + * collection's iterator. + * + * @param c the collection of elements to initially contain + * @throws NullPointerException if the specified collection or any + * of its elements are null + */ + public LinkedBlockingDeque(Collection<? extends E> c) { + this(Integer.MAX_VALUE); + final ReentrantLock lock = this.lock; + lock.lock(); // Never contended, but necessary for visibility + try { + for (E e : c) { + if (e == null) + throw new NullPointerException(); + if (!linkLast(new Node<E>(e))) + throw new IllegalStateException("Deque full"); + } + } finally { + lock.unlock(); + } + } + + + // Basic linking and unlinking operations, called only while holding lock + + /** + * Links node as first element, or returns false if full. + */ + private boolean linkFirst(Node<E> node) { + // assert lock.isHeldByCurrentThread(); + if (count >= capacity) + return false; + Node<E> f = first; + node.next = f; + first = node; + if (last == null) + last = node; + else + f.prev = node; + ++count; + notEmpty.signal(); + return true; + } + + /** + * Links node as last element, or returns false if full. + */ + private boolean linkLast(Node<E> node) { + // assert lock.isHeldByCurrentThread(); + if (count >= capacity) + return false; + Node<E> l = last; + node.prev = l; + last = node; + if (first == null) + first = node; + else + l.next = node; + ++count; + notEmpty.signal(); + return true; + } + + /** + * Removes and returns first element, or null if empty. + */ + private E unlinkFirst() { + // assert lock.isHeldByCurrentThread(); + Node<E> f = first; + if (f == null) + return null; + Node<E> n = f.next; + E item = f.item; + f.item = null; + f.next = f; // help GC + first = n; + if (n == null) + last = null; + else + n.prev = null; + --count; + notFull.signal(); + return item; + } + + /** + * Removes and returns last element, or null if empty. + */ + private E unlinkLast() { + // assert lock.isHeldByCurrentThread(); + Node<E> l = last; + if (l == null) + return null; + Node<E> p = l.prev; + E item = l.item; + l.item = null; + l.prev = l; // help GC + last = p; + if (p == null) + first = null; + else + p.next = null; + --count; + notFull.signal(); + return item; + } + + /** + * Unlinks x. + */ + void unlink(Node<E> x) { + // assert lock.isHeldByCurrentThread(); + Node<E> p = x.prev; + Node<E> n = x.next; + if (p == null) { + unlinkFirst(); + } else if (n == null) { + unlinkLast(); + } else { + p.next = n; + n.prev = p; + x.item = null; + // Don't mess with x's links. They may still be in use by + // an iterator. + --count; + notFull.signal(); + } + } + + // BlockingDeque methods + + /** + * @throws IllegalStateException {@inheritDoc} + * @throws NullPointerException {@inheritDoc} + */ + public void addFirst(E e) { + if (!offerFirst(e)) + throw new IllegalStateException("Deque full"); + } + + /** + * @throws IllegalStateException {@inheritDoc} + * @throws NullPointerException {@inheritDoc} + */ + public void addLast(E e) { + if (!offerLast(e)) + throw new IllegalStateException("Deque full"); + } + + /** + * @throws NullPointerException {@inheritDoc} + */ + public boolean offerFirst(E e) { + if (e == null) throw new NullPointerException(); + Node<E> node = new Node<E>(e); + final ReentrantLock lock = this.lock; + lock.lock(); + try { + return linkFirst(node); + } finally { + lock.unlock(); + } + } + + /** + * @throws NullPointerException {@inheritDoc} + */ + public boolean offerLast(E e) { + if (e == null) throw new NullPointerException(); + Node<E> node = new Node<E>(e); + final ReentrantLock lock = this.lock; + lock.lock(); + try { + return linkLast(node); + } finally { + lock.unlock(); + } + } + + /** + * @throws NullPointerException {@inheritDoc} + * @throws InterruptedException {@inheritDoc} + */ + public void putFirst(E e) throws InterruptedException { + if (e == null) throw new NullPointerException(); + Node<E> node = new Node<E>(e); + final ReentrantLock lock = this.lock; + lock.lock(); + try { + while (!linkFirst(node)) + notFull.await(); + } finally { + lock.unlock(); + } + } + + /** + * @throws NullPointerException {@inheritDoc} + * @throws InterruptedException {@inheritDoc} + */ + public void putLast(E e) throws InterruptedException { + if (e == null) throw new NullPointerException(); + Node<E> node = new Node<E>(e); + final ReentrantLock lock = this.lock; + lock.lock(); + try { + while (!linkLast(node)) + notFull.await(); + } finally { + lock.unlock(); + } + } + + /** + * @throws NullPointerException {@inheritDoc} + * @throws InterruptedException {@inheritDoc} + */ + public boolean offerFirst(E e, long timeout, TimeUnit unit) + throws InterruptedException { + if (e == null) throw new NullPointerException(); + Node<E> node = new Node<E>(e); + long nanos = unit.toNanos(timeout); + final ReentrantLock lock = this.lock; + lock.lockInterruptibly(); + try { + while (!linkFirst(node)) { + if (nanos <= 0) + return false; + nanos = notFull.awaitNanos(nanos); + } + return true; + } finally { + lock.unlock(); + } + } + + /** + * @throws NullPointerException {@inheritDoc} + * @throws InterruptedException {@inheritDoc} + */ + public boolean offerLast(E e, long timeout, TimeUnit unit) + throws InterruptedException { + if (e == null) throw new NullPointerException(); + Node<E> node = new Node<E>(e); + long nanos = unit.toNanos(timeout); + final ReentrantLock lock = this.lock; + lock.lockInterruptibly(); + try { + while (!linkLast(node)) { + if (nanos <= 0) + return false; + nanos = notFull.awaitNanos(nanos); + } + return true; + } finally { + lock.unlock(); + } + } + + /** + * @throws NoSuchElementException {@inheritDoc} + */ + public E removeFirst() { + E x = pollFirst(); + if (x == null) throw new NoSuchElementException(); + return x; + } + + /** + * @throws NoSuchElementException {@inheritDoc} + */ + public E removeLast() { + E x = pollLast(); + if (x == null) throw new NoSuchElementException(); + return x; + } + + public E pollFirst() { + final ReentrantLock lock = this.lock; + lock.lock(); + try { + return unlinkFirst(); + } finally { + lock.unlock(); + } + } + + public E pollLast() { + final ReentrantLock lock = this.lock; + lock.lock(); + try { + return unlinkLast(); + } finally { + lock.unlock(); + } + } + + public E takeFirst() throws InterruptedException { + final ReentrantLock lock = this.lock; + lock.lock(); + try { + E x; + while ( (x = unlinkFirst()) == null) + notEmpty.await(); + return x; + } finally { + lock.unlock(); + } + } + + public E takeLast() throws InterruptedException { + final ReentrantLock lock = this.lock; + lock.lock(); + try { + E x; + while ( (x = unlinkLast()) == null) + notEmpty.await(); + return x; + } finally { + lock.unlock(); + } + } + + public E pollFirst(long timeout, TimeUnit unit) + throws InterruptedException { + long nanos = unit.toNanos(timeout); + final ReentrantLock lock = this.lock; + lock.lockInterruptibly(); + try { + E x; + while ( (x = unlinkFirst()) == null) { + if (nanos <= 0) + return null; + nanos = notEmpty.awaitNanos(nanos); + } + return x; + } finally { + lock.unlock(); + } + } + + public E pollLast(long timeout, TimeUnit unit) + throws InterruptedException { + long nanos = unit.toNanos(timeout); + final ReentrantLock lock = this.lock; + lock.lockInterruptibly(); + try { + E x; + while ( (x = unlinkLast()) == null) { + if (nanos <= 0) + return null; + nanos = notEmpty.awaitNanos(nanos); + } + return x; + } finally { + lock.unlock(); + } + } + + /** + * @throws NoSuchElementException {@inheritDoc} + */ + public E getFirst() { + E x = peekFirst(); + if (x == null) throw new NoSuchElementException(); + return x; + } + + /** + * @throws NoSuchElementException {@inheritDoc} + */ + public E getLast() { + E x = peekLast(); + if (x == null) throw new NoSuchElementException(); + return x; + } + + public E peekFirst() { + final ReentrantLock lock = this.lock; + lock.lock(); + try { + return (first == null) ? null : first.item; + } finally { + lock.unlock(); + } + } + + public E peekLast() { + final ReentrantLock lock = this.lock; + lock.lock(); + try { + return (last == null) ? null : last.item; + } finally { + lock.unlock(); + } + } + + public boolean removeFirstOccurrence(Object o) { + if (o == null) return false; + final ReentrantLock lock = this.lock; + lock.lock(); + try { + for (Node<E> p = first; p != null; p = p.next) { + if (o.equals(p.item)) { + unlink(p); + return true; + } + } + return false; + } finally { + lock.unlock(); + } + } + + public boolean removeLastOccurrence(Object o) { + if (o == null) return false; + final ReentrantLock lock = this.lock; + lock.lock(); + try { + for (Node<E> p = last; p != null; p = p.prev) { + if (o.equals(p.item)) { + unlink(p); + return true; + } + } + return false; + } finally { + lock.unlock(); + } + } + + // BlockingQueue methods + + /** + * Inserts the specified element at the end of this deque unless it would + * violate capacity restrictions. When using a capacity-restricted deque, + * it is generally preferable to use method {@link #offer(Object) offer}. + * + * <p>This method is equivalent to {@link #addLast}. + * + * @throws IllegalStateException if the element cannot be added at this + * time due to capacity restrictions + * @throws NullPointerException if the specified element is null + */ + public boolean add(E e) { + addLast(e); + return true; + } + + /** + * @throws NullPointerException if the specified element is null + */ + public boolean offer(E e) { + return offerLast(e); + } + + /** + * @throws NullPointerException {@inheritDoc} + * @throws InterruptedException {@inheritDoc} + */ + public void put(E e) throws InterruptedException { + putLast(e); + } + + /** + * @throws NullPointerException {@inheritDoc} + * @throws InterruptedException {@inheritDoc} + */ + public boolean offer(E e, long timeout, TimeUnit unit) + throws InterruptedException { + return offerLast(e, timeout, unit); + } + + /** + * Retrieves and removes the head of the queue represented by this deque. + * This method differs from {@link #poll poll} only in that it throws an + * exception if this deque is empty. + * + * <p>This method is equivalent to {@link #removeFirst() removeFirst}. + * + * @return the head of the queue represented by this deque + * @throws NoSuchElementException if this deque is empty + */ + public E remove() { + return removeFirst(); + } + + public E poll() { + return pollFirst(); + } + + public E take() throws InterruptedException { + return takeFirst(); + } + + public E poll(long timeout, TimeUnit unit) throws InterruptedException { + return pollFirst(timeout, unit); + } + + /** + * Retrieves, but does not remove, the head of the queue represented by + * this deque. This method differs from {@link #peek peek} only in that + * it throws an exception if this deque is empty. + * + * <p>This method is equivalent to {@link #getFirst() getFirst}. + * + * @return the head of the queue represented by this deque + * @throws NoSuchElementException if this deque is empty + */ + public E element() { + return getFirst(); + } + + public E peek() { + return peekFirst(); + } + + /** + * Returns the number of additional elements that this deque can ideally + * (in the absence of memory or resource constraints) accept without + * blocking. This is always equal to the initial capacity of this deque + * less the current {@code size} of this deque. + * + * <p>Note that you <em>cannot</em> always tell if an attempt to insert + * an element will succeed by inspecting {@code remainingCapacity} + * because it may be the case that another thread is about to + * insert or remove an element. + */ + public int remainingCapacity() { + final ReentrantLock lock = this.lock; + lock.lock(); + try { + return capacity - count; + } finally { + lock.unlock(); + } + } + + /** + * @throws UnsupportedOperationException {@inheritDoc} + * @throws ClassCastException {@inheritDoc} + * @throws NullPointerException {@inheritDoc} + * @throws IllegalArgumentException {@inheritDoc} + */ + public int drainTo(Collection<? super E> c) { + return drainTo(c, Integer.MAX_VALUE); + } + + /** + * @throws UnsupportedOperationException {@inheritDoc} + * @throws ClassCastException {@inheritDoc} + * @throws NullPointerException {@inheritDoc} + * @throws IllegalArgumentException {@inheritDoc} + */ + public int drainTo(Collection<? super E> c, int maxElements) { + if (c == null) + throw new NullPointerException(); + if (c == this) + throw new IllegalArgumentException(); + final ReentrantLock lock = this.lock; + lock.lock(); + try { + int n = Math.min(maxElements, count); + for (int i = 0; i < n; i++) { + c.add(first.item); // In this order, in case add() throws. + unlinkFirst(); + } + return n; + } finally { + lock.unlock(); + } + } + + // Stack methods + + /** + * @throws IllegalStateException {@inheritDoc} + * @throws NullPointerException {@inheritDoc} + */ + public void push(E e) { + addFirst(e); + } + + /** + * @throws NoSuchElementException {@inheritDoc} + */ + public E pop() { + return removeFirst(); + } + + // Collection methods + + /** + * Removes the first occurrence of the specified element from this deque. + * If the deque does not contain the element, it is unchanged. + * More formally, removes the first element {@code e} such that + * {@code o.equals(e)} (if such an element exists). + * Returns {@code true} if this deque contained the specified element + * (or equivalently, if this deque changed as a result of the call). + * + * <p>This method is equivalent to + * {@link #removeFirstOccurrence(Object) removeFirstOccurrence}. + * + * @param o element to be removed from this deque, if present + * @return {@code true} if this deque changed as a result of the call + */ + public boolean remove(Object o) { + return removeFirstOccurrence(o); + } + + /** + * Returns the number of elements in this deque. + * + * @return the number of elements in this deque + */ + public int size() { + final ReentrantLock lock = this.lock; + lock.lock(); + try { + return count; + } finally { + lock.unlock(); + } + } + + /** + * Returns {@code true} if this deque contains the specified element. + * More formally, returns {@code true} if and only if this deque contains + * at least one element {@code e} such that {@code o.equals(e)}. + * + * @param o object to be checked for containment in this deque + * @return {@code true} if this deque contains the specified element + */ + public boolean contains(Object o) { + if (o == null) return false; + final ReentrantLock lock = this.lock; + lock.lock(); + try { + for (Node<E> p = first; p != null; p = p.next) + if (o.equals(p.item)) + return true; + return false; + } finally { + lock.unlock(); + } + } + + /* + * TODO: Add support for more efficient bulk operations. + * + * We don't want to acquire the lock for every iteration, but we + * also want other threads a chance to interact with the + * collection, especially when count is close to capacity. + */ + +// /** +// * Adds all of the elements in the specified collection to this +// * queue. Attempts to addAll of a queue to itself result in +// * {@code IllegalArgumentException}. Further, the behavior of +// * this operation is undefined if the specified collection is +// * modified while the operation is in progress. +// * +// * @param c collection containing elements to be added to this queue +// * @return {@code true} if this queue changed as a result of the call +// * @throws ClassCastException {@inheritDoc} +// * @throws NullPointerException {@inheritDoc} +// * @throws IllegalArgumentException {@inheritDoc} +// * @throws IllegalStateException {@inheritDoc} +// * @see #add(Object) +// */ +// public boolean addAll(Collection<? extends E> c) { +// if (c == null) +// throw new NullPointerException(); +// if (c == this) +// throw new IllegalArgumentException(); +// final ReentrantLock lock = this.lock; +// lock.lock(); +// try { +// boolean modified = false; +// for (E e : c) +// if (linkLast(e)) +// modified = true; +// return modified; +// } finally { +// lock.unlock(); +// } +// } + + /** + * Returns an array containing all of the elements in this deque, in + * proper sequence (from first to last element). + * + * <p>The returned array will be "safe" in that no references to it are + * maintained by this deque. (In other words, this method must allocate + * a new array). The caller is thus free to modify the returned array. + * + * <p>This method acts as bridge between array-based and collection-based + * APIs. + * + * @return an array containing all of the elements in this deque + */ + @SuppressWarnings("unchecked") + public Object[] toArray() { + final ReentrantLock lock = this.lock; + lock.lock(); + try { + Object[] a = new Object[count]; + int k = 0; + for (Node<E> p = first; p != null; p = p.next) + a[k++] = p.item; + return a; + } finally { + lock.unlock(); + } + } + + /** + * Returns an array containing all of the elements in this deque, in + * proper sequence; the runtime type of the returned array is that of + * the specified array. If the deque fits in the specified array, it + * is returned therein. Otherwise, a new array is allocated with the + * runtime type of the specified array and the size of this deque. + * + * <p>If this deque fits in the specified array with room to spare + * (i.e., the array has more elements than this deque), the element in + * the array immediately following the end of the deque is set to + * {@code null}. + * + * <p>Like the {@link #toArray()} method, this method acts as bridge between + * array-based and collection-based APIs. Further, this method allows + * precise control over the runtime type of the output array, and may, + * under certain circumstances, be used to save allocation costs. + * + * <p>Suppose {@code x} is a deque known to contain only strings. + * The following code can be used to dump the deque into a newly + * allocated array of {@code String}: + * + * <pre> + * String[] y = x.toArray(new String[0]);</pre> + * + * Note that {@code toArray(new Object[0])} is identical in function to + * {@code toArray()}. + * + * @param a the array into which the elements of the deque are to + * be stored, if it is big enough; otherwise, a new array of the + * same runtime type is allocated for this purpose + * @return an array containing all of the elements in this deque + * @throws ArrayStoreException if the runtime type of the specified array + * is not a supertype of the runtime type of every element in + * this deque + * @throws NullPointerException if the specified array is null + */ + @SuppressWarnings("unchecked") + public <T> T[] toArray(T[] a) { + final ReentrantLock lock = this.lock; + lock.lock(); + try { + if (a.length < count) + a = (T[])java.lang.reflect.Array.newInstance + (a.getClass().getComponentType(), count); + + int k = 0; + for (Node<E> p = first; p != null; p = p.next) + a[k++] = (T)p.item; + if (a.length > k) + a[k] = null; + return a; + } finally { + lock.unlock(); + } + } + + public String toString() { + final ReentrantLock lock = this.lock; + lock.lock(); + try { + Node<E> p = first; + if (p == null) + return "[]"; + + StringBuilder sb = new StringBuilder(); + sb.append('['); + for (;;) { + E e = p.item; + sb.append(e == this ? "(this Collection)" : e); + p = p.next; + if (p == null) + return sb.append(']').toString(); + sb.append(',').append(' '); + } + } finally { + lock.unlock(); + } + } + + /** + * Atomically removes all of the elements from this deque. + * The deque will be empty after this call returns. + */ + public void clear() { + final ReentrantLock lock = this.lock; + lock.lock(); + try { + for (Node<E> f = first; f != null; ) { + f.item = null; + Node<E> n = f.next; + f.prev = null; + f.next = null; + f = n; + } + first = last = null; + count = 0; + notFull.signalAll(); + } finally { + lock.unlock(); + } + } + + /** + * Returns an iterator over the elements in this deque in proper sequence. + * The elements will be returned in order from first (head) to last (tail). + * + * <p>The returned iterator is a "weakly consistent" iterator that + * will never throw {@link java.util.ConcurrentModificationException + * ConcurrentModificationException}, and guarantees to traverse + * elements as they existed upon construction of the iterator, and + * may (but is not guaranteed to) reflect any modifications + * subsequent to construction. + * + * @return an iterator over the elements in this deque in proper sequence + */ + public Iterator<E> iterator() { + return new Itr(); + } + + /** + * Returns an iterator over the elements in this deque in reverse + * sequential order. The elements will be returned in order from + * last (tail) to first (head). + * + * <p>The returned iterator is a "weakly consistent" iterator that + * will never throw {@link java.util.ConcurrentModificationException + * ConcurrentModificationException}, and guarantees to traverse + * elements as they existed upon construction of the iterator, and + * may (but is not guaranteed to) reflect any modifications + * subsequent to construction. + * + * @return an iterator over the elements in this deque in reverse order + */ + public Iterator<E> descendingIterator() { + return new DescendingItr(); + } + + /** + * Base class for Iterators for LinkedBlockingDeque + */ + private abstract class AbstractItr implements Iterator<E> { + /** + * The next node to return in next() + */ + Node<E> next; + + /** + * nextItem holds on to item fields because once we claim that + * an element exists in hasNext(), we must return item read + * under lock (in advance()) even if it was in the process of + * being removed when hasNext() was called. + */ + E nextItem; + + /** + * Node returned by most recent call to next. Needed by remove. + * Reset to null if this element is deleted by a call to remove. + */ + private Node<E> lastRet; + + abstract Node<E> firstNode(); + abstract Node<E> nextNode(Node<E> n); + + AbstractItr() { + // set to initial position + final ReentrantLock lock = LinkedBlockingDeque.this.lock; + lock.lock(); + try { + next = firstNode(); + nextItem = (next == null) ? null : next.item; + } finally { + lock.unlock(); + } + } + + /** + * Returns the successor node of the given non-null, but + * possibly previously deleted, node. + */ + private Node<E> succ(Node<E> n) { + // Chains of deleted nodes ending in null or self-links + // are possible if multiple interior nodes are removed. + for (;;) { + Node<E> s = nextNode(n); + if (s == null) + return null; + else if (s.item != null) + return s; + else if (s == n) + return firstNode(); + else + n = s; + } + } + + /** + * Advances next. + */ + void advance() { + final ReentrantLock lock = LinkedBlockingDeque.this.lock; + lock.lock(); + try { + // assert next != null; + next = succ(next); + nextItem = (next == null) ? null : next.item; + } finally { + lock.unlock(); + } + } + + public boolean hasNext() { + return next != null; + } + + public E next() { + if (next == null) + throw new NoSuchElementException(); + lastRet = next; + E x = nextItem; + advance(); + return x; + } + + public void remove() { + Node<E> n = lastRet; + if (n == null) + throw new IllegalStateException(); + lastRet = null; + final ReentrantLock lock = LinkedBlockingDeque.this.lock; + lock.lock(); + try { + if (n.item != null) + unlink(n); + } finally { + lock.unlock(); + } + } + } + + /** Forward iterator */ + private class Itr extends AbstractItr { + Node<E> firstNode() { return first; } + Node<E> nextNode(Node<E> n) { return n.next; } + } + + /** Descending iterator */ + private class DescendingItr extends AbstractItr { + Node<E> firstNode() { return last; } + Node<E> nextNode(Node<E> n) { return n.prev; } + } + + /** + * Save the state of this deque to a stream (that is, serialize it). + * + * @serialData The capacity (int), followed by elements (each an + * {@code Object}) in the proper order, followed by a null + * @param s the stream + */ + private void writeObject(java.io.ObjectOutputStream s) + throws java.io.IOException { + final ReentrantLock lock = this.lock; + lock.lock(); + try { + // Write out capacity and any hidden stuff + s.defaultWriteObject(); + // Write out all elements in the proper order. + for (Node<E> p = first; p != null; p = p.next) + s.writeObject(p.item); + // Use trailing null as sentinel + s.writeObject(null); + } finally { + lock.unlock(); + } + } + + /** + * Reconstitute this deque from a stream (that is, + * deserialize it). + * @param s the stream + */ + private void readObject(java.io.ObjectInputStream s) + throws java.io.IOException, ClassNotFoundException { + s.defaultReadObject(); + count = 0; + first = null; + last = null; + // Read in all elements and place in queue + for (;;) { + @SuppressWarnings("unchecked") + E item = (E)s.readObject(); + if (item == null) + break; + add(item); + } + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/LinkedBlockingDeque.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/LinkedBlockingQueue.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/LinkedBlockingQueue.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166/LinkedBlockingQueue.java 2011-02-17 12:56:14 UTC (rev 4205) @@ -0,0 +1,883 @@ +/* + * Written by Doug Lea with assistance from members of JCP JSR-166 + * Expert Group and released to the public domain, as explained at + * http://creativecommons.org/licenses/publicdomain + */ + +package com.bigdata.jsr166; + +import java.util.AbstractQueue; +import java.util.Collection; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; + +/** + * An optionally-bounded {@linkplain BlockingQueue blocking queue} based on + * linked nodes. + * This queue orders elements FIFO (first-in-first-out). + * The <em>head</em> of the queue is that element that has been on the + * queue the longest time. + * The <em>tail</em> of the queue is that element that has been on the + * queue the shortest time. New elements + * are inserted at the tail of the queue, and the queue retrieval + * operations obtain elements at the head of the queue. + * Linked queues typically have higher throughput than array-based queues but + * less predictable performance in most concurrent applications. + * + * <p> The optional capacity bound constructor argument serves as a + * way to prevent excessive queue expansion. The capacity, if unspecified, + * is equal to {@link Integer#MAX_VALUE}. Linked nodes are + * dynamically created upon each insertion unless this would bring the + * queue above capacity. + * + * <p>This class and its iterator implement all of the + * <em>optional</em> methods of the {@link Collection} and {@link + * Iterator} interfaces. + * + * <p>This class is a member of the + * <a href="{@docRoot}/../technotes/guides/collections/index.html"> + * Java Collections Framework</a>. + * + * @since 1.5 + * @author Doug Lea + * @param <E> the type of elements held in this collection + * + */ +public class LinkedBlockingQueue<E> extends AbstractQueue<E> + implements BlockingQueue<E>, java.io.Serializable { + private static final long serialVersionUID = -6903933977591709194L; + + /* + * A variant of the "two lock queue" algorithm. The putLock gates + * entry to put (and offer), and has an associated condition for + * waiting puts. Similarly for the takeLock. The "count" field + * that they both rely on is maintained as an atomic to avoid + * needing to get both locks in most cases. Also, to minimize need + * for puts to get takeLock and vice-versa, cascading notifies are + * used. When a put notices that it has enabled at least one take, + * it signals taker. That taker in turn signals others if more + * items have been entered since the signal. And symmetrically for + * takes signalling puts. Operations such as remove(Object) and + * iterators acquire both locks. + * + * Visibility between writers and readers is provided as follows: + * + * Whenever an element is enqueued, the putLock is acquired and + * count updated. A subsequent reader guarantees visibility to the + * enqueued Node by either acquiring the putLock (via fullyLock) + * or by acquiring the takeLock, and then reading n = count.get(); + * this gives visibility to the first n items. + * + * To implement weakly consistent iterators, it appears we need to + * keep all Nodes GC-reachable from a predecessor dequeued Node. + * That would cause two problems: + * - allow a rogue Iterator to cause unbounded memory retention + * - cause cross-generational linking of old Nodes to new Nodes if + * a Node was tenured while live, which generational GCs have a + * hard time dealing with, causing repeated major collections. + * However, only non-deleted Nodes need to be reachable from + * dequeued Nodes, and reachability does not necessarily have to + * be of the kind understood by the GC. We use the trick of + * linking a Node that has just been dequeued to itself. Such a + * self-link implicitly means to advance to head.next. + */ + + /** + * Linked list node class + */ + static class Node<E> { + E item; + + /** + * One of: + * - the real successor Node + * - this Node, meaning the successor is head.next + * - null, meaning there is no successor (this is the last node) + */ + Node<E> next; + + Node(E x) { item = x; } + } + + /** The capacity bound, or Integer.MAX_VALUE if none */ + private final int capacity; + + /** Current number of elements */ + private final AtomicInteger count = new AtomicInteger(0); + + /** + * Head of linked list. + * Invariant: head.item == null + */ + private transient Node<E> head; + + /** + * Tail of linked list. + * Invariant: last.next == null + */ + private transient Node<E> last; + + /** Lock held by take, poll, etc */ + private final ReentrantLock takeLock = new ReentrantLock(); + + /** Wait queue for waiting takes */ + private final Condition notEmpty = takeLock.newCondition(); + + /** Lock held by put, offer, etc */ + private final ReentrantLock putLock = new ReentrantLock(); + + /** Wait queue for waiting puts */ + private final Condition notFull = putLock.newCondition(); + + /** + * Signals a waiting take. Called only from put/offer (which do not + * otherwise ordinarily lock takeLock.) + */ + private void signalNotEmpty() { + final ReentrantLock takeLock = this.takeLock; + takeLock.lock(); + try { + notEmpty.signal(); + } finally { + takeLock.unlock(); + } + } + + /** + * Signals a waiting put. Called only from take/poll. + */ + private void signalNotFull() { + final ReentrantLock putLock = this.putLock; + putLock.lock(); + try { + notFull.signal(); + } finally { + putLock.unlock(); + } + } + + /** + * Links node at end of queue. + * + * @param node the node + */ + private void enqueue(Node<E> node) { + // assert putLock.isHeldByCurrentThread(); + // assert last.next == null; + last = last.next = node; + } + + /** + * Removes a node from head of queue. + * + * @return the node + */ + private E dequeue() { + // assert takeLock.isHeldByCurrentThread(); + // assert head.item == null; + Node<E> h = head; + Node<E> first = h.next; + h.next = h; // help GC + head = first; + E x = first.item; + first.item = null; + return x; + } + + /** + * Lock to prevent both puts and takes. + */ + void fullyLock() { + putLock.lock(); + takeLock.lock(); + } + + /** + * Unlock to allow both puts and takes. + */ + void fullyUnlock() { + takeLock.unlock(); + putLock.unlock(); + } + +// /** +// * Tells whether both locks are held by current thread. +// */ +// boolean isFullyLocked() { +// return (putLock.isHeldByCurrentThread() && +// takeLock.isHeldByCurrentThread()); +// } + + /** + * Creates a {@code LinkedBlockingQueue} with a capacity of + * {@link Integer#MAX_VALUE}. + */ + public LinkedBlockingQueue() { + this(Integer.MAX_VALUE); + } + + /** + * Creates a {@code LinkedBlockingQueue} with the given (fixed) capacity. + * + * @param capacity the capacity of this queue + * @throws IllegalArgumentException if {@code capacity} is not greater + * than zero + */ + public LinkedBlockingQueue(int capacity) { + if (capacity <= 0) throw new IllegalArgumentException(); + this.capacity = capacity; + last = head = new Node<E>(null); + } + + /** + * Creates a {@code LinkedBlockingQueue} with a capacity of + * {@link Integer#MAX_VALUE}, initially containing the elements of the + * given collection, + * added in traversal order of the collection's iterator. + * + * @param c the collection of elements to initially contain + * @throws NullPointerException if the specified collection or any + * of its elements are null + */ + public LinkedBlockingQueue(Collection<? extends E> c) { + this(Integer.MAX_VALUE); + final ReentrantLock putLock = this.putLock; + putLock.lock(); // Never contended, but necessary for visibility + try { + int n = 0; + for (E e : c) { + if (e == null) + throw new NullPointerException(); + if (n == capacity) + throw new IllegalStateException("Queue full"); + enqueue(new Node<E>(e)); + ++n; + } + count.set(n); + } finally { + putLock.unlock(); + } + } + + + // this doc comment is overridden to remove the reference to collections + // greater in size than Integer.MAX_VALUE + /** + * Returns the number of elements in this queue. + * + * @return the number of elements in this queue + */ + public int size() { + return count.get(); + } + + // this doc comment is a modified copy of the inherited doc comment, + // without the reference to unlimited queues. + /** + * Returns the number of additional elements that this queue can ideally + * (in the absence of memory or resource constraints) accept without + * blocking. This is always equal to the initial capacity of this queue + * less the current {@code size} of this queue. + * + * <p>Note that you <em>cannot</em> always tell if an attempt to insert + * an element will succeed by inspecting {@code remainingCapacity} + * because it may be the case that another thread is about to + * insert or remove an element. + */ + public int remainingCapacity() { + return capacity - count.get(); + } + + /** + * Inserts the specified element at the tail of this queue, waiting if + * necessary for space to become available. + * + * @throws InterruptedException {@inheritDoc} + * @throws NullPointerException {@inheritDoc} + */ + public void put(E e) throws InterruptedException { + if (e == null) throw new NullPointerException(); + // Note: convention in all put/take/etc is to preset local var + // holding count negative to indicate failure unless set. + int c = -1; + Node<E> node = new Node(e); + final ReentrantLock putLock = this.putLock; + final AtomicInteger count = this.count; + putLock.lockInterruptibly(); + try { + /* + * Note that count is used in wait guard even though it is + * not protected by lock. This works because count can + * only decrease at this point (all other puts are shut + * out by lock), and we (or some other waiting put) are + * signalled if it ever changes from capacity. Similarly + * for all other uses of count in other wait guards. + */ + while (count.get() == capacity) { + notFull.await(); + } + enqueue(node); + c = count.getAndIncrement(); + if (c + 1 < capacity) + notFull.signal(); + } finally { + putLock.unlock(); + } + if (c == 0) + signalNotEmpty(); + } + + /** + * Inserts the specified element at the tail of this queue, waiting if + * necessary up to the specified wait time for space to become available. + * + * @return {@code true} if successful, or {@code false} if + * the specified waiting time elapses before space is available. + * @throws InterruptedException {@inheritDoc} + * @throws NullPointerException {@inheritDoc} + */ + public boolean offer(E e, long timeout, TimeUnit unit) + throws InterruptedException { + + if (e == null) throw new NullPointerException(); + long nanos = unit.toNanos(timeout); + int c = -1; + final ReentrantLock putLock = this.putLock; + final AtomicInteger count = this.count; + putLock.lockInterruptibly(); + try { + while (count.get() == capacity) { + if (nanos <= 0) + return false; + nanos = notFull.awaitNanos(nanos); + } + enqueue(new Node<E>(e)); + c = count.getAndIncrement(); + if (c + 1 < capacity) + notFull.signal(); + } finally { + putLock.unlock(); + } + if (c == 0) + signalNotEmpty(); + return true; + } + + /** + * Inserts the specified element at the tail of this queue if it is + * possible to do so immediately without exceeding the queue's capacity, + * returning {@code true} upon success and {@code false} if this queue + * is full. + * When using a capacity-restricted queue, this method is generally + * preferable to method {@link BlockingQueue#add add}, which can fail to + * insert an element only by throwing an exception. + * + * @throws NullPointerException if the specified element is null + */ + public boolean offer(E e) { + if (e == null) throw new NullPointerException(); + final AtomicInteger count = this.count; + if (count.get() == capacity) + return false; + int c = -1; + Node<E> node = new Node(e); + final ReentrantLock putLock = this.putLock; + putLock.lock(); + try { + if (count.get() < capacity) { + enqueue(node); + c = count.getAndIncrement(); + if (c + 1 < capacity) + notFull.signal(); + } + } finally { + putLock.unlock(); + } + if (c == 0) + signalNotEmpty(); + return c >= 0; + } + + + public E take() throws InterruptedException { + E x; + int c = -1; + final AtomicInteger count = this.count; + final ReentrantLock takeLock = this.takeLock; + takeLock.lockInterruptibly(); + try { + while (count.get() == 0) { + notEmpty.await(); + } + x = dequeue(); + c = count.getAndDecrement(); + if (c > 1) + notEmpty.signal(); + } finally { + takeLock.unlock(); + } + if (c == capacity) + signalNotFull(); + return x; + } + + public E poll(long timeout, TimeUnit unit) throws InterruptedException { + E x = null; + int c = -1; + long nanos = unit.toNanos(timeout); + final AtomicInteger count = this.count; + final ReentrantLock takeLock = this.takeLock; + takeLock.lockInterruptibly(); + try { + while (count.get() == 0) { + if (nanos <= 0) + return null; + nanos = notEmpty.awaitNanos(nanos); + } + x = dequeue(); + c = count.getAndDecrement(); + if (c > 1) + notEmpty.signal(); + } finally { + takeLock.unlock(); + } + if (c == capacity) + signalNotFull(); + return x; + } + + public E poll() { + final AtomicInteger count = this.count; + if (count.get() == 0) + return null; + E x = null; + int c = -1; + final ReentrantLock takeLock = this.takeLock; + takeLock.lock(); + try { + if (count.get() > 0) { + x = dequeue(); + c = count.getAndDecrement(); + if (c > 1) + notEmpty.signal(); + } + } finally { + takeLock.unlock(); + } + if (c == capacity) + signalNotFull(); + return x; + } + + public E peek() { + if (count.get() == 0) + return null; + final ReentrantLock takeLock = this.takeLock; + takeLock.lock(); + try { + Node<E> first = head.next; + if (first == null) + return null; + else + return first.item; + } finally { + takeLock.unlock(); + } + } + + /** + * Unlinks interior Node p with predecessor trail. + */ + void unlink(Node<E> p, Node<E> trail) { + // assert isFullyLocked(); + // p.next is not changed, to allow iterators that are + // traversing p to maintain their weak-consistency guarantee. + p.item = null; + trail.next = p.next; + if (last == p) + last = trail; + if (count.getAndDecrement() == capacity) + notFull.signal(); + } + + /** + * Removes a single instance of the specified element from this queue, + * if it is present. More formally, removes an element {@code e} such + * that {@code o.equals(e)}, if this queue contains one or more such + * elements. + * Returns {@code true} if this queue contained the specified element + * (or equivalently, if this queue changed as a result of the call). + * + * @param o element to be removed from this queue, if present + * @return {@code true} if this queue changed as a result of the call + */ + public boolean remove(Object o) { + if (o == null) return false; + fullyLock(); + try { + for (Node<E> trail = head, p = trail.next; + p != null; + trail = p, p = p.next) { + if (o.equals(p.item)) { + unlink(p, trail); + return true; + } + } + return false; + } finally { + fullyUnlock(); + } + } + + /** + * Returns {@code true} if this queue contains the specified element. + * More formally, returns {@code true} if and only if this queue contains + * at least one element {@code e} such that {@code o.equals(e)}. + * + * @param o object to be checked for containment in this queue + * @return {@code true} if this queue contains the specified element + */ + public boolean contains(Object o) { + if (o == null) return false; + fullyLock(); + try { + for (Node<E> p = head.next; p != null; p = p.next) + if (o.equals(p.item)) + return true; + return false; + } finally { + fullyUnlock(); + } + } + + /** + * Returns an array containing all of the elements in this queue, in + * proper sequence. + * + * <p>The returned array will be "safe" in that no references to it are + * maintained by this queue. (In other words, this method must allocate + * a new array). The caller is thus free to modify the returned array. + * + * <p>This method acts as bridge between array-based and collection-based + * APIs. + * + * @return an array containing all of the elements in this queue + */ + public Object[] toArray() { + fullyLock(); + try { + int size = count.get(); + Object[] a = new Object[size]; + int k = 0; + for (Node<E> p = head.next; p != null; p = p.next) + a[k++] = p.item; + return a; + } finally { + fullyUnlock(); + } + } + + /** + * Returns an array containing all of the elements in this queue, in + * proper sequence; the runtime type of the returned array is that of + * the specified array. If the queue fits in the specified array, it + * is returned therein. Otherwise, a new array is allocated with the + * runtime type of the specified array and the size of this queue. + * + * <p>If this queue fits in the specified array with room to spare + * (i.e., the array has more elements than this queue), the element in + * the array immediately following the end of the queue is set to + * {@code null}. + * + * <p>Like the {@link #t... [truncated message content] |
From: <tho...@us...> - 2011-02-17 12:43:07
|
Revision: 4204 http://bigdata.svn.sourceforge.net/bigdata/?rev=4204&view=rev Author: thompsonbry Date: 2011-02-17 12:43:00 +0000 (Thu, 17 Feb 2011) Log Message: ----------- Added sample code to test https://sourceforge.net/apps/trac/bigdata/ticket/251, which reports a problem when using inlineDateTimes. Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/samples/com/bigdata/samples/CreateSailUsingInlineDateTimes.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/samples/com/bigdata/samples/CreateSailUsingInlineDateTimes.properties Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/samples/com/bigdata/samples/CreateSailUsingInlineDateTimes.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/samples/com/bigdata/samples/CreateSailUsingInlineDateTimes.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/samples/com/bigdata/samples/CreateSailUsingInlineDateTimes.java 2011-02-17 12:43:00 UTC (rev 4204) @@ -0,0 +1,75 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Feb 17, 2011 + */ + +package com.bigdata.samples; + +import java.util.Properties; + +import com.bigdata.rdf.sail.BigdataSail; + +/** + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class CreateSailUsingInlineDateTimes extends SampleCode { + + static public void main(String[] args) { + + try { + + CreateSailUsingInlineDateTimes f = new CreateSailUsingInlineDateTimes(); + + final String resource = "CreateSailUsingInlineDateTimes.properties"; + + final Properties properties = f.loadProperties(resource); + + System.out.println("Read properties from resource: " + resource); + properties.list(System.out); + + final BigdataSail sail = new BigdataSail(properties); + + sail.initialize(); + + try { + + System.out.println("Sail is initialized."); + + } finally { + + sail.shutDown(); + + } + + } catch (Throwable t) { + + t.printStackTrace(System.err); + + } + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/samples/com/bigdata/samples/CreateSailUsingInlineDateTimes.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/samples/com/bigdata/samples/CreateSailUsingInlineDateTimes.properties =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/samples/com/bigdata/samples/CreateSailUsingInlineDateTimes.properties (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/samples/com/bigdata/samples/CreateSailUsingInlineDateTimes.properties 2011-02-17 12:43:00 UTC (rev 4204) @@ -0,0 +1,18 @@ +com.bigdata.journal.AbstractJournal.createTempFile=true +com.bigdata.journal.AbstractJournal.deleteOnClose=true +com.bigdata.journal.AbstractJournal.deleteOnExit=true +# to not fail testcases that count... (NOT recommended). +com.bigdata.rdf.sail.exactSize=true +# This is full tx support, which does not have nearly the throughput of an unisolated writer combined with concurrent readers. +com.bigdata.rdf.sail.isolatableIndices=true +com.bigdata.rdf.sail.truthMaintenance=false +# This option will be going away once we finish the query engine refactor. +com.bigdata.rdf.sail.allowSesameQueryEvaluation=true +# Auto-commit is NOT recommended. +com.bigdata.rdf.sail.allowAutoCommit=true +com.bigdata.rdf.store.AbstractTripleStore.axiomsClass=com.bigdata.rdf.axioms.NoAxioms +com.bigdata.rdf.store.AbstractTripleStore.quads=true +com.bigdata.rdf.store.AbstractTripleStore.statementIdentifiers=false +com.bigdata.rdf.store.AbstractTripleStore.vocabularyClass=com.bigdata.rdf.vocab.NoVocabulary +com.bigdata.rdf.store.AbstractTripleStore.justify=false +com.bigdata.rdf.store.AbstractTripleStore.inlineDateTimes=true Property changes on: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/samples/com/bigdata/samples/CreateSailUsingInlineDateTimes.properties ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-16 12:40:29
|
Revision: 4203 http://bigdata.svn.sourceforge.net/bigdata/?rev=4203&view=rev Author: thompsonbry Date: 2011-02-16 12:40:23 +0000 (Wed, 16 Feb 2011) Log Message: ----------- Resolution for #117 (QName -> URI conversion). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataValueFactoryImpl.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/model/TestFactory.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataValueFactoryImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataValueFactoryImpl.java 2011-02-15 19:42:20 UTC (rev 4202) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataValueFactoryImpl.java 2011-02-16 12:40:23 UTC (rev 4203) @@ -267,9 +267,15 @@ public BigdataLiteralImpl createLiteral(final XMLGregorianCalendar arg0) { + /* + * Note: QName#toString() does not produce the right representation, + * which is why we need to go through XMLDatatypeUtil. + * + * @see https://sourceforge.net/apps/trac/bigdata/ticket/117 + */ return new BigdataLiteralImpl(this, arg0.toString(), - null/* languageCode */, createURI(arg0.getXMLSchemaType() - .toString())); + null/* languageCode */, createURI(XMLDatatypeUtil.qnameToURI( + arg0.getXMLSchemaType()).stringValue())); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/model/TestFactory.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/model/TestFactory.java 2011-02-15 19:42:20 UTC (rev 4202) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/model/TestFactory.java 2011-02-16 12:40:23 UTC (rev 4203) @@ -96,8 +96,10 @@ 0// timezone ); - assertEquals("http://www.w3.org/2001/XMLSchema#date", vf - .createLiteral(cal).getDatatype().stringValue()); + assertEquals(// + "http://www.w3.org/2001/XMLSchema#date",// + vf.createLiteral(cal).getDatatype().stringValue()// + ); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2011-02-15 19:42:26
|
Revision: 4202 http://bigdata.svn.sourceforge.net/bigdata/?rev=4202&view=rev Author: mrpersonick Date: 2011-02-15 19:42:20 +0000 (Tue, 15 Feb 2011) Log Message: ----------- added a prune groups test case Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEvaluationStrategyImpl.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEvaluationStrategyImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEvaluationStrategyImpl.java 2011-02-15 19:41:45 UTC (rev 4201) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEvaluationStrategyImpl.java 2011-02-15 19:42:20 UTC (rev 4202) @@ -945,6 +945,59 @@ } + public void test_prune_groups() throws Exception { + + // define the vocabulary + final URI mike = new URIImpl(BD.NAMESPACE + "Mike"); + final URI jane = new URIImpl(BD.NAMESPACE + "Jane"); + final URI bryan = new URIImpl(BD.NAMESPACE + "Bryan"); + final URI person = new URIImpl(BD.NAMESPACE + "Person"); + final URI object = new URIImpl(BD.NAMESPACE + "Object"); + final Literal mikeLabel = new LiteralImpl("mike label"); + final Literal mikeComment = new LiteralImpl("mike comment"); + final Literal janeLabel = new LiteralImpl("jane label"); + final URI p1 = new URIImpl(BD.NAMESPACE + "p1"); + final URI p2 = new URIImpl(BD.NAMESPACE + "p2"); + + // define the graph + Graph graph = new GraphImpl(); + graph.add(mike, RDF.TYPE, person); + graph.add(jane, RDF.TYPE, person); + graph.add(bryan, RDF.TYPE, person); + graph.add(mike, RDF.TYPE, object); + graph.add(jane, RDF.TYPE, object); + graph.add(bryan, RDF.TYPE, object); + graph.add(mike, RDFS.LABEL, mikeLabel); + graph.add(mike, RDFS.COMMENT, mikeComment); + graph.add(jane, RDFS.LABEL, janeLabel); + + // define the query + String query = + "select * " + + "where { " + + " ?s <"+RDF.TYPE+"> <"+person+"> . " + + " OPTIONAL { " + + " ?s <"+p1+"> ?p1 . " + + " OPTIONAL {" + + " ?p1 <"+p2+"> ?o2 . " + + " } " + + " } " + + "}"; + + // define the correct answer + Collection<BindingSet> answer = new LinkedList<BindingSet>(); + answer.add(createBindingSet( + new BindingImpl("s", mike))); + answer.add(createBindingSet( + new BindingImpl("s", jane))); + answer.add(createBindingSet( + new BindingImpl("s", bryan))); + + // run the test + runQuery(graph, query, answer); + + } + private void runQuery(final Graph data, final String query, final Collection<BindingSet> answer) throws Exception { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2011-02-15 19:41:51
|
Revision: 4201 http://bigdata.svn.sourceforge.net/bigdata/?rev=4201&view=rev Author: mrpersonick Date: 2011-02-15 19:41:45 +0000 (Tue, 15 Feb 2011) Log Message: ----------- fixed a pruneGroups bug Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java 2011-02-15 16:42:32 UTC (rev 4200) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java 2011-02-15 19:41:45 UTC (rev 4201) @@ -33,7 +33,7 @@ final Collection<SOpGroup> groupsToPrune) { final Collection<SOpGroup> children = new LinkedList<SOpGroup>(); for (SOpGroup g : groupsToPrune) - collectChildren(tree, g, groupsToPrune); + collectChildren(tree, g, children); groupsToPrune.addAll(children); final Collection<SOp> sopsToPrune = new LinkedList<SOp>(); for (SOpGroup g : groupsToPrune) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |