From: <mrp...@us...> - 2011-01-20 17:49:23
|
Revision: 4141 http://bigdata.svn.sourceforge.net/bigdata/?rev=4141&view=rev Author: mrpersonick Date: 2011-01-20 17:49:16 +0000 (Thu, 20 Jan 2011) Log Message: ----------- working through tck failures Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSparqlTest.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-20 17:48:21 UTC (rev 4140) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-20 17:49:16 UTC (rev 4141) @@ -745,14 +745,11 @@ s = s.replaceAll("com.bigdata.bop.join.", ""); s = s.replaceAll("com.bigdata.bop.solutions.", ""); s = s.replaceAll("com.bigdata.bop.rdf.filter.", ""); + s = s.replaceAll("com.bigdata.bop.bset", ""); s = s.replaceAll("com.bigdata.bop.", ""); s = s.replaceAll("com.bigdata.rdf.sail.", ""); s = s.replaceAll("com.bigdata.rdf.spo.", ""); -// s = s.replaceAll("com.bigdata.bop..", ""); -// s = s.replaceAll("com.bigdata.bop..", ""); -// s = s.replaceAll("com.bigdata.bop..", ""); -// s = s.replaceAll("com.bigdata.bop..", ""); -// s = s.replaceAll("com.bigdata.bop..", ""); + s = s.replaceAll("com.bigdata.rdf.internal.constraints.", ""); return s; } @@ -1027,3 +1024,4 @@ } } + Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java 2011-01-20 17:48:21 UTC (rev 4140) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java 2011-01-20 17:49:16 UTC (rev 4141) @@ -236,6 +236,8 @@ } else if (arg instanceof LeftJoin) { // collectSOps(sops, (LeftJoin) arg, rslj, groupId.incrementAndGet(), g); collectSOps(sops, (LeftJoin) arg, rslj, g, pg); + } else if (arg instanceof SingletonSet) { + // do nothing } else { throw new UnsupportedOperatorException(arg); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSparqlTest.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSparqlTest.java 2011-01-20 17:48:21 UTC (rev 4140) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSparqlTest.java 2011-01-20 17:49:16 UTC (rev 4141) @@ -41,11 +41,18 @@ import junit.framework.TestSuite; import org.apache.log4j.Logger; +import org.openrdf.model.Statement; import org.openrdf.query.Dataset; +import org.openrdf.query.Query; +import org.openrdf.query.QueryLanguage; +import org.openrdf.query.TupleQuery; +import org.openrdf.query.TupleQueryResult; import org.openrdf.query.parser.sparql.ManifestTest; import org.openrdf.query.parser.sparql.SPARQLQueryTest; import org.openrdf.repository.Repository; +import org.openrdf.repository.RepositoryConnection; import org.openrdf.repository.RepositoryException; +import org.openrdf.repository.RepositoryResult; import org.openrdf.repository.dataset.DatasetRepository; import org.openrdf.repository.sail.SailRepository; import org.openrdf.sail.memory.MemoryStore; @@ -55,8 +62,8 @@ import com.bigdata.journal.BufferMode; import com.bigdata.journal.IIndexManager; import com.bigdata.rdf.sail.BigdataSail; +import com.bigdata.rdf.sail.BigdataSail.Options; import com.bigdata.rdf.sail.BigdataSailRepository; -import com.bigdata.rdf.sail.BigdataSail.Options; /** * Test harness for running the SPARQL test suites. @@ -173,68 +180,71 @@ */ static final Collection<String> testURIs = Arrays.asList(new String[] { -// busted with EvalStrategy1 -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#nested-opt-2", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#filter-scope-1", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#join-scope-1", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest#dawg-optional-complex-4", - -// busted with EvalStrategy2 with LeftJoin enabled -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-eq-12", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#nested-opt-1", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#opt-filter-1", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#opt-filter-2", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest#dawg-optional-complex-3", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-001", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-004", - -// Dataset crap -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest#normalization-1", - -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/dataset/manifest#dawg-dataset-01", - -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2//manifest#", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-str-1", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-str-2", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-datatype-1", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm-simple", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm-eq", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm-not-eq", -// -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-equals/manifest#eq-graph-1", -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-equals/manifest#eq-graph-2", - - /* - * Tests which fail with 2 data services. - */ -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/basic/manifest#bgp-no-match",//Ok -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/basic/manifest#prefix-name-1",//OK -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/basic/manifest#spoo-1",//BOOM - -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/graph/manifest#dawg-graph-05", + /* + * working through the new query engine failures: 0 errors, 19 failures + */ + /* + * Basically we are having a lot of problems with our compare + * operator, which is supposed to do fuzzy comparisons that + * sometimes requires materialized RDF values. These I feel I can + * handle on my own. + */ + + // "a" and "a"^^xsd:string have different term ids? also bnodes are different + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-eq-07", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-eq-08", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-eq-10", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-eq-11", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-eq-12", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-cmp-01", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-cmp-02", + + /* + * These tests have to do with that that weird "well designed" + * optional nesting P = A OPT (B OPT C) where A and C share + * variables not in B. I think I can handle these on my own. + */ + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#nested-opt-1", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#join-scope-1", + /* - * working through the new query engine failures + * Everything below this point I need help with. */ - // please someone explain this shit to me -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-eq-12" + /* + * These failures have to do with nested UNIONs - we don't seem to + * be handling them correctly at all. + */ + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#join-combo-1", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#join-combo-2", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest#dawg-optional-complex-2", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest#dawg-optional-complex-4", - // this is that weird "well designed" optional shit P = A OPT (B OPT C) where A and C share variables not in B -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#nested-opt-1" - - // where do we put the !bound(?e) constraint??? -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/bound/manifest#dawg-bound-query-001" + /* + * This one is truly bizarre - involving a non-optional subquuery + * plus an optional subquery. Don't even know where to start on this + * guy. + */ + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#filter-scope-1", - // "a" and "a"^^xsd:string have different term ids? also bnodes are different -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-eq-07" + /* + * Sometimes, a filter is the entire join group, and it should not + * be able to see variables outside the group. Frankly I do not + * understand this one. + */ + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#filter-nested-2", + + /* + * These demonstrate the problem of where to put non-optional + * filters that need to be evaluated after optional tails and + * optional join groups. + */ + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/bound/manifest#dawg-bound-query-001", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest#dawg-optional-complex-1", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-002", + "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-003", - // help, non-optional subquery?? wtf -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#filter-scope-1" - - // this uncovers an obvious bug in our SubqueryOp -// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-001" - }); /** @@ -471,42 +481,42 @@ } -// @Override -// protected void runTest() -// throws Exception -// { -// RepositoryConnection con = getQueryConnection(dataRep); -// try { -// + @Override + protected void runTest() + throws Exception + { + RepositoryConnection con = getQueryConnection(dataRep); + try { + // log.info("database dump:"); // RepositoryResult<Statement> stmts = con.getStatements(null, null, null, false); // while (stmts.hasNext()) { // log.info(stmts.next()); // } -// log.info("dataset:\n" + dataset); -// -// String queryString = readQueryString(); -// log.info("query:\n" + getQueryString()); -// -// Query query = con.prepareQuery(QueryLanguage.SPARQL, queryString, queryFileURL); -// if (dataset != null) { -// query.setDataset(dataset); -// } -// -// if (query instanceof TupleQuery) { -// TupleQueryResult queryResult = ((TupleQuery)query).evaluate(); -// while (queryResult.hasNext()) { -// log.info("query result:\n" + queryResult.next()); -// } -// } -// -// } -// finally { -// con.close(); -// } -// -// super.runTest(); -// } + log.info("dataset:\n" + dataset); + + String queryString = readQueryString(); + log.info("query:\n" + getQueryString()); + + Query query = con.prepareQuery(QueryLanguage.SPARQL, queryString, queryFileURL); + if (dataset != null) { + query.setDataset(dataset); + } + + if (query instanceof TupleQuery) { + TupleQueryResult queryResult = ((TupleQuery)query).evaluate(); + while (queryResult.hasNext()) { + log.info("query result:\n" + queryResult.next()); + } + } + + } + finally { + con.close(); + } + + super.runTest(); + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-21 00:17:25
|
Revision: 4152 http://bigdata.svn.sourceforge.net/bigdata/?rev=4152&view=rev Author: thompsonbry Date: 2011-01-21 00:17:18 +0000 (Fri, 21 Jan 2011) Log Message: ----------- Added method to return an int[] of the ids for predicates to BOpUtility. Added method to return the set of constraints attached to the last join in a join path to PartitionedJoinGroup and (a few) unit tests for that method. Modified BOpBase#toString() to write out everything if an operand is an IValueExpression for better printing of bops. Modified the QUADS mode SAIL proxy test suites to invoke the TestNestedOptionals and TestNestedUnions test suites. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEmbeddedFederationWithQuads.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithQuads.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithQuadsWithoutInlining.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedUnions.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-01-20 23:25:24 UTC (rev 4151) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-01-21 00:17:18 UTC (rev 4152) @@ -566,10 +566,14 @@ for (BOp t : args) { if (nwritten > 0) sb.append(','); - sb.append(t.getClass().getSimpleName()); - final Integer tid = (Integer) t.getProperty(Annotations.BOP_ID); - if (tid != null) { - sb.append("[" + tid + "]"); + if(t instanceof IValueExpression<?>) { + sb.append(t.toString()); + } else { + sb.append(t.getClass().getSimpleName()); + final Integer tid = (Integer) t.getProperty(Annotations.BOP_ID); + if (tid != null) { + sb.append("[" + tid + "]"); + } } nwritten++; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-20 23:25:24 UTC (rev 4151) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-21 00:17:18 UTC (rev 4152) @@ -1022,6 +1022,37 @@ return out; } - + + /** + * Return an ordered array of the bopIds associated with an ordered array of + * predicates (aka a join path). + * + * @param path + * A join path. + * + * @return The ordered array of predicates for that join path. + * + * @throws IllegalArgumentException + * if the argument is <code>null</code>. + * @throws IllegalArgumentException + * if any element of the argument is <code>null</code>. + * @throws IllegalStateException + * if any {@link IPredicate} does not have a defined bopId as + * reported by {@link BOp#getId()}. + */ + public static int[] getPredIds(final IPredicate<?>[] path) { + + final int[] b = new int[path.length]; + + for (int i = 0; i < path.length; i++) { + + b[i] = path[i].getId(); + + } + + return b; + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-01-20 23:25:24 UTC (rev 4151) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-01-21 00:17:18 UTC (rev 4152) @@ -8,6 +8,8 @@ import java.util.Map; import java.util.Set; +import org.apache.log4j.Logger; + import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; @@ -65,6 +67,9 @@ */ public class PartitionedJoinGroup { + private static final transient Logger log = Logger + .getLogger(PartitionedJoinGroup.class); + /** * The set of variables bound by the non-optional predicates. */ @@ -139,6 +144,206 @@ } /** + * Return the set of constraints which should be attached to the last join + * in the given the join path. All joins in the join path must be + * non-optional joins (that is, part of either the head plan or the join + * graph). + * <p> + * The rule followed by this method is that each constraint will be attached + * to the first non-optional join at which all of its variables are known to + * be bound. It is assumed that constraints are attached to each join in the + * join path by a consistent logic, e.g., as dictated by this method. + * + * @param joinPath + * An ordered array of predicate identifiers representing a + * specific sequence of non-optional joins. + * + * @return The constraints which should be attached to the last join in the + * join path. + * + * @throws IllegalArgumentException + * if the join path is <code>null</code>. + * @throws IllegalArgumentException + * if the join path is empty. + * @throws IllegalArgumentException + * if any element of the join path is <code>null</code>. + * @throws IllegalArgumentException + * if any predicate specified in the join path is not known to + * this class. + * @throws IllegalArgumentException + * if any predicate specified in the join path is optional. + * + * FIXME implement and unit tests. + * + * @todo Implement (or refactor) the logic to decide which variables need to + * be propagated and which can be dropped. This decision logic will + * need to be available to the runtime query optimizer. + * + * @todo This does not pay attention to the head plan. If there can be + * constraints on the head plan then either this should be modified + * such that it can decide where they attach or we need to have a + * method which does the same thing for the head plan. + */ + public IConstraint[] getJoinGraphConstraints(final int[] pathIds) { + + /* + * Verify arguments and resolve bopIds to predicates. + */ + if (pathIds == null) + throw new IllegalArgumentException(); + + final IPredicate<?>[] path = new IPredicate[pathIds.length]; + + for (int i = 0; i < pathIds.length; i++) { + + final int id = pathIds[i]; + + IPredicate<?> p = null; + + for (IPredicate<?> tmp : joinGraph) { + + if (tmp.getId() == id) { + + p = tmp; + + break; + + } + + } + + if (p == null) + throw new IllegalArgumentException("Not found: id=" + id); + + if (p.isOptional()) + throw new AssertionError( + "Not expecting an optional predicate: " + p); + + path[i] = p; + + } + + /* + * For each predicate in the path in the given order, figure out which + * constraint(s) would attach to that predicate based on which variables + * first become bound with that predicate. For the last predicate in the + * given join path, we return that set of constraints. + */ + + // the set of variables which are bound. + final Set<IVariable<?>> boundVars = new LinkedHashSet<IVariable<?>>(); + + // the set of constraints which have been consumed. + final Set<IConstraint> used = new LinkedHashSet<IConstraint>(); + + // the set of constraints for the last predicate in the join path. + final List<IConstraint> ret = new LinkedList<IConstraint>(); + + for(int i = 0; i<path.length; i++) { + + // true iff this is the last join in the path. + final boolean lastJoin = i == path.length - 1; + + // a predicate in the path. + final IPredicate<?> p = path[i]; + + { + /* + * Visit the variables used by the predicate (and bound by it + * since it is not an optional predicate) and add them into the + * total set of variables which are bound at this point in the + * join path. + */ + final Iterator<IVariable<?>> vitr = BOpUtility + .getArgumentVariables(p); + + while (vitr.hasNext()) { + + final IVariable<?> var = vitr.next(); + + boundVars.add(var); + + } + } + + // consider each constraint. + for(IConstraint c : joinGraphConstraints) { + + if (used.contains(c)) { + /* + * Skip constraints which were already assigned to + * predicates before this one in the join path. + */ + continue; + } + + /* + * true iff all variables used by this constraint are bound at + * this point in the join path. + */ + boolean allVarsBound = true; + + // visit the variables used by this constraint. + final Iterator<IVariable<?>> vitr = BOpUtility + .getSpannedVariables(c); + + while (vitr.hasNext()) { + + final IVariable<?> var = vitr.next(); + + if(!boundVars.contains(var)) { + + allVarsBound = false; + + break; + + } + + } + + if (allVarsBound) { + + /* + * All variables have become bound for this constraint, so + * add it to the set of "used" constraints. + */ + + used.add(c); + +// if (log.isDebugEnabled()) { +// log.debug +// } + System.err.println("Constraint attached at index " + i + " of " + + path.length + ", bopId=" + p.getId() + + ", constraint=" + c); + + if (lastJoin) { + + /* + * If we are on the last join in the join path, then + * this constraint is one of the ones that we will + * return. + */ + + ret.add(c); + + } + + } // if(allVarsBound) + + } // next constraint + + } // next predicate in the join path. + + /* + * Return the set of constraints to be applied as of the last predicate + * in the join path. + */ + return ret.toArray(new IConstraint[ret.size()]); + + } + + /** * The {@link IPredicate}s representing optional joins. Any * {@link IConstraint}s having variable(s) NOT bound by the required joins * will already have been attached to the last {@link IPredicate} in the Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java 2011-01-20 23:25:24 UTC (rev 4151) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java 2011-01-21 00:17:18 UTC (rev 4152) @@ -231,6 +231,40 @@ assertSameIteratorAnyOrder("joinGraphConstraints", constraints, Arrays.asList(fixture.getJoinGraphConstraints()).iterator()); + /* + * Verify the placement of each constraint for a variety of join + * paths. + */ + { +// final int[] pathIds = BOpUtility.getPredIds(new IPredicate[] { +// p0, p1, p2, p3, p4, p5 }); +// final IConstraint[] actual = fixture +// .getJoinGraphConstraints(pathIds); +// System.out.println(Arrays.toString(actual)); + + // c1 is applied when x is bound. x is bound by p0. + assertEquals(new IConstraint[] { c1 }, fixture + .getJoinGraphConstraints(new int[] { p1.getId(), + p0.getId() })); + + /* + * c1 is applied when x is bound. x is bound by p0. p0 is the + * last predicate in this join path, so c1 is attached to p0. + */ + assertEquals(new IConstraint[] { c1 }, fixture + .getJoinGraphConstraints(new int[] { p0.getId()})); + + /* + * c2 is applied when y is bound. y is bound by p1. p1 is the + * last predicate in this join path, p1 is the last predicate in + * this join path so c2 is attached to p1. + */ + assertEquals(new IConstraint[] { c2 }, fixture + .getJoinGraphConstraints(new int[] { p0.getId(), + p1.getId() })); + + } + // there is no tail plan. assertEquals("tailPlan", new IPredicate[] {}, fixture.getTailPlan()); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java 2011-01-20 23:25:24 UTC (rev 4151) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java 2011-01-21 00:17:18 UTC (rev 4152) @@ -68,10 +68,23 @@ /** * Unit tests for the {@link PipelineJoin} operator. * <p> - * Note: The operators to map binding sets over shards are tested independently. + * Note: The logic to map binding sets over shards is tested independently. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ + * + * FIXME This test suite does not fully test the FILTER semantics for + * SPARQL joins. In particular, (a) optional join solutions are counted + * as successful <em>before</em> the FILTER is applied; and (b) if no + * optional join solutions exist (before filtering) for a given source + * binding set, then the FILTER is applied to the source binding set to + * decide whether or not the source solutions should be passed along. + * That is, the behavior is the same in both cases as if the FILTER as + * applied after the optional join. However, in fact, we apply it as + * part of the join operator in order to keep things simpler for the + * runtime query optimizer, which needs to be able to perform cutoff + * joins and which also needs to be able to reorder the predicates, + * creating the appropriate join operators as it does so. */ public class TestPipelineJoin extends TestCase2 { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEmbeddedFederationWithQuads.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEmbeddedFederationWithQuads.java 2011-01-20 23:25:24 UTC (rev 4151) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEmbeddedFederationWithQuads.java 2011-01-21 00:17:18 UTC (rev 4152) @@ -97,6 +97,10 @@ // test suite for optionals handling (left joins). suite.addTestSuite(TestOptionals.class); + suite.addTestSuite(TestNestedOptionals.class); + + suite.addTestSuite(TestNestedUnions.class); + // test of the search magic predicate suite.addTestSuite(TestSearchQuery.class); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithQuads.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithQuads.java 2011-01-20 23:25:24 UTC (rev 4151) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithQuads.java 2011-01-21 00:17:18 UTC (rev 4152) @@ -79,6 +79,10 @@ // test suite for optionals handling (left joins). suite.addTestSuite(TestOptionals.class); + suite.addTestSuite(TestNestedOptionals.class); + + suite.addTestSuite(TestNestedUnions.class); + // test of the search magic predicate suite.addTestSuite(TestSearchQuery.class); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithQuadsWithoutInlining.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithQuadsWithoutInlining.java 2011-01-20 23:25:24 UTC (rev 4151) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithQuadsWithoutInlining.java 2011-01-21 00:17:18 UTC (rev 4152) @@ -81,6 +81,10 @@ // test suite for optionals handling (left joins). suite.addTestSuite(TestOptionals.class); + suite.addTestSuite(TestNestedOptionals.class); + + suite.addTestSuite(TestNestedUnions.class); + // test of the search magic predicate suite.addTestSuite(TestSearchQuery.class); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java 2011-01-20 23:25:24 UTC (rev 4151) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedOptionals.java 2011-01-21 00:17:18 UTC (rev 4152) @@ -69,7 +69,7 @@ * @author <a href="mailto:mrp...@us...">Mike Personick</a> * @version $Id$ */ -public class TestNestedOptionals extends ProxyBigdataSailTestCase { +public class TestNestedOptionals extends QuadsTestCase { /* * TODO Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedUnions.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedUnions.java 2011-01-20 23:25:24 UTC (rev 4151) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNestedUnions.java 2011-01-21 00:17:18 UTC (rev 4152) @@ -69,7 +69,7 @@ * @author <a href="mailto:mrp...@us...">Mike Personick</a> * @version $Id$ */ -public class TestNestedUnions extends ProxyBigdataSailTestCase { +public class TestNestedUnions extends QuadsTestCase { /* * TODO This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2011-01-21 16:35:08
|
Revision: 4157 http://bigdata.svn.sourceforge.net/bigdata/?rev=4157&view=rev Author: mrpersonick Date: 2011-01-21 16:35:02 +0000 (Fri, 21 Jan 2011) Log Message: ----------- working through tck failures Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-21 14:28:17 UTC (rev 4156) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-21 16:35:02 UTC (rev 4157) @@ -156,17 +156,17 @@ } - public String toString() { - - final StringBuilder sb = new StringBuilder(super.toString()); - sb.append("\n{\n"); - final PipelineOp subquery = (PipelineOp) - getRequiredProperty(Annotations.SUBQUERY); - sb.append(BOpUtility.toString(subquery)); - sb.append("\n}"); - return sb.toString(); - - } +// public String toString() { +// +// final StringBuilder sb = new StringBuilder(super.toString()); +// sb.append("\n{\n"); +// final PipelineOp subquery = (PipelineOp) +// getRequiredProperty(Annotations.SUBQUERY); +// sb.append(BOpUtility.toString(subquery)); +// sb.append("\n}"); +// return sb.toString(); +// +// } /** * Evaluates the arguments of the operator as subqueries. The arguments are Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-21 14:28:17 UTC (rev 4156) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-21 16:35:02 UTC (rev 4157) @@ -1684,9 +1684,11 @@ final IBindingSet bs = bindingSets[bindex]; - if(!BOpUtility.isConsistent(constraints, bs)) { - // Failed by the constraint on the join. - continue; + if (constraints != null) { + if(!BOpUtility.isConsistent(constraints, bs)) { + // Failed by the constraint on the join. + continue; + } } if (log.isTraceEnabled()) Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-21 14:28:17 UTC (rev 4156) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-21 16:35:02 UTC (rev 4157) @@ -620,12 +620,12 @@ // } // just add all the constraints to the very last tail for now -// if (i == (order.length-1) && rule.getConstraintCount() > 0) { -// final Iterator<IConstraint> it = rule.getConstraints(); -// while (it.hasNext()) { -// constraints.add(it.next()); -// } -// } + if (i == (order.length-1) && rule.getConstraintCount() > 0) { + final Iterator<IConstraint> it = rule.getConstraints(); + while (it.hasNext()) { + constraints.add(it.next()); + } + } // annotations for this join. final List<NV> anns = new LinkedList<NV>(); @@ -730,23 +730,23 @@ } - if (rule.getConstraintCount() > 0) { - final Iterator<IConstraint> it = rule.getConstraints(); - while (it.hasNext()) { - final IConstraint c = it.next(); - final int condId = idFactory.incrementAndGet(); - final PipelineOp condOp = applyQueryHints( - new ConditionalRoutingOp(new BOp[]{left}, - NV.asMap(new NV[]{// - new NV(BOp.Annotations.BOP_ID,condId), - new NV(ConditionalRoutingOp.Annotations.CONDITION, c), - })), queryHints); - left = condOp; - if (log.isDebugEnabled()) { - log.debug("adding conditional routing op: " + condOp); - } - } - } +// if (rule.getConstraintCount() > 0) { +// final Iterator<IConstraint> it = rule.getConstraints(); +// while (it.hasNext()) { +// final IConstraint c = it.next(); +// final int condId = idFactory.incrementAndGet(); +// final PipelineOp condOp = applyQueryHints( +// new ConditionalRoutingOp(new BOp[]{left}, +// NV.asMap(new NV[]{// +// new NV(BOp.Annotations.BOP_ID,condId), +// new NV(ConditionalRoutingOp.Annotations.CONDITION, c), +// })), queryHints); +// left = condOp; +// if (log.isDebugEnabled()) { +// log.debug("adding conditional routing op: " + condOp); +// } +// } +// } if (log.isInfoEnabled()) { // just for now while i'm debugging Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java 2011-01-21 14:28:17 UTC (rev 4156) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java 2011-01-21 16:35:02 UTC (rev 4157) @@ -27,6 +27,7 @@ package com.bigdata.rdf.sail.sop; +import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; @@ -47,6 +48,7 @@ import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.bset.ConditionalRoutingOp; import com.bigdata.bop.controller.SubqueryOp; import com.bigdata.bop.controller.Union; import com.bigdata.bop.engine.QueryEngine; @@ -161,13 +163,22 @@ * join groups, and thus should be translated into ConditionalRoutingOps * for maximum efficiency. */ - final Collection<IConstraint> conditionals = + final Collection<IConstraint> preConditionals = new LinkedList<IConstraint>(); - final IRule rule = rule(join, conditionals); + /* + * These are constraints that use variables bound by subqueries, and + * thus cannot be attached to the predicates in this group. They are + * handled by ConditionalRoutingOps at the end of the group, after + * the subqueries have run. + */ + final Collection<IConstraint> postConditionals = + new LinkedList<IConstraint>(); + final IRule rule = rule(join, preConditionals, postConditionals); + PipelineOp left = Rule2BOpUtility.convert( - rule, conditionals, idFactory, db, queryEngine, queryHints); + rule, preConditionals, idFactory, db, queryEngine, queryHints); /* * Start with left=<this join group> and add a SubqueryOp for each @@ -190,9 +201,26 @@ new NV(SubqueryOp.Annotations.SUBQUERY, subquery),// new NV(SubqueryOp.Annotations.OPTIONAL,optional)// ); + if (log.isInfoEnabled()) { + log.info("adding a subquery: " + subqueryId + "\n" + left); + } } } + for (IConstraint c : postConditionals) { + final int condId = idFactory.incrementAndGet(); + final PipelineOp condOp = + new ConditionalRoutingOp(new BOp[]{left}, + NV.asMap(new NV[]{// + new NV(BOp.Annotations.BOP_ID,condId), + new NV(ConditionalRoutingOp.Annotations.CONDITION, c), + })); + left = condOp; + if (log.isDebugEnabled()) { + log.debug("adding post-conditional routing op: " + condOp); + } + } + if (!left.getEvaluationContext() .equals(BOpEvaluationContext.CONTROLLER) && !(left instanceof SubqueryOp)) { @@ -258,7 +286,8 @@ } protected static IRule rule(final SOpGroup group, - final Collection<IConstraint> conditionals) { + final Collection<IConstraint> preConditionals, + final Collection<IConstraint> postConditionals) { final Collection<IPredicate> preds = new LinkedList<IPredicate>(); final Collection<IConstraint> constraints = new LinkedList<IConstraint>(); @@ -266,36 +295,20 @@ /* * Gather up all the variables used by non-optional parent join groups */ - final Set<IVariable<?>> variables = new HashSet<IVariable<?>>(); + final Set<IVariable<?>> nonOptParentVars = new HashSet<IVariable<?>>(); SOpGroup parent = group; while ((parent = parent.getParent()) != null) { if (isNonOptionalJoinGroup(parent)) - collectPredicateVariables(variables, parent); + collectPredicateVariables(nonOptParentVars, parent); } + /* + * Gather up all the predicates in this group. + */ for (SOp sop : group) { final BOp bop = sop.getBOp(); if (bop instanceof IPredicate) { preds.add((IPredicate) bop); - } else if (bop instanceof IConstraint) { - final IConstraint c = (IConstraint) bop; - /* - * This constraint is a conditional if all of its variables - * appear in non-optional parent join groups - */ - final Iterator<IVariable<?>> vars = - BOpUtility.getSpannedVariables(c); - boolean conditional = true; - while (vars.hasNext()) { - final IVariable<?> v = vars.next(); - conditional &= variables.contains(v); - } - if (conditional) - conditionals.add(c); - else - constraints.add(c); - } else { - throw new IllegalArgumentException("illegal operator: " + sop); } } @@ -317,8 +330,95 @@ } } + /* + * Gather up all the variables used by predicates in this group + */ + final Set<IVariable<?>> groupVars = new HashSet<IVariable<?>>(); + for (IPredicate bop : preds) { + for (BOp arg : bop.args()) { + if (arg instanceof IVariable<?>) { + final IVariable<?> v = (IVariable<?>) arg; + groupVars.add(v); + } + } + } + + /* + * Gather up the constraints, segregating into three categories: + * -constraints: all variables used by predicates in this group + * -pre-conditionals: all variables already bound by parent group(s) + * -post-conditionals: some or all variables bound in subqueries + */ + for (SOp sop : group) { + final BOp bop = sop.getBOp(); + if (bop instanceof IConstraint) { + final IConstraint c = (IConstraint) bop; + + { // find the pre-conditionals + + final Iterator<IVariable<?>> constraintVars = + BOpUtility.getSpannedVariables(c); + + /* + * This constraint is a pre-conditional if all of its variables + * appear in non-optional parent join groups + */ + boolean preConditional = true; + while (constraintVars.hasNext()) { + final IVariable<?> v = constraintVars.next(); + preConditional &= nonOptParentVars.contains(v); + } + if (preConditional) { + preConditionals.add(c); + continue; + } + + } + + { // find the post-conditionals + + final Iterator<IVariable<?>> constraintVars = + BOpUtility.getSpannedVariables(c); + + /* + * This constraint is a post-conditional if not all of its + * variables appear in this join group or non-optional parent + * groups (bound by subqueries) + */ + boolean postConditional = false; + while (constraintVars.hasNext()) { + final IVariable<?> v = constraintVars.next(); + if (!nonOptParentVars.contains(v) && + !groupVars.contains(v)) { + postConditional = true; + break; + } + } + if (postConditional) { + postConditionals.add(c); + continue; + } + + } + + /* + * Neither pre nor post conditional, but a constraint on the + * predicates in this group. done this roundabout way for the + * benefit of the RTO + */ + constraints.add(c); + } + } + final IVariable<?>[] required = group.getTree().getRequiredVars(); + if (log.isInfoEnabled()) { + log.info("preds: " + Arrays.toString(preds.toArray())); + log.info("constraints: " + Arrays.toString(constraints.toArray())); + log.info("preConds: " + Arrays.toString(preConditionals.toArray())); + log.info("postConds: " + Arrays.toString(postConditionals.toArray())); + } + final IRule rule = new Rule( "dummy rule", null, // head This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-21 16:46:48
|
Revision: 4159 http://bigdata.svn.sourceforge.net/bigdata/?rev=4159&view=rev Author: thompsonbry Date: 2011-01-21 16:46:41 +0000 (Fri, 21 Jan 2011) Log Message: ----------- Modified UNION/STEPS and AbstractSubqueryOp to use an annotation for the subqueries rather than the arguments of the UNION/STEPS. This makes it possible to use these operators at positions other than the start of the pipeline. Bug fix to PipelineJoin where it was applying constraints for optional joins to the original solution even when the constraints was a null reference. Fix to the semantics of an optional join test in TestQueryEngine to reflect the fact that the constraint is applied on all paths through a join. Turning on the new eval strategy by default in the SAIL for CI builds. 13 sail failures in this commit. some appear to be new while some have disappeared. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Steps.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Union.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestUnion.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-21 16:41:12 UTC (rev 4158) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-21 16:46:41 UTC (rev 4159) @@ -38,7 +38,6 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; -import com.bigdata.bop.BOpEvaluationContext; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.PipelineOp; @@ -74,7 +73,7 @@ * * <pre> * SLICE[1]( - * UNION[2]([a{sinkRef=1},b{sinkRef=1},c{sinkRef=1}],{}) + * UNION[2]([...],{subqueries=[a{sinkRef=1},b{sinkRef=1},c{sinkRef=1}]}) * ) * </pre> * @@ -96,6 +95,12 @@ public interface Annotations extends PipelineOp.Annotations { /** + * The ordered {@link BOp}[] of subqueries to be evaluated for each + * binding set presented (required). + */ + String SUBQUERIES = SubqueryOp.class.getName() + ".subqueries"; + + /** * The maximum parallelism with which the subqueries will be evaluated * (default is unlimited). */ @@ -132,13 +137,19 @@ super(args, annotations); - if (!getEvaluationContext().equals(BOpEvaluationContext.CONTROLLER)) - throw new IllegalArgumentException(Annotations.EVALUATION_CONTEXT - + "=" + getEvaluationContext()); +// if (!getEvaluationContext().equals(BOpEvaluationContext.CONTROLLER)) +// throw new IllegalArgumentException(Annotations.EVALUATION_CONTEXT +// + "=" + getEvaluationContext()); - if (!getProperty(Annotations.CONTROLLER, Annotations.DEFAULT_CONTROLLER)) - throw new IllegalArgumentException(Annotations.CONTROLLER); - +// if (!getProperty(Annotations.CONTROLLER, Annotations.DEFAULT_CONTROLLER)) +// throw new IllegalArgumentException(Annotations.CONTROLLER); + + // verify required annotation. + final BOp[] subqueries = (BOp[]) getRequiredProperty(Annotations.SUBQUERIES); + + if (subqueries.length == 0) + throw new IllegalArgumentException(Annotations.SUBQUERIES); + // // The id of this operator (if any). // final Integer thisId = (Integer)getProperty(Annotations.BOP_ID); // @@ -170,6 +181,7 @@ private static class ControllerTask implements Callable<Void> { private final AbstractSubqueryOp controllerOp; + private final BOp[] subqueries; private final BOpContext<IBindingSet> context; private final int nparallel; private final Executor executor; @@ -187,6 +199,9 @@ this.context = context; + this.subqueries = (BOp[]) controllerOp + .getRequiredProperty(Annotations.SUBQUERIES); + this.nparallel = controllerOp.getProperty(Annotations.MAX_PARALLEL, Annotations.DEFAULT_MAX_PARALLEL); @@ -244,8 +259,8 @@ try { - final CountDownLatch latch = new CountDownLatch(controllerOp - .arity()); + final CountDownLatch latch = new CountDownLatch( + subqueries.length); /* * Create FutureTasks for each subquery. The futures are not @@ -253,7 +268,7 @@ * deferring the evaluation until call() we gain the ability to * cancel all subqueries if any subquery fails. */ - for (BOp op : controllerOp.args()) { + for (BOp op : subqueries) { /* * Task runs subquery and cancels all subqueries in [tasks] Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Steps.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Steps.java 2011-01-21 16:41:12 UTC (rev 4158) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Steps.java 2011-01-21 16:46:41 UTC (rev 4159) @@ -37,7 +37,7 @@ * STEPS(ops) * * <pre> - * STEPS([a,b,c],{}) + * STEPS([],{subqueries=[a,b,c]}) * </pre> * * Will run the subqueries <i>a</i>, <i>b</i>, and <i>c</i> in sequence. Each @@ -62,12 +62,11 @@ public Steps(Steps op) { super(op); } - + /** * Shallow copy constructor. * * @param args - * Two or more operators whose union is desired. * @param annotations */ public Steps(final BOp[] args, Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Union.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Union.java 2011-01-21 16:41:12 UTC (rev 4158) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Union.java 2011-01-21 16:46:41 UTC (rev 4159) @@ -34,10 +34,10 @@ import com.bigdata.bop.PipelineOp; /** - * UNION(ops)[maxParallel(default all)] + * UNION()[maxParallel(default all); subqueries=ops] * * <pre> - * UNION([a,b,c],{}) + * UNION([],{subqueries=[a,b,c]}) * </pre> * * Will run the subqueries <i>a</i>, <i>b</i>, and <i>c</i> in parallel for each @@ -68,15 +68,14 @@ * Shallow copy constructor. * * @param args - * Two or more operators whose union is desired. * @param annotations */ public Union(final BOp[] args, final Map<String, Object> annotations) { super(args, annotations); - if (args.length < 2) - throw new IllegalArgumentException(); +// if (args.length < 2) +// throw new IllegalArgumentException(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestUnion.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestUnion.java 2011-01-21 16:41:12 UTC (rev 4158) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestUnion.java 2011-01-21 16:46:41 UTC (rev 4159) @@ -45,6 +45,7 @@ import com.bigdata.bop.bindingSet.ArrayBindingSet; import com.bigdata.bop.bindingSet.EmptyBindingSet; import com.bigdata.bop.bindingSet.HashBindingSet; +import com.bigdata.bop.bindingSet.ListBindingSet; import com.bigdata.bop.bset.StartOp; import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.QueryEngine; @@ -119,7 +120,7 @@ // data to insert (in key order for convenience). final E[] a = {// - new E("John", "Mary"),// [0] + new E("John", "Mary"),// [0] new E("Leon", "Paul"),// [1] new E("Mary", "Paul"),// [2] new E("Paul", "Leon"),// [3] @@ -176,12 +177,14 @@ BOpEvaluationContext.CONTROLLER),// })); - final BOp unionOp = new Union(new BOp[] { startOp1, startOp2 }, NV + final BOp unionOp = new Union(new BOp[0], NV .asMap(new NV[] {// new NV(Union.Annotations.BOP_ID, unionId),// - new NV(Union.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - new NV(Union.Annotations.CONTROLLER, true),// + new NV(Union.Annotations.SUBQUERIES, new BOp[] { + startOp1, startOp2 }),// +// new NV(Union.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER),// +// new NV(Union.Annotations.CONTROLLER, true),// })); final BOp query = unionOp; @@ -203,6 +206,60 @@ } + public void test_union_consumesSource() throws Exception { + + final int startId1 = 1; + final int startId2 = 2; + final int unionId = 3; + + final BOp startOp1 = new StartOp(new BOp[] {}, NV.asMap(new NV[] {// + new NV(StartOp.Annotations.BOP_ID, startId1),// + new NV(StartOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final BOp startOp2 = new StartOp(new BOp[] {}, NV.asMap(new NV[] {// + new NV(StartOp.Annotations.BOP_ID, startId2),// + new NV(StartOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final BOp unionOp = new Union(new BOp[]{}, NV + .asMap(new NV[] {// + new NV(Union.Annotations.BOP_ID, unionId),// + new NV(Union.Annotations.SUBQUERIES, new BOp[] { + startOp1, startOp2 }) // +// new NV(Union.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER),// +// new NV(Union.Annotations.CONTROLLER, true),// + })); + + final BOp query = unionOp; + + /* + * Create an initial non-empty binding set. + */ + final IBindingSet bset = new ListBindingSet(); + bset.set(Var.var("x"), new Constant<String>("John")); + bset.set(Var.var("y"), new Constant<String>("Mary")); + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + bset, // one copy from the left side of the union. + bset, // one copy from the right side of the union. + }; + + final IRunningQuery runningQuery = queryEngine.eval(query, bset); + + // verify solutions. + TestQueryEngine.assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + + // Wait until the query is done. + runningQuery.get(); + + } + /** * Verifies that the UNION of two operators is computed. The operators do * not route around the UNION, so their solutions are copied to the UNION @@ -249,12 +306,14 @@ new NV(StartOp.Annotations.BINDING_SETS,bindingSets2) })); - final BOp unionOp = new Union(new BOp[] { startOp1, startOp2 }, NV + final BOp unionOp = new Union(new BOp[] {}, NV .asMap(new NV[] {// new NV(Union.Annotations.BOP_ID, unionId),// - new NV(Union.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - new NV(Union.Annotations.CONTROLLER, true),// + new NV(Union.Annotations.SUBQUERIES, new BOp[] { + startOp1, startOp2 }) // +// new NV(Union.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER),// +// new NV(Union.Annotations.CONTROLLER, true),// })); final BOp sliceOp = new SliceOp(new BOp[]{unionOp},NV.asMap( @@ -336,12 +395,14 @@ new NV(StartOp.Annotations.BINDING_SETS,bindingSets2) })); - final BOp unionOp = new Union(new BOp[] { startOp1, startOp2 }, NV + final BOp unionOp = new Union(new BOp[] {}, NV .asMap(new NV[] {// new NV(Union.Annotations.BOP_ID, unionId),// - new NV(Union.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - new NV(Union.Annotations.CONTROLLER, true),// + new NV(Union.Annotations.SUBQUERIES, new BOp[] { + startOp1, startOp2 }) // +// new NV(Union.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER),// +// new NV(Union.Annotations.CONTROLLER, true),// })); final BOp sliceOp = new SliceOp(new BOp[]{unionOp},NV.asMap( Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2011-01-21 16:41:12 UTC (rev 4158) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2011-01-21 16:46:41 UTC (rev 4159) @@ -1643,17 +1643,24 @@ new Constant<String>("Leon"), new Constant<String>("Paul") }// ), - // plus anything we read from the first access path which did not join. - new ArrayBindingSet(// - new IVariable[] { Var.var("x"), Var.var("y") },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary") }// - ), - new ArrayBindingSet(// - new IVariable[] { Var.var("x"), Var.var("y") },// - new IConstant[] { new Constant<String>("Mary"), - new Constant<String>("Paul") }// - ) + /* + * No. The CONSTRAINT on the 2nd join [x == y] filters all + * solutions. For solutions where the optional join fails, [y] is + * not bound. Since [y] is part of the constraint on that join we DO + * NOT observe those solutions which only join on the first access + * path. + */ +// // plus anything we read from the first access path which did not join. +// new ArrayBindingSet(// +// new IVariable[] { Var.var("x"), Var.var("y") },// +// new IConstant[] { new Constant<String>("John"), +// new Constant<String>("Mary") }// +// ), +// new ArrayBindingSet(// +// new IVariable[] { Var.var("x"), Var.var("y") },// +// new IConstant[] { new Constant<String>("Mary"), +// new Constant<String>("Paul") }// +// ) }; assertSameSolutionsAnyOrder(expected, @@ -1714,7 +1721,7 @@ // verify query solution stats details. // assertEquals(1L, stats.chunksIn.get()); assertEquals(4L, stats.unitsIn.get()); - assertEquals(4L, stats.unitsOut.get()); + assertEquals(2L, stats.unitsOut.get()); // assertEquals(1L, stats.chunksOut.get()); } @@ -1727,8 +1734,8 @@ // verify query solution stats details. // assertEquals(2L, stats.chunksIn.get()); - assertEquals(4L, stats.unitsIn.get()); - assertEquals(4L, stats.unitsOut.get()); + assertEquals(2L, stats.unitsIn.get()); + assertEquals(2L, stats.unitsOut.get()); // assertEquals(1L, stats.chunksOut.get()); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2011-01-21 16:41:12 UTC (rev 4158) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2011-01-21 16:46:41 UTC (rev 4159) @@ -382,7 +382,7 @@ public static final String NEW_EVAL_STRATEGY = BigdataSail.class.getPackage() .getName()+ ".newEvalStrategy"; - public static final String DEFAULT_NEW_EVAL_STRATEGY = "false"; + public static final String DEFAULT_NEW_EVAL_STRATEGY = "true"; /** * Option as to whether or not to allow Sesame evaluation of queries Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-21 16:41:12 UTC (rev 4158) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-21 16:46:41 UTC (rev 4159) @@ -65,6 +65,7 @@ import com.bigdata.bop.bindingSet.HashBindingSet; import com.bigdata.bop.bset.ConditionalRoutingOp; import com.bigdata.bop.bset.StartOp; +import com.bigdata.bop.controller.AbstractSubqueryOp; import com.bigdata.bop.controller.Steps; import com.bigdata.bop.controller.Union; import com.bigdata.bop.cost.ScanCostReport; @@ -1304,13 +1305,11 @@ // The bopId for the UNION or STEP. final int thisId = idFactory.incrementAndGet(); - final int arity = program.stepCount(); - - final IStep[] steps = program.toArray(); + final IStep[] steps = program.toArray(); - final BOp[] args = new BOp[arity]; + final BOp[] subqueries = new BOp[steps.length]; - for (int i = 0; i < arity; i++) { + for (int i = 0; i < steps.length; i++) { // convert the child IStep final BOpBase tmp = convert(steps[i], idFactory, db, queryEngine, @@ -1324,28 +1323,31 @@ */ // tmp = tmp.setProperty(PipelineOp.Annotations.SINK_REF, thisId); - args[i] = tmp; + subqueries[i] = tmp; } final LinkedList<NV> anns = new LinkedList<NV>(); - anns.add(new NV(Union.Annotations.BOP_ID, thisId)); + anns.add(new NV(BOp.Annotations.BOP_ID, thisId)); + + // the subqueries. + anns.add(new NV(AbstractSubqueryOp.Annotations.SUBQUERIES, subqueries)); + +// anns.add(new NV(Union.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER)); +// +// anns.add(new NV(Union.Annotations.CONTROLLER, true)); - anns.add(new NV(Union.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER)); - - anns.add(new NV(Union.Annotations.CONTROLLER, true)); - if (!isParallel) anns.add(new NV(Union.Annotations.MAX_PARALLEL, 1)); final PipelineOp thisOp; if (isParallel) { - thisOp = new Union(args, NV + thisOp = new Union(new BOp[]{}, NV .asMap(anns.toArray(new NV[anns.size()]))); } else { - thisOp = new Steps(args, NV + thisOp = new Steps(new BOp[]{}, NV .asMap(anns.toArray(new NV[anns.size()]))); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java 2011-01-21 16:41:12 UTC (rev 4158) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java 2011-01-21 16:46:41 UTC (rev 4159) @@ -273,12 +273,13 @@ } final LinkedList<NV> anns = new LinkedList<NV>(); - anns.add(new NV(Union.Annotations.BOP_ID, thisId)); - anns.add(new NV(Union.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER)); - anns.add(new NV(Union.Annotations.CONTROLLER, true)); + anns.add(new NV(BOp.Annotations.BOP_ID, thisId)); + anns.add(new NV(Union.Annotations.SUBQUERIES,args)); +// anns.add(new NV(Union.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER)); +// anns.add(new NV(Union.Annotations.CONTROLLER, true)); - final Union thisOp = new Union(args, NV + final Union thisOp = new Union(new BOp[]{}, NV .asMap(anns.toArray(new NV[anns.size()]))); return thisOp; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-21 21:52:25
|
Revision: 4160 http://bigdata.svn.sourceforge.net/bigdata/?rev=4160&view=rev Author: thompsonbry Date: 2011-01-21 21:52:18 +0000 (Fri, 21 Jan 2011) Log Message: ----------- Added getSharedVars() to BOpUtility. More work on the runtime query optimizer. See https://sourceforge.net/apps/trac/bigdata/ticket/64. Javadoc on AbstractSubqueryOp and SubqueryOp. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/NoSolutionsException.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestAll.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphWithRDF.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -34,6 +34,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.log4j.Logger; @@ -1053,6 +1054,83 @@ return b; } - + + /** + * Return the variable references shared by tw operators. All variables + * spanned by either {@link BOp} are considered. + * + * @param p + * An operator. + * @param c + * Another operator. + * + * @param p + * A predicate. + * + * @param c + * A constraint. + * + * @return The variables in common -or- <code>null</code> iff there are no + * variables in common. + * + * @throws IllegalArgumentException + * if the two either reference is <code>null</code>. + * @throws IllegalArgumentException + * if the reference are the same. + * + * @todo unit tests. + */ + public static Set<IVariable<?>> getSharedVars(final BOp p, final BOp c) { + + if (p == null) + throw new IllegalArgumentException(); + + if (c == null) + throw new IllegalArgumentException(); + + if (p == c) + throw new IllegalArgumentException(); + + // The set of variables which are shared. + final Set<IVariable<?>> sharedVars = new LinkedHashSet<IVariable<?>>(); + + // Collect the variables appearing anywhere in [p]. + final Set<IVariable<?>> p1vars = new LinkedHashSet<IVariable<?>>(); + { + + final Iterator<IVariable<?>> itr = BOpUtility + .getSpannedVariables(p); + + while (itr.hasNext()) { + + p1vars.add(itr.next()); + + } + + } + + // Consider the variables appearing anywhere in [c]. + { + + final Iterator<IVariable<?>> itr = BOpUtility + .getSpannedVariables(c); + + while (itr.hasNext()) { + + final IVariable<?> avar = itr.next(); + + if (p1vars.contains(avar)) { + + sharedVars.add(avar); + + } + + } + + } + + return sharedVars; + + } + } - Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -84,6 +84,11 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ + * + * @todo There is relatively little difference between this class and SubqueryOp + * and we should consider converging them into a single concrete subquery + * operator with specializations for UNION and STEPS. The main difference + * is that the SubqueryOp can not run multiple subqueries. */ abstract public class AbstractSubqueryOp extends PipelineOp { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -67,7 +67,6 @@ import com.bigdata.bop.join.PipelineJoin; import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; import com.bigdata.bop.rdf.join.DataSetJoin; -import com.bigdata.bop.solutions.SliceOp; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.BufferClosedException; import com.bigdata.relation.accesspath.IAccessPath; @@ -75,6 +74,7 @@ import com.bigdata.relation.accesspath.ThickAsynchronousIterator; import com.bigdata.striterator.Dechunkerator; import com.bigdata.striterator.IChunkedIterator; +import com.bigdata.util.concurrent.Haltable; /** * A join graph with annotations for estimated cardinality and other details in @@ -1450,13 +1450,13 @@ * * @see #getVertices() */ - public IPredicate[] getPredicates() { + public IPredicate<?>[] getPredicates() { // The vertices in the selected evaluation order. final Vertex[] vertices = getVertices(); // The predicates in the same order as the vertices. - final IPredicate[] preds = new IPredicate[vertices.length]; + final IPredicate<?>[] preds = new IPredicate[vertices.length]; for (int i = 0; i < vertices.length; i++) { @@ -1469,6 +1469,16 @@ } /** + * Return the {@link BOp} identifiers of the predicates associated with + * each vertex in path order. + */ + public int[] getVertexIds() { + + return getVertexIds(edges); + + } + + /** * Return the {@link BOp} identifiers of the predicates associated with * each vertex in path order. */ @@ -1590,10 +1600,6 @@ * join path in each round then this would help to establish a * better estimate in successive rounds.] * - * FIXME CONSTRAINT ORDERING : It is illegal to add a vertex to the - * path if any variable appearing in its CONSTRAINTS would not be - * bound. - * * FIXME CONSTRAINT ORDERING : Rather than constraints imposing an * ordering on joins, constraints need to be attached dynamically to * the first join for which their variables are known to be bound. @@ -1848,7 +1854,7 @@ * @todo unit test for a constraint using a variable which is never * bound. */ - public JGraph(final IPredicate[] v, final IConstraint[] constraints) { + public JGraph(final IPredicate<?>[] v, final IConstraint[] constraints) { if (v == null) throw new IllegalArgumentException(); @@ -1871,27 +1877,33 @@ * Identify the edges by looking for shared variables among the * predicates. * - * Note: Variables may appear in the arguments of the predicate, - * e.g., spo(?s,rdf:type,?o). + * Note: Variables appear in predicates or in constraints. Edges are + * created to represent possible joins between predicates based on + * those shared variables. There are two cases: * - * Note: Variables may ALSO appear in the CONSTRAINTS (imposed on - * the binding sets) or FILTERS (imposed either on the local or - * remote access path). For example, that a variable bound by - * another predicate must take on a value having some mathematical - * relationship to a variable bound by the predicate, e.g., BSBM Q5. - * When a variable appears in a constraint but does not appear as an - * argument to the predicate, then there is an additional - * requirement that the variable MUST become bound before the - * predicate may be evaluated (again, BSBM Q5 has this form). + * (1) When the target predicate shares a variable with the source + * predicate, then we always create an edge between those predicates + * to represent a possible join. * - * Note: If a vertex does not share ANY variables (neither in the - * arguments of the predicate nor in its constraints or filters) - * then it can be paired with any of the other vertices. However, in - * such cases we always run such vertices last as they can not - * restrict the cardinality of the rest of the join graph. Such - * vertices are therefore inserted into a separate set and appended - * to the join path once all edges having shared variables have been - * exhausted. + * (2) When the source predicate shares a variable with a constraint + * which also shares a variable with the target predicate, then we + * will also create an edge to represent a possible join. + * + * The second case handles the case where variables are transitively + * shared through a constraint, but not directly shared between the + * predicates. BSBM Q5 is an example of this case. + * + * Note: If applying these two rules fails to create any edges for + * some vertex, then it does not share ANY variables and can be + * paired with ANY of the other vertices. However, we always run + * such vertices last as they can not restrict the cumulative + * cardinality of the solutions. Such vertices are therefore + * inserted into a separate set and appended to the join path once + * all edges having shared variables have been exhausted. + * + * FIXME VERTICES WHICH SHARE VARS THROUGH A CONSTRAINT. + * + * FIXME VERTICES WITH NO SHARED VARS. */ { @@ -1917,20 +1929,14 @@ // consider a possible target vertex. final IPredicate<?> p2 = v[j]; - final Set<IVariable<?>> shared = getSharedVars(p1, p2); + final Set<IVariable<?>> shared = BOpUtility + .getSharedVars(p1, p2); if (shared != null && !shared.isEmpty()) { /* - * The source and target vertices share var(s). - * - * Note: A predicate having a variable which appears - * in a CONSTRAINT MUST NOT be added to the join - * path until that variable would be bound. - * Therefore, when selecting the vertices to be used - * to extend a join path, we must consider whether - * or not the join path would bind the variable(s) - * appearing in the CONSTRAINT. + * The source and target vertices share one or more + * variable(s). */ if (log.isDebugEnabled()) @@ -1945,8 +1951,68 @@ nmatched++; - } + } else if (constraints != null) { + /* + * The source and target vertices do not directly + * share any variable(s). However, there may be a + * constraint which shares a variable with both the + * source and target vertex. If such a constraint is + * found, then we add an edge now as that join is + * potentially constrained (less than the full + * Cartesian cross product). + * + * Note: While this identifies possible joins via a + * constraint, such joins are only legal when all + * variables used by the constraint are known to be + * bound. + * + * FIXME We have to reject edges unless there are + * variable(s) which are directly shared between the + * source and target vertex until all the variables + * spanned by the constraint which licenses the join + * have become bound. [Consider marking these edges + * directly so we know that we need to test and see + * whether or not a constraint exists which shares + * at least one variable with both vertices and that + * all variables in that constraint are bound.] + * + * FIXME Since we can attach more than one + * constraint to a vertex, we may have to ask + * whether any set of the available constraints + * shares at least one variable with the source and + * target vertices. [That is, do they have to share + * variables via the same constraint?!?] + */ + + for(IConstraint c : constraints) { + + if(BOpUtility.getSharedVars(p1, c).isEmpty()) + continue; + + if(BOpUtility.getSharedVars(p2, c).isEmpty()) + continue; + + if (log.isDebugEnabled()) + log + .debug("vertices shared variable(s) via constraint: v1=" + + p1 + + ", v2=" + + p2 + + ", c=" + c); + + tmp.add(new Edge(V[i], V[j], shared)); + + sharedEdgeVertices.add(V[i]); + + sharedEdgeVertices.add(V[j]); + + nmatched++; + + } + + } + } if (nmatched == 0 && !sharedEdgeVertices.contains(V[i])) { @@ -1972,14 +2038,24 @@ if(!unsharedEdgeVertices.isEmpty()) { /* - * FIXME This needs to be supported. We should explore and - * generate the join paths based on only those vertices - * which do share variables (and hence for which we have - * defined edges). Once the vertices which share variables - * have been exhausted, we should simply append edges for - * the vertices which do not share variables in an arbitrary - * order (they will be run last since they can not constrain - * the evaluation). + * FIXME NO SHARED VARS : RUN LAST. This needs to be + * supported. When vertices that do not share variables + * either directly or via a constraint then they should run + * last as they can not constrain the query. In this case, + * they are not considered by the runtime optimizer when + * building up the join path until all vertices which share + * variables have been exhausted. At that point, the + * remaining vertices are just appended to whatever join + * path was selected as having the lowest cumulative + * estimated cardinality. + * + * However, if there exists for a vertex which otherwise + * does not share variables a constraint which should be + * evaluated against that vertex, then that constraint + * provides the basis for a edge (aka join). In this case, + * an edge must be created for the vertex based on the + * shared variable in the constraint and its position in the + * join path will be decided by the runtime optimizer. */ throw new UnsupportedOperationException( @@ -2917,8 +2993,8 @@ final BOpIdFactory idFactory = new BOpIdFactory(); // Generate the query from the join path. - final PipelineOp queryOp = JoinGraph.getQuery(idFactory, p - .getPredicates(), getConstraints()); + final PipelineOp queryOp = PartitionedJoinGroup.getQuery(idFactory, + p.getPredicates(), getConstraints()); // Run the query, blocking until it is done. JoinGraph.runSubquery(context, queryOp); @@ -2996,318 +3072,95 @@ */ /** - * Generate a query plan from an ordered collection of predicates. + * Execute the selected join path. + * <p> + * Note: When executing the query, it is actually being executed as a + * subquery. Therefore we have to take appropriate care to ensure that the + * results are copied out of the subquery and into the parent query. See + * {@link AbstractSubqueryOp} for how this is done. * - * @param p - * The join path. + * @throws Exception * - * @return The query plan. + * @todo When we execute the query, we should clear the references to the + * samples (unless they are exact, in which case they can be used as + * is) in order to release memory associated with those samples if the + * query is long running. Samples must be held until we have + * identified the final join path since each vertex will be used by + * each maximum length join path and we use the samples from the + * vertices to re-sample the surviving join paths in each round. * - * FIXME Verify that constraints are attached correctly to the - * returned query. + * @todo If there is a slice on the outer query, then the query result may + * well be materialized by now. + * + * @todo If there are source binding sets then they need to be applied above + * (when we are sampling) and below (when we evaluate the selected + * join path). + * + * FIXME runQuery() is not working correctly. The query is being + * halted by a {@link BufferClosedException} which appears before it + * has materialized the necessary results. */ - static public PipelineOp getQuery(final BOpIdFactory idFactory, - final IPredicate[] preds, final IConstraint[] constraints) { + static private void runSubquery( + final BOpContext<IBindingSet> parentContext, + final PipelineOp queryOp) throws Exception { - if (constraints != null && constraints.length != 0) { - // FIXME Constraints must be attached to joins. - throw new UnsupportedOperationException( - "Constraints must be attached to joins!"); - } - - final PipelineJoin[] joins = new PipelineJoin[preds.length]; + final QueryEngine queryEngine = parentContext.getRunningQuery() + .getQueryEngine(); -// final PipelineOp startOp = new StartOp(new BOp[] {}, -// NV.asMap(new NV[] {// -// new NV(Predicate.Annotations.BOP_ID, idFactory -// .nextId()),// -// new NV(SliceOp.Annotations.EVALUATION_CONTEXT, -// BOpEvaluationContext.CONTROLLER),// -// })); -// -// PipelineOp lastOp = startOp; - PipelineOp lastOp = null; + /* + * Run the query. + * + * @todo pass in the source binding sets here and also when sampling the + * vertices. + */ -// final Set<IVariable> vars = new LinkedHashSet<IVariable>(); -// for(IPredicate p : preds) { -// for(BOp arg : p.args()) { -// if(arg instanceof IVariable) { -// vars.add((IVariable)arg); -// } -// } -// } - - for (int i = 0; i < preds.length; i++) { + IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; - // The next vertex in the selected join order. - final IPredicate p = preds[i]; + final IRunningQuery runningQuery = queryEngine.eval(queryOp); - final List<NV> anns = new LinkedList<NV>(); + try { - anns.add(new NV(PipelineJoin.Annotations.PREDICATE, p)); + // Iterator visiting the subquery solutions. + subquerySolutionItr = runningQuery.iterator(); - anns.add(new NV(PipelineJoin.Annotations.BOP_ID, idFactory - .nextId())); + // Copy solutions from the subquery to the query. + final long nout = BOpUtility.copy(subquerySolutionItr, + parentContext.getSink(), null/* sink2 */, + null/* constraints */, null/* stats */); -// anns.add(new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.ANY)); -// -// anns.add(new NV(PipelineJoin.Annotations.SELECT, vars.toArray(new IVariable[vars.size()]))); + System.out.println("nout=" + nout); - final PipelineJoin joinOp = new PipelineJoin( - lastOp == null ? new BOp[0] : new BOp[] { lastOp }, - anns.toArray(new NV[anns.size()])); + // verify no problems. + runningQuery.get(); - joins[i] = joinOp; + System.out.println("Future Ok"); - lastOp = joinOp; + } catch (Throwable t) { - } + if (Haltable.isTerminationByInterrupt(t)) { -// final PipelineOp queryOp = lastOp; + // normal termination. + return; - /* - * FIXME Why does wrapping with this slice appear to be - * necessary? (It is causing runtime errors when not wrapped). - * Is this a bopId collision which is not being detected? - */ - final PipelineOp queryOp = new SliceOp(new BOp[] { lastOp }, NV - .asMap(new NV[] { - new NV(JoinGraph.Annotations.BOP_ID, idFactory.nextId()), // - new NV(JoinGraph.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER) }) // - ); - - return queryOp; - - } - - /** - * Execute the selected join path. - * <p> - * Note: When executing the query, it is actually being executed as a - * subquery. Therefore we have to take appropriate care to ensure that the - * results are copied out of the subquery and into the parent query. See - * {@link AbstractSubqueryOp} for how this is done. - * - * @todo When we execute the query, we should clear the references to the - * samples (unless they are exact, in which case they can be used as - * is) in order to release memory associated with those samples if the - * query is long running. Samples must be held until we have - * identified the final join path since each vertex will be used by - * each maximum length join path and we use the samples from the - * vertices to re-sample the surviving join paths in each round. - * - * @todo If there is a slice on the outer query, then the query result may - * well be materialized by now. - * - * @todo If there are source binding sets then they need to be applied above - * (when we are sampling) and below (when we evaluate the selected - * join path). - * - * FIXME runQuery() is not working correctly. The query is being - * halted by a {@link BufferClosedException} which appears before it - * has materialized the necessary results. - */ - static public void runSubquery(final BOpContext<IBindingSet> parentContext, - final PipelineOp queryOp) { - - IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; - - try { - - if (log.isInfoEnabled()) - log.info("Running: " + BOpUtility.toString(queryOp)); - - final PipelineOp startOp = (PipelineOp) BOpUtility - .getPipelineStart(queryOp); - - if (log.isInfoEnabled()) - log.info("StartOp: " + BOpUtility.toString(startOp)); - - // Run the query. - final UUID queryId = UUID.randomUUID(); - - final QueryEngine queryEngine = parentContext.getRunningQuery() - .getQueryEngine(); - - final IRunningQuery runningQuery = queryEngine - .eval( - queryId, - queryOp, - new LocalChunkMessage<IBindingSet>( - queryEngine, - queryId, - startOp.getId()/* startId */, - -1 /* partitionId */, - /* - * @todo pass in the source binding sets - * here and also when sampling the - * vertices. - */ - new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { new IBindingSet[] { new HashBindingSet() } }))); - - // Iterator visiting the subquery solutions. - subquerySolutionItr = runningQuery.iterator(); - - // Copy solutions from the subquery to the query. - final long nout = BOpUtility - .copy(subquerySolutionItr, parentContext.getSink(), - null/* sink2 */, null/* constraints */, null/* stats */); - - System.out.println("nout=" + nout); - - // verify no problems. - runningQuery.get(); - - System.out.println("Future Ok"); - - } catch (Throwable t) { - - log.error(t,t); - - /* - * If a subquery fails, then propagate the error to the parent - * and rethrow the first cause error out of the subquery. - */ - throw new RuntimeException(parentContext.getRunningQuery() - .halt(t)); - - } finally { - - if (subquerySolutionItr != null) - subquerySolutionItr.close(); - - } - - } - - /** - * Return the variables in common for two {@link IPredicate}s. All variables - * spanned by either {@link IPredicate} are considered. - * <p> - * Note: Variables may appear in the predicates operands, in the - * {@link Annotations#CONSTRAINTS} associated with the - * predicate, and in the {@link IPredicate.Annotations#ACCESS_PATH_FILTER} - * or {@link IPredicate.Annotations#INDEX_LOCAL_FILTER}. - * <p> - * Note: A variable must become bound before it may be evaluated in - * {@link Annotations#CONSTRAINTS}, an - * {@link IPredicate.Annotations#ACCESS_PATH_FILTER} or an - * {@link IPredicate.Annotations#INDEX_LOCAL_FILTER}. This means that the - * {@link IPredicate}s which can bind the variable must be ordered before - * those which merely test the variable. - * - * - * @param p1 - * A predicate. - * - * @param p2 - * A different predicate. - * - * @return The variables in common -or- <code>null</code> iff there are no - * variables in common. - * - * @throws IllegalArgumentException - * if the two predicates are the same reference. - * - * @todo It should be an error if a variable appear in a test is not bound - * by any possible join path. However, note that it may not be - * possible to determine this by local examination of a join graph - * since we do not know which variables may be presented as already - * bound when the join graph is evaluated (but we can only run the - * join graph currently against static source binding sets and for - * that case this is knowable). - * - * @todo When a variable is only optionally bound and it is discovered at - * runtime that the variable is not bound when it is considered by a - * CONSTRAINT, FILTER, etc., then the SPARQL semantics are that - * evaluation should produce a 'type' error which would cause the - * solution should fail (at least within its current join group). See - * https://sourceforge.net/apps/trac/bigdata/ticket/179. - * - * @todo Unit tests, including those which verify that variables appearing - * in the constraints are reported as shared with those appearing in - * the predicates operands. - */ - static Set<IVariable<?>> getSharedVars(final IPredicate p1, final IPredicate p2) { - - // The set of variables which are shared by those predicates. - final Set<IVariable<?>> sharedVars = new LinkedHashSet<IVariable<?>>(); - - /* - * Collect the variables appearing anyway in [p1], including the - * predicate's operands and its constraints, filters, etc. - */ - final Set<IVariable<?>> p1vars = new LinkedHashSet<IVariable<?>>(); - { - - final Iterator<IVariable<?>> itr = BOpUtility - .getSpannedVariables(p1); - - while (itr.hasNext()) { - - p1vars.add(itr.next()); - } - } + // log.error(t,t); - /* - * Consider the variables appearing anyway in [p2], including the - * predicate's operands and its constraints, filters, etc. - */ - { + /* + * Propagate the error to the parent and rethrow the first cause + * error out of the subquery. + */ + throw new RuntimeException(parentContext.getRunningQuery().halt(t)); - final Iterator<IVariable<?>> itr = BOpUtility - .getSpannedVariables(p2); + } finally { - while (itr.hasNext()) { + runningQuery.cancel(true/* mayInterruptIfRunning */); - final IVariable<?> avar = itr.next(); - - if(p1vars.contains(avar)) { + if (subquerySolutionItr != null) + subquerySolutionItr.close(); - sharedVars.add(avar); - - } - - } - } - - return sharedVars; } - /** - * Exception thrown when the join graph does not have any solutions in the - * data (running the query does not produce any results). - */ - public static class NoSolutionsException extends RuntimeException - { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public NoSolutionsException() { - super(); - } - - public NoSolutionsException(String message, Throwable cause) { - super(message, cause); - } - - public NoSolutionsException(String message) { - super(message); - } - - public NoSolutionsException(Throwable cause) { - super(cause); - } - - } - } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/NoSolutionsException.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/NoSolutionsException.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/NoSolutionsException.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -0,0 +1,31 @@ +package com.bigdata.bop.controller; + +/** + * Exception thrown when the join graph does not have any solutions in the + * data (running the query does not produce any results). + */ +public class NoSolutionsException extends RuntimeException +{ + + /** + * + */ + private static final long serialVersionUID = 1L; + + public NoSolutionsException() { + super(); + } + + public NoSolutionsException(String message, Throwable cause) { + super(message, cause); + } + + public NoSolutionsException(String message) { + super(message); + } + + public NoSolutionsException(Throwable cause) { + super(cause); + } + +} \ No newline at end of file Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/NoSolutionsException.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -10,11 +10,17 @@ import org.apache.log4j.Logger; +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.BOpIdFactory; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; -import com.bigdata.bop.controller.JoinGraph.NoSolutionsException; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.bop.solutions.SliceOp; /** * Class accepts a join group and partitions it into a join graph and a tail @@ -54,10 +60,6 @@ * predicate, which is meaningless in an of itself because the P is * magical.] * - * @todo write a method which returns the set of constraints which should be run - * for the last predicate in a given join path (a join path is just an - * ordered array of predicates). - * * FIXME Add a method to generate a runnable query plan from a collection * of predicates and constraints. This is a bit different for the join * graph and the optionals in the tail plan. The join graph itself should @@ -220,7 +222,54 @@ path[i] = p; } + + return getJoinGraphConstraints(path, joinGraphConstraints + .toArray(new IConstraint[joinGraphConstraints.size()]))[pathIds.length - 1]; + + } + /** + * Given a join path, return the set of constraints to be associated with + * each join in that join path. Only those constraints whose variables are + * known to be bound will be attached. + * + * @param path + * The join path. + * @param joinGraphConstraints + * The constraints to be applied to the join path (optional). + * + * @return The constraints to be paired with each element of the join path. + * + * @throws IllegalArgumentException + * if the join path is <code>null</code>. + * @throws IllegalArgumentException + * if the join path is empty. + * @throws IllegalArgumentException + * if any element of the join path is <code>null</code>. + * @throws IllegalArgumentException + * if any element of the join graph constraints is + * <code>null</code>. + * + * @todo It should be an error if a variable appear in a constraint is not + * bound by any possible join path. However, it may not be possible to + * determine this by local examination of a join graph since we do not + * know which variables may be presented as already bound when the + * join graph is evaluated (but we can only run the join graph + * currently against static source binding sets and for that case this + * is knowable). + */ + static public IConstraint[][] getJoinGraphConstraints( + final IPredicate<?>[] path, final IConstraint[] joinGraphConstraints) { + + if (path == null) + throw new IllegalArgumentException(); + + if (path.length == 0) + throw new IllegalArgumentException(); + + // the set of constraints for each predicate in the join path. + final IConstraint[][] ret = new IConstraint[path.length][]; + /* * For each predicate in the path in the given order, figure out which * constraint(s) would attach to that predicate based on which variables @@ -234,17 +283,20 @@ // the set of constraints which have been consumed. final Set<IConstraint> used = new LinkedHashSet<IConstraint>(); - // the set of constraints for the last predicate in the join path. - final List<IConstraint> ret = new LinkedList<IConstraint>(); - - for(int i = 0; i<path.length; i++) { + for (int i = 0; i < path.length; i++) { - // true iff this is the last join in the path. - final boolean lastJoin = i == path.length - 1; +// // true iff this is the last join in the path. +// final boolean lastJoin = i == path.length - 1; // a predicate in the path. final IPredicate<?> p = path[i]; + if (p == null) + throw new IllegalArgumentException(); + + // the constraints for the current predicate in the join path. + final List<IConstraint> constraints = new LinkedList<IConstraint>(); + { /* * Visit the variables used by the predicate (and bound by it @@ -263,80 +315,80 @@ } } - - // consider each constraint. - for(IConstraint c : joinGraphConstraints) { - if (used.contains(c)) { + if (joinGraphConstraints != null) { + + // consider each constraint. + for (IConstraint c : joinGraphConstraints) { + + if (c == null) + throw new IllegalArgumentException(); + + if (used.contains(c)) { + /* + * Skip constraints which were already assigned to + * predicates before this one in the join path. + */ + continue; + } + /* - * Skip constraints which were already assigned to - * predicates before this one in the join path. + * true iff all variables used by this constraint are bound + * at this point in the join path. */ - continue; - } + boolean allVarsBound = true; - /* - * true iff all variables used by this constraint are bound at - * this point in the join path. - */ - boolean allVarsBound = true; + // visit the variables used by this constraint. + final Iterator<IVariable<?>> vitr = BOpUtility + .getSpannedVariables(c); - // visit the variables used by this constraint. - final Iterator<IVariable<?>> vitr = BOpUtility - .getSpannedVariables(c); + while (vitr.hasNext()) { - while (vitr.hasNext()) { + final IVariable<?> var = vitr.next(); - final IVariable<?> var = vitr.next(); - - if(!boundVars.contains(var)) { - - allVarsBound = false; + if (!boundVars.contains(var)) { - break; + allVarsBound = false; - } + break; - } + } - if (allVarsBound) { + } - /* - * All variables have become bound for this constraint, so - * add it to the set of "used" constraints. - */ - - used.add(c); + if (allVarsBound) { - if (log.isDebugEnabled()) { - log.debug("Constraint attached at index " + i + " of " - + path.length + ", bopId=" + p.getId() - + ", constraint=" + c); - } - - if (lastJoin) { - /* - * If we are on the last join in the join path, then - * this constraint is one of the ones that we will - * return. + * All variables have become bound for this constraint, + * so add it to the set of "used" constraints. */ - - ret.add(c); - } + used.add(c); - } // if(allVarsBound) - - } // next constraint + if (log.isDebugEnabled()) { + log.debug("Constraint attached at index " + i + + " of " + path.length + ", bopId=" + + p.getId() + ", constraint=" + c); + } + + constraints.add(c); + + } // if(allVarsBound) + + } // next constraint + + } // joinGraphConstraints != null; + + // store the constraint[] for that predicate. + ret[i] = constraints.toArray(new IConstraint[constraints.size()]); } // next predicate in the join path. /* - * Return the set of constraints to be applied as of the last predicate - * in the join path. + * Return the set of constraints associated with each predicate in the + * join path. */ - return ret.toArray(new IConstraint[ret.size()]); + return ret; } @@ -589,4 +641,83 @@ } + /** + * Generate a query plan from an ordered collection of predicates. + * + * @param p + * The join path. + * + * @return The query plan. + * + * FIXME Select only those variables required by downstream + * processing or explicitly specified by the caller (in the case + * when this is a subquery, the caller has to declare which + * variables are selected and will be returned out of the subquery). + * + * FIXME For scale-out, we need to either mark the join's evaluation + * context based on whether or not the access path is local or + * remote (and whether the index is key-range distributed or hash + * partitioned). + */ + static public PipelineOp getQuery(final BOpIdFactory idFactory, + final IPredicate<?>[] preds, final IConstraint[] constraints) { + + // figure out which constraints are attached to which predicates. + final IConstraint[][] assignedConstraints = PartitionedJoinGroup + .getJoinGraphConstraints(preds, constraints); + + final PipelineJoin<?>[] joins = new PipelineJoin[preds.length]; + + PipelineOp lastOp = null; + + for (int i = 0; i < preds.length; i++) { + + // The next vertex in the selected join order. + final IPredicate<?> p = preds[i]; + + final List<NV> anns = new LinkedList<NV>(); + + anns.add(new NV(PipelineJoin.Annotations.PREDICATE, p)); + + anns.add(new NV(PipelineJoin.Annotations.BOP_ID, idFactory + .nextId())); + +// anns.add(new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.ANY)); +// +// anns.add(new NV(PipelineJoin.Annotations.SELECT, vars.toArray(new IVariable[vars.size()]))); + + if (assignedConstraints[i] != null + && assignedConstraints[i].length > 0) + anns + .add(new NV(PipelineJoin.Annotations.CONSTRAINTS, + assignedConstraints[i])); + + final PipelineJoin<?> joinOp = new PipelineJoin( + lastOp == null ? new BOp[0] : new BOp[] { lastOp }, anns + .toArray(new NV[anns.size()])); + + joins[i] = joinOp; + + lastOp = joinOp; + + } + +// final PipelineOp queryOp = lastOp; + + /* + * FIXME Why does wrapping with this slice appear to be + * necessary? (It is causing runtime errors when not wrapped). + * Is this a bopId collision which is not being detected? + */ + final PipelineOp queryOp = new SliceOp(new BOp[] { lastOp }, NV + .asMap(new NV[] { + new NV(JoinGraph.Annotations.BOP_ID, idFactory.nextId()), // + new NV(JoinGraph.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER) }) // + ); + + return queryOp; + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -57,6 +57,8 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ + * + * @see AbstractSubqueryOp */ public class SubqueryOp extends PipelineOp { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -87,7 +87,7 @@ // fail("write test"); // } // -// // @todo also getEdgeCount() +// // and also getEdgeCount() // public void test_getEdges() { // fail("write test"); // } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -456,9 +456,6 @@ /** * @todo test with headPlan. - * - * @todo test logic to attach constraints to non-optional joins based on a - * given join path (not yet written). */ public void test_something() { fail("write tests"); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -43,7 +43,7 @@ import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.controller.JoinGraph; +import com.bigdata.bop.controller.PartitionedJoinGroup; import com.bigdata.bop.controller.JoinGraph.JGraph; import com.bigdata.bop.controller.JoinGraph.Path; import com.bigdata.bop.engine.BOpStats; @@ -217,7 +217,7 @@ * JVM run using the known solutions produced by the runtime versus * static query optimizers. */ - protected void doTest(final IPredicate[] preds, + protected void doTest(final IPredicate<?>[] preds, final IConstraint[] constraints) throws Exception { if (warmUp) @@ -228,7 +228,7 @@ * Run the runtime query optimizer once (its cost is not counted * thereafter). */ - final IPredicate[] runtimePredOrder = runRuntimeQueryOptimizer( + final IPredicate<?>[] runtimePredOrder = runRuntimeQueryOptimizer( getQueryEngine(), limit, nedges, preds, constraints); long totalRuntimeTime = 0; @@ -296,9 +296,10 @@ * * @throws Exception */ - static protected IPredicate[] runRuntimeQueryOptimizer( + static protected IPredicate<?>[] runRuntimeQueryOptimizer( final QueryEngine queryEngine, final int limit, final int nedges, - final IPredicate[] preds, IConstraint[] constraints) throws Exception { + final IPredicate<?>[] preds, IConstraint[] constraints) + throws Exception { final Logger tmp = Logger.getLogger(QueryLog.class); final Level oldLevel = tmp.getEffectiveLevel(); @@ -329,8 +330,8 @@ * @return The predicates in order as recommended by the static query * optimizer. */ - static protected IPredicate[] runStaticQueryOptimizer( - final QueryEngine queryEngine, final IPredicate[] preds) { + static protected IPredicate<?>[] runStaticQueryOptimizer( + final QueryEngine queryEngine, final IPredicate<?>[] preds) { final BOpContextBase context = new BOpContextBase(queryEngine); @@ -351,7 +352,7 @@ final int[] ids = new int[order.length]; - final IPredicate[] out = new IPredicate[order.length]; + final IPredicate<?>[] out = new IPredicate[order.length]; for (int i = 0; i < order.length; i++) { @@ -374,15 +375,9 @@ * @return The elapsed query time (ms). */ private static long runQuery(final String msg, - final QueryEngine queryEngine, final IPredicate[] predOrder, + final QueryEngine queryEngine, final IPredicate<?>[] predOrder, final IConstraint[] constraints) throws Exception { - if (constraints != null && constraints.length != 0) { - // FIXME Constraints must be attached to joins. - throw new UnsupportedOperationException( - "Constraints must be attached to joins!"); - } - if (log.isInfoEnabled()) log.info("Running " + msg); @@ -400,38 +395,46 @@ } - final PipelineOp queryOp = JoinGraph.getQuery(idFactory, predOrder, - constraints); + final PipelineOp queryOp = PartitionedJoinGroup.getQuery(idFactory, + predOrder, constraints); // submit query to runtime optimizer. final IRunningQuery q = queryEngine.eval(queryOp); - // drain the query results. - long nout = 0; - long nchunks = 0; - final IAsynchronousIterator<IBindingSet[]> itr = q.iterator(); try { - while (itr.hasNext()) { - final IBindingSet[] chunk = itr.next(); - nout += chunk.length; - nchunks++; + + // drain the query results. + long nout = 0; + long nchunks = 0; + final IAsynchronousIterator<IBindingSet[]> itr = q.iterator(); + try { + while (itr.hasNext()) { + final IBindingSet[] chunk = itr.next(); + nout += chunk.length; + nchunks++; + } + } finally { + itr.close(); } - } finally { - itr.close(); - } - // check the Future for the query. - q.get(); + // check the Future for the query. + q.get(); - // show the results. - final BOpStats stats = q.getStats().get(queryOp.getId()); + // show the results. + final BOpStats stats = q.getStats().get(queryOp.getId()); - System.err.println(msg + " : ids=" + Arrays.toString(ids) - + ", elapsed=" + q.getElapsed() + ", nout=" + nout - + ", nchunks=" + nchunks + ", stats=" + stats); - - return q.getElapsed(); + System.err.println(msg + " : ids=" + Arrays.toString(ids) + + ", elapsed=" + q.getElapsed() + ", nout=" + nout + + ", nchunks=" + nchunks + ", stats=" + stats); + return q.getElapsed(); + + } finally { + + q.cancel(true/* mayInterruptIfRunning */); + + } + } /** Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestAll.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestAll.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -0,0 +1,72 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.bop.rdf.joinGraph; + + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * Aggregates test suites into increasing dependency order. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestAll extends TestCase { + + /** + * + */ + public TestAll() { + + } + + /** + * @param arg0 + */ + public TestAll(String arg0) { + + super(arg0); + + } + + /** + * Returns a test that will run each of the implementation specific test + * suites in turn. + */ + public static Test suite() + { + + final TestSuite suite = new TestSuite("Runtime query optimizer"); + + suite.addTestSuite(TestJoinGraphOnLubm.class); + suite.addTestSuite(TestJoinGraphOnBarData.class); + suite.addTestSuite(TestJoinGraphOnBSBMData.class); + + return suite; + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestAll.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -176,7 +176,7 @@ return new Journal(properties); } - + /** * BSBM Q5 * @@ -202,6 +202,18 @@ * LIMIT 5 * </pre> * + * Note: There are two predicates which bind variables (origProperty1 and + * origProperty2) that are not used by the other predicates and therefore do + * not share any variables which would form "edges" that define joins. In + * general, a join without shared variables means the cross product of the + * sources will be materialized and such joins should be run last. + * <p> + * However, in this case there are SPARQL FILTERs which (a) use those + * variables (origProperty1 and origProperty2); and (b) can constrain the + * query. This means that running the predicates without shared variables + * and applying the constraints before the tail of the plan can in fact lead + * to a more efficient join path. + * * @throws Exception */ public void test_bsbm_q5() throws Exception { Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -211,7 +211,7 @@ * * @throws Exception */ - public void test_query() throws Exception { + public void test_barData_query() throws Exception { final String namespace = getNamespace(); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-01-21 16:46:41 UTC (rev 4159) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-01-21 21:52:18 UTC (rev 4160) @@ -310,7 +310,7 @@ * * @throws Exception */ - public void test_query2() throws Exception { + public void test_LUBM_Q2() throws Exception { final String namespace = getNamespace(); @@ -449,7 +449,7 @@ * </pre> * @throws Exception */ - public void test_query8() throws Exception { + public void test_LUBM_Q8() throws Exception { final String namespace = getNamespace(); @@ -580,7 +580,7 @@ * * @throws Exception */ - public void test_query9() throws Exception { + public void test_LUBM_Q9() throws Exception { final String name... [truncated message content] |
From: <tho...@us...> - 2011-02-09 17:00:10
|
Revision: 4185 http://bigdata.svn.sourceforge.net/bigdata/?rev=4185&view=rev Author: thompsonbry Date: 2011-02-09 17:00:01 +0000 (Wed, 09 Feb 2011) Log Message: ----------- More work on the GROUP_BY operator. Defined various aggregate operators (MIN, MAX, SUM, COUNT, etc). They all need unit tests. The semantics of many of these operators needs to be reviewed. Defined BIND(var,expr) operator, which binds the variable to the result of evaluating the value expression as side-effect. Modified the ORDER_BY stress test to verify the ordering imposed. Modified DistinctBindingSetOp to pass the hash map as shared state (it was only distinct for each invocation). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IValueExpression.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_SortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/TestAll.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Bind.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IAggregate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemoryGroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBind.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_DistinctOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_GroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemoryGroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/COUNT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/GROUP_CONCAT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MAX.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MIN.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SAMPLE.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SUM.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestCOUNT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestGROUP_CONCAT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestMAX.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestMIN.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestSAMPLE.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestSUM.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2011-02-08 17:50:40 UTC (rev 4184) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -27,11 +27,15 @@ */ package com.bigdata.bop; +import java.util.concurrent.atomic.AtomicBoolean; + import org.apache.log4j.Logger; import com.bigdata.bop.engine.BOpStats; import com.bigdata.bop.engine.IChunkMessage; +import com.bigdata.bop.engine.IQueryClient; import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.QueryEngine; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; @@ -59,6 +63,39 @@ private final IBlockingBuffer<E[]> sink2; + private final AtomicBoolean lastInvocation = new AtomicBoolean(false); + + /** + * Set by the {@link QueryEngine} when the criteria specified by + * {@link #isLastInvocation()} are satisfied. + */ + public void setLastInvocation() { + lastInvocation.set(true); + } + + /** + * <code>true</code> iff this is the last invocation of the operator. The + * property is only set to <code>true</code> for operators which: + * <ol> + * <li>{@link BOp.Annotations#EVALUATION_CONTEXT} is + * {@link BOpEvaluationContext#CONTROLLER}</li> + * <li>{@link PipelineOp.Annotations#THREAD_SAFE} is <code>false</code></li> + * </ol> + * Under these circumstances, it is possible for the {@link IQueryClient} to + * atomically decide that a specific invocation of the operator task for the + * query will be the last invocation for that task. This is not possible if + * the operator allows concurrent evaluation tasks. Sharded operators are + * intrinsically concurrent since they can evaluate at each shard in + * parallel. This is why the evaluation context is locked to the query + * controller. In addition, the operator must declare that it is NOT thread + * safe in order for the query engine to serialize its evaluation tasks. + * + * @return + */ + public boolean isLastInvocation() { + return lastInvocation.get(); + } + /** * The interface for a running query. * <p> Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Bind.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Bind.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Bind.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -0,0 +1,66 @@ +package com.bigdata.bop; + +import java.util.Map; + +/** + * Operator causes a variable to be bound to the result of its evaluation as a + * side-effect. + * + * @author thompsonbry + */ +public class Bind<E> extends ImmutableBOp implements IValueExpression<E> { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * Required deep copy constructor. + */ + public Bind(BOpBase op) { + super(op); + } + + /** + * @param var + * The {@link IVariable} which will be bound to result of + * evaluating the associated value expression. + * @param expr + * The {@link IValueExpression} to be evaluated. + */ + public Bind(IVariable<E> var, IValueExpression<E> expr) { + + this(new BOp[] { var, expr }, null/* annotations */); + + } + + /** + * Required shallow copy constructor. + * @param args + * @param annotations + */ + public Bind(BOp[] args, Map<String, Object> annotations) { + super(args, annotations); + } + + @SuppressWarnings("unchecked") + @Override + public E get(final IBindingSet bindingSet) { + + final IVariable<E> var = (IVariable<E>) get(0); + + final IValueExpression<E> expr = (IValueExpression<E>) get(1); + + // evaluate the value expression. + E val = expr.get(bindingSet); + + // bind the variable as a side-effect. + bindingSet.set(var, new Constant<E>(val)); + + // return the evaluated value + return val; + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IAggregate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IAggregate.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IAggregate.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -0,0 +1,16 @@ +package com.bigdata.bop; + +/** + * An aggregate operator, such as SUM, COUNT, MIN, MAX, etc. + * + * @author thompsonbry + */ +public interface IAggregate<E> extends IValueExpression<E>{ + + /** + * Return the current value of the aggregate (this has a side-effect on the + * internal state of the {@link IAggregate} operator). + */ + E get(IBindingSet bset); + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IValueExpression.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IValueExpression.java 2011-02-08 17:50:40 UTC (rev 4184) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IValueExpression.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -2,6 +2,11 @@ import java.io.Serializable; +/** + * An expression which may be evaluated to a value. + * + * @author mrpersonick + */ public interface IValueExpression<E> extends BOp, Serializable { /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-02-08 17:50:40 UTC (rev 4184) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -109,6 +109,10 @@ * @todo Unit tests for {@link ChunkedRunningQuery} to verify that it * eventually schedules operator tasks which were deferred to * prevent concurrent evaluation. + * + * @todo This is currently not used. However, it could simplify the + * logic for operators, such as SLICE, which otherwise depend on + * {@link #SHARED_STATE} to provide their own synchronization. */ String THREAD_SAFE = PipelineOp.class.getName() + ".threadSafe"; @@ -334,7 +338,27 @@ return getProperty(PipelineOp.Annotations.PIPELINED, PipelineOp.Annotations.DEFAULT_PIPELINED); } - + + /** + * Return <code>true</code> iff concurrent invocations of the operator are + * permitted. + * <p> + * Note: Operators which are not thread-safe still permit concurrent + * evaluation for <em>distinct</em> partitions. In order to ensure that all + * invocations of the operator within a query are serialized (no more than + * one concurrent invocation) you must also specify + * {@link BOpEvaluationContext#CONTROLLER}. + * + * @see Annotations#THREAD_SAFE + * @see BOp.Annotations#EVALUATION_CONTEXT + */ + public boolean isThreadSafe() { + + return getProperty(Annotations.THREAD_SAFE, + Annotations.DEFAULT_THREAD_SAFE); + + } + /** * Return <code>true</code> iff {@link #newStats()} must be shared across * all invocations of {@link #eval(BOpContext)} for this operator for a Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java 2011-02-08 17:50:40 UTC (rev 4184) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -14,13 +14,17 @@ import com.bigdata.bop.IConstant; import com.bigdata.bop.IVariable; import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.bindingSet.HashBindingSet; +import com.bigdata.bop.bindingSet.ListBindingSet; import com.bigdata.bop.engine.BOpStats; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; /** * A pipelined DISTINCT operator based on a hash table. + * <p> + * Note: This implementation is a pipelined operator which inspects each chunk + * of solutions as they arrive and those solutions which are distinct for each + * chunk processed. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z @@ -58,8 +62,23 @@ public DistinctBindingSetOp(final BOp[] args, final Map<String, Object> annotations) { - super(args, annotations); + super(args, annotations); + switch (getEvaluationContext()) { + case CONTROLLER: + break; + default: + throw new UnsupportedOperationException( + Annotations.EVALUATION_CONTEXT + "=" + + getEvaluationContext()); + } + + // shared state is used to share the hash table. + if (isSharedState()) { + throw new UnsupportedOperationException(Annotations.SHARED_STATE + + "=" + isSharedState()); + } + } /** @@ -101,6 +120,12 @@ } + public BOpStats newStats() { + + return new DistinctStats(this); + + } + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { return new FutureTask<Void>(new DistinctTask(this, context)); @@ -145,6 +170,37 @@ return true; } } + + /** + * Extends {@link BOpStats} to provide the shared state for the distinct + * solution groups across multiple invocations of the DISTINCT operator. + */ + private static class DistinctStats extends BOpStats { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * A concurrent map whose keys are the bindings on the specified + * variables (the keys and the values are the same since the map + * implementation does not allow <code>null</code> values). + * <p> + * Note: The map is shared state and can not be discarded or cleared + * until the last invocation!!! + */ + private final ConcurrentHashMap<Solution, Solution> map; + + public DistinctStats(final DistinctBindingSetOp op) { + + this.map = new ConcurrentHashMap<Solution, Solution>( + op.getInitialCapacity(), op.getLoadFactor(), + op.getConcurrencyLevel()); + + } + + } /** * Task executing on the node. @@ -153,12 +209,12 @@ private final BOpContext<IBindingSet> context; - /** - * A concurrent map whose keys are the bindings on the specified - * variables (the keys and the values are the same since the map - * implementation does not allow <code>null</code> values). - */ - private /*final*/ ConcurrentHashMap<Solution, Solution> map; + /** + * A concurrent map whose keys are the bindings on the specified + * variables (the keys and the values are the same since the map + * implementation does not allow <code>null</code> values). + */ + private final ConcurrentHashMap<Solution, Solution> map; /** * The variables used to impose a distinct constraint. @@ -178,9 +234,8 @@ if (vars.length == 0) throw new IllegalArgumentException(); - this.map = new ConcurrentHashMap<Solution, Solution>( - op.getInitialCapacity(), op.getLoadFactor(), - op.getConcurrencyLevel()); + // The map is shared state across invocations of this operator task. + this.map = ((DistinctStats) context.getStats()).map; } @@ -235,6 +290,7 @@ stats.chunksIn.increment(); stats.unitsIn.add(a.length); + // The distinct solutions accepted from this chunk. final List<IBindingSet> accepted = new LinkedList<IBindingSet>(); int naccepted = 0; @@ -243,14 +299,26 @@ // System.err.println("considering: " + bset); + /* + * Test to see if this solution is distinct from those + * already seen. + */ final IConstant<?>[] vals = accept(bset); if (vals != null) { + /* + * This is a distinct solution. Copy only the + * variables used to select distinct solutions into + * a new binding set and add that to the set of + * [accepted] binding sets which will be emitted by + * this operator. + */ + // System.err.println("accepted: " // + Arrays.toString(vals)); - final HashBindingSet tmp = new HashBindingSet(); + final ListBindingSet tmp = new ListBindingSet(); for (int i = 0; i < vars.length; i++) { @@ -268,12 +336,19 @@ if (naccepted > 0) { + /* + * At least one solution was accepted as distinct, so + * copy the selected solutions to the output of the + * operator. + */ + final IBindingSet[] b = accepted .toArray(new IBindingSet[naccepted]); // System.err.println("output: " // + Arrays.toString(b)); + // copy the distinct solutions to the output. sink.add(b); // stats.unitsOut.add(naccepted); @@ -285,6 +360,23 @@ sink.flush(); + if(context.isLastInvocation()) { + + /* + * Discard the map. + * + * Note: The map can not be discarded (or cleared) until the + * last invocation. However, we only get the benefit of the + * lastInvocation signal if the operator is single threaded + * and running on the query controller. That is not a + * requirement for this DISTINCT implementation, so the map + * is not going to be cleared until the query goes out of + * scope and is swept by GC. + */ + map.clear(); + + } + // done. return null; @@ -292,9 +384,6 @@ sink.close(); - // discard the map. - map = null; - } } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -0,0 +1,111 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 4, 2010 + */ + +package com.bigdata.bop.solutions; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.PipelineOp; + +/** + * Base class for operators which perform aggregation operations on binding + * sets. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: SortOp.java 3665 2010-09-28 16:53:22Z thompsonbry $ + */ +abstract public class GroupByOp extends PipelineOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public interface Annotations extends PipelineOp.Annotations { + + /** + * The ordered set of variables declared by {@link #COMPUTE} which are + * projected out of the group by operator. + */ + String SELECT = GroupByOp.class.getName() + ".select"; + + /** + * The ordered set of {@link IValueExpression}s which are to be + * computed. + * + * TODO This really needs to be VAR := EXPR. EXPR can only reference the + * source variables or variables declared earlier in the ordered + * collection. If an EXPR references a source variable, then it must + * wrap that source variable with an aggregation operator (SUM, COUNT, + * MIN, MAX, AVG, GROUP_CONCAT, or SAMPLE). Only source variables and + * constants may appear as operands of aggregation operators. [We need a + * BIND() operator for this, which might wind up being the same as a + * LET.] + * + * TODO Decide how we will handle AVG. + */ + String COMPUTE = GroupByOp.class.getName() + ".compute"; + + /** + * The ordered set of or one or more variables defining the aggregation + * groups (required). The variables named in this collection MUST be + * variables declared for the incoming solutions. + */ + String GROUP_BY = GroupByOp.class.getName() + ".groupBy"; + + /** + * An {@link IConstraint}[] applied to the aggregated solutions + * (optional). The {@link IConstraint}s MAY NOT include aggregation + * operators and may only reference variables declared by + * {@link #COMPUTE}. + * + * TODO Should be the BEV of an {@link IValueExpression}, which might or + * might not be an {@link IConstraint}. + */ + String HAVING = GroupByOp.class.getName() + ".having"; + + } + + /** + * @param op + */ + public GroupByOp(final GroupByOp op) { + super(op); + } + + /** + * @param args + * @param annotations + */ + public GroupByOp(final BOp[] args, final Map<String, Object> annotations) { + super(args, annotations); + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemoryGroupByOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemoryGroupByOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemoryGroupByOp.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -0,0 +1,551 @@ +package com.bigdata.bop.solutions; + +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.FutureTask; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.ConcurrentHashMapAnnotations; +import com.bigdata.bop.IAggregate; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.bindingSet.ListBindingSet; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBlockingBuffer; + +/** + * An in-memory GROUP_BY for binding sets. + * <p> + * Note: This implementation is a pipelined operator which aggregates each chunk + * of solutions as they arrive and outputs empty messages (containing no + * solutions) until the last chunk is consumed. This operator relies on + * {@link BOpContext#isLastInvocation()} in order to decide when to write its + * output solutions, which requires the operator to (a) be evaluated on the + * controller and (b) declare itself as NOT thread-safe. In addition, the + * operator must be marked as SHARED_STATE := true such that the hash table + * associated with the {@link BOpStats} is shared across multiple invocations of + * this operator for a given query. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z + * thompsonbry $ + * + * @todo GROUP_BY implementation which depends on an ORDER_BY operator to setup + * the correct order and then performs the aggregations in a single pass + * over the ordered data. + * + * @todo GROUP_BY implementation using an HTree suitable for use when the #of + * groups is very large. The HTree would be associated with the allocation + * context for the (queryId,bopId(,shardId))). (The shardId would be used + * iff the GROUP_BY operator was hash partitioned across the nodes.) + * + * @todo In scale-out, we can hash partition the GROUP_BY operator over the + * nodes as long as all of the aggregation functions can be combined from + * the partitions. If AVG is used, then it needs to be replaced by SUM and + * COUNT in the GROUP_BY operator and the use of the AVG in the SELECT + * needs to be rewritten as (SUM(v)/COUNT(v)). + * + * @todo As a special twist, there can also be memory burdens, even with a small + * #of groups, when the aggregated solution data is very large and a + * GROUP_CONCAT function is specified such that it combines a large #of + * input solution bindings into a big string. + * + * FIXME How should we handle DISTINCT semantics for GROUP_BY? (I think + * that we just insert a {@link DistinctBindingSetOp} before the + * GROUP_BY). + * + * FIXME How should we handle nulls (missing values) during aggregation? + * (It appears that nulls and type errors are generally handled by the + * aggregate operator ignoring the detail record). + * + * FIXME All of the {@link IAggregate} operators have a side-effect. In + * order for them to have isolated side-effects for distinct groups, they + * would have to either internalize a value map for the group or each + * group would have to use a distinct instance. If the latter, then + * provide for this on the operator, e.g., newInstance(), and document + * why. + */ +public class MemoryGroupByOp extends GroupByOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + private static final transient Logger log = Logger + .getLogger(MemoryGroupByOp.class); + + public interface Annotations extends GroupByOp.Annotations, + ConcurrentHashMapAnnotations { + + } + + /** + * Required deep copy constructor. + */ + public MemoryGroupByOp(final MemoryGroupByOp op) { + super(op); + } + + /** + * Required shallow copy constructor. + */ + public MemoryGroupByOp(final BOp[] args, + final Map<String, Object> annotations) { + + super(args, annotations); + + switch (getEvaluationContext()) { + case CONTROLLER: + break; + default: + throw new UnsupportedOperationException( + Annotations.EVALUATION_CONTEXT + "=" + + getEvaluationContext()); + } + + // shared state is used to share the hash table. + if (isSharedState()) { + throw new UnsupportedOperationException(Annotations.SHARED_STATE + + "=" + isSharedState()); + } + + // single threaded required for pipelining w/ isLastInvocation() hook. + if (isThreadSafe()) { + throw new UnsupportedOperationException(Annotations.THREAD_SAFE + + "=" + isThreadSafe()); + } + + // operator is pipelined, but relies on isLastEvaluation() hook. + if (!isPipelined()) { + throw new UnsupportedOperationException(Annotations.PIPELINED + "=" + + isPipelined()); + } + + } + + /** + * @see Annotations#INITIAL_CAPACITY + */ + public int getInitialCapacity() { + + return getProperty(Annotations.INITIAL_CAPACITY, + Annotations.DEFAULT_INITIAL_CAPACITY); + + } + + /** + * @see Annotations#LOAD_FACTOR + */ + public float getLoadFactor() { + + return getProperty(Annotations.LOAD_FACTOR, + Annotations.DEFAULT_LOAD_FACTOR); + + } + + /** + * @see Annotations#CONCURRENCY_LEVEL + */ + public int getConcurrencyLevel() { + + return getProperty(Annotations.CONCURRENCY_LEVEL, + Annotations.DEFAULT_CONCURRENCY_LEVEL); + + } + + public BOpStats newStats() { + + return new GroupByStats(this); + + } + + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { + + return new FutureTask<Void>(new GroupByTask(this, context)); + + } + + /** + * Wrapper used for the solution groups in the {@link ConcurrentHashMap}. + */ + private static class SolutionGroup { + + /** The precomputed hash code for {@link #vals}. */ + private final int hash; + + /** The values for the groupBy variables which define a distinct group. */ + private final IConstant<?>[] vals; + + /** + * The values for the variables which are being computed by the + * aggregation. The binding set is when the {@link SolutionGroup} is + * first constructed. + * <p> + * Note: Updates to this binding set MUST be protected by synchronizing + * on {@link SolutionGroup}. + */ + private final IBindingSet aggregatedBSet; + + public String toString() { + return super.toString() + // + "{group=" + Arrays.toString(vals) + // + ",solution=" + aggregatedBSet + // + "}"; + } + + public SolutionGroup(final IConstant<?>[] vals) { + this.vals = vals; + this.hash = java.util.Arrays.hashCode(vals); + this.aggregatedBSet = new ListBindingSet(); + } + + public int hashCode() { + return hash; + } + + public boolean equals(final Object o) { + if (this == o) + return true; + if (!(o instanceof SolutionGroup)) { + return false; + } + final SolutionGroup t = (SolutionGroup) o; + if (vals.length != t.vals.length) + return false; + for (int i = 0; i < vals.length; i++) { + // @todo verify that this allows for nulls with a unit test. + if (vals[i] == t.vals[i]) + continue; + if (vals[i] == null) + return false; + if (!vals[i].equals(t.vals[i])) + return false; + } + return true; + } + + /** + * Apply the {@link IValueExpression}s to compute the updated variable + * bindings in the {@link SolutionGroup}. + * + * @param bset + * An input solution. + * @param compute + * The ordered array of {@link IValueExpression}s which + * define the aggregated variables. + */ + public void aggregate(final IBindingSet bset, + final IValueExpression<?>[] compute) { + + /* + * @todo The aggregated variables are all undefined the first time a + * source binding set is presented and need to be initialized to an + * appropriate value. + */ + + // synchronize for visibility. + synchronized(this) { + } + + throw new UnsupportedOperationException(); + + } + + } // SolutionGroup + + /** + * Extends {@link BOpStats} to provide the shared state for the solution + * groups across multiple invocations of the GROUP_BY operator. + */ + private static class GroupByStats extends BOpStats { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * A concurrent map whose keys are the bindings on the specified + * variables (the keys and the values are the same since the map + * implementation does not allow <code>null</code> values). + * <p> + * Note: The map is shared state and can not be discarded or cleared + * until the last invocation!!! + */ + private /*final*/ ConcurrentHashMap<SolutionGroup, SolutionGroup> map; + + public GroupByStats(final MemoryGroupByOp op) { + + this.map = new ConcurrentHashMap<SolutionGroup, SolutionGroup>( + op.getInitialCapacity(), op.getLoadFactor(), + op.getConcurrencyLevel()); + + } + + } + + /** + * Task executing on the node. + */ + static private class GroupByTask implements Callable<Void> { + + private final BOpContext<IBindingSet> context; + + /** + * A concurrent map whose keys are the bindings on the specified + * variables (the keys and the values are the same since the map + * implementation does not allow <code>null</code> values). + * <p> + * Note: The map is shared state and can not be discarded or cleared + * until the last invocation!!! + */ + private final ConcurrentHashMap<SolutionGroup, SolutionGroup> map; + + /** + * The ordered array of variables which define the distinct groups to + * be aggregated. + */ + private final IVariable<?>[] groupBy; + + /** + * The {@link IValueExpression}s used to compute each of the variables + * in the aggregated solutions. + */ + private final IValueExpression<?>[] compute; + + /** + * Optional constraints applied to the aggregated solutions. + */ + private final IConstraint[] having; + + /** + * Optional set of variables to be projected out of the GROUP_BY + * operator. When <code>null</code>, all variables will be projected + * out. + */ + private final IVariable<?>[] select; + + GroupByTask(final MemoryGroupByOp op, + final BOpContext<IBindingSet> context) { + + this.context = context; + + // must be non-null, and non-empty array w/o dups. + this.groupBy = (IVariable[]) op + .getRequiredProperty(GroupByOp.Annotations.GROUP_BY); + + if (groupBy == null) + throw new IllegalArgumentException(); + + if (groupBy.length == 0) + throw new IllegalArgumentException(); + + /* + * Must be non-null, and non-empty array. Any variables in the + * source solutions may only appear within aggregation operators + * such as SUM, COUNT, etc. Variables declared in [compute] may be + * referenced inside the value expressions as long as they do not + * appear within an aggregation function, but they they must be + * defined earlier in the ordered compute[]. The value expressions + * must include an assignment to the appropriate aggregate variable. + * + * FIXME This must include a LET or BIND to assign the computed + * value to the appropriate variable. + * + * FIXME verify references to unaggregated and aggregated variables. + */ + this.compute = (IValueExpression<?>[]) op + .getRequiredProperty(GroupByOp.Annotations.COMPUTE); + + if (compute == null) + throw new IllegalArgumentException(); + + if (compute.length == 0) + throw new IllegalArgumentException(); + + // may be null or empty[]. + this.having = (IConstraint[]) op + .getRequiredProperty(GroupByOp.Annotations.HAVING); + + /* + * The variables to project out of the GROUP_BY operator. This may + * be null, but not empty[]. + * + * TODO Variables may only appear once and must be distinct from the + * source variables. + */ + this.select = (IVariable[]) op + .getRequiredProperty(GroupByOp.Annotations.SELECT); + + if (select != null && select.length == 0) + throw new IllegalArgumentException(); + + // The map is shared state across invocations of this operator task. + this.map = ((GroupByStats) context.getStats()).map; + + } + + /** + * Return the "row" for the groupBy variables. + * + * @param bset + * The binding set to be filtered. + * + * @return The distinct as bound values -or- <code>null</code> if the + * binding set duplicates a solution which was already accepted. + */ + private SolutionGroup accept(final IBindingSet bset) { + + final IConstant<?>[] r = new IConstant<?>[groupBy.length]; + + for (int i = 0; i < groupBy.length; i++) { + + /* + * Note: This allows null's. + * + * @todo write a unit test when some variables are not bound. + */ + r[i] = bset.get(groupBy[i]); + + } + + final SolutionGroup s = new SolutionGroup(r); + + map.putIfAbsent(s, s); + + return s; + + } + + public Void call() throws Exception { + + final BOpStats stats = context.getStats(); + + final boolean isLastInvocation = context.isLastInvocation(); + + final IAsynchronousIterator<IBindingSet[]> itr = context + .getSource(); + + final IBlockingBuffer<IBindingSet[]> sink = context.getSink(); + + try { + + /* + * Present each source solution in turn, identifying the group + * into which it falls and then applying the value expressions + * to update the aggregated variable bindings for that group. + */ + while (itr.hasNext()) { + + final IBindingSet[] a = itr.next(); + + stats.chunksIn.increment(); + stats.unitsIn.add(a.length); + + for (IBindingSet bset : a) { + + // identify the solution group. + final SolutionGroup solutionGroup = accept(bset); + + // aggregate the bindings + solutionGroup.aggregate(bset, compute); + + } + + } + + if (isLastInvocation) { + + /* + * Write aggregated solutions on the sink, applying the + * [having] filter to remove any solutions which do not + * satisfy its constraints. + */ + + final List<IBindingSet> accepted = new LinkedList<IBindingSet>(); + + int naccepted = 0; + + for(SolutionGroup solutionGroup: map.values()) { + + synchronized(solutionGroup) { + + IBindingSet bset = solutionGroup.aggregatedBSet; + + // verify optional constraint(s) + if (having != null + && !BOpUtility.isConsistent(having, bset)) { + + // skip this group. + continue; + + } + + /* + * We will accept this solution group, so filter out + * any variables which are not being projected out + * of this operator. + */ + if (log.isDebugEnabled()) + log.debug("accepted: " + solutionGroup); + + // optionally strip off unnecessary variables. + bset = select == null ? bset : bset + .copy(select); + + accepted.add(bset); + + naccepted++; + + } + + } + + /* + * Output the aggregated bindings for the accepted + * solutions. + */ + if (naccepted > 0) { + + final IBindingSet[] b = accepted + .toArray(new IBindingSet[naccepted]); + + sink.add(b); + + // flush the output. + sink.flush(); + + // discard the map. + map.clear(); + + } + + } + + // done. + return null; + + } finally { + + sink.close(); + + } + + } // call() + + } // GroupByTask + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java 2011-02-08 17:50:40 UTC (rev 4184) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -79,6 +79,9 @@ // pure binding set operators. suite.addTest(com.bigdata.bop.bset.TestAll.suite()); + // bind(var,expr) + suite.addTestSuite(TestBind.class); + // index operators. suite.addTest(com.bigdata.bop.ndx.TestAll.suite()); Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBind.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBind.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBind.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -0,0 +1,72 @@ +/** + * + */ +package com.bigdata.bop; + +import com.bigdata.bop.bindingSet.ListBindingSet; + +import junit.framework.TestCase2; + +/** + * Unit tests for {@link Bind}. + * + * @author thompsonbry + * + * @todo Write a test where the {@link IValueExpression} given to bind is more + * complex than an {@link IVariable} or an {@link IConstant}. + */ +public class TestBind extends TestCase2 { + + /** + * + */ + public TestBind() { + } + + /** + * @param name + */ + public TestBind(String name) { + super(name); + } + + /** + * Unit test of bind(var,constant). + */ + public void test_bind_constant() { + + final IBindingSet bset = new ListBindingSet(); + + final IVariable<?> y = Var.var("y"); + + // verify bind() returns the value of the constant. + assertEquals(Integer.valueOf(12), new Bind(y, new Constant<Integer>( + Integer.valueOf(12))).get(bset)); + + // verify side-effect on the binding set. + assertEquals(new Constant<Integer>(Integer.valueOf(12)), bset.get(y)); + + } + + /** + * Unit test of bind(var,otherVar). + */ + public void test_bind_var() { + + final IBindingSet bset = new ListBindingSet(); + + final IVariable<?> x = Var.var("x"); + + final IVariable<?> y = Var.var("y"); + + bset.set(x, new Constant<Integer>(12)); + + // verify bind() returns the value of the other variable. + assertEquals(Integer.valueOf(12), new Bind(y, x).get(bset)); + + // verify side-effect on the binding set. + assertEquals(new Constant<Integer>(Integer.valueOf(12)), bset.get(y)); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java 2011-02-08 17:50:40 UTC (rev 4184) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -85,6 +85,7 @@ Constant.class,// Var.class,// QuoteOp.class,// + Bind.class,// // com.bigdata.bop.constraint EQ.class,// NE.class,// Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java 2011-02-08 17:50:40 UTC (rev 4184) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -110,17 +110,17 @@ * multiple chunks of solutions. */ - // stress test for SliceOp. + // stress test for SLICE suite.addTestSuite(TestQueryEngine_Slice.class); - // ORDER BY implementations. + // stress test for ORDER_BY suite.addTestSuite(TestQueryEngine_SortOp.class); - // @todo DISTINCT implementations. -// suite.addTestSuite(TestQueryEngine_SortOp.class); + // stress test for DISTINCT. + suite.addTestSuite(TestQueryEngine_DistinctOp.class); - // @todo GROUP BY implementations. -// suite.addTestSuite(TestQueryEngine_SortOp.class); + // stress test for GROUP_BY. + suite.addTestSuite(TestQueryEngine_GroupByOp.class); return suite; Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_DistinctOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_DistinctOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_DistinctOp.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -0,0 +1,306 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 1, 2010 + */ + +package com.bigdata.bop.engine; + +import java.util.Properties; +import java.util.Random; +import java.util.UUID; + +import junit.framework.TestCase2; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.Var; +import com.bigdata.bop.bindingSet.ListBindingSet; +import com.bigdata.bop.bset.StartOp; +import com.bigdata.bop.solutions.ComparatorOp; +import com.bigdata.bop.solutions.ISortOrder; +import com.bigdata.bop.solutions.MemorySortOp; +import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.bop.solutions.SortOrder; +import com.bigdata.journal.BufferMode; +import com.bigdata.journal.Journal; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; + +/** + * Test suite for DISTINCT solution operators when integrated with the query + * engine. This test suite is designed to examine cases where the DISTINCT + * operator will have to buffer multiple chunks of solutions before finally + * reporting the aggregated solutions. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestQueryEngine2.java 3489 2010-09-01 18:27:35Z thompsonbry $ + * + * @todo Test each DISTINCT implementation here. + */ +public class TestQueryEngine_DistinctOp extends TestCase2 { + + public Properties getProperties() { + + final Properties p = new Properties(super.getProperties()); + + p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient + .toString()); + + return p; + + } + + Journal jnl; + QueryEngine queryEngine; + + public void setUp() throws Exception { + + jnl = new Journal(getProperties()); + + queryEngine = new QueryEngine(jnl); + + queryEngine.init(); + + } + + public void tearDown() throws Exception { + + if (queryEngine != null) { + queryEngine.shutdownNow(); + queryEngine = null; + } + + if (jnl != null) { + jnl.destroy(); + jnl = null; + } + + } + + /** + * + */ + public TestQueryEngine_DistinctOp() { + } + + /** + * @param name + */ + public TestQueryEngine_DistinctOp(String name) { + super(name); + } + + public void testStressThreadSafe() throws Exception { + + for (int i = 0; i < 100; i++) { + + try { + + test_distinct_threadSafe(); + + } catch (Throwable t) { + + fail("Failed after " + i + " trials", t); + + } + + } + + } + + /** + * @todo Unit test for DISTINCT. How to judge correctness? + */ + public void test_distinct_threadSafe() throws Exception { + + final long timeout = 10000; // ms + + final int ntrials = 10000; + + final int poolSize = 10; + + doDistinctTest(10000/* maxInt */, timeout, ntrials, poolSize); + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, chunk + * containing all of the specified {@link IBindingSet}s. + * + * @param bindingSetChunks + * the chunks of binding sets. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet[][] bindingSetChunks) { + + return new ThickAsynchronousIterator<IBindingSet[]>(bindingSetChunks); + + } + + /** + * + * @param timeout + * @param ntrials + * @param poolSize + * + * @return The #of successful trials. + * + * @throws Exception + */ + protected void doDistinctTest(final int maxInt, + final long timeout, final int ntrials, final int poolSize) + throws Exception { + + fail("write test helper"); + + int ngiven = 0; + final IVariable<?> a = Var.var("a"); + final IBindingSet[][] chunks = new IBindingSet[ntrials][]; + { + final Random r = new Random(); + for (int i = 0; i < chunks.length; i++) { + // random non-zero chunk size + chunks[i] = new IBindingSet[r.nextInt(10) + 1]; + for (int j = 0; j < chunks[i].length; j++) { + final IBindingSet bset = new ListBindingSet(); + bset.set(a, new Constant<Integer>(r.nextInt(maxInt))); + chunks[i][j] = bset; + ngiven++; + } + } + } + + final int startId = 1; + final int sortId = 2; + + /* + * Note: The StartOp breaks up the initial set of chunks into multiple + * IChunkMessages, which results in multiple invocations of the SortOp. + */ + final PipelineOp startOp = new StartOp(new BOp[]{}, NV.asMap(new NV[]{// + new NV(SliceOp.Annotations.BOP_ID, startId),// + new NV(MemorySortOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = new MemorySortOp(new BOp[] {startOp}, NV.asMap(new NV[] {// + new NV(SliceOp.Annotations.BOP_ID, sortId),// + new NV(MemorySortOp.Annotations.COMPARATOR, + new IntegerComparatorOp( + new ISortOrder[] { new SortOrder(a, + true) })),// + new NV(MemorySortOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + new NV(MemorySortOp.Annotations.PIPELINED, false),// + })); + + final UUID queryId = UUID.randomUUID(); + final IRunningQuery q = queryEngine.eval(queryId, query, + new LocalChunkMessage<IBindingSet>(queryEngine, queryId, + startId, -1/* partitionId */, + newBindingSetIterator(chunks))); + + // consume solutions. + int nsolutions = 0; + final IAsynchronousIterator<IBindingSet[]> itr = q.iterator(); + while (itr.hasNext()) { + nsolutions += itr.next().length; + } + + // wait for the query to terminate. + q.get(); + + // Verify stats. + final BOpStats stats = (BOpStats) q.getStats().get(sortId); + if (log.isInfoEnabled()) + log.info(getClass().getName() + "." + getName() + " : " + stats); + assertNotNull(stats); + assertEquals(ngiven, nsolutions); + assertEquals(ngiven, stats.unitsIn.get()); + assertEquals(ngiven, stats.unitsOut.get()); + + } + + /** + * Helper class for comparing solution sets having variables which evaluate + * to {@link Integer} values. + */ + static private class IntegerComparatorOp extends ComparatorOp + { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** The sort order. */ + final private ISortOrder<?> [] _sors; + + public IntegerComparatorOp ( final ISortOrder<?> sors [] ) + { + super ( new BOp [] {}, NV.asMap ( new NV [] { new NV ( ComparatorOp.Annotations.ORDER, sors ) } ) ) ; + _sors = sors ; + } + + public int compare ( IBindingSet o1, IBindingSet o2 ) + { + for ( ISortOrder<?> sor : _sors ) + { + int ret = compare ( sor, o1, o2 ) ; + if ( 0 != ret ) + return ret ; + } + return 0 ; + } + + private int compare ( ISortOrder<?> sor, IBindingSet lhs, IBindingSet rhs ) + { + int compare = 0 ; + + IConstant<?> lhsv = lhs.get ( sor.getVariable () ) ; + IConstant<?> rhsv = rhs.get ( sor.getVariable () ) ; + + if ( null == lhsv && null == rhsv ) + return 0 ; + else if ( null == lhsv ) + compare = -1 ; + else if ( null == rhsv ) + compare = 1 ; + else + compare = ((Integer) lhsv.get()).compareTo(((Integer) rhsv + .get())) ; + + return compare * ( sor.isAscending () ? 1 : -1 ) ; + } + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_GroupByOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_GroupByOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_GroupByOp.java 2011-02-09 17:00:01 UTC (rev 4185) @@ -0,0 +1,306 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 1, 2010 + */ + +package com.bigdata.bop.engine; + +import java.util.Properties; +import java.util.Random; +import java.util.UUID; + +import junit.framework.TestCase2; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.Var; +import com.bigdata.bop.bindingSet.ListBindingSet; +import com.bigdata.bop.bset.StartOp; +import com.bigdata.bop.solutions.ComparatorOp; +import com.bigdata.bop.solutions.ISortOrder; +import com.bigdata.bop.solutions.MemorySortOp; +import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.bop.solutions.SortOrder; +import com.bigdata.journal.BufferMode; +import com.bigdata.journal.Journal; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; + +/** + * Test suite for GROUP_BY operators when integrated with the query engine. This + * test suite is designed to examine cases where the GROUP_BY operator will have + * to buffer multiple chunks of solutions before finally reporting the aggregated + * solutions. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestQueryEngine2.java 3489 2010-09-01 18:27:35Z thompsonbry $ + * + * @todo Test each GROUP_BY implementation here. + */ +public class TestQueryEngine_GroupByOp extends TestCase2 { + + public Properties getProperties() { + + final Properties p = new Properties(super.getProperties()); + + p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient + .toString()); + + return p; + + } + + Journal jnl; + QueryEngine queryEngine; + + public void setUp() throws Exception { + + jnl = new Journal(getProperties()); + + queryEngine = new QueryEngine(jnl); + + queryEngine.init(); + + } + + public void tearDown() throws Exception { + + if (queryEngine != null) { + queryEngine.shutdownNow(); + queryEngine = null; + } + + if (jnl != null) { + jnl.destroy(); + jnl = null; + } + + } + + /** + * + */ + public TestQueryEngine_GroupByOp() { + } + + /** + * @param name + */ + public TestQueryEngine_GroupByOp(String name) { + super(name); + } + + public void testStressThreadSafe() throws Exception { + + for (int i = 0; i < 100; i++) { + + try { + + test_groupBy_threadSafe(); + + } catch (Throwable t) { + + fail("Failed after " + i + " trials", t); + + } + + } + + } + + /** + * @todo Unit test for GROUP BY. How to judge correctness? + */ + public void test_groupBy_threadSafe() throws Exception { + + final long timeout = 10000; // ms + + final int ntrials = 10000; + + final int poolSize = 10; + + doGroupByTest(10000/* maxInt */, timeout, ntrials, poolSize); + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, chunk + * containing all of the specified {@link IBindingSet}s. + * + * @param bindingSetChunks + * the chunks of binding sets. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet[][] bindingSetChunks) { + + return new ThickAsynchronousIterator<IBindingSet[]>(bindingSetChunks); + + } + + /** + * + * @param timeout + * @param ntrials + * @param poolSize + * + * @return The #of successful trials. + * + * @throws Exception + */ + protected void doGroupByTest(final int maxInt, + final long timeout, final int ntrials, final int poolSize) + throws Exception { + + fail("write test helper"); + + int ngiven = 0; + final IVariable<?> a = Var.var("a"); + final IBindingSet[][] chunks = new IBindingSet[ntrials][]; + { + final Random r = new Random(); + for (int i = 0; i < chunks.length; i++) { + // random non-zero chunk size + chunks[i] = new IBindingSet[r.nextInt(10) + 1]; + for (int j = 0; j < chunks[i].length; j++) { + final IBindingSet bset = new ListBindingSet(); + bset.set(a, new Constant<Integer>(r.nextInt(maxInt))); + chunks[i][j] = bset; + ngiven++; + } + } + } + + final int startId = 1; + final int sortId = 2; + + /* + * Note: The StartOp breaks up the initial set of chunks into multiple + * IChunkMessages, which results in multiple invocations of the SortOp. + */ + final PipelineOp startOp = new StartOp(new BOp[]{}, NV.asMap(new NV[]{// + new NV(SliceOp.Annotations.BOP_ID, startId),// + new NV(MemorySortOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = new MemorySortOp(new BOp[] {startOp}, NV.asMap(new NV[] {// + new NV(SliceOp.Annotations.BOP_ID, sortId),// + new NV(MemorySortOp.Annotations.COMPARATOR, + new IntegerComparatorOp( + new ISortOrder[... [truncated message content] |
From: <tho...@us...> - 2011-02-10 22:08:30
|
Revision: 4191 http://bigdata.svn.sourceforge.net/bigdata/?rev=4191&view=rev Author: thompsonbry Date: 2011-02-10 22:08:22 +0000 (Thu, 10 Feb 2011) Log Message: ----------- More work on GROUP_BY Working w/ MikeP on a problem with BOpUtility#getSpannedVariables(), which has an underlying problem in preOrderTraversal() which can take time apparently exponential in the depth of the operator tree! (This is a redo of a failed commit). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/filter/BOpFilterBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ISortOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemoryGroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemoryGroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/COUNT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/GROUP_CONCAT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MAX.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MIN.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SAMPLE.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SUM.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/TestBOpUtility.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/IAggregate.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IAggregate.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -28,6 +28,7 @@ package com.bigdata.bop; import java.io.Serializable; +import java.util.Iterator; import java.util.List; import java.util.Map; @@ -71,11 +72,20 @@ * @return The argument. */ BOp get(int index); + + /** + * The operator's arguments as an unmodified list. + * + * @todo Consider deprecating since this is much less efficient than + * {@link #argIterator()}. + */ + List<BOp> args(); /** - * The operator's arguments. + * An iterator visiting the operator's arguments. The iterator does + * not support removal. (This is more efficient than #args()). */ - List<BOp> args(); + Iterator<BOp> argIterator(); /** A shallow copy of the operator's arguments. */ BOp[] toArray(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -35,6 +35,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.NoSuchElementException; import com.bigdata.bop.constraint.EQ; import com.bigdata.btree.Tuple; @@ -278,9 +279,40 @@ final public List<BOp> args() { return Collections.unmodifiableList(Arrays.asList(args)); +// return Arrays.asList(args); } + // @todo unit tests. + final public Iterator<BOp> argIterator() { + + return new ArgIterator(); + + } + + /** + * An iterator visiting the arguments which does not support removal. + */ + private class ArgIterator implements Iterator<BOp> { + + private int i = 0; + + public boolean hasNext() { + return i < args.length; + } + + public BOp next() { + if (!hasNext()) + throw new NoSuchElementException(); + return args[i++]; + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + } + // shallow copy public BOp[] toArray() { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -44,6 +44,7 @@ import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; +import cutthecrap.utils.striterators.EmptyIterator; import cutthecrap.utils.striterators.Expander; import cutthecrap.utils.striterators.Filter; import cutthecrap.utils.striterators.SingleValueIterator; @@ -67,7 +68,7 @@ public static Iterator<BOp> preOrderIterator(final BOp op) { return new Striterator(new SingleValueIterator(op)) - .append(preOrderIterator2(op)); + .append(preOrderIterator2(0,op)); } @@ -76,14 +77,18 @@ * NOT visit this node. */ @SuppressWarnings("unchecked") - static private Iterator<AbstractNode> preOrderIterator2(final BOp op) { + static private Iterator<AbstractNode> preOrderIterator2(final int depth, final BOp op) { /* * Iterator visits the direct children, expanding them in turn with a * recursive application of the pre-order iterator. */ + + // mild optimization when no children are present. + if (op.arity() == 0) + return EmptyIterator.DEFAULT; - return new Striterator(op.args().iterator()).addFilter(new Expander() { + return new Striterator(op.argIterator()).addFilter(new Expander() { private static final long serialVersionUID = 1L; @@ -106,11 +111,13 @@ * Visit the children (recursive pre-order traversal). */ +// System.err.println("Node["+depth+"]: "+op.getClass().getName()); + final Striterator itr = new Striterator( new SingleValueIterator(child)); - // append this node in post-order position. - itr.append(preOrderIterator2(child)); + // append this node in post-order position. + itr.append(preOrderIterator2(depth+1,child)); return itr; @@ -120,10 +127,13 @@ * The child is a leaf. */ +// System.err.println("Leaf["+depth+"]: "+op.getClass().getName()); + // Visit the leaf itself. return new SingleValueIterator(child); } + } }); @@ -153,7 +163,7 @@ * recursive application of the post-order iterator. */ - return new Striterator(op.args().iterator()).addFilter(new Expander() { + return new Striterator(op.argIterator()).addFilter(new Expander() { private static final long serialVersionUID = 1L; @@ -297,7 +307,7 @@ } }); - // append the pre-order traveral of each annotation. + // append the pre-order traversal of each annotation. itr.append(itr2); return itr; @@ -307,10 +317,11 @@ } - /** - * Return all variables recursively using a pre-order traversal present - * whether in the operator tree or on annotations attached to operators. - */ + /** + * Return the distinct variables recursively using a pre-order traversal + * present whether in the operator tree or on annotations attached to + * operators. + */ @SuppressWarnings("unchecked") public static Iterator<IVariable<?>> getSpannedVariables(final BOp op) { @@ -322,7 +333,7 @@ public boolean isValid(Object arg0) { return arg0 instanceof IVariable<?>; } - }); + }).makeUnique(); } @@ -337,7 +348,7 @@ @SuppressWarnings("unchecked") static public Iterator<IVariable<?>> getArgumentVariables(final BOp op) { - return new Striterator(op.args().iterator()) + return new Striterator(op.argIterator()) .addFilter(new Filter() { private static final long serialVersionUID = 1L; @@ -357,7 +368,7 @@ */ static public int getArgumentVariableCount(final BOp op) { int nvars = 0; - final Iterator<BOp> itr = op.args().iterator(); + final Iterator<BOp> itr = op.argIterator(); while(itr.hasNext()) { final BOp arg = itr.next(); if (arg instanceof IVariable<?>) @@ -504,7 +515,7 @@ if (op == null) throw new IllegalArgumentException(); - final Iterator<BOp> itr = root.args().iterator(); + final Iterator<BOp> itr = root.argIterator(); while (itr.hasNext()) { @@ -770,8 +781,12 @@ if (bop == null) return; - for (BOp arg : bop.args()) { + final Iterator<BOp> itr = bop.argIterator(); + while(itr.hasNext()) { + + final BOp arg = itr.next(); + if (!(arg instanceof IVariableOrConstant<?>)) { toString(arg, sb, indent+1); @@ -805,7 +820,7 @@ } - private static final transient String ws = " "; + private static final transient String ws = " "; // /** // * Verify that all bops from the identified <i>startId</i> to the root are Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IAggregate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IAggregate.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IAggregate.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -1,16 +0,0 @@ -package com.bigdata.bop; - -/** - * An aggregate operator, such as SUM, COUNT, MIN, MAX, etc. - * - * @author thompsonbry - */ -public interface IAggregate<E> extends IValueExpression<E>{ - - /** - * Return the current value of the aggregate (this has a side-effect on the - * internal state of the {@link IAggregate} operator). - */ - E get(IBindingSet bset); - -} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -0,0 +1,101 @@ +package com.bigdata.bop.aggregate; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpBase; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.ImmutableBOp; +import com.bigdata.bop.NV; +import com.bigdata.bop.Var; +import com.bigdata.bop.BOp.Annotations; + +/** + * Abstract base class for aggregate functions. + * + * @author thompsonbry + * + * @param <E> + */ +abstract public class AggregateBase<E> extends ImmutableBOp implements IAggregate<E> { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public interface Annotations extends ImmutableBOp.Annotations { + + /** + * Optional boolean property indicates whether the aggregate applies to + * the distinct within group solutions (default + * {@value #DEFAULT_DISTINCT}). + */ + String DISTINCT = AggregateBase.class.getName()+".distinct"; + + boolean DEFAULT_DISTINCT = false; + + } + + public AggregateBase(BOpBase op) { + super(op); + } + + public AggregateBase(BOp[] args, Map<String, Object> annotations) { + + super(args, annotations); + + if (!isWildcardAllowed() && getExpression() == Var.var("*")) { + + /* + * Only COUNT may use the wildcard '*' variable. + */ + + throw new UnsupportedOperationException("'*' not permitted."); + + } + + } + + /** + * + * @param distinct + * <code>true</code> iff the keyword DISTINCT was used, for + * example <code>COUNT(DISTINCT y)</code> + * @param expr + * The value expression to be computed, for example + * <code>x</code> in <code>COUNT(DISTINCT x)</code> or + * <code>y+x</code> in <code>MIN(x+y)</code>. + */ + public AggregateBase(final boolean distinct, final IValueExpression<E> expr) { + + this(new BOp[] { expr }, distinct ? NV.asMap(new NV( + Annotations.DISTINCT, true)) : null); + + } + + final public boolean isDistinct() { + + return getProperty(Annotations.DISTINCT, Annotations.DEFAULT_DISTINCT); + + } + + @SuppressWarnings("unchecked") + final public IValueExpression<E> getExpression() { + + return (IValueExpression<E>) get(0); + + } + + /** + * Return <code>true</code> iff the {@link IValueExpression} may be the + * special variable <code>*</code>. The default implementation always + * returns <code>false</code>. + */ + public boolean isWildcardAllowed() { + + return false; + + } + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/IAggregate.java (from rev 4185, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IAggregate.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/IAggregate.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/IAggregate.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -0,0 +1,46 @@ +package com.bigdata.bop.aggregate; + +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; + +/** + * An aggregate operator, such as SUM, COUNT, MIN, MAX, etc. + * + * @author thompsonbry + */ +public interface IAggregate<E> extends IValueExpression<E>{ + + /** + * <code>true</code> if the aggregate is to be applied to the distinct + * solutions within the group. E.g., + * + * <pre> + * COUNT(DISTINCT x) + * </pre> + * + * <pre> + * COUNT(DISTINCT *) + * </pre> + * + * or + * + * <pre> + * SUM(DISTINCT x) + * </pre> + */ + boolean isDistinct(); + + /** + * Return the {@link IValueExpression} to be computed by the aggregate. For + * <code>COUNT</code> this may be the special variable <code>*</code>, which + * is interpreted to mean all variables declared in the source solutions. + */ + IValueExpression<E> getExpression(); + + /** + * Return the current value of the aggregate (this has a side-effect on the + * internal state of the {@link IAggregate} operator). + */ + E get(IBindingSet bset); + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/filter/BOpFilterBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/filter/BOpFilterBase.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/filter/BOpFilterBase.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -81,8 +81,12 @@ final public Iterator filter(Iterator src, final Object context) { // wrap source with each additional filter from the filter chain. - for (BOp arg : args()) { + final Iterator<BOp> itr = argIterator(); + + while(itr.hasNext()) { + final BOp arg = itr.next(); + src = ((BOpFilterBase) arg).filter(src, context); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -535,10 +535,17 @@ * Visit children, but not if this is a CONTROLLER operator since * its children belong to a subquery. */ - for (BOp t : op.args()) { + final Iterator<BOp> itr = op.argIterator(); + + while(itr.hasNext()) { + + final BOp t = itr.next(); + // visit children (recursion) populateStatsMap(t); + } + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -30,8 +30,10 @@ import java.util.Map; import com.bigdata.bop.BOp; +import com.bigdata.bop.Bind; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.IVariable; import com.bigdata.bop.PipelineOp; /** @@ -51,14 +53,25 @@ public interface Annotations extends PipelineOp.Annotations { /** - * The ordered set of variables declared by {@link #COMPUTE} which are - * projected out of the group by operator. + * The ordered set of {@link IVariable}s which are projected out of the + * group by operator. + * <p> + * The variables named in {@link #SELECT} must either: (a) appear the + * {@link #GROUP_BY} declaration as simple {@link IVariable} s; or (b) + * be declared by {@link #COMPUTE}. */ String SELECT = GroupByOp.class.getName() + ".select"; /** * The ordered set of {@link IValueExpression}s which are to be * computed. + * <p> + * The top-level for each element of {@link #COMPUTE} must be either an + * {@link IVariable} or a {@link Bind}. When present, the {@link Bind} + * has the effect of assigning the result of an {@link IValueExpression} + * to an {@link IVariable}. Only {@link IVariable}s declared in the + * input solutions may be referenced in a {@link #COMPUTE} + * {@link IValueExpression}. * * TODO This really needs to be VAR := EXPR. EXPR can only reference the * source variables or variables declared earlier in the ordered Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ISortOrder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ISortOrder.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ISortOrder.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -30,6 +30,7 @@ import java.io.Serializable; +import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; /** @@ -40,9 +41,19 @@ */ public interface ISortOrder<E> extends Serializable { - /** - * The variable whose values will be sorted. - */ + /** + * The variable whose values will be sorted. + * + * FIXME ORDER_BY is defined in terms of Expressions, not just Vars. Either + * this will need to be an {@link IValueExpression} which is evaluated + * during the ordering or we will have to pre-compute a hidden variable + * which can be ordered directly. Presumably BrackettedExpression provides a + * computed RDF Value while Constraint orders based on the BEV. + * + * <pre> + * [23] OrderCondition ::= ( ( 'ASC' | 'DESC' ) BrackettedExpression ) | ( Constraint | Var ) + * </pre> + */ IVariable<E> getVariable(); /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemoryGroupByOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemoryGroupByOp.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemoryGroupByOp.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -14,12 +14,13 @@ import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.ConcurrentHashMapAnnotations; -import com.bigdata.bop.IAggregate; +import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; +import com.bigdata.bop.aggregate.IAggregate; import com.bigdata.bop.bindingSet.ListBindingSet; import com.bigdata.bop.engine.BOpStats; import com.bigdata.relation.accesspath.IAsynchronousIterator; @@ -62,20 +63,70 @@ * GROUP_CONCAT function is specified such that it combines a large #of * input solution bindings into a big string. * - * FIXME How should we handle DISTINCT semantics for GROUP_BY? (I think - * that we just insert a {@link DistinctBindingSetOp} before the - * GROUP_BY). + * FIXME How should we handle nulls (unbound variables) and type errors + * during aggregation? (LeeF suggests that they cause type errors which + * are propagated such that the aggregated value winds up unbound but I + * can not reconcile this with the language in the W3C draft which would + * appear to suggest that detail records are ignored if they result in + * type errors when computing the aggregate). * - * FIXME How should we handle nulls (missing values) during aggregation? - * (It appears that nulls and type errors are generally handled by the - * aggregate operator ignoring the detail record). - * * FIXME All of the {@link IAggregate} operators have a side-effect. In * order for them to have isolated side-effects for distinct groups, they * would have to either internalize a value map for the group or each * group would have to use a distinct instance. If the latter, then * provide for this on the operator, e.g., newInstance(), and document * why. + * + * FIXME Review all syntax/semantic: + * + * <pre> + * [17] SolutionModifier ::= GroupClause? HavingClause? OrderClause? LimitOffsetClauses? + * [18] GroupClause ::= 'GROUP' 'BY' GroupCondition+ + * [19] GroupCondition ::= ( BuiltInCall | FunctionCall | '(' Expression ( 'AS' Var )? ')' | Var ) + * [20] HavingClause ::= 'HAVING' HavingCondition+ + * [21] HavingCondition ::= Constraint + * [61] FunctionCall ::= IRIref ArgList + * [62] ArgList ::= ( NIL | '(' 'DISTINCT'? Expression ( ',' Expression )* ')' ) + * [106] BuiltInCall ::= 'STR' '(' Expression ')' .... + * </pre> + * + * FIXME The aggregate functions can have the optional keyword DISTINCT + * which forces the application to the distinct solutions within each + * group. [COUNT(DISTINCT *) appears to have some special semantics as + * well, but I can't figure out what the difference is from the use of + * DISTINCT with other aggregate operators unless it applies to the set of + * variables which are used to impose DISTINCT on the solutions within the + * group.] + * + * I've proven to my satisfaction that MySQL is behaving as per your description of the SPARQL semantics even when there are multiple columns in the aggregated solutions. It examines the distinct values within each group for the computed value of the expression within the aggregate function. + +mysql> select * from test; ++------+------+------+ +| s | i | j | ++------+------+------+ +| A | 1 | 1 | +| A | 1 | 2 | +| A | 1 | 3 | +| A | 1 | 4 | ++------+------+------+ +4 rows in set (0.00 sec) + +mysql> select sum(i), sum(j), sum(distinct i), sum(distinct j), sum(i+j), sum(distinct i+j) from test; ++--------+--------+-----------------+-----------------+----------+-------------------+ +| sum(i) | sum(j) | sum(distinct i) | sum(distinct j) | sum(i+j) | sum(distinct i+j) | ++--------+--------+-----------------+-----------------+----------+-------------------+ +| 4 | 10 | 1 | 10 | 14 | 14 | ++--------+--------+-----------------+-----------------+----------+-------------------+ +1 row in set (0.00 sec) + +mysql> select sum(i), sum(j), sum(distinct i), sum(distinct j), sum(i+j), sum(distinct i+j) from test group by s; ++--------+--------+-----------------+-----------------+----------+-------------------+ +| sum(i) | sum(j) | sum(distinct i) | sum(distinct j) | sum(i+j) | sum(distinct i+j) | ++--------+--------+-----------------+-----------------+----------+-------------------+ +| 4 | 10 | 1 | 10 | 14 | 14 | ++--------+--------+-----------------+-----------------+----------+-------------------+ +1 row in set (0.00 sec) + */ public class MemoryGroupByOp extends GroupByOp { @@ -251,17 +302,17 @@ final IValueExpression<?>[] compute) { /* - * @todo The aggregated variables are all undefined the first time a - * source binding set is presented and need to be initialized to an - * appropriate value. + * FIXME The aggregate functions have side-effects so we need to use + * a distinct instance of each function for each group. */ // synchronize for visibility. synchronized(this) { + for(IValueExpression<?> expr : compute) { + System.err.println(expr.get(bset)); + } } - throw new UnsupportedOperationException(); - } } // SolutionGroup @@ -318,7 +369,7 @@ * The ordered array of variables which define the distinct groups to * be aggregated. */ - private final IVariable<?>[] groupBy; + private final IValueExpression<?>[] groupBy; /** * The {@link IValueExpression}s used to compute each of the variables @@ -344,7 +395,7 @@ this.context = context; // must be non-null, and non-empty array w/o dups. - this.groupBy = (IVariable[]) op + this.groupBy = (IValueExpression<?>[]) op .getRequiredProperty(GroupByOp.Annotations.GROUP_BY); if (groupBy == null) @@ -378,7 +429,7 @@ // may be null or empty[]. this.having = (IConstraint[]) op - .getRequiredProperty(GroupByOp.Annotations.HAVING); + .getProperty(GroupByOp.Annotations.HAVING); /* * The variables to project out of the GROUP_BY operator. This may @@ -418,7 +469,8 @@ * * @todo write a unit test when some variables are not bound. */ - r[i] = bset.get(groupBy[i]); +// r[i] = bset.get(groupBy[i]); + r[i] = new Constant(groupBy[i].get(bset)); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -10,6 +10,7 @@ import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; import com.bigdata.bop.engine.BOpStats; import com.bigdata.relation.accesspath.IBlockingBuffer; @@ -20,6 +21,22 @@ * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z * thompsonbry $ * + * FIXME ORDER_BY is defined in terms of Expressions, not just Vars. + * Either this will need to be an {@link IValueExpression} which is + * evaluated during the ordering or we will have to pre-compute a + * hidden variable which can be ordered directly. Presumably + * BrackettedExpression provides a computed RDF Value while Constraint + * orders based on the BEV. Write unit tests for those computed + * expressions. + * + * <pre> + * [22] OrderClause ::= 'ORDER' 'BY' OrderCondition+ + * [23] OrderCondition ::= ( ( 'ASC' | 'DESC' ) BrackettedExpression ) | ( Constraint | Var ) + * </pre> + * + * FIXME ORDER_BY should be written out of a CONSTRUCT or DESCRIBE + * query since it will not have any affect on the solutions. + * * @todo do an external merge sort operator. * @todo do a wordsort operator w/ ties broken by the {@link ComparatorOp} after * the main sort. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -33,8 +33,7 @@ import junit.framework.TestCase2; -import com.bigdata.bop.constraint.BOpConstraint; -import com.bigdata.bop.constraint.OR; +import junit.framework.TestCase2; /** * Unit tests for {@link BOpUtility}. @@ -76,10 +75,9 @@ { final BOp op1 = new BOpBase(new BOp[] { Var.var("y") }, null/* annotations */); - assertEquals(1, op1.arity()); + assertEquals(1, op1.arity()); - assertSameIterator(new Object[] { Var.var("y") }, op1.args() - .iterator()); + assertSameIterator(new Object[] { Var.var("y") }, op1.argIterator()); assertSameIterator(new Object[] { Var.var("y") }, BOpUtility .getArgumentVariables(op1)); @@ -94,7 +92,7 @@ assertEquals(2,op2.arity()); assertSameIterator(new Object[] { Var.var("x"), Var.var("y") }, op2 - .args().iterator()); + .argIterator()); assertSameIterator(new Object[] { Var.var("x"), Var.var("y") }, BOpUtility.getArgumentVariables(op2)); @@ -107,7 +105,7 @@ Var.var("y") }, null/* annotations */); assertSameIterator(new Object[] { new Constant<String>("x"), - Var.var("y") }, op3.args().iterator()); + Var.var("y") }, op3.argIterator()); assertSameIterator(new Object[] { Var.var("y") }, BOpUtility .getArgumentVariables(op3)); @@ -420,73 +418,6 @@ } /** - * Unit test for {@link BOpUtility#getSpannedVariables(BOp)}. - */ - public void test_getSpannedVariables2() { - - final IValueExpression<?> a = Var.var("a"); - - IConstraint bop = null; - - final int count = 100; - - for (int i = 0; i < count; i++) { - - final IConstraint c = new DummyConstraint( - new BOp[] { a, new Constant<Integer>(i) }, - null/*annotations*/); - - if (bop == null) { - bop = c; - } else { - bop = new OR(c, bop); - } - - } - - final Object[] expected = new Object[]{// - a,// - }; - - int i = 0; - final Iterator<IVariable<?>> itr = BOpUtility - .getSpannedVariables(bop); - while (itr.hasNext()) { - final BOp t = itr.next(); - System.out.println(i + " : " + t); -// assertTrue("index=" + i + ", expected=" + expected[i] + ", actual=" -// + t, expected[i].equals(t)); - i++; - } - - assertEquals(i, expected.length); - - assertSameIterator(expected, BOpUtility - .getSpannedVariables(bop)); - - } - - private static class DummyConstraint extends BOpConstraint { - - /** - * - */ - private static final long serialVersionUID = 1942393209821562541L; - - public DummyConstraint(BOp[] args, Map<String, Object> annotations) { - super(args, annotations); - } - - public DummyConstraint(BOpBase op) { - super(op); - } - - public boolean accept(IBindingSet bindingSet) { - throw new RuntimeException(); - } - } - - /** * Unit test for {@link BOpUtility#getIndex(BOp)} using valid inputs. */ public void test_getIndex() { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemoryGroupByOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemoryGroupByOp.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemoryGroupByOp.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -23,12 +23,74 @@ */ package com.bigdata.bop.solutions; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.FutureTask; + import junit.framework.TestCase2; +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpBase; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.Var; +import com.bigdata.bop.aggregate.AggregateBase; +import com.bigdata.bop.aggregate.IAggregate; +import com.bigdata.bop.bindingSet.ArrayBindingSet; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.engine.BlockingBufferWithStats; +import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.MockRunningQuery; +import com.bigdata.bop.engine.TestQueryEngine; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBlockingBuffer; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; + /** * Unit tests for {@link MemoryGroupByOp}. * * @author thompsonbry + * + * @todo correct rejection tests for various kinds of illegal expressions, such + * as having forward references to variables which have not been computed. + * There are actually several different ways in which this rule can be + * violated, including having forward references within the SELECT (or our + * COMPUTE). + * + * @todo correct rejection tests when the SELECT or HAVING clause references a + * variable not defined in the aggregated solution groups and not wrapped + * by an aggregate function. + * + * @todo test to verify that the evaluation of the aggregate functions within + * each group are independent (they have internal state to track the + * running value of the aggregate, but that state can not be shared across + * groups). + * + * @todo test with various kinds of type errors. + * + * @todo test with DISTINCT used within aggregate functions (this forces us to + * consider the distinct solutions within groups for those aggregate + * functions which make use of the DISTINCT keyword). + * + * @todo test COUNT(*) and COUNT(DISTINCT *) semantics. + * + * @todo test when some aggregate functions in a GROUP_BY use the DISTINCT + * keyword while others in the same GROUP_BY do not. + * + * @todo test with HAVING constraints. + * + * @todo test with multiple invocations of the operator (or do this in the + * integration stress test). + * + * @todo Is it possible to test these aggregation operators without testing at + * the SPARQL level? */ public class TestMemoryGroupByOp extends TestCase2 { @@ -43,4 +105,227 @@ fail("write tests"); } + /** + * Based on an example in the SPARQL 1.1 Working Draft. + * + * <pre> + * @prefix : <http://books.example/> . + * + * :org1 :affiliates :auth1, :auth2 . + * :auth1 :writesBook :book1, :book2 . + * :book1 :price 9 . + * :book2 :price 5 . + * :auth2 :writesBook :book3 . + * :book3 :price 7 . + * :org2 :affiliates :auth3 . + * :auth3 :writesBook :book4 . + * :book4 :price 7 . + * </pre> + * + * <pre> + * PREFIX : <http://books.example/> + * SELECT ?org, (SUM(?lprice) AS ?totalPrice) + * WHERE { + * ?org :affiliates ?auth . + * ?auth :writesBook ?book . + * ?book :price ?lprice . + * } + * GROUP BY ?org + * </pre> + * + * The solutions input to the GROUP_BY are: + * + * <pre> + * ?org ?auth ?book ?lprice + * org1 auth1 book1 9 + * org1 auth1 book3 5 + * org1 auth2 book3 7 + * org2 auth3 book4 7 + * </pre> + * + * The aggregated solutions groups are: + * + * <pre> + * ?org ?totalPrice + * org1 21 + * org2 7 + * </pre> + * + * @todo Do variant with <code>HAVING (SUM(?lprice) > 10)</code>. The + * solutions are: + * <pre> + * ?org ?totalPrice + * org1 21 + * </pre> + * + * @throws ExecutionException + * @throws InterruptedException + */ + public void test_simpleGroupBy() { + + final IVariable<?> org = Var.var("org"); + final IVariable<?> auth = Var.var("auth"); + final IVariable<?> book = Var.var("book"); + final IVariable<?> lprice = Var.var("lprice"); + final IVariable<?> totalPrice = Var.var("totalPrice"); + + final IConstant<String> org1 = new Constant<String>("org1"); + final IConstant<String> org2 = new Constant<String>("org2"); + final IConstant<String> auth1 = new Constant<String>("auth1"); + final IConstant<String> auth2 = new Constant<String>("auth2"); + final IConstant<String> auth3 = new Constant<String>("auth3"); + final IConstant<String> book1 = new Constant<String>("book1"); + final IConstant<String> book2 = new Constant<String>("book2"); + final IConstant<String> book3 = new Constant<String>("book3"); + final IConstant<String> book4 = new Constant<String>("book4"); + final IConstant<Integer> price5 = new Constant<Integer>(5); + final IConstant<Integer> price7 = new Constant<Integer>(7); + final IConstant<Integer> price9 = new Constant<Integer>(9); + final IConstant<Integer> price21 = new Constant<Integer>(21); + + final int groupById = 1; + + final GroupByOp query = new MemoryGroupByOp(new BOp[] {}, NV + .asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, groupById),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + new NV(PipelineOp.Annotations.PIPELINED, true),// + new NV(PipelineOp.Annotations.THREAD_SAFE, false),// + new NV(GroupByOp.Annotations.SELECT, // + new IVariable[] { org, lprice }), // + new NV(GroupByOp.Annotations.COMPUTE,// + new IValueExpression[] { new SUMInt( + false/* distinct */, + (IValueExpression) lprice) }), // + new NV(GroupByOp.Annotations.GROUP_BY,// + new IValueExpression[] { org }) // + })); + + /* the test data: + * + * ?org ?auth ?book ?lprice + * org1 auth1 book1 9 + * org1 auth1 book3 5 + * org1 auth2 book3 7 + * org2 auth3 book4 7 + */ + final IBindingSet data [] = new IBindingSet [] + { + new ArrayBindingSet ( new IVariable<?> [] { org, auth, book, lprice }, new IConstant [] { org1, auth1, book1, price9 } ) + , new ArrayBindingSet ( new IVariable<?> [] { org, auth, book, lprice }, new IConstant [] { org1, auth1, book2, price5 } ) + , new ArrayBindingSet ( new IVariable<?> [] { org, auth, book, lprice }, new IConstant [] { org1, auth2, book3, price7 } ) + , new ArrayBindingSet ( new IVariable<?> [] { org, auth, book, lprice }, new IConstant [] { org2, auth3, book4, price7 } ) + }; + + /* the expected solutions: + * + * ?org ?totalPrice + * org1 21 + * org2 7 + */ + final IBindingSet expected [] = new IBindingSet [] + { + new ArrayBindingSet ( new IVariable<?> [] { org, totalPrice }, new IConstant [] { org1, price21 } ) + , new ArrayBindingSet ( new IVariable<?> [] { org, totalPrice }, new IConstant [] { org1, price7 } ) + } ; + + final BOpStats stats = query.newStats () ; + + final IAsynchronousIterator<IBindingSet[]> source = new ThickAsynchronousIterator<IBindingSet[]> ( new IBindingSet [][] { data } ) ; + + final IBlockingBuffer<IBindingSet[]> sink = new BlockingBufferWithStats<IBindingSet[]>(query, stats); + + final IRunningQuery runningQuery = new MockRunningQuery(null/* fed */ + , null/* indexManager */ + ); + final BOpContext<IBindingSet> context = new BOpContext<IBindingSet>( + runningQuery, -1/* partitionId */ + , stats, source, sink, null/* sink2 */ + ); + // Force the solutions to be emitted. + context.setLastInvocation(); + + final FutureTask<Void> ft = query.eval(context); + // Run the query. + { + final Thread t = new Thread() { + public void run() { + ft.run(); + } + }; + t.setDaemon(true); + t.start(); + } + + try { + // Check the solutions. + TestQueryEngine.assertSameSolutions(expected, sink.iterator()); + } finally { + /* Always wait for the future afterwards and test it for errors. */ + try { + ft.get(); + } catch (Throwable ex) { + log.error("Evaluation failed: " + ex, ex); + } + } + + assertEquals(1, stats.chunksIn.get()); + assertEquals(4, stats.unitsIn.get()); + assertEquals(2, stats.unitsOut.get()); + assertEquals(1, stats.chunksOut.get()); + + } + + private static class SUMInt extends AggregateBase<Integer> implements IAggregate<Integer> { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public SUMInt(BOpBase op) { + super(op); + } + + public SUMInt(BOp[] args, Map<String, Object> annotations) { + super(args, annotations); + } + + public SUMInt(boolean distinct, IValueExpression<Integer> expr) { + super(distinct, expr); + } + + /** + * The running aggregate value. + * <p> + * Note: SUM() returns ZERO if there are no non-error solutions + * presented. + * <p> + * Note: This field is guarded by the monitor on the {@link SUMInt} + * instance. + */ + private transient int aggregated = 0; + + @Override + synchronized + public Integer get(final IBindingSet bindingSet) { + + final IValueExpression<Integer> var = (IValueExpression<Integer>) get(0); + + final Integer val = (Integer) var.get(bindingSet); + + if (val != null) { + + // aggregate non-null values. + aggregated += val; + + } + + return Integer.valueOf(aggregated); + + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -27,6 +27,8 @@ package com.bigdata.bop.solutions; +import java.util.concurrent.FutureTask; + import junit.framework.TestCase2; import com.bigdata.bop.BOp; @@ -53,6 +55,8 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ + * + * FIXME This needs to test for computed expressions as well. */ public class TestMemorySortOp extends TestCase2 { @@ -69,7 +73,7 @@ super ( name ) ; } - public void testEval () + public void testEval () { final IVariable<?> x = Var.var ( "x" ) ; final IVariable<?> y = Var.var ( "y" ) ; @@ -154,9 +158,29 @@ , stats, source, sink, null/* sink2 */ ); - query.eval ( context ).run () ; + final FutureTask<Void> ft = query.eval(context); + // Run the query. + { + final Thread t = new Thread() { + public void run() { + ft.run(); + } + }; + t.setDaemon(true); + t.start(); + } - TestQueryEngine.assertSameSolutions ( expected, sink.iterator () ) ; + try { + // Check the solutions. + TestQueryEngine.assertSameSolutions(expected, sink.iterator()); + } finally { + /* Always wait for the future afterwards and test it for errors. */ + try { + ft.get(); + } catch (Throwable ex) { + log.error("Evaluation failed: " + ex, ex); + } + } assertEquals ( 1, stats.chunksIn.get () ) ; assertEquals ( 10, stats.unitsIn.get () ) ; Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/COUNT.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/COUNT.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/COUNT.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -27,10 +27,11 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.BOpBase; -import com.bigdata.bop.IAggregate; import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; -import com.bigdata.bop.ImmutableBOp; +import com.bigdata.bop.aggregate.AggregateBase; +import com.bigdata.bop.aggregate.IAggregate; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.XSDLongIV; @@ -39,8 +40,11 @@ * sets for the given variable. * * @author thompsonbry + * + * FIXME The application of COUNT(*) must explicitly recognize the + * special variable <code>*</code> */ -public class COUNT extends ImmutableBOp implements IAggregate<IV> { +public class COUNT extends AggregateBase<IV> implements IAggregate<IV> { /** * @@ -51,21 +55,14 @@ super(op); } - /** - * FIXME This must also accept '*' in lieu of a variable. When given a '*' - * (which could be modeled as a special variable name), we count all detail - * records without regard to their bound values. - * - * @param var - */ - public COUNT(IVariable<IV> var) { - this(new BOp[] { var }, null/* annotations */); - } - public COUNT(BOp[] args, Map<String, Object> annotations) { super(args, annotations); } + public COUNT(final boolean distinct, IValueExpression<IV> expr) { + super(distinct, expr); + } + /** * The running aggregate value. * <p> @@ -98,4 +95,14 @@ } + /** + * Overridden to allow <code>COUNT(*)</code>. + */ + @Override + final public boolean isWildcardAllowed() { + + return true; + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/GROUP_CONCAT.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/GROUP_CONCAT.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/GROUP_CONCAT.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -30,11 +30,13 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.BOpBase; -import com.bigdata.bop.IAggregate; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; +import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; -import com.bigdata.bop.ImmutableBOp; +import com.bigdata.bop.NV; +import com.bigdata.bop.aggregate.AggregateBase; +import com.bigdata.bop.aggregate.IAggregate; /** * Operator combines the string values over the presented binding sets for the @@ -46,17 +48,32 @@ * FIXME This must only operate on variables which are known to be * materialized RDF Values. */ -public class GROUP_CONCAT extends ImmutableBOp implements IAggregate<Literal> { +public class GROUP_CONCAT extends AggregateBase<Literal> implements IAggregate<Literal> { /** * */ private static final long serialVersionUID = 1L; + public interface Annotations extends AggregateBase.Annotations { + + /** + * Required string property provides the separator used when combining + * the {@link IValueExpression} computed for each solution within the + * group. + */ + String SEPARATOR = AggregateBase.class.getName()+".separator"; + + } + public GROUP_CONCAT(BOpBase op) { super(op); } + public GROUP_CONCAT(BOp[] args, Map<String, Object> annotations) { + super(args, annotations); + } + /** * * @param var @@ -64,14 +81,14 @@ * @param sep * The separator string. */ - public GROUP_CONCAT(IVariable<Literal> var, IConstant<String> sep) { - this(new BOp[] { var, sep }, null/* annotations */); + public GROUP_CONCAT(final boolean distinct, + final IValueExpression<Literal> expr, final IConstant<String> sep) { + this(new BOp[] { expr }, NV.asMap(// + new NV(Annotations.DISTINCT, distinct),// + new NV(Annotations.SEPARATOR, sep)// + )); } - public GROUP_CONCAT(BOp[] args, Map<String, Object> annotations) { - super(args, annotations); - } - /** * The running concatenation of observed bound values. * <p> Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MAX.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MAX.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MAX.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -29,10 +29,11 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.BOpBase; -import com.bigdata.bop.IAggregate; import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; -import com.bigdata.bop.ImmutableBOp; +import com.bigdata.bop.aggregate.AggregateBase; +import com.bigdata.bop.aggregate.IAggregate; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; @@ -45,10 +46,11 @@ * * @todo What is reported if there are no non-null observations? * - * FIXME This must handle comparisons when the value is not an IV, e.g., - * using {@link ValueComparator}. + * FIXME MIN (and MAX) are defined in terms of the ORDER_BY semantics for + * SPARQL. Therefore, this must handle comparisons when the value is not + * an IV, e.g., using {@link ValueComparator}. */ -public class MAX extends ImmutableBOp implements IAggregate<IV> { +public class MAX extends AggregateBase<IV> implements IAggregate<IV> { /** * @@ -59,14 +61,14 @@ super(op); } - public MAX(IVariable<IV> var) { - this(new BOp[] { var }, null/* annotations */); - } - public MAX(BOp[] args, Map<String, Object> annotations) { super(args, annotations); } + public MAX(boolean distinct, IValueExpression<IV> expr) { + super(distinct, expr); + } + /** * The maximum observed value and initially <code>null</code>. * <p> Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MIN.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MIN.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MIN.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -29,10 +29,11 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.BOpBase; -import com.bigdata.bop.IAggregate; import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; -import com.bigdata.bop.ImmutableBOp; +import com.bigdata.bop.aggregate.AggregateBase; +import com.bigdata.bop.aggregate.IAggregate; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; @@ -45,10 +46,11 @@ * * @todo What is reported if there are no non-null observations? * - * FIXME This must handle comparisons when the value is not an IV, e.g., - * using {@link ValueComparator}. + * FIXME MIN (and MAX) are defined in terms of the ORDER_BY semantics for + * SPARQL. Therefore, this must handle comparisons when the value is not + * an IV, e.g., using {@link ValueComparator}. */ -public class MIN extends ImmutableBOp implements IAggregate<IV> { +public class MIN extends AggregateBase<IV> implements IAggregate<IV> { /** * @@ -59,14 +61,14 @@ super(op); } - public MIN(IVariable<IV> var) { - this(new BOp[] { var }, null/* annotations */); - } - public MIN(BOp[] args, Map<String, Object> annotations) { super(args, annotations); } + public MIN(boolean distinct, IValueExpression<IV> expr) { + super(distinct, expr); + } + /** * The minimum observed value and initially <code>null</code>. * <p> Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SAMPLE.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SAMPLE.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SAMPLE.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -27,10 +27,11 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.BOpBase; -import com.bigdata.bop.IAggregate; import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; -import com.bigdata.bop.ImmutableBOp; +import com.bigdata.bop.aggregate.AggregateBase; +import com.bigdata.bop.aggregate.IAggregate; import com.bigdata.rdf.internal.IV; /** @@ -40,7 +41,7 @@ * * @author thompsonbry */ -public class SAMPLE extends ImmutableBOp implements IAggregate<IV> { +public class SAMPLE extends AggregateBase<IV> implements IAggregate<IV> { /** * @@ -51,14 +52,14 @@ super(op); } - public SAMPLE(IVariable<IV> var) { - this(new BOp[] { var }, null/* annotations */); - } - public SAMPLE(BOp[] args, Map<String, Object> annotations) { super(args, annotations); } + public SAMPLE(boolean distinct, IValueExpression<IV> expr) { + super(distinct, expr); + } + /** * The sampled value and initially <code>null</code>. * <p> Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SUM.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SUM.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SUM.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -29,10 +29,11 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.BOpBase; -import com.bigdata.bop.IAggregate; import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; -import com.bigdata.bop.ImmutableBOp; +import com.bigdata.bop.aggregate.AggregateBase; +import com.bigdata.bop.aggregate.IAggregate; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.internal.XSDLongIV; @@ -49,7 +50,7 @@ * the numeric values - perhaps we should just get rid of the option to * not inline and require people to export/import for an upgrade). */ -public class SUM extends ImmutableBOp implements IAggregate<IV> { +public class SUM extends AggregateBase<IV> implements IAggregate<IV> { /** * @@ -60,14 +61,14 @@ super(op); } - public SUM(IVariable<IV> var) { - this(new BOp[] { var }, null/* annotations */); - } - public SUM(BOp[] args, Map<String, Object> annotations) { super(args, annotations); } + public SUM(boolean distinct, IValueExpression<IV> expr) { + super(distinct, expr); + } + /** * The running aggregate value. * <p> Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/TestBOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/TestBOpUtility.java 2011-02-10 19:46:37 UTC (rev 4190) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/TestBOpUtility.java 2011-02-10 22:08:22 UTC (rev 4191) @@ -39,12 +39,9 @@ import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IValueExpression; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.NV; import com.bigdata.bop.Var; import com.bigdata.bop.constraint.BOpConstraint; import com.bigdata.bop.constraint.OR; -import com.bigdata.rdf.internal.constraints.SameTermBOp; /** * Unit tests for {@link BOpUtility}. @@ -64,6 +61,19 @@ super(name); } + private void eatData(/*final int expectedLength, */final Iterator<?> itr) { + int i = 1; + while (itr.hasNext()) { + final Object t = itr.next(); +// System.err.print(i+" ");// + " : " + t); +// assertTrue("index=" + i + ", expected=" + expected[i] + ", actual=" +// + t, expected[i].equals(t)); + i++; + } +// System.err.println(""); +// assertEquals("#visited", expectedLength, i); + } + /** * Unit test for {@link BOpUtility#getSpannedVariables(BOp)}. */ @@ -73,7 +83,7 @@ IConstraint bop = null; - final int count = 100; + final int count = 99; for (int i = 0; i < count; i++) { @@ -93,22 +103,20 @@ a,// }; - int i = 0; - final Iterator<IVariable<?>> itr = BOpUtility - .getSpannedVariables(bop); - while (itr.hasNext()) { - final BOp t = itr.next(); - System.out.println(i + " : " + t); -// assertTrue("index=" + i + ", expected=" + expected[i] + ", actual=" -// + t, expected[i].equals(t)); - i++; - } + System.err.println("preOrderIterator"); + eatData(BOpUtility.preOrderIterator(bop)); - assertEquals(i, expected.length); + System.err.println("preOrderIteratorWithAnnotations"); + eatData(BOpUtility.preOrderIteratorWithAnnotations(bop)); - assertSameIterator(expected, BOpUtility - .getSpannedVariables(bop)); + System.err.println("getSpannedVariables"); + eatData(BOpUtility.getSpannedVariables(bop)); + + // @todo make the returned set distinct? + // @todo verify the actual data visited. + assertSameIterator(expected, BOpUtility.getSpannedVariables(bop)); + } private static class DummyConstraint extends BOpConstraint { This was sent by the SourceForge.net col... [truncated message content] |
From: <tho...@us...> - 2011-02-17 22:58:16
|
Revision: 4207 http://bigdata.svn.sourceforge.net/bigdata/?rev=4207&view=rev Author: thompsonbry Date: 2011-02-17 22:58:07 +0000 (Thu, 17 Feb 2011) Log Message: ----------- Redo of failed commit. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Bind.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariable.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/IAggregate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Steps.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Union.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemoryGroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestVar.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestUnion.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_DistinctOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_GroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_SortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemoryGroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/COUNT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/GROUP_CONCAT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MAX.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MIN.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SAMPLE.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SUM.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlClient.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByStagedOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/raba/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestGroupByUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/RunQuery.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -246,6 +246,36 @@ return args[index]; } + +// /** +// * Return a new {@link BOp} where the specified argument has been replaced +// * by the given value. This is a copy-on-write operation. The original +// * {@link BOp} is NOT modified by this method. +// * +// * @param index +// * The index of the argument whose value will be changed. +// * @param arg +// * The new value for that argument. +// * +// * @return A new operator in which the given argument has been replaced. +// * +// * @throws IndexOutOfBoundsException +// * unless <i>index</i> is in (0:{@link #arity()}]. +// * @throws IllegalArgumentException +// * if <i>arg</i> is <code>null</code>. +// */ +// public BOp setArg(final int index,final BOp arg) { +// +// if(arg == null) +// throw new IllegalArgumentException(); +// +// final BOpBase tmp = this.clone(); +// +// tmp._set(index, arg); +// +// return tmp; +// +// } /** * Set the value of an operand. @@ -264,7 +294,7 @@ * * @todo thread safety and visibility.... */ - final protected void set(final int index, final BOp op) { + final protected void _set(final int index, final BOp op) { this.args[index] = op; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -79,7 +79,8 @@ * <ol> * <li>{@link BOp.Annotations#EVALUATION_CONTEXT} is * {@link BOpEvaluationContext#CONTROLLER}</li> - * <li>{@link PipelineOp.Annotations#THREAD_SAFE} is <code>false</code></li> + * <li>{@link PipelineOp.Annotations#MAX_PARALLEL} is <code>1</code></li> + * <li>{@link PipelineOp.Annotations#PIPELINED} is <code>true</code></li> * </ol> * Under these circumstances, it is possible for the {@link IQueryClient} to * atomically decide that a specific invocation of the operator task for the @@ -90,7 +91,8 @@ * controller. In addition, the operator must declare that it is NOT thread * safe in order for the query engine to serialize its evaluation tasks. * - * @return + * @todo This should be a ctor parameter. We just have to update the test + * suites for the changed method signature. */ public boolean isLastInvocation() { return lastInvocation.get(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Bind.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Bind.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Bind.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -24,7 +24,7 @@ /** * @param var - * The {@link IVariable} which will be bound to result of + * The {@link IVariable} which will be bound to the result of * evaluating the associated value expression. * @param expr * The {@link IValueExpression} to be evaluated. @@ -44,23 +44,42 @@ super(args, annotations); } + /** + * Return the variable which will be bound to the result of evaluating the + * associated value expression. + */ @SuppressWarnings("unchecked") - @Override + public IVariable<E> getVar() { + + return (IVariable<E>) get(0); + + } + + /** + * Return the value expression. + */ + @SuppressWarnings("unchecked") + public IValueExpression<E> getExpr() { + + return (IValueExpression<E>) get(1); + + } + public E get(final IBindingSet bindingSet) { - - final IVariable<E> var = (IVariable<E>) get(0); - final IValueExpression<E> expr = (IValueExpression<E>) get(1); + final IVariable<E> var = getVar(); + final IValueExpression<E> expr = getExpr(); + // evaluate the value expression. E val = expr.get(bindingSet); - + // bind the variable as a side-effect. bindingSet.set(var, new Constant<E>(val)); - + // return the evaluated value return val; - + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariable.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariable.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariable.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -41,5 +41,10 @@ * {@link IVariableOrConstant#getName()} */ int hashCode(); + + /** + * Return <code>true</code> iff this is the special variable <code>*</code> + */ + boolean isWildcard(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -35,7 +35,7 @@ import org.apache.log4j.Logger; import com.bigdata.bop.engine.BOpStats; -import com.bigdata.bop.engine.ChunkedRunningQuery; +import com.bigdata.bop.engine.IChunkMessage; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.solutions.SliceOp; import com.bigdata.relation.accesspath.IAsynchronousIterator; @@ -101,24 +101,65 @@ boolean DEFAULT_SHARED_STATE = false; /** - * Annotation may be used to indicate operators which are not thread - * safe (default {@value #DEFAULT_THREAD_SAFE}). Concurrent invocations - * of the evaluation task will not be scheduled for a given shard for an - * operator which is not thread safe. - * - * @todo Unit tests for {@link ChunkedRunningQuery} to verify that it - * eventually schedules operator tasks which were deferred to - * prevent concurrent evaluation. - * - * @todo This is currently not used. However, it could simplify the - * logic for operators, such as SLICE, which otherwise depend on - * {@link #SHARED_STATE} to provide their own synchronization. + * This option may be used to place an optional limit on the #of + * concurrent tasks which may run for the same (bopId,shardId) for a + * given query (default {@value #DEFAULT_MAX_PARALLEL}). The query is + * guaranteed to make progress as long as this is some positive integer. + * While limiting this value can limit the concurrency with which + * certain operators are evaluated and that can have a negative effect + * on the throughput, it controls both the demand on the JVM heap and + * the #of threads consumed. + * <p> + * Note: {@link #MAX_PARALLEL} is the annotation for pipelined joins + * which has the strongest effect on performance. Changes to both + * {@link #MAX_MESSAGES_PER_TASK} and {@link #PIPELINE_QUEUE_CAPACITY} + * have less effect and performance tends to be best around a modest + * value (10) for those annotations. */ - String THREAD_SAFE = PipelineOp.class.getName() + ".threadSafe"; + String MAX_PARALLEL = PipelineOp.class.getName() + ".maxParallel"; - boolean DEFAULT_THREAD_SAFE = true; + /** + * @see #MAX_PARALLEL + */ + int DEFAULT_MAX_PARALLEL = 5; /** + * For a pipelined operator, this is the maximum number of messages that + * will be assigned to a single invocation of the evaluation task for + * that operator (default {@value #DEFAULT_MAX_MESSAGES_PER_TASK}). By + * default the {@link QueryEngine} MAY (and generally does) combine + * multiple {@link IChunkMessage}s from the work queue of an operator + * for each evaluation pass made for that operator. When ONE (1), each + * {@link IChunkMessage} will be assigned to a new evaluation task for + * the operator. The value of this annotation must be a positive + * integer. If the operator is not-pipelined, then the maximum amount of + * data to be assigned to an evaluation task is governed by + * {@link #MAX_MEMORY} instead. + */ + String MAX_MESSAGES_PER_TASK = PipelineOp.class.getName() + + ".maxMessagesPerTask"; + + /** + * @see #MAX_MESSAGES_PER_TASK + */ + int DEFAULT_MAX_MESSAGES_PER_TASK = 10; + + /** + * For pipelined operators, this is the capacity of the input queue for + * that operator. Producers will block if the input queue for the target + * operator is at its capacity. This provides an important limit on the + * amount of data which can be buffered on the JVM heap during pipelined + * query evaluation. + */ + String PIPELINE_QUEUE_CAPACITY = PipelineOp.class.getName() + + ".pipelineQueueCapacity"; + + /** + * @see #PIPELINE_QUEUE_CAPACITY + */ + int DEFAULT_PIPELINE_QUEUE_CAPACITY = 10; + + /** * Annotation used to mark pipelined (aka vectored) operators. When * <code>false</code> the operator will use either "at-once" or * "blocked" evaluation depending on how it buffers its data for @@ -126,6 +167,9 @@ */ String PIPELINED = PipelineOp.class.getName() + ".pipelined"; + /** + * @see #PIPELINED + */ boolean DEFAULT_PIPELINED = true; /** @@ -159,87 +203,11 @@ */ String MAX_MEMORY = PipelineOp.class.getName() + ".maxMemory"; + /** + * @see #MAX_MEMORY + */ int DEFAULT_MAX_MEMORY = 0; -// /** -// * Annotation used to mark a set of (non-optional) joins which may be -// * freely reordered by the query optimizer in order to minimize the -// * amount of work required to compute the solutions. -// * <p> -// * Note: Optional joins MAY NOT appear within a join graph. Optional -// * joins SHOULD be evaluated as part of the "tail plan" following the -// * join graph, but before operations such as SORT, DISTINCT, etc. When -// * the query plan includes {@link #CONDITIONAL_GROUP}s, those groups -// * include a leading {@link #JOIN_GRAPH} (required joins) followed by -// * zero or more optional joins. -// */ -// String JOIN_GRAPH = PipelineOp.class.getName() + ".joinGraph"; - -// /** -// * Annotation used to mark a set of operators belonging to a conditional -// * binding group. Bindings within with the group will be discarded if -// * any required operator in the group fails. For example, if a binding -// * set exits via the alternative sink for a required join then any -// * conditional bindings within the group will be discarded. -// * <p> -// * Together with {@link #ALT_SINK_GROUP}, the {@link #CONDITIONAL_GROUP} -// * annotation provides the information necessary in order to decide the -// * re-entry point in the query plan when a join within an conditional -// * binding group fails. -// * <p> -// * The {@link #CONDITIONAL_GROUP} annotation controls the -// * {@link IBindingSet#push()} and {@link IBindingSet#pop(boolean)} of -// * individual solutions as they propagate through the pipeline. When a -// * pipeline starts, the {@link IBindingSet} stack contains only the top -// * level symbol table (i.e., name/value bindings). When an intermediate -// * solution enters a {@link PipelineOp} marked as belonging to a -// * {@link #CONDITIONAL_GROUP}, a new symbol table is -// * {@link IBindingSet#push() pushed} onto the stack for that solution. -// * If the solution leaves the optional join group via the default sink, -// * then the symbol table is "saved" when it is -// * {@link IBindingSet#pop(boolean) popped} off of the stack. If the -// * solution leaves the join group via the alternative sink, then the -// * symbol table is discarded when it is {@link IBindingSet#pop(boolean) -// * popped} off of the stack. This provides for conditional binding of -// * variables within the operators of the group. -// * <p> -// * The value of the {@link #CONDITIONAL_GROUP} is an {@link Integer} -// * which uniquely identifies the group within the query. -// * -// * @deprecated The binding set stack push/pop mechanisms are not -// * sufficient to support optional join groups. This -// * annotation will be removed unless it proves valuable for -// * marking the elements of a join group, in which case the -// * javadoc needs to be updated. -// */ -// String CONDITIONAL_GROUP = PipelineOp.class.getName() + ".conditionalGroup"; - -// /** -// * Annotation used to designate the target when a required operator -// * within an {@link #CONDITIONAL_GROUP} fails. The value of this -// * annotation must be the {@link #CONDITIONAL_GROUP} identifier -// * corresponding to the next conditional binding group in the query -// * plan. If there is no such group, then the {@link #ALT_SINK_REF} -// * should be used instead to specify the target operator in the -// * pipeline, e.g., a {@link SliceOp}. -// * <p> -// * The target {@link #CONDITIONAL_GROUP} is specified (rather than the -// * bopId of the target join) since the non-optional joins in the target -// * {@link #CONDITIONAL_GROUP} be reordered by the query optimizer. The -// * entry point for solutions redirected to the {@link #ALT_SINK_GROUP} -// * is therefore the first operator in the target -// * {@link #CONDITIONAL_GROUP}. This decouples the routing decisions from -// * the join ordering decisions. -// * -// * @see #CONDITIONAL_GROUP -// * @see #ALT_SINK_REF -// * -// * @deprecated The binding set stack push/pop mechanisms are not -// * sufficient to support optional join groups. This -// * annotation will be removed. -// */ -// String ALT_SINK_GROUP = PipelineOp.class.getName() + ".altSinkGroup"; - } /** @@ -261,13 +229,19 @@ final Map<String, Object> annotations) { super(args, annotations); - + + if (getMaxParallel() < 1) + throw new IllegalArgumentException(Annotations.MAX_PARALLEL + "=" + + getMaxParallel()); + + // @todo range check the rest of the annotations. + } /** * @see BufferAnnotations#CHUNK_CAPACITY */ - public int getChunkCapacity() { + final public int getChunkCapacity() { return getProperty(Annotations.CHUNK_CAPACITY, Annotations.DEFAULT_CHUNK_CAPACITY); @@ -277,7 +251,7 @@ /** * @see BufferAnnotations#CHUNK_OF_CHUNKS_CAPACITY */ - public int getChunkOfChunksCapacity() { + final public int getChunkOfChunksCapacity() { return getProperty(Annotations.CHUNK_OF_CHUNKS_CAPACITY, Annotations.DEFAULT_CHUNK_OF_CHUNKS_CAPACITY); @@ -287,7 +261,7 @@ /** * @see BufferAnnotations#CHUNK_TIMEOUT */ - public long getChunkTimeout() { + final public long getChunkTimeout() { return getProperty(Annotations.CHUNK_TIMEOUT, Annotations.DEFAULT_CHUNK_TIMEOUT); @@ -334,31 +308,49 @@ * @see Annotations#PIPELINED * @see Annotations#MAX_MEMORY */ - public boolean isPipelined() { - return getProperty(PipelineOp.Annotations.PIPELINED, + final public boolean isPipelined() { + + return getProperty(PipelineOp.Annotations.PIPELINED, PipelineOp.Annotations.DEFAULT_PIPELINED); + } +// /** +// * Return <code>true</code> iff concurrent invocations of the operator are +// * permitted. +// * <p> +// * Note: Operators which are not thread-safe still permit concurrent +// * evaluation for <em>distinct</em> partitions. In order to ensure that all +// * invocations of the operator within a query are serialized (no more than +// * one concurrent invocation) you must also specify +// * {@link BOpEvaluationContext#CONTROLLER}. +// * +// * @see Annotations#THREAD_SAFE +// * @see BOp.Annotations#EVALUATION_CONTEXT +// */ +// public boolean isThreadSafe() { +// +// return getProperty(Annotations.THREAD_SAFE, +// Annotations.DEFAULT_THREAD_SAFE); +// +// } + /** - * Return <code>true</code> iff concurrent invocations of the operator are - * permitted. - * <p> - * Note: Operators which are not thread-safe still permit concurrent - * evaluation for <em>distinct</em> partitions. In order to ensure that all - * invocations of the operator within a query are serialized (no more than - * one concurrent invocation) you must also specify - * {@link BOpEvaluationContext#CONTROLLER}. + * The maximum parallelism with which tasks may be evaluated for this + * operator (this is a per-shard limit in scale-out). A value of ONE (1) + * indicates that at most ONE (1) instance of this task may be executing in + * parallel for a given shard and may be used to indicate that the operator + * evaluation task is not thread-safe. * - * @see Annotations#THREAD_SAFE - * @see BOp.Annotations#EVALUATION_CONTEXT + * @see Annotations#MAX_PARALLEL */ - public boolean isThreadSafe() { + final public int getMaxParallel() { - return getProperty(Annotations.THREAD_SAFE, - Annotations.DEFAULT_THREAD_SAFE); - - } + return getProperty(PipelineOp.Annotations.MAX_PARALLEL, + PipelineOp.Annotations.DEFAULT_MAX_PARALLEL); + } + /** * Return <code>true</code> iff {@link #newStats()} must be shared across * all invocations of {@link #eval(BOpContext)} for this operator for a @@ -366,7 +358,7 @@ * * @see Annotations#SHARED_STATE */ - public boolean isSharedState() { + final public boolean isSharedState() { return getProperty(Annotations.SHARED_STATE, Annotations.DEFAULT_SHARED_STATE); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -124,6 +124,12 @@ } + public boolean isWildcard() { + + return name.length() == 1 && name.charAt(0) == '*'; + + } + // public int compareTo(IVariableOrConstant arg0) { // // // order vars before ids Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -4,11 +4,11 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.BOpBase; +import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IValueExpression; import com.bigdata.bop.ImmutableBOp; import com.bigdata.bop.NV; import com.bigdata.bop.Var; -import com.bigdata.bop.BOp.Annotations; /** * Abstract base class for aggregate functions. @@ -17,21 +17,100 @@ * * @param <E> */ -abstract public class AggregateBase<E> extends ImmutableBOp implements IAggregate<E> { +public class AggregateBase<E> extends ImmutableBOp implements IAggregate<E> { /** * */ private static final long serialVersionUID = 1L; + /** + * A type safe enumeration of well known aggregate functions. + */ + static public enum FunctionCode { + + /** + * The count of the #of computed value expressions within the solution + * group. In combination with the special keyword DISTINCT, this is the + * #of distinct values from the computed value expression within the + * solution group. When given with the special variable <code>*</code>, + * this is the count of the #of solutions (or distinct solutions if also + * combined with DISTINCT) within the group. + */ + COUNT(0), + + /** + * The sum of the computed value expressions within the solution group. + * In combination with the special keyword DISTINCT, this is the sum of + * the distinct values from the computed value expressions within the + * solution group. + */ + SUM(1), + + /** + * The average is defined as + * <code>AVG(expr) := SUM(expr)/COUNT(expr)</code>. Note that both SUM + * and COUNT can be hash partitioned over a cluster, so it often makes + * sense to rewrite AVG(expr) internally in terms of COUNT and SUM. This + * may be combined with DISTINCT. + */ + AVG(2), + + /** + * MIN(expr) is the minimum observed value for the computed value + * expressions according to the ordering semantics of + * <code>ORDER BY expr ASC</code>. This may be combined with DISTINCT. + */ + MIN(3), + + /** + * MAX(expr) is the maximum observed value for the computed value + * expressions according to the ordering semantics of + * <code>ORDER BY expr ASC</code>. This may be combined with DISTINCT. + */ + MAX(4), + + /** + * The combined values of the computed value expressions as a string. + * This may be combined with DISTINCT. + */ + GROUP_CONCAT(5), + + /** + * This evaluates to an arbitrary value of the computed value + * expressions. This may be combined with DISTINCT to sample from the + * distinct computed values. While the implementation is not required to + * choose randomly among the values to be sampled, random sampling may + * prove more useful to some applications. + */ + SAMPLE(6); + + private FunctionCode(int code) { + this.code = code; + } + + final private int code; + + public int getCode() { + return code; + } + + } + public interface Annotations extends ImmutableBOp.Annotations { /** + * The aggregate function identifier ({@link FunctionCode#COUNT}, + * {@link FunctionCode#SUM}, etc). + */ + String FUNCTION_CODE = AggregateBase.class.getName() + ".functionCode"; + + /** * Optional boolean property indicates whether the aggregate applies to * the distinct within group solutions (default * {@value #DEFAULT_DISTINCT}). */ - String DISTINCT = AggregateBase.class.getName()+".distinct"; + String DISTINCT = AggregateBase.class.getName() + ".distinct"; boolean DEFAULT_DISTINCT = false; @@ -41,36 +120,40 @@ super(op); } + /** + * Core shallow copy constructor. The <i>distinct</i> option is modeled + * using {@link Annotations#DISTINCT}. The <i>expr</i> is modeled as the + * first argument for the aggregate function. + * + * @param args + * @param annotations + */ public AggregateBase(BOp[] args, Map<String, Object> annotations) { super(args, annotations); - if (!isWildcardAllowed() && getExpression() == Var.var("*")) { - - /* - * Only COUNT may use the wildcard '*' variable. - */ - - throw new UnsupportedOperationException("'*' not permitted."); - - } - } /** - * + * @param functionCode + * The type safe value identifying the desired aggregate + * function. * @param distinct * <code>true</code> iff the keyword DISTINCT was used, for * example <code>COUNT(DISTINCT y)</code> * @param expr * The value expression to be computed, for example * <code>x</code> in <code>COUNT(DISTINCT x)</code> or - * <code>y+x</code> in <code>MIN(x+y)</code>. + * <code>y+x</code> in <code>MIN(x+y)</code>. Note that only + * COUNT may be used with the special variable <code>*</code>. */ - public AggregateBase(final boolean distinct, final IValueExpression<E> expr) { + public AggregateBase(final FunctionCode functionCode, + final boolean distinct, final IValueExpression<E> expr) { - this(new BOp[] { expr }, distinct ? NV.asMap(new NV( - Annotations.DISTINCT, true)) : null); + this(new BOp[] { expr }, NV.asMap(// + new NV(Annotations.FUNCTION_CODE, functionCode), // + new NV(Annotations.DISTINCT, distinct))// + ); } @@ -87,15 +170,33 @@ } + public boolean isWildcard() { + + return get(0).equals(Var.var("*")); + + } + /** - * Return <code>true</code> iff the {@link IValueExpression} may be the - * special variable <code>*</code>. The default implementation always - * returns <code>false</code>. + * Operation is not implemented by this class and must be overridden if the + * {@link AggregateBase} is to be directly evaluated. However, note that the + * computation of aggregate functions is often based on hard coded + * recognition of the appropriate function code. */ - public boolean isWildcardAllowed() { + public E get(IBindingSet bset) { + throw new UnsupportedOperationException(); + } - return false; - + public AggregateBase<E> setExpression(final IValueExpression<E> newExpr) { + + if (newExpr == null) + throw new IllegalArgumentException(); + + final AggregateBase<E> tmp = (AggregateBase<E>) this.clone(); + + tmp._set(0, newExpr); + + return tmp; + } - + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/IAggregate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/IAggregate.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/IAggregate.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -7,6 +7,14 @@ * An aggregate operator, such as SUM, COUNT, MIN, MAX, etc. * * @author thompsonbry + * + * @todo In order to assign nice labels to select expressions we need to know + * (or be able to generate) the original syntactic expression, e.g., + * <code>i+j<code> or <code>SUM(i*2)+j</code>. The textual value of these + * expressions will be used as if they were variable names. Since a + * subquery could be part of a SELECT expression, this means that we need + * to be able to do this for any SPARQL query construct. I do not believe + * that openrdf currently supports this. */ public interface IAggregate<E> extends IValueExpression<E>{ @@ -29,11 +37,20 @@ * </pre> */ boolean isDistinct(); + + /** + * Return <code>true</code> iff the {@link IValueExpression} is the special + * variable <code>*</code> (but note that this is only allowed for COUNT). + */ + boolean isWildcard(); /** * Return the {@link IValueExpression} to be computed by the aggregate. For - * <code>COUNT</code> this may be the special variable <code>*</code>, which - * is interpreted to mean all variables declared in the source solutions. + * example, is the aggregate function is <code>SUM(i+2)</code>, then this + * expression would be <code>i+2</code>. For <code>COUNT</code> this may be + * the special variable <code>*</code>, which is interpreted to mean all + * variables declared in the source solutions. The "DISTINCT" keyword is + * reported separately by {@link #isDistinct()}. */ IValueExpression<E> getExpression(); @@ -42,5 +59,16 @@ * internal state of the {@link IAggregate} operator). */ E get(IBindingSet bset); - + + /** + * Return a new {@link IAggregate} where the expression has been replaced by + * the given expression (copy-on-write). + * + * @param newExpr + * The new expression. + * + * @return The new {@link IAggregate}. + */ + IAggregate<E> setExpression(IValueExpression<E> newExpr); + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -349,7 +349,7 @@ continue; } - tmp.set(i, val.clone()); + tmp._set(i, val.clone()); // modified = true; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -59,13 +59,6 @@ * <p> * Note: This operator must execute on the query controller. * <p> - * The {@link PipelineOp.Annotations#SINK_REF} of each child operand should be - * overridden to specify the parent of the this operator. If you fail to do - * this, then the intermediate results of the subqueries will be routed to this - * operator. This may cause unnecessary network traffic when running against the - * {@link IBigdataFederation}. It may also cause the query to block if the - * buffer capacity is limited. - * <p> * If you want to route intermediate results from other computations into * subqueries, then consider a {@link Tee} pattern instead. * <p> @@ -73,14 +66,12 @@ * * <pre> * SLICE[1]( - * UNION[2]([...],{subqueries=[a{sinkRef=1},b{sinkRef=1},c{sinkRef=1}]}) + * UNION[2]([...],{subqueries=[a,b,c]}) * ) * </pre> * * Will run the subqueries <i>a</i>, <i>b</i>, and <i>c</i> in parallel. Each - * subquery will be run once for each source {@link IBindingSet}. The output of - * those subqueries is explicitly routed to the SLICE operator using - * {@link PipelineOp.Annotations#SINK_REF} for efficiency in scale-out. + * subquery will be run once for each source {@link IBindingSet}. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ @@ -109,19 +100,19 @@ * The maximum parallelism with which the subqueries will be evaluated * (default is unlimited). */ - String MAX_PARALLEL = AbstractSubqueryOp.class.getName() - + ".maxParallel"; + String MAX_PARALLEL_SUBQUERIES = AbstractSubqueryOp.class.getName() + + ".maxParallelSubqueries"; - int DEFAULT_MAX_PARALLEL = Integer.MAX_VALUE; + int DEFAULT_MAX_PARALLEL_SUBQUERIES = Integer.MAX_VALUE; } /** - * @see Annotations#MAX_PARALLEL + * @see Annotations#MAX_PARALLEL_SUBQUERIES */ - public int getMaxParallel() { - return getProperty(Annotations.MAX_PARALLEL, - Annotations.DEFAULT_MAX_PARALLEL); + public int getMaxParallelSubqueries() { + return getProperty(Annotations.MAX_PARALLEL_SUBQUERIES, + Annotations.DEFAULT_MAX_PARALLEL_SUBQUERIES); } /** @@ -207,8 +198,8 @@ this.subqueries = (BOp[]) controllerOp .getRequiredProperty(Annotations.SUBQUERIES); - this.nparallel = controllerOp.getProperty(Annotations.MAX_PARALLEL, - Annotations.DEFAULT_MAX_PARALLEL); + this.nparallel = controllerOp.getProperty(Annotations.MAX_PARALLEL_SUBQUERIES, + Annotations.DEFAULT_MAX_PARALLEL_SUBQUERIES); this.executor = new LatchedExecutor(context.getIndexManager() .getExecutorService(), nparallel); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -1080,8 +1080,10 @@ new NV(BOp.Annotations.BOP_ID, joinId),// // @todo Why not use a factory which avoids bopIds already in use? new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred.setBOpId(3)), - // disallow parallel evaluation. - new NV(PipelineJoin.Annotations.MAX_PARALLEL,0), + // disallow parallel evaluation of tasks. + new NV(PipelineJoin.Annotations.MAX_PARALLEL,1), + // disallow parallel evaluation of chunks. + new NV(PipelineJoin.Annotations.MAX_PARALLEL_CHUNKS,0), // disable access path coalescing new NV(PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS,false), // cutoff join. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -713,7 +713,9 @@ .asMap(new NV[] { new NV(JoinGraph.Annotations.BOP_ID, idFactory.nextId()), // new NV(JoinGraph.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER) }) // + BOpEvaluationContext.CONTROLLER),// + new NV(PipelineOp.Annotations.SHARED_STATE,true),// + }) // ); return queryOp; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Steps.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Steps.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Steps.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -74,8 +74,8 @@ super(args, annotations); - if (getMaxParallel() != 1) - throw new IllegalArgumentException(Annotations.MAX_PARALLEL + "=" + if (getMaxParallelSubqueries() != 1) + throw new IllegalArgumentException(Annotations.MAX_PARALLEL_SUBQUERIES + "=" + getMaxParallel()); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Union.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Union.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/Union.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -42,8 +42,7 @@ * * Will run the subqueries <i>a</i>, <i>b</i>, and <i>c</i> in parallel for each * source {@link IBindingSet}. The output of those subqueries will be routed to - * the UNION operator (their parent) unless the subqueries explicitly override - * this behavior using {@link PipelineOp.Annotations#SINK_REF}. + * the UNION operator (their parent). * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -405,6 +405,33 @@ } + /** + * Return the {@link BOp} having the specified id. + * + * @param bopId + * The {@link BOp} identifier. + * + * @return The {@link BOp}. + * + * @throws IllegalArgumentException + * if there is no {@link BOp} with that identifier declared in + * this query. + */ + final public BOp getBOp(final int bopId) { + + final BOp bop = getBOpIndex().get(bopId); + + if (bop == null) { + + throw new IllegalArgumentException("Not found: id=" + bopId + + ", query=" + query); + + } + + return bop; + + } + /** * @param queryEngine * The {@link QueryEngine} on which the query is running. In @@ -620,6 +647,9 @@ try { + if(log.isInfoEnabled())//FIXME TRACE + log.info(msg.toString()); + if (runState.startOp(msg)) { /* @@ -673,10 +703,13 @@ if (!queryId.equals(msg.queryId)) throw new IllegalArgumentException(); - lock.lock(); + lock.lock(); try { + if(log.isInfoEnabled())//FIXME TRACE + log.info(msg.toString()); + // update per-operator statistics. final BOpStats tmp = statsMap.putIfAbsent(msg.bopId, msg.taskStats); @@ -1129,6 +1162,21 @@ } + /** + * Return the textual representation of the {@link RunState} of this query. + * <p> + * Note: Exposed for log messages in derived classes since {@link #runState} + * is private. + */ + protected String runStateString() { + lock.lock(); + try { + return runState.toString(); + } finally { + lock.unlock(); + } + } + public String toString() { final StringBuilder sb = new StringBuilder(getClass().getName()); sb.append("{queryId=" + queryId); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-02-17 13:38:48 UTC (rev 4206) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-02-17 22:58:07 UTC (rev 4207) @@ -37,7 +37,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; -import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.atomic.AtomicInteger; @@ -45,6 +44,7 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BOpEvaluationContext; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.NoSuchBOpException; @@ -57,6 +57,7 @@ import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.relation.accesspath.IMultiSourceAsynchronousIterator; import com.bigdata.relation.accesspath.MultiSourceSequentialAsynchronousIterator; +import com.bigdata.rwstore.sector.IMemoryManager; import com.bigdata.service.IBigdataFederation; import com.bigdata.util.InnerCause; import com.bigdata.util.concurrent.Memoizer; @@ -72,13 +73,17 @@ * distribution of the shards. This evaluation strategy is compatible with both * the {@link Journal} (aka standalone) and the {@link IBigdataFederation} (aka * clustered or scale-out). + * <p> + * Note: The challenge with this implementation is managing the amount of data + * buffered on the JVM heap without introducing control structures which can + * result in deadlock or starvation. This has been addressed to a large extent + * by sharing a lock between this class and the per-operator input work queues + * using modified version of the JSR 166 classes. For high volume operator at + * once evaluation, we need to buffer the data on the native process heap using + * the {@link IMemoryManager}. * - * @todo The challenge with this implementation is managing the amount of data - * buffered on the JVM heap without introducing control structures which - * can result in deadlock or starvation. One way to manage this is to move - * the data off of the JVM heap onto direct ByteBuffers and then - * potentially spilling blocks to disk, e.g., using an RWStore based cache - * pattern. + * @todo {@link IMemoryManager} integration and support + * {@link PipelineOp.Annotations#MAX_MEMORY}. */ public class ChunkedRunningQuery extends AbstractRunningQuery { @@ -90,96 +95,6 @@ */ private final static Logger chunkTaskLog = Logger .getLogger(ChunkTask.class); - -// /** -// * The maximum number of operator tasks which may be concurrently executed -// * for a given (bopId,shardId). -// * -// * @see QueryEngineTestAnnotations#MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD -// */ -// final private int maxConcurrentTasksPerOperatorAndShard; - -// /** -// * The maximum #of concurrent tasks for this query across all operators and -// * shards. -// * -// * Note: This is not a safe option and MUST be removed. It is possible for -// * N-1 tasks to backup with the Nth task not running due to concurrent -// * execution of some of the N-t tasks. -// */ -// final private int maxConcurrentTasks = 10; - - /* - * FIXME Explore the use of this semaphore to limit the maximum #of messages - * further. (Note that placing a limit on messages would allow us to buffer - * potentially many chunks. That could be solved by making LocalChunkMessage - * transparent in terms of the #of chunks or _binding_sets_ which it is - * carrying, but let's take this one step at a time). - * - * The first issue is ensuring that the query continue to make progress when - * a semaphore with a limited #of permits is introduced. This is because the - * ChunkFutureTask only attempts to schedule the next task for a given - * (bopId,shardId) but we could have failed to accept outstanding work for - * any of a number of operator/shard combinations. Likewise, the QueryEngine - * tells the RunningQuery to schedule work each time a message is dropped - * onto the QueryEngine, but the signal to execute more work is lost if the - * permits were not available immediately. - * - * One possibility would be to have a delayed retry. Another would be to - * have ChunkTaskFuture try to run *any* messages, not just messages for the - * same (bopId,shardId). - * - * Also, when scheduling work, there needs to be some bias towards the - * downstream operators in the query plan in order to ensure that they get a - * chance to clear work from upstream operators. This suggests that we might - * carry an order[] and use it to scan the work queue -- or make the work - * queue a priority heap using the order[] to place a primary sort over the - * bopIds in terms of the evaluation order and letting the shardIds fall in - * increasing shard order so we have a total order for the priority heap (a - * total order may also require a tie breaker, but I think that the priority - * heap allows ties). - * - * This concept of memory overhead and permits would be associated with the - * workload waiting on a given node for processing. (In scale-out, we do not - * care how much data is moving in the cluster, only how much data is - * challenging an individual machine). - * - * This emphasize again why we need to get the data off of the Java heap. - * - * The same concept should apply for chained buffers. Maybe one way to do - * this is to allocate a fixed budget to each query for the Java heap and - * the C heap and then the query blocks or goes to disk. - */ -// /** -// * The maximum number of binding sets which may be outstanding before a task -// * which is producing binding sets will block. This value may be used to -// * limit the memory demand of a query in which some operators produce -// * binding sets faster than other operators can consume them. -// * -// * @todo This could be generalized to consider the Java heap separately from -// * the native heap as we get into the use of native ByteBuffers to -// * buffer intermediate results. -// * -// * @todo This is expressed in terms of messages and not {@link IBindingSet}s -// * because the {@link LocalChunkMessage} does not self-report the #of -// * {@link IBindingSet}s (or chunks). [It should really be bytes on the -// * heap even if we can count binding sets and #s of bindings, but we -// * do not serialize all binding sets so we have to have one measure -// * for serialized and one measure for live objects.] -// */ -// final private int maxOutstandingMessageCount = 100; -// -// /** -// * A counting semaphore used to limit the #of outstanding binding set chunks -// * which may be buffered before a producer will block when trying to emit -// * another chunk. -// * -// * @see HandleChunkBuffer#outputChunk(IBindingSet[]) -// * @see #scheduleNext(BSBundle) -// * -// * @see #maxOutstandingMessageCount -// */ -// final private Semaphore outstandingMessageSemaphore = new Semaphore(maxOutstandingMessageCount); /** * A collection of (bopId,partitionId) keys mapped onto a collection of @@ -188,58 +103,39 @@ */ private final ConcurrentHashMap<BSBundle, ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask>> operatorFutures; - /** - * A map of unbounded work queues for each (bopId,partitionId). Empty queues - * are removed from the map. - * <p> - * The map is guarded by the {@link #lock}. - */ + /** + * A map of unbounded work queues for each (bopId,partitionId). Empty queues + * are removed from the map. + * <p> + * The map is guarded by the {@link #lock}. + * + * FIXME Either this and/or {@link #operatorFutures} must be a weak value + * map in order to ensure that entries are eventually cleared in scale-out + * where the #of entries can potentially be very large since they are per + * (bopId,shardId). While these maps were initially declared as + * {@link ConcurrentHashMap} instances, if we remove entries once the + * map/queue entry is empty, this appears to open a concurrency hole which + * does not exist if we leave entries with empty map/queue values in the + * map. Changing to a weak value map should provide the necessary pruning of + * unused entries without opening up this concurrency hole. + */ private final Map<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>> operatorQueues; - -// /** -// * When running in stand alone, we can chain together the operators and have -// * much higher throughput. Each operator has an {@link BlockingBuffer} which -// * is essentially its input queue. The operator will drain its input queue -// * using {@link BlockingBuffer#iterator()}. -// * <p> -// * Each operator closes its {@link IBlockingBuffer} sink(s) once its own -// * source has been closed and it has finished processing that source. Since -// * multiple producers can target the same operator, we need a means to -// * ensure that the source for the target operator is not closed until each -// * producer which targets that operator has closed its corresponding sink. -// * <p> -// * In order to support this many-to-one producer/consumer pattern, we wrap -// * the input queue (a {@link BlockingBuffer}) for each operator having -// * multiple sources with a {@link MultiplexBlockingBuffer}. This class gives -// * each producer their own view on the underlying {@link BlockingBuffer}. -// * The underlying {@link BlockingBuffer} will not be closed until all -// * source(s) have closed their view of that buffer. This collection keeps -// * track of the {@link MultiplexBlockingBuffer} wrapping the -// * {@link BlockingBuffer} which is the input queue for each operator. -// * <p> -// * The input queues themselves are {@link BlockingBuffer} objects. Those -// * objects are available from this map using -// * {@link MultiplexBlockingBuffer#getBackingBuffer()}. These buffers are -// * pre-allocated by {@link #populateInputBufferMap(BOp)}. -// * {@link #startTasks(BOp)} is responsible for starting the operator tasks -// * in a "back-to-front" order. {@link #startQuery(IChunkMessage)} kicks off -// * the query and invokes {@link #startTasks(BOp)} to chain the input queues -// * and output queues together (when so chained, the output queues are skins -// * over the input queues obtained from {@link MultiplexBlockingBuffer}). -// * -// * FIXME The inputBufferMap will let us construct consumer producer chains -// * where the consumer _waits_ for all producer(s) which target the consumer -// * to close the sink associated with that consumer. Unlike when attaching an -// * {@link IChunkMessage} to an already running operator, the consumer will -// * NOT terminate (due to lack up input) until each running producer -// * terminating that consumer terminates. This will improve concurrency, -// * result in fewer task instances, and have better throughput than attaching -// * a chunk to an already running task. However, in scale-out we will have -// * tasks running on different nodes so we can not always chain together the -// * producer and consumer in this tightly integrated manner. -// */ -// final private ConcurrentHashMap<Integer/*operator*/, MultiplexBlockingBuffer<IBindingSet[]>/*inputQueue*/> inputBufferMap; + /** + * FIXME It appears that this is Ok based on a single unit test known to + * fail when {@link #removeMapOperatorQueueEntries} is <code>true</code>, + * but I expect that a similar concurrency problem could also exist for the + * {@link #operatorFutures} even through it does not produce a deadlock. + */ + static private final boolean removeMapOperatorFutureEntries = false; + + /** + * FIXME See operatorQueues for why removing the map entries appears to + * cause problems. This is problem is demonstrated by + * TestQueryEngine#test_query_slice_noLimit() when + * {@link PipelineOp.Annotations#PIPELINE_QUEUE_CAPACITY} is ONE (1). + */ + static private final boolean removeMapOperatorQueueEntries = false; // /** // * The chunks available for immediate processing (they must have been @@ -285,286 +181,12 @@ super(queryEngine, queryId, controller, clientProxy, query); -//// combineReceivedChunks = query.getProperty( -//// QueryEngineTestAnnotations.COMBINE_RECEIVED_CHUNKS, -//// QueryEngineTestAnnotations.DEFAULT_COMBINE_RECEIVED_CHUNKS); - -// this.maxConcurrentTasksPerOperatorAndShard = 300; -// this.maxConcurrentTasksPerOperatorAndShard = query -// .getProperty( -// QueryEngineTestAnnotations.MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD, -// QueryEngineTestAnnotations.DEFAULT_MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD); - this.operatorFutures = new ConcurrentHashMap<BSBundle, ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask>>(); this.operatorQueues = new ConcurrentHashMap<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>>(); -// /* -// * Setup the BOpStats object for each pipeline operator in the query. -// */ -// if (controller) { -// -//// runState = new RunState(this); -// -//// statsMap = new ConcurrentHashMap<Integer, BOpStats>(); -//// -//// populateStatsMap(query); -// -//// /* -//// * FIXME Review the concept of mutation queries. It used to be that -//// * queries could only either read or write. Now we have access paths -//// * which either read or write and each query could use zero or more -//// * such access paths. -//// */ -//// if (true/*!query.isMutation()*/) { -//// -//// // read-only query. -//// -//// final BOpStats queryStats = statsMap.get(query.getId()); -// -//// queryBuffer = new BlockingBufferWithStats<IBindingSet[]>(query, -//// queryStats); -//// -//// queryIterator = new QueryResultIterator<IBindingSet[]>(this, -//// queryBuffer.iterator()); -// -//// } else { -//// -//// // Note: Not used for mutation queries. -//// queryBuffer = null; -//// queryIterator = null; -// -// } -// -// } else { -// -//// runState = null; // Note: only on the query controller. -//// statsMap = null; // Note: only on the query controller. -//// queryBuffer = null; // Note: only on the query controller. -//// queryIterator = null; // Note: only when queryBuffer is defined. -// -// } - } -// /** -// * Take a chunk generated by some pass over an operator and make it -// * available to the target operator. How this is done depends on whether the -// * query is running against a standalone database or the scale-out database. -// * <p> -// * Note: The return value is used as part of the termination criteria for -// * the query. -// * <p> -// * The default implementation supports a standalone database. The generated -// * chunk is left on the Java heap and handed off synchronously using -// * {@link QueryEngine#acceptChunk(IChunkMessage)}. That method will queue -// * the chunk for asynchronous processing. -// * -// * @param bop -// * The operator which wrote on the sink. -// * @param sinkId -// * The identifier of the target operator. -// * @param sink -// * The intermediate results to be passed to that target operator. -// * -// * @return The #of {@link IChunkMessage} sent. This will always be ONE (1) -// * for scale-up. For scale-out, there will be at least one -// * {@link IChunkMessage} per index partition over which the -// * intermediate results were mapped. -// */ -// protected <E> int handleOutputChunk(final BOp bop, final int sinkId, -// final IBlockingBuffer<IBindingSet[]> sink) { -// -// if (bop == null) -// throw new IllegalArgumentException(); -// -// if (sink == null) -// throw new IllegalArgumentException(); -// -// if (inputBufferMap != null && inputBufferMap.get(sinkId) != null) { -// /* -// * FIXME The sink is just a wrapper for t... [truncated message content] |
From: <tho...@us...> - 2011-02-18 19:39:08
|
Revision: 4208 http://bigdata.svn.sourceforge.net/bigdata/?rev=4208&view=rev Author: thompsonbry Date: 2011-02-18 19:39:00 +0000 (Fri, 18 Feb 2011) Log Message: ----------- Added support for random sampling of the standalone database B+Tree indices in support of the Runtime Query Optimizer. Modified the Advancer pattern to permit one-time initialization of the advancer. Added information about the selected join path to the JoinGraph INFO trace. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/filter/Advancer.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2011-02-17 22:58:07 UTC (rev 4207) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2011-02-18 19:39:00 UTC (rev 4208) @@ -27,10 +27,16 @@ package com.bigdata.bop.ap; +import it.unimi.dsi.bits.BitVector; +import it.unimi.dsi.bits.LongArrayBitVector; +import it.unimi.dsi.fastutil.ints.IntOpenHashSet; + import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.Map; +import java.util.Random; import java.util.concurrent.Callable; import com.bigdata.bop.AbstractAccessPathOp; @@ -45,6 +51,7 @@ import com.bigdata.btree.ITupleCursor; import com.bigdata.btree.filter.Advancer; import com.bigdata.btree.view.FusedView; +import com.bigdata.rawstore.Bytes; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.IAccessPath; @@ -79,6 +86,27 @@ private static final long serialVersionUID = 1L; /** + * Typesafe enumeration of different kinds of index sampling strategies. + * + * @todo It is much more efficient to take clusters of samples when you can + * accept the bias. Taking a clustered sample really requires knowing + * where the leaf boundaries are in the index, e.g., using + * {@link ILeafCursor}. Taking all tuples from a few leaves in each + * sample might produce a faster estimation of the correlation when + * sampling join paths. + */ + public static enum SampleType { + /** + * Samples are taken at even space offsets. + */ + EVEN, + /** + * Sample offsets are computed randomly. + */ + RANDOM; + } + + /** * Known annotations. */ public interface Annotations extends BOp.Annotations { @@ -86,16 +114,32 @@ /** * The sample limit (default {@value #DEFAULT_LIMIT}). */ - String LIMIT = "limit"; + String LIMIT = SampleIndex.class.getName() + ".limit"; int DEFAULT_LIMIT = 100; /** + * The random number generator seed -or- ZERO (0L) for a random seed + * (default {@value #DEFAULT_SEED}). A non-zero value may be used to + * create a repeatable sample. + */ + String SEED = SampleIndex.class.getName() + ".seed"; + + long DEFAULT_SEED = 0L; + + /** * The {@link IPredicate} describing the access path to be sampled * (required). */ String PREDICATE = SampleIndex.class.getName() + ".predicate"; + + /** + * The type of sample to take (default {@value #DEFAULT_SAMPLE_TYPE)}. + */ + String SAMPLE_TYPE = SampleIndex.class.getName() + ".sampleType"; + String DEFAULT_SAMPLE_TYPE = SampleType.RANDOM.name(); + } public SampleIndex(SampleIndex<E> op) { @@ -115,7 +159,20 @@ return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); } + + public long seed() { + return getProperty(Annotations.SEED, Annotations.DEFAULT_SEED); + + } + + public SampleType getSampleType() { + + return SampleType.valueOf(getProperty(Annotations.SAMPLE_TYPE, + Annotations.DEFAULT_SAMPLE_TYPE)); + + } + @SuppressWarnings("unchecked") public IPredicate<E> getPredicate() { @@ -195,7 +252,7 @@ /** Return a sample from the access path. */ public E[] call() throws Exception { - return sample(limit(), getPredicate()).getSample(); + return sample(limit(), getSampleType(), getPredicate()).getSample(); } @@ -206,7 +263,7 @@ * @return */ public AccessPathSample<E> sample(final int limit, - IPredicate<E> predicate) { + final SampleType sampleType, IPredicate<E> predicate) { final IRelation<E> relation = context.getRelation(predicate); @@ -242,10 +299,25 @@ /* * Add advancer to collect sample. */ + + final Advancer<E> advancer; + switch (sampleType) { + case EVEN: + advancer = new EvenSampleAdvancer<E>(// rangeCount, + limit, accessPath.getFromKey(), accessPath.getToKey()); + break; + case RANDOM: + advancer = new RandomSampleAdvancer<E>(// rangeCount, + seed(), limit, accessPath.getFromKey(), accessPath + .getToKey()); + break; + default: + throw new UnsupportedOperationException("SampleType=" + + sampleType); + } + predicate = ((Predicate<E>) predicate) - .addIndexLocalFilter(new SampleAdvancer<E>(//rangeCount, - limit, accessPath.getFromKey(), accessPath - .getToKey())); + .addIndexLocalFilter(advancer); return new AccessPathSample<E>(limit, context.getAccessPath( relation, predicate)); @@ -256,20 +328,21 @@ /** * An advancer pattern which is designed to take evenly distributed samples - * from an index. The caller specifies the #of tuples to be skipped after - * each tuple visited. That number should be computed based on the estimated - * range count of the index and the desired sample size. This can fail to - * gather the desired number of sample if additional filters are applied - * which further restrict the elements selected by the predicate. However, - * it will still faithfully represent the expected cardinality of the - * sampled access path. + * from an index. The caller specifies the #of tuples to be sampled. This + * class estimates the range count of the access path and then computes the + * #of samples to be skipped after each tuple visited. + * <p> + * Note: This can fail to gather the desired number of sample if additional + * filters are applied which further restrict the elements selected by the + * predicate. However, it will still faithfully represent the expected + * cardinality of the sampled access path (tuples tested). * * @author tho...@us... * * @param <E> * The generic type of the elements visited by that access path. */ - private static class SampleAdvancer<E> extends Advancer<E> { + private static class EvenSampleAdvancer<E> extends Advancer<E> { private static final long serialVersionUID = 1L; @@ -296,30 +369,13 @@ * @param limit * The #of samples to visit. */ - public SampleAdvancer(final int limit, final byte[] fromKey, + public EvenSampleAdvancer(final int limit, final byte[] fromKey, final byte[] toKey) { this.limit = limit; this.toKey = toKey; } - /** - * @todo This is taking evenly spaced samples. It is much more efficient - * to take clusters of samples when you can accept the bias. - * Taking a clustered sample really requires knowing where the - * leaf boundaries are in the index, e.g., using - * {@link ILeafCursor}. - * <p> - * Taking all tuples from a few leaves in each sample might - * produce a faster estimation of the correlation when sampling - * join paths. - * - * @todo Rather than evenly spaced samples, we should be taking a random - * sample. This could be achieved using a random initial offset - * and random increment as long as the initial offset was in the - * range of a single increment and we compute the increment such - * that N+1 intervals exist. - */ @Override protected void advance(final ITuple<E> tuple) { @@ -336,6 +392,11 @@ toIndex = toKey == null ? ndx.getEntryCount() : ndx .indexOf(toKey); + if (toIndex < 0) { + // convert insert position to index. + toIndex = -toIndex + 1; + } + final int rangeCount = (toIndex - fromIndex); skipCount = Math.max(1, rangeCount / limit); @@ -365,9 +426,125 @@ } - } // class SampleAdvancer + } // class EvenSampleAdvancer /** + * An advancer pattern which is designed to take randomly distributed + * samples from an index. The caller specifies the #of tuples to be sampled. + * This class estimates the range count of the access path and then computes + * a set of random offsets into the access path from which it will collect + * the desired #of samples. + * <p> + * Note: This can fail to gather the desired number of sample if additional + * filters are applied which further restrict the elements selected by the + * predicate. However, it will still faithfully represent the expected + * cardinality of the sampled access path (tuples tested). + * + * @author tho...@us... + * + * @param <E> + * The generic type of the elements visited by that access path. + */ + private static class RandomSampleAdvancer<E> extends Advancer<E> { + + private static final long serialVersionUID = 1L; + + /** The random number generator seed. */ + private final long seed; + + /** The desired total limit on the sample. */ + private final int limit; + + private final byte[] fromKey, toKey; + + /* + * Transient data. This gets initialized when we visit the first tuple. + */ + + /** The offset of each tuple to be sampled. */ + private transient int[] offsets; + /** The #of tuples accepted so far. */ + private transient int nread = 0; + /** The inclusive lower bound of the first tuple actually visited. */ + private transient int fromIndex; + /** The exclusive upper bound of the last tuple which could be visited. */ + private transient int toIndex; + + /** + * + * @param limit + * The #of samples to visit. + */ + public RandomSampleAdvancer(final long seed, final int limit, + final byte[] fromKey, final byte[] toKey) { + + this.seed = seed; + this.limit = limit; + this.fromKey = fromKey; + this.toKey = toKey; + } + + @Override + protected boolean init() { + + final AbstractBTree ndx = (AbstractBTree) src.getIndex(); + + // inclusive lower bound. + fromIndex = fromKey == null ? 0 : ndx.indexOf(fromKey); + + if (fromIndex < 0) { + // convert insert position to index. + fromIndex = -fromIndex + 1; + } + + // exclusive upper bound. + toIndex = toKey == null ? ndx.getEntryCount() : ndx.indexOf(toKey); + + if (toIndex < 0) { + // convert insert position to index. + toIndex = -toIndex + 1; + } + + // get offsets to be sampled. + offsets = new SmartOffsetSampler().getOffsets(seed, limit, + fromIndex, toIndex); + + // Skip to the first tuple. + src.seek(ndx.keyAt(offsets[0])); + + return true; + + } + + @Override + protected void advance(final ITuple<E> tuple) { + + final AbstractBTree ndx = (AbstractBTree) src.getIndex(); + + if (nread < offsets.length - 1) { + + /* + * Skip to the next tuple. + */ + + final int nextIndex = offsets[nread]; + +// System.err.println("limit=" + limit + ", rangeCount=" +// + (toIndex - fromIndex) + ", fromIndex=" + fromIndex +// + ", toIndex=" + toIndex + ", currentIndex=" +// + currentIndex + ", nextIndex=" + nextIndex); + + src.seek(ndx.keyAt(nextIndex)); + + } + + nread++; + + } + + } // class RandomSampleAdvancer + + /** * A sample from an access path. * * @param <E> @@ -459,4 +636,355 @@ } // AccessPathSample + /** + * Interface for obtaining an array of tuple offsets to be sampled. + * + * @author thompsonbry + */ + public interface IOffsetSampler { + + /** + * Return an array of tuple indices which may be used to sample a key + * range of some index. + * <p> + * Note: The caller must stop when it runs out of offsets, not when the + * limit is satisfied, as there will be fewer offsets returned when the + * half open range is smaller than the limit. + * + * @param seed + * The seed for the random number generator -or- ZERO (0L) + * for a random seed. A non-zero value may be used to create + * a repeatable sample. + * @param limit + * The maximum #of tuples to sample. + * @param fromIndex + * The inclusive lower bound. + * @param toIndex + * The exclusive upper bound0 + * + * @return An array of at most <i>limit</i> offsets into the index. The + * offsets will lie in the half open range (fromIndex,toIndex]. + * The elements of the array will be in ascending order. No + * offsets will be repeated. + * + * @throws IllegalArgumentException + * if <i>limit</i> is non-positive. + * @throws IllegalArgumentException + * if <i>fromIndex</i> is negative. + * @throws IllegalArgumentException + * if <i>toIndex</i> is negative. + * @throws IllegalArgumentException + * unless <i>toIndex</i> is GT <i>fromIndex</i>. + */ + int[] getOffsets(final long seed, int limit, final int fromIndex, + final int toIndex); + } + + /** + * A smart implementation which uses whichever implementation is most + * efficient for the limit and key range to be sampled. + * + * @author thompsonbry + */ + public static class SmartOffsetSampler implements IOffsetSampler { + + /** + * {@inheritDoc} + */ + public int[] getOffsets(final long seed, int limit, + final int fromIndex, final int toIndex) { + + if (limit < 1) + throw new IllegalArgumentException(); + if (fromIndex < 0) + throw new IllegalArgumentException(); + if (toIndex < 0) + throw new IllegalArgumentException(); + if (toIndex <= fromIndex) + throw new IllegalArgumentException(); + + final int rangeCount = (toIndex - fromIndex); + + if (limit > rangeCount) + limit = rangeCount; + + if (limit == rangeCount) { + + // Visit everything. + return new EntireRangeOffsetSampler().getOffsets(seed, limit, + fromIndex, toIndex); + + } + + /* + * Random offsets visiting a subset of the key range using a + * selection without replacement pattern (the same tuple is never + * visited twice). + * + * FIXME When the limit approaches the range count and the range + * count is large (too large for a bit vector or acceptance set + * approach), then we are better off creating a hash set of offsets + * NOT to be visited and then just choosing (rangeCount-limit) + * offsets to reject. This will be less expensive than computing the + * acceptance set directly. However, to really benefit from the + * smaller memory profile, we would also need to wrap that with an + * iterator pattern so the smaller memory representation could be of + * use when the offset[] is applied (e.g., modify the IOffsetSampler + * interface to be an iterator with various ctor parameters rather + * than returning an array as we do today). + */ + + // FIXME BitVectorOffsetSampler is broken. + if (false && rangeCount < Bytes.kilobyte32 * 8) { + + // NB: 32k range count uses a 4k bit vector. + return new BitVectorOffsetSampler().getOffsets(seed, limit, + fromIndex, toIndex); + + } + + /* + * When limit is small (or significantly smaller than the + * rangeCount), then we are much better off creating a hash set of + * the offsets which have been accepted. + * + * Good unless [limit] is very large. + */ + return new AcceptanceSetOffsetSampler().getOffsets(seed, limit, + fromIndex, toIndex); + + } + + } + + /** + * Returns all offsets in the half-open range, but may only be used when + * the limit GTE the range count. + */ + static public class EntireRangeOffsetSampler implements IOffsetSampler { + + /** + * {@inheritDoc} + * + * @throws UnsupportedOperationException + * if <i>limit!=rangeCount</i> (after adjusting for limits + * greater than the rangeCount). + */ + public int[] getOffsets(final long seed, int limit, + final int fromIndex, final int toIndex) { + + if (limit < 1) + throw new IllegalArgumentException(); + if (fromIndex < 0) + throw new IllegalArgumentException(); + if (toIndex < 0) + throw new IllegalArgumentException(); + if (toIndex <= fromIndex) + throw new IllegalArgumentException(); + + final int rangeCount = (toIndex - fromIndex); + + if (limit > rangeCount) + limit = rangeCount; + + if (limit != rangeCount) + throw new UnsupportedOperationException(); + + // offsets of tuples to visit. + final int[] offsets = new int[limit]; + + for (int i = 0; i < limit; i++) { + + offsets[i] = fromIndex + i; + + } + + return offsets; + + } + } + + /** + * Return a randomly selected ordered array of offsets in the given + * half-open range. + * <p> + * This approach is based on a bit vector. If the bit is already marked, + * then the offset has been used and we scan until we find the next free + * offset. This requires [rangeCount] bits, so it works well when the + * rangeCount of the key range is small. For example, a range count of 32k + * requires a 4kb bit vector, which is quite manageable. + * + * FIXME There is something broken in this class, probably an assumption I + * have about how {@link LongArrayBitVector} works. If you enable it in the + * stress test, it will fail. + */ + static public class BitVectorOffsetSampler implements IOffsetSampler { + + public int[] getOffsets(final long seed, int limit, + final int fromIndex, final int toIndex) { + + if (limit < 1) + throw new IllegalArgumentException(); + if (fromIndex < 0) + throw new IllegalArgumentException(); + if (toIndex < 0) + throw new IllegalArgumentException(); + if (toIndex <= fromIndex) + throw new IllegalArgumentException(); + + final int rangeCount = (toIndex - fromIndex); + + if (limit > rangeCount) + limit = rangeCount; + + // offsets of tuples to visit. + final int[] offsets = new int[limit]; + + // create a cleared bit vector of the stated capacity. + final BitVector v = LongArrayBitVector.ofLength(// + rangeCount// capacity (in bits) + ); + + // Random number generator using caller's seed (if given). + final Random rnd = seed == 0L ? new Random() : new Random(seed); + + // Choose random tuple indices for the remaining tuples. + for (int i = 0; i < limit; i++) { + + /* + * Look for an unused bit starting at this index. If necessary, + * this will wrap around to zero. + */ + + // k in (0:rangeCount-1). + int k = rnd.nextInt(rangeCount); + + if (v.getBoolean((long) k)) { + // This bit is already taken. + final long nextZero = v.nextZero((long) k); + if (nextZero != -1L) { + k = (int) nextZero; + } else { + final long priorZero = v.previousZero((long) k); + if (priorZero != -1L) { + k = (int) priorZero; + } else { + // No empty bit found? + throw new AssertionError(); + } + } + } + + assert !v.getBoolean(k); + + // Set the bit. + v.add(k, true); + + assert v.getBoolean(k); + + offsets[i] = fromIndex + k; + + assert offsets[i] < toIndex; + + } + + // put them into sorted order for more efficient traversal. + Arrays.sort(offsets); + + // System.err.println(Arrays.toString(offsets)); + + return offsets; + + } + + } + + /** + * An implementation based on an acceptance set of offsets which have been + * accepted. This implementation is a good choice when the limit moderate + * (~100k) and the rangeCount is significantly greater than the limit. The + * memory demand is the O(limit). + * + * @author thompsonbry + */ + static public class AcceptanceSetOffsetSampler implements IOffsetSampler { + + public int[] getOffsets(final long seed, int limit, + final int fromIndex, final int toIndex) { + + if (limit < 1) + throw new IllegalArgumentException(); + if (fromIndex < 0) + throw new IllegalArgumentException(); + if (toIndex < 0) + throw new IllegalArgumentException(); + if (toIndex <= fromIndex) + throw new IllegalArgumentException(); + + final int rangeCount = (toIndex - fromIndex); + + if (limit > rangeCount) + limit = rangeCount; + + // offsets of tuples to visit. + final int[] offsets = new int[limit]; + + // hash set of accepted offsets. + final IntOpenHashSet v = new IntOpenHashSet( + rangeCount// capacity + ); + + // Random number generator using caller's seed (if given). + final Random rnd = seed == 0L ? new Random() : new Random(seed); + + // Choose random tuple indices for the remaining tuples. + for (int i = 0; i < limit; i++) { + + /* + * Look for an unused bit starting at this index. If necessary, + * this will wrap around to zero. + */ + + // k in (0:rangeCount-1). + int k = rnd.nextInt(rangeCount); + + int round = 0; + while (v.contains(k)) { + + k++; + + if (k == rangeCount) { + // wrap around. + if (++round > 1) { + // no empty bit found? + throw new AssertionError(); + } + // reset starting index. + k = 0; + } + + } + + assert !v.contains(k); + + // Set the bit. + v.add(k); + + offsets[i] = fromIndex + k; + + assert offsets[i] < toIndex; + + } + + // put them into sorted order for more efficient traversal. + Arrays.sort(offsets); + + // System.err.println(Arrays.toString(offsets)); + + return offsets; + + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-17 22:58:07 UTC (rev 4207) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-18 19:39:00 UTC (rev 4208) @@ -60,6 +60,7 @@ import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.ap.SampleIndex; +import com.bigdata.bop.ap.SampleIndex.SampleType; import com.bigdata.bop.bindingSet.HashBindingSet; import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.LocalChunkMessage; @@ -534,11 +535,16 @@ * Materialize a random sample from the access path. */ - final SampleIndex sampleOp = new SampleIndex( +// final SampleType sampleType = SampleType.EVEN; + final SampleType sampleType = SampleType.RANDOM; + + final SampleIndex<?> sampleOp = new SampleIndex( new BOp[] {}, // NV.asMap(// new NV(SampleIndex.Annotations.PREDICATE, pred),// - new NV(SampleIndex.Annotations.LIMIT, limit))); + new NV(SampleIndex.Annotations.LIMIT, limit),// + new NV(SampleIndex.Annotations.SAMPLE_TYPE, sampleType.name())// + )); sample = new VertexSample(rangeCount, limit, false/* exact */, sampleOp.eval(context)); @@ -1081,7 +1087,7 @@ // @todo Why not use a factory which avoids bopIds already in use? new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred.setBOpId(3)), // disallow parallel evaluation of tasks. - new NV(PipelineJoin.Annotations.MAX_PARALLEL,1), + new NV(PipelineOp.Annotations.MAX_PARALLEL,1), // disallow parallel evaluation of chunks. new NV(PipelineJoin.Annotations.MAX_PARALLEL_CHUNKS,0), // disable access path coalescing @@ -1172,6 +1178,11 @@ * cardinality at 1600L (lower bound). In fact, the cardinality * is 16*175000. This falsely low estimate can cause solutions * which are really better to be dropped. + * + * @todo we should mark [nout] when we do this show that it + * shows up in the trace! Also, the rangeCount is sometimes + * falsely high. However, that should be corrected by random + * resampling of the vertices and paths. */ nout = sumRangeCount; @@ -1226,11 +1237,14 @@ /** * The cumulative estimated cardinality of the path. This is zero for an * empty path. For a path consisting of a single edge, this is the - * estimated cardinality of that edge. When creating a new path adding - * an edge to an existing path, the cumulative cardinality of the new - * path is the cumulative cardinality of the existing path plus the + * estimated cardinality of that edge. When creating a new path by + * adding an edge to an existing path, the cumulative cardinality of the + * new path is the cumulative cardinality of the existing path plus the * estimated cardinality of the cutoff join of the new edge given the * input sample of the existing path. + * + * @todo track this per vertex as well as the total for more interesting + * traces in showPath(Path). */ final public long cumulativeEstimatedCardinality; @@ -1672,7 +1686,7 @@ static public String showTable(final Path[] a,final Path[] pruned) { final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); - f.format("%5s %10s%1s * %7s (%3s/%3s) = %10s%1s : %10s %10s", + f.format("%5s %10s%1s * %10s (%6s/%6s) = %10s%1s : %10s %10s", "path",// "rangeCount",// "",// sourceSampleExact @@ -1698,9 +1712,9 @@ } } if (x.sample == null) { - f.format("p[%2d] %10d%1s * %7s (%3s/%3s) = %10s%1s : %10s", i, "N/A", "", "N/A", "N/A", "N/A", "N/A", "", "N/A"); + f.format("p[%2d] %10d%1s * %10s (%6s/%6s) = %10s%1s : %10s", i, "N/A", "", "N/A", "N/A", "N/A", "N/A", "", "N/A"); } else { - f.format("p[%2d] %10d%1s * % 7.2f (%3d/%3d) = % 10d%1s : % 10d", i, + f.format("p[%2d] %10d%1s * % 10.2f (%6d/%6d) = % 10d%1s : % 10d", i, x.sample.rangeCount,// x.sample.sourceSampleExact?"E":"",// x.sample.f,// @@ -1730,6 +1744,66 @@ } /** + * Show the details of a join path, including the estimated cardinality and + * join hit ratio for each step in the path. + * + * @param p + * The join path. + */ + public static String showPath(final Path x) { + if(x == null) + throw new IllegalArgumentException(); + final StringBuilder sb = new StringBuilder(); + final Formatter f = new Formatter(sb); + { + /* + * @todo show sumEstCard for each step of the path. Only the + * estimate for the current path length is currently preserved. We + * would need to preserve the estimate for each step in the path to + * show it here. + * + * @todo show limit on EdgeSample? + */ + f.format("%6s %10s%1s * %10s (%6s/%6s) = %10s%1s",// : %10s",// + "edge", + "rangeCount",// + "",// sourceSampleExact + "f",// + "out",// + "in",// + "estCard",// + ""// estimateIs(Exact|LowerBound|UpperBound) +// "sumEstCard",// + ); + int i = 0; + for (Edge e : x.edges) { + sb.append("\n"); + if (e.sample == null) { + f.format("%6s %10d%1s * %10s (%6s/%6s) = %10s%1s",// + e.getLabel(),// + "N/A", "", "N/A", "N/A", "N/A", "N/A", "", "N/A"); + } else { + f.format("%6s %10d%1s * % 10.2f (%6d/%6d) = % 10d%1s",// + e.getLabel(),// + e.sample.rangeCount,// + e.sample.sourceSampleExact ? "E" : "",// + e.sample.f,// + e.sample.outputCount,// + e.sample.inputCount,// + e.sample.estimatedCardinality,// + e.sample.estimateEnum.getCode()// +// e.cumulativeEstimatedCardinality// + ); + } +// sb.append("\nv[" + vertexIds[i] + "] " + e.toString()); + i++; + } + } + sb.append("\n"); + return sb.toString(); + } + + /** * A runtime optimizer for a join graph. The {@link JoinGraph} bears some * similarity to ROX (Runtime Optimizer for XQuery), but has several * significant differences: @@ -2148,6 +2222,18 @@ // Should be one winner. assert paths.length == 1; + if (log.isInfoEnabled()) { + + /* + * @todo It would be nice to show the plan with the filters + * attached, but that might be something that the caller does. + */ + log.info("\n*** Selected join path: " + + Arrays.toString(paths[0].getVertexIds()) + "\n" + + showPath(paths[0])); + + } + return paths[0]; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-02-17 22:58:07 UTC (rev 4207) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-02-18 19:39:00 UTC (rev 4208) @@ -204,9 +204,7 @@ * The maximum #of solutions which will be generated by the join * (default {@value #DEFAULT_LIMIT}). * - * @todo Unit tests for this feature. It currently breaks out of loops - * but does not explicitly interrupt the task. See the uses of - * {@link JoinTask#limit}. + * @todo Unit tests for this feature (it is used by the JoinGraph). */ String LIMIT = PipelineJoin.class.getName() + ".limit"; @@ -594,6 +592,8 @@ /** * An optional limit on the #of solutions to be produced. The limit is * ignored if it is {@link Long#MAX_VALUE}. + * + * @see Annotations#LIMIT */ final private long limit; @@ -808,6 +808,9 @@ // // stats.elapsed.add(System.currentTimeMillis() - begin); +// } finally { +// System.err.println(joinOp.toString()); +// System.err.println(stats.toString()); } } @@ -1624,6 +1627,12 @@ halted(); if (limit != Long.MAX_VALUE && exactOutputCount.get() > limit) { + // break query @ limit. + if (log.isInfoEnabled()) + log.info("Breaking query @ limit: limit=" + limit + + ", exactOutputCount=" + + exactOutputCount.get()); +// halt((Void) null); return null; } @@ -1713,6 +1722,12 @@ if (limit != Long.MAX_VALUE && exactOutputCount.incrementAndGet() > limit) { + // break query @ limit. + if (log.isInfoEnabled()) + log.info("Breaking query @ limit: limit=" + limit + + ", exactOutputCount=" + + exactOutputCount.get()); +// halt((Void) null); break; } @@ -1927,6 +1942,12 @@ if (limit != Long.MAX_VALUE && exactOutputCount.incrementAndGet() > limit) { + // break query @ limit. + if (log.isInfoEnabled()) + log.info("Breaking query @ limit: limit=" + limit + + ", exactOutputCount=" + + exactOutputCount.get()); +// halt((Void) null); break; } @@ -2119,6 +2140,12 @@ if (limit != Long.MAX_VALUE && exactOutputCount.incrementAndGet() > limit) { + // break query @ limit. + if (log.isInfoEnabled()) + log.info("Breaking query @ limit: limit=" + limit + + ", exactOutputCount=" + + exactOutputCount.get()); +// halt((Void) null); break; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/filter/Advancer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/filter/Advancer.java 2011-02-17 22:58:07 UTC (rev 4207) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/filter/Advancer.java 2011-02-18 19:39:00 UTC (rev 4208) @@ -63,6 +63,18 @@ } + /** + * Hook for one-time initialization invoked before the advancer visits the + * first tuple. The default implementation simply returns <code>true</code>. + * + * @return <code>false</code> if nothing should be visited. + */ + protected boolean init() { + + return true; + + } + /** * Offers an opportunity to advance the source {@link ITupleCursor} to a * new key using {@link ITupleCursor#seek(byte[]). @@ -87,6 +99,11 @@ final private Advancer<E> filter; /** + * Used to invoke {@link Advancer#init()}. + */ + private boolean firstTime = true; + + /** * Set true iff we exceed the bounds on the {@link ITupleCursor}. For * example, if we run off the end of an index partition. This is used to * simulate the exhaustion of the cursor when you advance past its @@ -116,6 +133,20 @@ public boolean hasNext() { + if(firstTime) { + + if (!filter.init()) { + + exhausted = true; + + return false; + + } + + firstTime =false; + + } + if(exhausted) return false; return src.hasNext(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java 2011-02-17 22:58:07 UTC (rev 4207) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java 2011-02-18 19:39:00 UTC (rev 4208) @@ -41,6 +41,10 @@ import com.bigdata.bop.NV; import com.bigdata.bop.Var; import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.ap.SampleIndex.AcceptanceSetOffsetSampler; +import com.bigdata.bop.ap.SampleIndex.IOffsetSampler; +import com.bigdata.bop.ap.SampleIndex.SampleType; +import com.bigdata.bop.ap.SampleIndex.SmartOffsetSampler; import com.bigdata.journal.BufferMode; import com.bigdata.journal.ITx; import com.bigdata.journal.Journal; @@ -175,8 +179,124 @@ } /** + * Stress test for {@link IOffsetSampler}s. + * + * TODO Look at the distributions of the different {@link IOffsetSampler}s. + * They should be uniform. + */ + public void test_offsetSamplers() { + + // Note: Only handles a special case! +// new GetOffsetsEntireRange(), + + final IOffsetSampler[] samplers = new IOffsetSampler[] { + new SmartOffsetSampler(), // +// new BitVectorOffsetSampler(),// + new AcceptanceSetOffsetSampler(),// +// new RejectionSetOffsetSampler(), // + }; + + final Random r = new Random(); + + final int ntrials = 1000; + + for (int trial = 0; trial < ntrials; trial++) { + + // 10% seed is 0L (which gets turned into random anyway) + final long seed = r.nextDouble() < .1 ? 0 : r.nextLong(); + + final int entryCount = r.nextInt(100000); + + // 10% fromIndex is zero. + final int fromIndex = r.nextDouble() < .1 ? 0 : r + .nextInt(entryCount); + + final int remaining = entryCount - fromIndex; + + final int toIndex = r.nextDouble() < .1 ? entryCount : (fromIndex + + r.nextInt(remaining) + 1); + + final int rangeCount = toIndex - fromIndex; + + final int limit = r.nextDouble() < .1 ? r.nextInt(100) + 1 : r + .nextDouble() < .5 ? r.nextInt(entryCount) + 1 : r + .nextInt(10000) + 1; + + for (IOffsetSampler sampler : samplers) { + + try { + + final long begin = System.currentTimeMillis(); + + final int[] offsets = sampler.getOffsets(seed, limit, fromIndex, toIndex); + + final long elapsed = System.currentTimeMillis() - begin; + + if (elapsed > 1000) { + log.warn("Slow: elapsed=" + elapsed + ", class=" + + sampler.getClass() + ", seed=" + seed + + ", limit=" + limit + ", fromIndex=" + + fromIndex + ",toIndex=" + toIndex); + } + + // check the #of offsets returned. + final int noffsets = offsets.length; + assertTrue(noffsets <= limit); + if (limit > rangeCount) + assertTrue(noffsets <= rangeCount); + else + assertTrue(noffsets == limit); + + // check offsets ordered, within range, and w/o dups. + int lastOffset = -1; + for (int j = 0; j < offsets.length; j++) { + + final int offset = offsets[j]; + + if (offset < fromIndex) + fail("index=" + j + + ", offset LT fromIndex: offset=" + offset + + ", fromIndex=" + fromIndex); + + if (offset >= toIndex) + fail("index=" + j + ", offset GTE toIndex: offset=" + + offset + ", toIndex=" + toIndex); + + if (offset <= lastOffset) { + fail("index=" + j + ", lastOffset=" + lastOffset + + ", but offset=" + offset); + } + + lastOffset = offset; + + } + + } catch (Throwable t) { + + fail("sampler=" + sampler.getClass() + ", seed=" + seed + + ", limit=" + limit + ", fromIndex=" + fromIndex + + ",toIndex=" + toIndex + ", rangeCount=" + + rangeCount, t); + + } + + } + + } + + } + + /** * Unit test verifies some aspects of a sample taken from a local index * (primarily that the sample respects the limit). + * + * @todo test when the range count is zero. + * + * @todo test when the inclusive lower bound of a key range is an insertion + * point (no tuple for that key). + * + * @todo test when the exclusive upper bound of a key range is an insertion + * point (no tuple for that key). */ public void test_something() { @@ -194,42 +314,59 @@ new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED)// ); - final BOpContextBase context = new BOpContextBase(null/* fed */, jnl/* indexManager */); - final int[] limits = new int[] { // 1, 9, 19, 100, 217, 900,// nrecords, nrecords + 1 }; - for (int limit : limits) { + for (SampleType sampleType : SampleType.values()) { - final SampleIndex<E> sampleOp = new SampleIndex<E>( - new BOp[0], - NV - .asMap( - // - new NV(SampleIndex.Annotations.PREDICATE, - predicate),// - new NV(SampleIndex.Annotations.LIMIT, limit)// - )); + if (log.isInfoEnabled()) + log.info("Testing: SampleType=" + sampleType); - final E[] a = sampleOp.eval(context); + for (int limit : limits) { -// System.err.println("limit=" + limit + ", nrecords=" + nrecords -// + ", nsamples=" + a.length); -// -// for (int i = 0; i < a.length && i < 10; i++) { -// System.err.println("a[" + i + "]=" + a[i]); -// } + doTest(nrecords, limit, sampleType, predicate); - final int nexpected = Math.min(nrecords, limit); + } - assertEquals("#samples (limit=" + limit + ", nrecords=" + nrecords - + ", nexpected=" + nexpected + ")", nexpected, a.length); - } } + + private void doTest(final int nrecords, final int limit, + final SampleType sampleType, final IPredicate<E> predicate) { + final BOpContextBase context = new BOpContextBase(null/* fed */, jnl/* indexManager */); + + final SampleIndex<E> sampleOp = new SampleIndex<E>( new BOp[0], // + NV.asMap(// + new NV(SampleIndex.Annotations.PREDICATE, predicate),// + new NV(SampleIndex.Annotations.LIMIT, limit),// + new NV(SampleIndex.Annotations.SAMPLE_TYPE, sampleType + .name())// + )); + + final E[] a = sampleOp.eval(context); + + if (log.isInfoEnabled()) { + + System.err.println("limit=" + limit + ", nrecords=" + nrecords + + ", nsamples=" + a.length + ", sampleType=" + sampleType); + + for (int i = 0; i < a.length && i < 10; i++) { + + System.err.println("a[" + i + "]=" + a[i]); + + } + + } + + final int nexpected = Math.min(nrecords, limit); + + assertEquals("#samples (limit=" + limit + ", nrecords=" + nrecords + + ", nexpected=" + nexpected + ")", nexpected, a.length); + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-17 22:58:07 UTC (rev 4207) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-18 19:39:00 UTC (rev 4208) @@ -91,7 +91,7 @@ * When true, the test uses hardcoded access to an existing Journal already * loaded with some BSBM data set. */ - private static final boolean useExistingJournal = true; + private static final boolean useExistingJournal = false; // private static final long existingPC = 284826; // BSBM 100M @@ -219,7 +219,7 @@ */ public void test_bsbm_q5() throws Exception { -// QueryLog.logTableHeader(); + QueryLog.logTableHeader(); final String namespace = getNamespace(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2011-02-20 23:39:34
|
Revision: 4212 http://bigdata.svn.sourceforge.net/bigdata/?rev=4212&view=rev Author: mrpersonick Date: 2011-02-20 23:39:28 +0000 (Sun, 20 Feb 2011) Log Message: ----------- turn off stopword filter when using prefix match Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2011-02-20 21:20:44 UTC (rev 4211) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2011-02-20 23:39:28 UTC (rev 4212) @@ -51,7 +51,10 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.util.Version; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; @@ -661,6 +664,19 @@ } /** + * See {@link #index(TokenBuffer, long, int, String, Reader, boolean)}. + * <p> + * Uses a default filterStopwords value of true. + * + */ + public void index(final TokenBuffer buffer, final long docId, final int fieldId, + final String languageCode, final Reader r) { + + index(buffer, docId, fieldId, languageCode, r, true/* filterStopwords */); + + } + + /** * Index a field in a document. * <p> * Note: This method does NOT force a write on the indices. If the <i>buffer</i> @@ -684,11 +700,13 @@ * {@link Locale}. * @param r * A reader on the text to be indexed. + * @param filterStopwords + * if true, filter stopwords from the token stream * * @see TokenBuffer#flush() */ public void index(final TokenBuffer buffer, final long docId, final int fieldId, - final String languageCode, final Reader r) { + final String languageCode, final Reader r, final boolean filterStopwords) { /* * Note: You can invoke this on a read-only index. It is only overflow @@ -701,7 +719,7 @@ int n = 0; // tokenize (note: docId,fieldId are not on the tokenStream, but the field could be). - final TokenStream tokenStream = getTokenStream(languageCode, r); + final TokenStream tokenStream = getTokenStream(languageCode, r, filterStopwords); try { while (tokenStream.incrementToken()) { TermAttribute term=tokenStream.getAttribute(TermAttribute.class); @@ -729,10 +747,14 @@ * * @param r * A reader on the text to be indexed. + * + * @param filterStopwords + * if true, filter stopwords from the token stream * * @return The extracted token stream. */ - protected TokenStream getTokenStream(final String languageCode, final Reader r) { + protected TokenStream getTokenStream(final String languageCode, + final Reader r, final boolean filterStopwords) { /* * Note: This stripping out stopwords by default. @@ -741,9 +763,22 @@ */ final Analyzer a = getAnalyzer(languageCode); - TokenStream tokenStream = a.tokenStream(null/* @todo field? */, r); + TokenStream tokenStream; + if (filterStopwords) { + tokenStream = a.tokenStream(null/* @todo field? */, r); + } else { + /* + * To eliminiate stopword filtering, we simulate the tokenStream() + * operation above per the javadoc for that method, which says: + * "Constructs a StandardTokenizer filtered by a StandardFilter, + * a LowerCaseFilter and a StopFilter", eliminating the StopFilter. + */ + tokenStream = new StandardTokenizer(Version.LUCENE_CURRENT, r); + tokenStream = new StandardFilter(tokenStream); + } // force to lower case. + // might be able to move this inside the else {} block above? tokenStream = new LowerCaseFilter(tokenStream); return tokenStream; @@ -1037,9 +1072,15 @@ final TokenBuffer buffer = new TokenBuffer(1, this); + /* + * If we are using prefix match (* operator) then we don't want + * to filter stopwords from the search query. + */ + final boolean filterStopwords = !prefixMatch; + index(buffer, Long.MIN_VALUE/* docId */, Integer.MIN_VALUE/* fieldId */, languageCode, - new StringReader(query)); + new StringReader(query), filterStopwords); if (buffer.size() == 0) { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2011-02-20 21:20:44 UTC (rev 4211) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2011-02-20 23:39:28 UTC (rev 4212) @@ -40,6 +40,7 @@ import java.util.Set; import java.util.concurrent.TimeUnit; +import org.apache.log4j.Logger; import org.openrdf.model.BNode; import org.openrdf.model.Graph; import org.openrdf.model.Literal; @@ -99,6 +100,8 @@ */ public class TestSearchQuery extends ProxyBigdataSailTestCase { + protected static final Logger log = Logger.getLogger(TestSearchQuery.class); + public TestSearchQuery() { } @@ -708,6 +711,7 @@ final URI s5 = vf.createURI(BD.NAMESPACE+"s5"); final URI s6 = vf.createURI(BD.NAMESPACE+"s6"); final URI s7 = vf.createURI(BD.NAMESPACE+"s7"); + final URI s8 = vf.createURI(BD.NAMESPACE+"s8"); final Literal l1 = vf.createLiteral("how"); final Literal l2 = vf.createLiteral("now"); final Literal l3 = vf.createLiteral("brown"); @@ -715,6 +719,7 @@ final Literal l5 = vf.createLiteral("how now"); final Literal l6 = vf.createLiteral("brown cow"); final Literal l7 = vf.createLiteral("how now brown cow"); + final Literal l8 = vf.createLiteral("toilet"); cxn.add(s1, RDFS.LABEL, l1); cxn.add(s2, RDFS.LABEL, l2); @@ -723,6 +728,7 @@ cxn.add(s5, RDFS.LABEL, l5); cxn.add(s6, RDFS.LABEL, l6); cxn.add(s7, RDFS.LABEL, l7); + cxn.add(s8, RDFS.LABEL, l8); /* * Note: The either flush() or commit() is required to flush the @@ -739,6 +745,7 @@ literals.put(((BigdataValue)l5).getIV(), l5); literals.put(((BigdataValue)l6).getIV(), l6); literals.put(((BigdataValue)l7).getIV(), l7); + literals.put(((BigdataValue)l8).getIV(), l8); final Map<IV, URI> uris = new LinkedHashMap<IV, URI>(); uris.put(((BigdataValue)l1).getIV(), s1); @@ -748,6 +755,7 @@ uris.put(((BigdataValue)l5).getIV(), s5); uris.put(((BigdataValue)l6).getIV(), s6); uris.put(((BigdataValue)l7).getIV(), s7); + uris.put(((BigdataValue)l8).getIV(), s8); /**/ if (log.isInfoEnabled()) { @@ -1066,6 +1074,71 @@ } + { // prefix match using a stopword + + final String searchQuery = "to*"; + final double minRelevance = 0.0d; + + final String query = + "select ?s ?o ?score " + + "where " + + "{ " + + " ?s <"+RDFS.LABEL+"> ?o . " + + " ?o <"+BD.SEARCH+"> \""+searchQuery+"\" . " + + " ?o <"+BD.RELEVANCE+"> ?score . " + +// " ?o <"+BD.MIN_RELEVANCE+"> \""+minRelevance+"\" . " + +// " ?o <"+BD.MAX_HITS+"> \"5\" . " + +// " filter regex(?o, \""+searchQuery+"\") " + + "} " + + "order by desc(?score)"; + + log.info("\n"+query); + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(true /* includeInferred */); + TupleQueryResult result = tupleQuery.evaluate(); + + int i = 0; + while (result.hasNext()) { + log.info(i++ + ": " + result.next().toString()); + } + assertTrue("wrong # of results: " + i, i == 1); + + result = tupleQuery.evaluate(); + + Collection<BindingSet> answer = new LinkedList<BindingSet>(); + + final ITextIndexer search = + sail.getDatabase().getLexiconRelation().getSearchEngine(); + final Hiterator<IHit> hits = + search.search(searchQuery, + null, // languageCode + true, // prefixMatch + minRelevance, // minCosine + 10000, // maxRank (=maxResults + 1) + 1000L, // timeout + TimeUnit.MILLISECONDS // unit + ); + + while (hits.hasNext()) { + final IHit hit = hits.next(); + final IV id = new TermId(VTE.LITERAL, hit.getDocId()); + final Literal score = vf.createLiteral(hit.getCosine()); + final URI s = uris.get(id); + final Literal o = literals.get(id); + final BindingSet bs = createBindingSet( + new BindingImpl("s", s), + new BindingImpl("o", o), + new BindingImpl("score", score)); + log.info(bs); + answer.add(bs); + } + + compare(result, answer); + + } + } finally { cxn.close(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-21 22:01:59
|
Revision: 4218 http://bigdata.svn.sourceforge.net/bigdata/?rev=4218&view=rev Author: thompsonbry Date: 2011-02-21 22:01:49 +0000 (Mon, 21 Feb 2011) Log Message: ----------- - Working on join graphs and the runtime query optimizer. - Moved JoinGraph, NoSolutionsException, and PartitionedJoinGraph into the com.bigdata.bop.joinGraph package. - Moved IEvaluationPlan, IEvaluationPlanFactory, DefaultEvaluationPlan, etc. into the com.bigdata.bop.joinGraph package. - Moved BOpUtility into the com.bigdata.bop.util package. - Added partial support for dynamically identifying edges based on constraints and for accepting unconstrained edges once there are no other vertices available to extend the join graph. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/package.html branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IRuleState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IRuleStatisticsFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/RuleState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/RuleStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/MockJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/filter/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/eval/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/OwlSameAsPropertiesExpandingIterator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/InferenceEngine.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/magic/TestIRIS.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/AbstractRuleTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestOptionals.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestRuleExpansion.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPORelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOStarJoin.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/DefaultRangeCountFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/FixedEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/IEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/IEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/IRangeCountFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/NOPEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/NoReorderEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/NoSolutionsException.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/fast/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/fast/DefaultEvaluationPlan2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/fast/DefaultEvaluationPlanFactory2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/package.html branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/util/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/fast/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/fast/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/fast/TestDefaultEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/rto/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/rto/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/rto/TestJGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/rto/TestJoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility_canJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility_canJoinUsingConstraints.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility_sharedVariables.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/NoSolutionsException.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/DefaultEvaluationPlan2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/DefaultEvaluationPlanFactory2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/DefaultRangeCountFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/FixedEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IRangeCountFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/NOPEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/NoReorderEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoinUsingConstraints.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/eval/TestDefaultEvaluationPlan.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-02-21 18:33:22 UTC (rev 4217) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-02-21 22:01:49 UTC (rev 4218) @@ -40,8 +40,8 @@ import org.apache.log4j.Logger; import com.bigdata.bop.BOp.Annotations; -import com.bigdata.bop.controller.PartitionedJoinGroup; import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.joinGraph.PartitionedJoinGroup; import com.bigdata.btree.AbstractNode; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; @@ -1381,6 +1381,9 @@ /* * Find the constraints that will run with each vertex of the new * join path. + * + * TODO This is a forward reference to a different package, so maybe + * move the canJoinWithConstraints() method to that package? */ final IConstraint[][] constraintRunArray = PartitionedJoinGroup .getJoinGraphConstraints(newPath, constraints); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-02-21 18:33:22 UTC (rev 4217) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-02-21 22:01:49 UTC (rev 4218) @@ -34,6 +34,7 @@ import com.bigdata.bop.ap.filter.BOpFilterBase; import com.bigdata.bop.ap.filter.BOpTupleFilter; import com.bigdata.bop.ap.filter.DistinctFilter; +import com.bigdata.bop.joinGraph.IEvaluationPlan; import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.ITuple; import com.bigdata.btree.ITupleCursor; @@ -47,7 +48,6 @@ import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.rule.IAccessPathExpander; import com.bigdata.relation.rule.IRule; -import com.bigdata.relation.rule.eval.IEvaluationPlan; import com.bigdata.relation.rule.eval.pipeline.JoinMasterTask; import com.bigdata.service.ndx.IClientIndex; import com.bigdata.striterator.IKeyOrder; Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-21 18:33:22 UTC (rev 4217) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-21 22:01:49 UTC (rev 4218) @@ -1,3260 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -/* - * Created on Aug 16, 2010 - */ - -package com.bigdata.bop.controller; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.Formatter; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.concurrent.FutureTask; - -import org.apache.log4j.Logger; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpContext; -import com.bigdata.bop.BOpContextBase; -import com.bigdata.bop.BOpEvaluationContext; -import com.bigdata.bop.BOpIdFactory; -import com.bigdata.bop.BOpUtility; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstraint; -import com.bigdata.bop.IElement; -import com.bigdata.bop.IPredicate; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.NV; -import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.ap.SampleIndex; -import com.bigdata.bop.ap.SampleIndex.SampleType; -import com.bigdata.bop.bindingSet.HashBindingSet; -import com.bigdata.bop.engine.IRunningQuery; -import com.bigdata.bop.engine.LocalChunkMessage; -import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.bop.join.PipelineJoin; -import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; -import com.bigdata.bop.rdf.join.DataSetJoin; -import com.bigdata.relation.IRelation; -import com.bigdata.relation.accesspath.BufferClosedException; -import com.bigdata.relation.accesspath.IAccessPath; -import com.bigdata.relation.accesspath.IAsynchronousIterator; -import com.bigdata.relation.accesspath.ThickAsynchronousIterator; -import com.bigdata.striterator.Dechunkerator; -import com.bigdata.striterator.IChunkedIterator; -import com.bigdata.util.concurrent.Haltable; - -/** - * A join graph with annotations for estimated cardinality and other details in - * support of runtime query optimization. A join graph is a collection of - * relations and joins which connect those relations. This boils down to a - * collection of {@link IPredicate}s (selects on relations), shared variables - * (which identify joins), and {@link IConstraint}s (which limit solutions). - * Operators other than standard joins (including optional joins, sort, order - * by, etc.) must be handled downstream from the join graph in a "tail plan". - * - * @see http://arxiv.org/PS_cache/arxiv/pdf/0810/0810.4809v1.pdf, XQuery Join - * Graph Isolation. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * - * @todo Examine the overhead of the runtime optimizer. Look at ways to prune - * its costs. For example, by pruning the search, by recognizing when the - * query is simple enough to execute directly, by recognizing when we have - * already materialized the answer to the query, etc. - * - * @todo Cumulative estimated cardinality is an estimate of the work to be done. - * However, the actual cost of a join depends on whether we will use - * nested index subquery or a hash join and the cost of that operation on - * the database. There could be counter examples where the cost of the - * hash join with a range scan using the unbound variable is LT the nested - * index subquery. For those cases, we will do the same amount of IO on - * the hash join but there will still be a lower cardinality to the join - * path since we are feeding in fewer solutions to be joined. - * - * @todo Look at the integration with the SAIL. We decorate the joins with some - * annotations. Those will have to be correctly propagated to the "edges" - * in order for edge sampling and incremental evaluation (or final - * evaluation) to work. The {@link DataSetJoin} essentially inlines one of - * its access paths. That should really be changed into an inline access - * path and a normal join operator so we can defer some of the details - * concerning the join operator annotations until we decide on the join - * path to be executed. An inline AP really implies an inline relation, - * which in turn implies that the query is a searchable context for - * query-local resources. - * <p> - * For s/o, when the AP is remote, the join evaluation context must be ANY - * and otherwise (for s/o) it must be SHARDED. - * <p> - * Since the join graph is fed the vertices (APs), it does not have access - * to the annotated joins so we need to generated appropriately annotated - * joins when sampling an edge and when evaluation a subquery. - * <p> - * One solution would be to always use the unpartitioned views of the - * indices for the runtime query optimizer, which is how we are estimating - * the range counts of the access paths right now. [Note that the static - * query optimizer ignores named and default graphs, while the runtime - * query optimizer SHOULD pay attention to these things and exploit their - * conditional selectivity for the query plan.] - * - * @todo Handle optional join graphs by first applying the runtime optimizer to - * the main join graph and obtaining a sample for the selected join path. - * That sample will then be feed into the the optional join graph in order - * to optimize the join order within the optional join graph (a join order - * which is selective in the optional join graph is better since it will - * result in faster rejections of intermediate results and hence do less - * work). - * <p> - * This is very much related to accepting a collection of non-empty - * binding sets when running the join graph. However, optional join graph - * should be presented in combination with the original join graph and the - * starting paths must be constrained to have the selected join path for - * the original join graph as a prefix. With this setup, the original join - * graph has been locked in to a specific join path and the sampling of - * edges and vertices for the optional join graph can proceed normally. - * <p> - * True optionals will always be appended as part of the "tail plan" for - * any join graph and can not be optimized as each optional join must run - * regardless (as long as the intermediate solution survives the - * non-optional joins). - * - * @todo There are two cases where a join graph must be optimized against a - * specific set of inputs. In one case, it is a sample (this is how - * optimization of an optional join group proceeds per above). In the - * other case, the set of inputs is fixed and is provided instead of a - * single empty binding set as the starting condition. This second case is - * actually a bit more complicated since we can not use a random sample of - * vertices unless the do not share any variables with the initial binding - * sets. When there is a shared variable, we need to do a cutoff join of - * the edge with the initial binding sets. When there is not a shared - * variable, we can sample the vertex and then do a cutoff join. - * - * @todo When we run into a cardinality estimation underflow (the expected - * cardinality goes to zero) we could double the sample size for just - * those join paths which hit a zero estimated cardinality and re-run them - * within the round. This would imply that we keep per join path limits. - * The vertex and edge samples are already aware of the limit at which - * they were last sampled so this should not cause any problems there. - * <p> - * A related option would be to deepen the samples only when we are in - * danger of cardinality estimation underflow. E.g., a per-path limit. - * Resampling vertices may only make sense when we increase the limit - * since otherwise we may find a different correlation with the new sample - * but the comparison of paths using one sample base with paths using a - * different sample base in a different round does not carry forward the - * cardinality estimates from the prior round (unless we do something like - * a weighted moving average). - * - * @todo When comparing choices among join paths having fully bound tails where - * the estimated cardinality has also gone to zero, we should prefer to - * evaluate vertices in the tail with better index locality first. For - * example, if one vertex had one variable in the original plan while - * another had two variables, then solutions which reach the 2-var vertex - * could be spread out over a much wider range of the selected index than - * those which reach the 1-var vertex. [In order to support this, we would - * need a means to indicate that a fully bound access path should use an - * index specified by the query optimizer rather than the primary index - * for the relation. In addition, this suggests that we should keep bloom - * filters for more than just the SPO(C) index in scale-out.] - * - * @todo Examine behavior when we do not have perfect covering indices. This - * will mean that some vertices can not be sampled using an index and that - * estimation of their cardinality will have to await the estimation of - * the cardinality of the edge(s) leading to that vertex. Still, the - * approach should be able to handle queries without perfect / covering - * automatically. Then experiment with carrying fewer statement indices - * for quads. - * - * @todo Unit test when there are no solutions to the query. In this case there - * will be no paths identified by the optimizer and the final path length - * becomes zero. - */ -public class JoinGraph extends PipelineOp { - - private static final transient Logger log = Logger - .getLogger(JoinGraph.class); - - private static final long serialVersionUID = 1L; - - /** - * Known annotations. - */ - public interface Annotations extends PipelineOp.Annotations { - - /** - * The vertices of the join graph, expressed an an {@link IPredicate}[] - * (required). - */ - String VERTICES = JoinGraph.class.getName() + ".vertices"; - - /** - * The constraints on the join graph, expressed an an - * {@link IConstraint}[] (optional, defaults to no constraints). - */ - String CONSTRAINTS = JoinGraph.class.getName() + ".constraints"; - - /** - * The initial limit for cutoff sampling (default - * {@value #DEFAULT_LIMIT}). - */ - String LIMIT = JoinGraph.class.getName() + ".limit"; - - int DEFAULT_LIMIT = 100; - - /** - * The <i>nedges</i> edges of the join graph having the lowest - * cardinality will be used to generate the initial join paths (default - * {@value #DEFAULT_NEDGES}). This must be a positive integer. - */ - String NEDGES = JoinGraph.class.getName() + ".nedges"; - - int DEFAULT_NEDGES = 2; - } - - /** - * @see Annotations#VERTICES - */ - public IPredicate<?>[] getVertices() { - - return (IPredicate[]) getRequiredProperty(Annotations.VERTICES); - - } - - /** - * @see Annotations#CONSTRAINTS - */ - public IConstraint[] getConstraints() { - - return (IConstraint[]) getProperty(Annotations.CONSTRAINTS, null/* none */); - - } - - /** - * @see Annotations#LIMIT - */ - public int getLimit() { - - return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); - - } - - /** - * @see Annotations#NEDGES - */ - public int getNEdges() { - - return getProperty(Annotations.NEDGES, Annotations.DEFAULT_NEDGES); - - } - - public JoinGraph(final BOp[] args, final NV... anns) { - - this(args, NV.asMap(anns)); - - } - - public JoinGraph(final BOp[] args, final Map<String, Object> anns) { - - super(args, anns); - - // required property. - final IPredicate<?>[] vertices = (IPredicate[]) getProperty(Annotations.VERTICES); - - if (vertices == null) - throw new IllegalArgumentException(Annotations.VERTICES); - - if (vertices.length == 0) - throw new IllegalArgumentException(Annotations.VERTICES); - - if (getLimit() <= 0) - throw new IllegalArgumentException(Annotations.LIMIT); - - if (getNEdges() <= 0) - throw new IllegalArgumentException(Annotations.NEDGES); - - if (!isController()) - throw new IllegalArgumentException(); - - switch (getEvaluationContext()) { - case CONTROLLER: - break; - default: - throw new IllegalArgumentException(Annotations.EVALUATION_CONTEXT - + "=" + getEvaluationContext()); - } - - } - - public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { - - return new FutureTask<Void>(new JoinGraphTask(context)); - - } - - /** - * A sample of a {@link Vertex} (an access path). - */ - public static class VertexSample { - - /** - * Fast range count. This will be the same for each sample taken - * (assuming a read historical view or even a time scale of query which - * is significantly faster than update). - */ - public final long rangeCount; - - /** - * The limit used to produce the {@link #sample}. - */ - public final int limit; - - /** - * When <code>true</code>, the result is not a sample but the - * materialized access path. - * - * TODO When <code>true</code>, we could run the join against the sample - * rather than the disk. This would require wrapping the sample as an - * access path. Since all exact samples will be pretty small, this is - * not likely to have any great performance benefit. - */ - public final boolean exact; - - /** - * Sample. - */ - final Object[] sample; - - /** - * - * @param rangeCount - * @param limit - * @param exact - * @param sample - */ - public VertexSample(final long rangeCount, final int limit, - final boolean exact, final Object[] sample) { - - if (rangeCount < 0L) - throw new IllegalArgumentException(); - - if (limit <= 0) - throw new IllegalArgumentException(); - - if (sample == null) - throw new IllegalArgumentException(); - - this.rangeCount = rangeCount; - - this.limit = limit; - - this.exact = exact; - - this.sample = sample; - - } - - public String toString() { - return "VertexSample{rangeCount=" + rangeCount + ",limit=" + limit - + ",exact=" + exact + ", sampleSize=" + sample.length + "}"; - } - - } - - /** - * A vertex of the join graph is an annotated relation (this corresponds to - * an {@link IPredicate} with additional annotations to support the adaptive - * query optimization algorithm). - * <p> - * The unique identifier for a {@link Vertex} (within a given join graph) is - * the {@link BOp.Annotations#BOP_ID} decorating its {@link IPredicate}. - * {@link #hashCode()} is defined in terms of this unique identifier so we - * can readily detect when a {@link Set} already contains a given - * {@link Vertex}. - */ - public static class Vertex implements Serializable { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public final IPredicate<?> pred; - - /** - * The most recently taken sample of the {@link Vertex}. - */ - transient VertexSample sample = null; - - Vertex(final IPredicate<?> pred) { - - if (pred == null) - throw new IllegalArgumentException(); - - this.pred = pred; - - } - - public String toString() { - - return "Vertex{pred=" + pred + ",sample=" + sample + "}"; - - } - - /** - * Equals is based on a reference test. - */ - public boolean equals(Object o) { - return this == o; - } - - /** - * The hash code is just the {@link BOp.Annotations#BOP_ID} of the - * associated {@link IPredicate}. - */ - public int hashCode() { - return pred.getId(); - } - - /** - * Take a sample of the vertex, updating {@link #sample} as a - * side-effect. If the sample is already exact, then this is a NOP. If - * the vertex was already sampled to that limit, then this is a NOP (you - * have to raise the limit to re-sample the vertex). - * - * @param limit - * The sample cutoff. - */ - public void sample(final QueryEngine queryEngine, final int limit) { - - if (queryEngine == null) - throw new IllegalArgumentException(); - - if (limit <= 0) - throw new IllegalArgumentException(); - - final VertexSample oldSample = this.sample; - - if (oldSample != null && oldSample.exact) { - - /* - * The old sample is already the full materialization of the - * vertex. - */ - - return; - - } - - if (oldSample != null && oldSample.limit >= limit) { - - /* - * The vertex was already sampled to this limit. - */ - - return; - - } - - final BOpContextBase context = new BOpContextBase(queryEngine); - - final IRelation r = context.getRelation(pred); - - final IAccessPath ap = context.getAccessPath(r, pred); - - final long rangeCount = oldSample == null ? ap - .rangeCount(false/* exact */) : oldSample.rangeCount; - - if (rangeCount <= limit) { - - /* - * Materialize the access path. - * - * TODO This could be more efficient if we raised it onto the AP - * or if we overrode CHUNK_CAPACITY and the fully buffered - * iterator threshold such that everything was materialized as a - * single chunk. - */ - - final List<Object> tmp = new ArrayList<Object>((int) rangeCount); - - final IChunkedIterator<Object> itr = ap.iterator(); - - try { - - while (itr.hasNext()) { - - tmp.add(itr.next()); - - } - - } finally { - - itr.close(); - } - - sample = new VertexSample(rangeCount, limit, true/* exact */, - tmp.toArray(new Object[0])); - - } else { - - /* - * Materialize a random sample from the access path. - */ - -// final SampleType sampleType = SampleType.EVEN; - final SampleType sampleType = SampleType.RANDOM; - - final SampleIndex<?> sampleOp = new SampleIndex( - new BOp[] {}, // - NV.asMap(// - new NV(SampleIndex.Annotations.PREDICATE, pred),// - new NV(SampleIndex.Annotations.LIMIT, limit),// - new NV(SampleIndex.Annotations.SAMPLE_TYPE, sampleType.name())// - )); - - sample = new VertexSample(rangeCount, limit, false/* exact */, - sampleOp.eval(context)); - - } - - if (log.isTraceEnabled()) - log.trace("Sampled: " + sample); - - return; - - } - - } - - /** - * Type safe enumeration describes the edge condition (if any) for a - * cardinality estimate. - */ - public static enum EstimateEnum { - /** - * An estimate, but not any of the edge conditions. - */ - Normal(" "), - /** - * The cardinality estimate is exact. - */ - Exact("E"), - /** - * The cardinality estimation is a lower bound (the actual cardinality - * may be higher than the estimated value). - */ - LowerBound("L"), - /** - * Flag is set when the cardinality estimate underflowed (false zero - * (0)). - */ - Underflow("U"); - - private EstimateEnum(final String code) { - - this.code = code; - - } - - private final String code; - - public String getCode() { - - return code; - - } - - } // EstimateEnum - - /** - * A sample of an {@link Edge} (a join). - */ - public static class EdgeSample { - - /** - * The fast range count (aka cardinality) for the source vertex of the - * edge (whichever vertex has the lower cardinality). - */ - public final long rangeCount; - - /** - * <code>true</code> iff the source sample is exact (because the source - * is either a fully materialized vertex or an edge whose solutions have - * been fully materialized). - */ - public final boolean sourceSampleExact; - - /** - * The limit used to sample the edge (this is the limit on the #of - * solutions generated by the cutoff join used when this sample was - * taken). - */ - public final int limit; - - /** - * The #of binding sets out of the source sample vertex sample which - * were consumed. - */ - public final int inputCount; - - /** - * The #of binding sets generated before the join was cutoff. - * <p> - * Note: If the outputCount is zero then this is a good indicator that - * there is an error in the query such that the join will not select - * anything. This is not 100%, merely indicative. - */ - public final long outputCount; - - /** - * The ratio of the #of input samples consumed to the #of output samples - * generated (the join hit ratio or scale factor). - */ - public final double f; - - /** - * The estimated cardinality of the join. - */ - public final long estimatedCardinality; - - /** - * Indicates whether the estimate is exact, an upper bound, or a lower - * bound. - * - * TODO This field should be used to avoid needless re-computation of a - * join whose exact solution is already known. - */ - public final EstimateEnum estimateEnum; - - /** - * The sample of the solutions for the join path. - */ - private final IBindingSet[] sample; - - /** - * Create an object which encapsulates a sample of an edge. - * - * @param limit - * The limit used to sample the edge (this is the limit on - * the #of solutions generated by the cutoff join used when - * this sample was taken). - * @param sourceVertexSample - * The sample for source vertex of the edge (whichever vertex - * has the lower cardinality). - * @param inputCount - * The #of binding sets out of the source sample vertex - * sample which were consumed. - * @param outputCount - * The #of binding sets generated before the join was cutoff. - */ - EdgeSample( - // final VertexSample sourceVertexSample, - final long sourceSampleRangeCount,// - final boolean sourceSampleExact, // - final int sourceSampleLimit,// - final int limit,// - final int inputCount, // - final long outputCount,// - final double f, - final long estimatedCardinality, - final IBindingSet[] sample) { - - if (sample == null) - throw new IllegalArgumentException(); - - // this.rangeCount = sourceVertexSample.rangeCount; - this.rangeCount = sourceSampleRangeCount; - - this.sourceSampleExact = sourceSampleExact; - - this.limit = limit; - - this.inputCount = inputCount; - - this.outputCount = outputCount; - - this.f = f; - - this.estimatedCardinality = estimatedCardinality; - - if (sourceSampleExact && outputCount < limit) { - /* - * Note: If the entire source vertex is being fed into the - * cutoff join and the cutoff join outputCount is LT the limit, - * then the sample is the actual result of the join. That is, - * feeding all source solutions into the join gives fewer than - * the desired number of output solutions. - */ - estimateEnum = EstimateEnum.Exact; - } else if (inputCount == 1 && outputCount == limit) { - /* - * If the inputCount is ONE (1) and the outputCount is the - * limit, then the estimated cardinality is a lower bound as - * more than outputCount solutions might be produced by the join - * when presented with a single input solution. - */ - estimateEnum = EstimateEnum.LowerBound; - } else if (!sourceSampleExact - && inputCount == Math.min(sourceSampleLimit, rangeCount) - && outputCount == 0) { - /* - * When the source sample was not exact, the inputCount is EQ to - * the lesser of the source range count and the source sample - * limit, and the outputCount is ZERO (0), then feeding in all - * source solutions in is not sufficient to generate any output - * solutions. In this case, the estimated join hit ratio appears - * to be zero. However, the estimation of the join hit ratio - * actually underflowed and the real join hit ratio might be a - * small non-negative value. A real zero can only be identified - * by executing the full join. - * - * Note: An apparent join hit ratio of zero does NOT imply that - * the join will be empty (unless the source vertex sample is - * actually the fully materialized access path - this case is - * covered above). - */ - estimateEnum = EstimateEnum.Underflow; - } else { - estimateEnum = EstimateEnum.Normal; - } - - this.sample = sample; - } - - public String toString() { - return getClass().getName() // - + "{ rangeCount=" + rangeCount// - + ", sourceSampleExact=" + sourceSampleExact// - + ", limit=" + limit // - + ", inputCount=" + inputCount// - + ", outputCount=" + outputCount // - + ", f=" + f// - + ", estimatedCardinality=" + estimatedCardinality// - + ", estimateEnum=" + estimateEnum// -// + ", estimateIsLowerBound=" + estimateIsLowerBound// -// + ", estimateIsUpperBound=" + estimateIsUpperBound// -// + ", sampleIsExactSolution=" + estimateIsExact // - + "}"; - } - - }; - - /** - * An edge of the join graph is an annotated join operator. The edges of the - * join graph are undirected. Edges exist when the vertices share at least - * one variable. - * <p> - * {@link #hashCode()} is defined in terms of the unordered hash codes of - * the individual vertices. - */ - public static class Edge implements Serializable { - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * The vertices connected by that edge. - */ - public final Vertex v1, v2; - - /** - * The set of shared variables. - */ - public final Set<IVariable<?>> shared; - - /** - * The last sample for this edge and <code>null</code> if the edge has - * not been sampled. - * <p> - * Note: This sample is only the one-step cutoff evaluation of the edge - * given a sample of its vertex having the lesser cardinality. It is NOT - * the cutoff sample of a join path having this edge except for the - * degenerate case where the edge is the first edge in the join path. - */ - transient EdgeSample sample = null; - - public Edge(final Vertex v1, final Vertex v2, - final Set<IVariable<?>> shared) { - if (v1 == null) - throw new IllegalArgumentException(); - if (v2 == null) - throw new IllegalArgumentException(); - if (shared == null) - throw new IllegalArgumentException(); - // Note: We need to allow edges which do not share variables -// if (shared.isEmpty()) -// throw new IllegalArgumentException(); - this.v1 = v1; - this.v2 = v2; - this.shared = shared; - } - - /** - * The edge label is formed from the {@link BOp.Annotations#BOP_ID} of - * its ordered vertices (v1,v2). - */ - public String getLabel() { - - return "(" + v1.pred.getId() + "," + v2.pred.getId() + ")"; - - } - - /** - * Note: The vertices of the edge are labeled using the - * {@link BOp.Annotations#BOP_ID} associated with the {@link IPredicate} - * for each vertex. - */ - public String toString() { - - return "Edge{ "+getLabel()+", estCard=" - + (sample == null ? "N/A" : sample.estimatedCardinality) - + ", shared=" + shared.toString() + ", sample=" + sample - + "}"; - - } - - /** - * Equality is determined by reference testing. - */ - public boolean equals(final Object o) { - - return this == o; - - } - - /** - * The hash code of an edge is the hash code of the vertex with the - * smaller hash code X 31 plus the hash code of the vertex with the - * larger hash code. This definition compensates for the arbitrary order - * in which the vertices may be expressed and also recognizes that the - * vertex hash codes are based on the bop ids, which are often small - * integers. - */ - public int hashCode() { - - if (hash == 0) { - - final int h1 = v1.hashCode(); - final int h2 = v2.hashCode(); - - final int h; - if (h1 < h2) { - - h = h1 * 31 + h2; - - } else { - - h = h2 * 31 + h1; - - } - - hash = h; - - } - return hash; - - } - - private int hash; - - /** - * Return the vertex with the smaller estimated cardinality. - * - * @throws IllegalStateException - * if either vertex has not been sampled. - */ - public Vertex getMinimumCardinalityVertex() { - - if (v1.sample == null) // vertex not sampled. - throw new IllegalStateException(); - - if (v2.sample == null) // vertex not sampled. - throw new IllegalStateException(); - - return (v1.sample.rangeCount < v2.sample.rangeCount) ? v1 : v2; - - } - - /** - * Return the vertex with the larger estimated cardinality (the vertex - * not returned by {@link #getMinimumCardinalityVertex()}). - * - * @throws IllegalStateException - * if either vertex has not been sampled. - */ - public Vertex getMaximumCardinalityVertex() { - - // The vertex with the minimum cardinality. - final Vertex o = getMinimumCardinalityVertex(); - - // Return the other vertex. - return (v1 == o) ? v2 : v1; - - } - - /** - * Estimate the cardinality of the edge, updating {@link #sample} as a - * side-effect. This is a NOP if the edge has already been sampled at - * that <i>limit</i>. This is a NOP if the edge sample is exact. - * - * @param context - * - * @return The new {@link EdgeSample} (this is also updated on - * {@link #sample} as a side-effect). - * - * @throws Exception - */ - public EdgeSample estimateCardinality(final QueryEngine queryEngine, - final int limit) throws Exception { - - if (limit <= 0) - throw new IllegalArgumentException(); - -// /* -// * Note: There is never a need to "re-sample" the edge. Unlike ROX, -// * we always can sample a vertex. This means that we can sample the -// * edges exactly once, during the initialization of the join graph. -// */ -// if (sample != null) -// throw new RuntimeException(); - - if (sample != null) { - - if (sample.limit >= limit) { - - // Already sampled at that limit. - return sample; - - } - - if (sample.estimateEnum == EstimateEnum.Exact) { - - // Sample is exact (fully materialized result). - return sample; - - } - - } - - /* - * Figure out which vertex has the smaller cardinality. The sample - * of that vertex is used since it is more representative than the - * sample of the other vertex. - */ - // vertex v, vprime - final Vertex v, vp; - if (v1.sample == null) // vertex not sampled. - throw new IllegalStateException(); - if (v2.sample == null) // vertex not sampled. - throw new IllegalStateException(); - /* - * FIXME CONSTRAINT ORDERING : If a variable only appears in a - * CONSTRAINT for one of the two vertices then that vertex must be - * evaluated second. (If the vertices both have this problem then - * the edge can not be evaluated until some other vertex causes the - * variables of either one [v1] or [v2] to become bound.) - */ - if (v1.sample.rangeCount < v2.sample.rangeCount) { - v = v1; - vp = v2; - } else { - v = v2; - vp = v1; - } - - /* - * Convert the source sample into an IBindingSet[]. - * - * TODO We might as well do this when we sample the vertex. - */ - final IBindingSet[] sourceSample = new IBindingSet[v.sample.sample.length]; - { - for (int i = 0; i < sourceSample.length; i++) { - final IBindingSet bset = new HashBindingSet(); - BOpContext.copyValues((IElement) v.sample.sample[i], - v.pred, bset); - sourceSample[i] = bset; - } - } - - // Sample the edge and save the sample on the edge as a side-effect. - this.sample = estimateCardinality(queryEngine, limit, v, vp, - v.sample.rangeCount, v.sample.exact, v.sample.limit, - sourceSample); - - return sample; - - } - - /** - * Estimate the cardinality of the edge given a sample of either a - * vertex or a join path leading up to that edge. - * <p> - * Note: The caller is responsible for protecting against needless - * re-sampling. - * - * @param queryEngine - * @param limit - * @param vSource - * The source vertex. - * @param vTarget - * The target vertex - * @param sourceSample - * The sample for the source vertex. When this is a one-step - * estimation of the cardinality of the edge, then this - * sample is taken from the {@link VertexSample}. When the - * edge (vSource,vTarget) extends some {@link Path}, then - * this is taken from the {@link EdgeSample} for that - * {@link Path}. - * - * @return The result of sampling that edge. - * - * @throws Exception - */ - public EdgeSample estimateCardinality(final QueryEngine queryEngine, - final int limit, final Vertex vSource, final Vertex vTarget, - final long sourceSampleRangeCount, - final boolean sourceSampleExact, - final int sourceSampleLimit, - final IBindingSet[] sourceSample) - throws Exception { - - if (limit <= 0) - throw new IllegalArgumentException(); - - /* - * Note: This sets up a cutoff pipeline join operator which makes an - * accurate estimate of the #of input solutions consumed and the #of - * output solutions generated. From that, we can directly compute - * the join hit ratio. This approach is preferred to injecting a - * "RowId" column as the estimates are taken based on internal - * counters in the join operator and the join operator knows how to - * cutoff evaluation as soon as the limit is satisfied, thus - * avoiding unnecessary effort. - * - * TODO Any constraints on the edge (other than those implied by - * shared variables) need to be annotated on the join. Constraints - * (other than range constraints which are directly coded by the - * predicate) will not reduce the effort to compute the join, but - * they can reduce the cardinality of the join and that is what we - * are trying to estimate here. - * - * TODO How can join constraints be moved around? Just attach them - * where ever a variable becomes bound? And when do we filter out - * variables which are not required downstream? Once we decide on a - * join path and execute it fully (rather than sampling that join - * path). - */ - final int joinId = 1; - final Map<String,Object> anns = NV.asMap(// - new NV(BOp.Annotations.BOP_ID, joinId),// - // @todo Why not use a factory which avoids bopIds already in use? - new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred.setBOpId(3)), - // disallow parallel evaluation of tasks. - new NV(PipelineOp.Annotations.MAX_PARALLEL,1), - // disallow parallel evaluation of chunks. - new NV(PipelineJoin.Annotations.MAX_PARALLEL_CHUNKS,0), - // disable access path coalescing - new NV(PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS,false), - // cutoff join. - new NV(PipelineJoin.Annotations.LIMIT,(long)limit), - /* - * Note: In order to have an accurate estimate of the join - * hit ratio we need to make sure that the join operator - * runs using a single PipelineJoinStats instance which will - * be visible to us when the query is cutoff. In turn, this - * implies that the join must be evaluated on the query - * controller. - * - * @todo This implies that sampling of scale-out joins must - * be done using remote access paths. - */ - new NV(PipelineJoin.Annotations.SHARED_STATE,true), - new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT,BOpEvaluationContext.CONTROLLER) - ); - - final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, anns); - - final PipelineOp queryOp = joinOp; - - // run the cutoff sampling of the edge. - final UUID queryId = UUID.randomUUID(); - final IRunningQuery runningQuery = queryEngine.eval(queryId, - queryOp, new LocalChunkMessage<IBindingSet>(queryEngine, - queryId, joinOp.getId()/* startId */, - -1 /* partitionId */, - new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { sourceSample }))); - - final List<IBindingSet> result = new LinkedList<IBindingSet>(); - try { - try { - IBindingSet bset = null; - // Figure out the #of source samples consumed. - final Iterator<IBindingSet> itr = new Dechunkerator<IBindingSet>( - runningQuery.iterator()); - while (itr.hasNext()) { - bset = itr.next(); - result.add(bset); - } - } finally { - // verify no problems. - runningQuery.get(); - } - } finally { - runningQuery.cancel(true/* mayInterruptIfRunning */); - } - - // The join hit ratio can be computed directly from these stats. - final PipelineJoinStats joinStats = (PipelineJoinStats) runningQuery - .getStats().get(joinId); - - if (log.isTraceEnabled()) - log.trace(joinStats.toString()); - - /* - * TODO Improve comments here. See if it is possible to isolate a - * common base class which would simplify the setup of the cutoff - * join and the computation of the sample stats. - */ - - // #of solutions in. - final int nin = (int) joinStats.inputSolutions.get(); - - // #of solutions out. - long nout = joinStats.outputSolutions.get(); - - // cumulative range count of the sampled access paths. - final long sumRangeCount = joinStats.accessPathRangeCount.get(); - - if (nin == 1 && nout == limit) { - /* - * We are getting [limit] solutions out for one solution in. In - * this case, (nout/nin) is a lower bound for the estimated - * cardinality of the edge. In fact, this condition suggests - * that the upper bound is a must better estimate of the - * cardinality of this join. Therefore, we replace [nout] with - * the sum of the range counts for the as-bound predicates - * considered by the cutoff join. - * - * For example, consider a join feeding a rangeCount of 16 into - * a rangeCount of 175000. With a limit of 100, we estimated the - * cardinality at 1600L (lower bound). In fact, the cardinality - * is 16*175000. This falsely low estimate can cause solutions - * which are really better to be dropped. - * - * @todo we should mark [nout] when we do this show that it - * shows up in the trace! Also, the rangeCount is sometimes - * falsely high. However, that should be corrected by random - * resampling of the vertices and paths. - */ - nout = sumRangeCount; - - } - - final double f = nout == 0 ? 0 : (nout / (double) nin); - - final long estimatedCardinality = (long) (sourceSampleRangeCount * f); - - final EdgeSample edgeSample = new EdgeSample( - sourceSampleRangeCount, // - sourceSampleExact, // @todo redundant with sourceSampleLimit - sourceSampleLimit, // - limit, // - nin,// - nout, // - f, // - estimatedCardinality, // - result.toArray(new IBindingSet[result.size()])); - - if (log.isDebugEnabled()) - log.debug(getLabel() + " : newSample=" + edgeSample); - - return edgeSample; - - } - - } - - /** - * A sequence of {@link Edge}s (aka join steps). - */ - public static class Path { - - /** - * An immutable ordered list of the edges in the (aka the sequence of - * joins represented by this path). - */ - public final List<Edge> edges; - - /** - * The sample obtained by the step-wise cutoff evaluation of the ordered - * edges of the path. - * <p> - * Note: This sample is generated one edge at a time rather than by - * attempting the cutoff evaluation of the entire join path (the latter - * approach does allow us to limit the amount of work to be done to - * satisfy the cutoff). - */ - public EdgeSample sample; - - /** - * The cumulative estimated cardinality of the path. This is zero for an - * empty path. For a path consisting of a single edge, this is the - * estimated cardinality of that edge. When creating a new path by - * adding an edge to an existing path, the cumulative cardinality of the - * new path is the cumulative cardinality of the existing path plus the - * estimated cardinality of the cutoff join of the new edge given the - * input sample of the existing path. - * - * @todo track this per vertex as well as the total for more interesting - * traces in showPath(Path). - */ - final public long cumulativeEstimatedCardinality; - - public String toString() { - final StringBuilder sb = new StringBuilder(); - sb.append("Path{"); - boolean first = true; - for (Edge e : edges) { - if (!first) - sb.append(","); - sb.append(e.getLabel()); - first = false; - } - sb.append(",cumEstCard=" + cumulativeEstimatedCardinality - + ",sample=" + sample + "}"); - return sb.toString(); - } - - /** - * Create an empty path. - */ - public Path() { - this.edges = Collections.emptyList(); - this.cumulativeEstimatedCardinality = 0; - this.sample = null; - } - - /** - * Create a path from a single edge. - * - * @param e - * The edge. - */ - public Path(final Edge e) { - - if (e == null) - throw new IllegalArgumentException(); - - if (e.sample == null) - throw new IllegalArgumentException("Not sampled: " + e); - - this.edges = Collections.singletonList(e); - - this.sample = e.sample; - - this.cumulativeEstimatedCardinality = e.sample.estimatedCardinality; - - } - - /** - * Constructor used by {@link #addEdge(QueryEngine, int, Edge)} - * - * @param edges - * The edges in the new path. - * @param cumulativeEstimatedCardinality - * The cumulative estimated cardinality of the new path. - * @param sample - * The sample from the last - */ - private Path(final List<Edge> edges, - final long cumulativeEstimatedCardinality, - final EdgeSample sample) { - - if (edges == null) - throw new IllegalArgumentException(); - - if (cumulativeEstimatedCardinality < 0) - throw new IllegalArgumentException(); - - if (sample == null) - throw new IllegalArgumentException(); - - this.edges = Collections.unmodifiableList(edges); - - this.cumulativeEstimatedCardinality = cumulativeEstimatedCardinality; - - this.sample = sample; - - } - - /** - * Return <code>true</code> iff the {@link Path} contains at least one - * {@link Edge} for that {@link Vertex}. - * - * @param v - * The vertex - * - * @return true if the vertex is already part of the path. - */ - public boolean contains(final Vertex v) { - - if (v == null) - throw new IllegalArgumentException(); - - for (Edge e : edges) { - - if (e.v1 == v || e.v2 == v) - return true; - - } - - return false; - } - - /** - * Return <code>true</code> if this path is an unordered variant of the - * given path (same vertices in any order). - * - * @param p - * Another path. - * - * @return <code>true</code> if this path is an unordered variant of the - * given path. - */ - public boolean isUnorderedVariant(final Path p) { - - if (p == null) - throw new IllegalArgumentException(); - - if (edges.size() != p.edges.size()) { - /* - * Fast rejection. This assumes that each edge after the first - * adds one distinct vertex to the path. That assumption is - * enforced by #addEdge(). - */ - return false; - } - - final Vertex[] v1 = getVertices(); - final Vertex[] v2 = p.getVertices(); - - if (v1.length != v2.length) { - - // Reject (this case is also covered by the test above). - return false; - - } - - /* - * Scan the vertices of the caller's path. If any of those vertices - * are NOT found in this path the paths are not unordered variations - * of one another. - */ - for (int i = 0; i < v2.length; i++) { - - final Vertex tmp = v2[i]; - - boolean found = false; - for (int j = 0; j < v1.length; j++) { - - if (v1[j] == tmp) { - found = true; - break; - } - - } - - if (!found) { - return false; - } - - } - - return true; - - } - - /** - * Return the vertices in this path (in path order). For the first edge, - * the minimum cardinality vertex is always reported first (this is - * critical for producing the correct join plan). For the remaining - * edges in the path, the unvisited is reported. - * - * @return The vertices (in path order). - * - * TODO This could be rewritten without the toArray() using a - * method which visits the vertices of a path in any order. - * - * @todo unit test for the first vertex to be reported. - */ - public Vertex[] getVertices() { - - final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); - - for (Edge e : edges) { - - if (tmp.isEmpty()) { - /* - * The first edge is handled specially in order to report - * the minimum cardinality vertex first. - * - * FIXME CONSTRAINT ORDERING : A vertex can not run until - * all variables appearing in its CONSTRAINTS would be - * bound. This can cause us to use and report an ordering - * which does not place the minimum cardinality vertex 1st. - */ - tmp.add(e.getMinimumCardinalityVertex()); - tmp.add(e.getMaximumCardinalityVertex()); - - } else { - - tmp.add(e.v1); - - tmp.add(e.v2); - - } - - } - - final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); - - return a; - - } - - /** - * Return the {@link IPredicate}s associated with the vertices of the - * join path in path order. - * - * @see #getVertices() - */ - public IPredicate<?>[] getPredicates() { - - // The vertices in the selected evaluation order. - final Vertex[] vertices = getVertices(); - - // The predicates in the same order as the vertices. - final IPredicate<?>[] preds = new IPredicate[vertices.length]; - - for (int i = 0; i < vertices.length; i++) { - - preds[i] = vertices[i].pred; - - } - - return preds; - - } - - /** - * Return the {@link BOp} identifiers of the predicates associated with - * each vertex in path order. - */ - public int[] getVertexIds() { - - return getVertexIds(edges); - - } - - /** - * Return the {@link BOp} identifiers of the predicates associated with - * each vertex in path order. - */ - static public int[] getVertexIds(final List<Edge> edges) { - - final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); - - for (Edge e : edges) { - - tmp.add(e.v1); - - tmp.add(e.v2); - - } - - final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); - - final int[] b = new int[a.length]; - - for (int i = 0; i < a.length; i++) { - - b[i] = a[i].pred.getId(); - - } - - return b; - - } - - /** - * Return <code>true</code> if this path begins with the given path. - * - * @param p - * The given path. - * - * @return <code>true</code> if this path begins with the given path. - */ - public boolean beginsWith(final Path p) { - - if (p == null) - throw new IllegalArgumentException(); - - if (p.edges.size() > edges.size()) { - // Proven false since the caller's path is lon... [truncated message content] |
From: <mrp...@us...> - 2011-02-22 20:29:57
|
Revision: 4219 http://bigdata.svn.sourceforge.net/bigdata/?rev=4219&view=rev Author: mrpersonick Date: 2011-02-22 20:29:45 +0000 (Tue, 22 Feb 2011) Log Message: ----------- refactor constraints -> value expressions Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SameTermBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailEvaluationStrategyImpl.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AndBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsBoundBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInlineBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteralBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/NotBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/OrBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionConstraint.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInline.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -45,10 +45,16 @@ import com.bigdata.bop.constraint.NE; import com.bigdata.bop.constraint.NEConstant; import com.bigdata.bop.constraint.OR; +import com.bigdata.rdf.internal.constraints.AndBOp; import com.bigdata.rdf.internal.constraints.CompareBOp; -import com.bigdata.rdf.internal.constraints.IsInline; -import com.bigdata.rdf.internal.constraints.IsLiteral; +import com.bigdata.rdf.internal.constraints.EBVBOp; +import com.bigdata.rdf.internal.constraints.IsBoundBOp; +import com.bigdata.rdf.internal.constraints.IsInlineBOp; +import com.bigdata.rdf.internal.constraints.IsLiteralBOp; import com.bigdata.rdf.internal.constraints.MathBOp; +import com.bigdata.rdf.internal.constraints.NotBOp; +import com.bigdata.rdf.internal.constraints.OrBOp; +import com.bigdata.rdf.internal.constraints.SameTermBOp; import com.bigdata.rdf.rules.RejectAnythingSameAsItself; import com.bigdata.rdf.spo.SPOPredicate; import com.bigdata.rdf.spo.SPOStarJoin; @@ -99,9 +105,15 @@ com.bigdata.rdf.magic.MagicPredicate.class,// // com.bigdata.rdf.internal.constraint CompareBOp.class,// - IsInline.class,// - IsLiteral.class,// + IsInlineBOp.class,// + IsLiteralBOp.class,// MathBOp.class,// + AndBOp.class, + EBVBOp.class, + IsBoundBOp.class, + NotBOp.class, + OrBOp.class, + SameTermBOp.class, // com.bigdata.rdf.inf RejectAnythingSameAsItself.class, Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -39,11 +39,11 @@ */ private static final long serialVersionUID = 1L; - static public transient final XSDBooleanIV<BigdataLiteral> TRUE = new XSDBooleanIV<BigdataLiteral>( - true); + static public transient final XSDBooleanIV<BigdataLiteral> TRUE = + new XSDBooleanIV<BigdataLiteral>(true); - static public transient final XSDBooleanIV<BigdataLiteral> FALSE = new XSDBooleanIV<BigdataLiteral>( - false); + static public transient final XSDBooleanIV<BigdataLiteral> FALSE = + new XSDBooleanIV<BigdataLiteral>(false); private final boolean value; Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AndBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AndBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AndBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,114 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.rdf.error.SparqlTypeErrorException; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Imposes the constraint <code>x AND y</code>. + */ +public class AndBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = -1217715173822304819L; + + public AndBOp(final IValueExpression<IV> x, final IValueExpression<IV> y) { + + this(new BOp[] { x, y }, null/*annocations*/); + + } + + /** + * Required shallow copy constructor. + */ + public AndBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 2 || args[0] == null || args[1] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public AndBOp(final AndBOp op) { + super(op); + } + + /** + * Follows semantics from SPARQL spec - "Testing Values". + * <p> + * see http://www.w3.org/TR/rdf-sparql-query/#tests section 11.2 + */ + public boolean accept(final IBindingSet bs) { + + XSDBooleanIV left, right; + + try { + left = (XSDBooleanIV) get(0).get(bs); + } catch (SparqlTypeErrorException ex) { + left = null; + } + + try { + right = (XSDBooleanIV) get(1).get(bs); + } catch (SparqlTypeErrorException ex) { + right = null; + } + + // special error handling per the SPARQL spec + if (left == null || right == null) { + // if one or the other is false, return false + if (left != null && !left.booleanValue()) + return false; + if (right != null && !right.booleanValue()) + return false; + // all other cases, throw a type error + throw new SparqlTypeErrorException(); + } + + return left.booleanValue() && right.booleanValue(); + + } + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -26,6 +26,7 @@ import java.util.Map; +import org.apache.log4j.Logger; import org.openrdf.query.algebra.Compare.CompareOp; import com.bigdata.bop.BOp; @@ -33,21 +34,26 @@ import com.bigdata.bop.IValueExpression; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.constraint.BOpConstraint; +import com.bigdata.rdf.error.SparqlTypeErrorException; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; +import com.bigdata.rdf.internal.XSDBooleanIV; /** * Use inline terms to perform numerical comparison operations. * * @see IVUtility#numericalCompare(IV, IV) */ -public class CompareBOp extends BOpConstraint { +public class CompareBOp extends ValueExpressionBOp + implements IValueExpression<IV> { - /** - * - */ - private static final long serialVersionUID = 1L; + /** + * + */ + private static final long serialVersionUID = 5661497748051783499L; + + protected static final Logger log = Logger.getLogger(CompareBOp.class); + public interface Annotations extends PipelineOp.Annotations { @@ -58,12 +64,27 @@ } + public CompareBOp(final IValueExpression<IV> left, + final IValueExpression<IV> right, final CompareOp op) { + + this(new BOp[] { left, right }, NV.asMap(new NV(Annotations.OP, op))); + + } + /** * Required shallow copy constructor. */ - public CompareBOp(final BOp[] values, - final Map<String, Object> annotations) { - super(values, annotations); + public CompareBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 2 || args[0] == null || args[1] == null + || getProperty(Annotations.OP) == null) { + + throw new IllegalArgumentException(); + + } + } /** @@ -73,32 +94,32 @@ super(op); } - public CompareBOp(final IValueExpression<IV> left, - final IValueExpression<IV> right, final CompareOp op) { - - super(new BOp[] { left, right }, NV.asMap(new NV(Annotations.OP, op))); - - if (left == null || right == null || op == null) - throw new IllegalArgumentException(); - - } - public boolean accept(final IBindingSet s) { - final IV left = ((IValueExpression<IV>) get(0)).get(s); - final IV right = ((IValueExpression<IV>) get(1)).get(s); - + final IV left = get(0).get(s); + final IV right = get(1).get(s); + + // not yet bound if (left == null || right == null) -// return true; // not yet bound. - return false; // no longer allow unbound values + throw new SparqlTypeErrorException(); final CompareOp op = (CompareOp) getProperty(Annotations.OP); - if (left.isTermId() && right.isTermId() && - (op == CompareOp.EQ || op == CompareOp.NE)) { - return _accept(left.compareTo(right)); + if (left.isTermId() && right.isTermId()) { + if (op == CompareOp.EQ || op == CompareOp.NE) { + return _accept(left.compareTo(right)); + } else { + if (log.isInfoEnabled()) + log.info("cannot compare: " + + left + " " + op + " " + right); + + throw new SparqlTypeErrorException(); + } } + /* + * This code is bad. + */ if (!IVUtility.canNumericalCompare(left) || !IVUtility.canNumericalCompare(right)) { if (op == CompareOp.EQ) { @@ -106,8 +127,11 @@ } else if (op == CompareOp.NE) { return true; } else { - throw new NotNumericalException("cannot numerical compare: " + if (log.isInfoEnabled()) + log.info("cannot numerical compare: " + left + " " + op + " " + right); + + throw new SparqlTypeErrorException(); } } @@ -121,7 +145,7 @@ switch(op) { case EQ: - return compare == 0; + return compare == 0; case NE: return compare != 0; case GT: @@ -137,7 +161,13 @@ } } - + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + public static class NotNumericalException extends RuntimeException { /** Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,130 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.rdf.error.SparqlTypeErrorException; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Calculates the "effective boolean value" of an IValueExpression. See the + * SPARQL spec for details. + */ +public class EBVBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = -5701967329003122236L; + + public EBVBOp(final IValueExpression<IV> x) { + + this(new BOp[] { x }, null/*Annotations*/); + + } + + /** + * Required shallow copy constructor. + */ + public EBVBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 1 || args[0] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public EBVBOp(final EBVBOp op) { + super(op); + } + + /** + * 11.2.2 Effective Boolean Value (EBV) + * + * Effective boolean value is used to calculate the arguments to the logical + * functions logical-and, logical-or, and fn:not, as well as evaluate the + * result of a FILTER expression. + * + * The XQuery Effective Boolean Value rules rely on the definition of + * XPath's fn:boolean. The following rules reflect the rules for fn:boolean + * applied to the argument types present in SPARQL Queries: + * + * The EBV of any literal whose type is xsd:boolean or numeric is false if + * the lexical form is not valid for that datatype (e.g. + * "abc"^^xsd:integer). + * + * If the argument is a typed literal with a datatype of xsd:boolean, the + * EBV is the value of that argument. + * + * If the argument is a plain literal or a typed literal with a datatype of + * xsd:string, the EBV is false if the operand value has zero length; + * otherwise the EBV is true. + * + * If the argument is a numeric type or a typed literal with a datatype + * derived from a numeric type, the EBV is false if the operand value is NaN + * or is numerically equal to zero; otherwise the EBV is true. + * + * All other arguments, including unbound arguments, produce a type error. + * + * An EBV of true is represented as a typed literal with a datatype of + * xsd:boolean and a lexical value of "true"; an EBV of false is represented + * as a typed literal with a datatype of xsd:boolean and a lexical value of + * "false". + */ + public boolean accept(final IBindingSet bs) { + + final IV iv = get(0).get(bs); + + if (iv instanceof XSDBooleanIV) { + return ((XSDBooleanIV) iv).booleanValue(); + } + + throw new SparqlTypeErrorException(); + + } + + /** + * We know we can strengthen the return type on this one since its whole + * purpose is to evaluate the effective boolean value of a wrapped + * expression. + */ + @Override + public XSDBooleanIV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsBoundBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsBoundBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsBoundBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,84 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.IVariable; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Imposes the constraint <code>bound(x)</code> for the variable x. + */ +public class IsBoundBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = -7408654639183330874L; + + public IsBoundBOp(final IVariable<IV> x) { + + this(new BOp[] { x }, null/*annocations*/); + + } + + /** + * Required shallow copy constructor. + */ + public IsBoundBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 1 || args[0] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public IsBoundBOp(final IsBoundBOp op) { + super(op); + } + + public boolean accept(final IBindingSet s) { + + return get(0).get(s) != null; + + } + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Deleted: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInline.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInline.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInline.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -1,102 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -package com.bigdata.rdf.internal.constraints; - -import java.util.Map; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstant; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.NV; -import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.constraint.BOpConstraint; -import com.bigdata.rdf.internal.IV; - -/** - * Imposes the constraint <code>isInline(x)</code>. - */ -public class IsInline extends BOpConstraint { - - /** - * - */ - private static final long serialVersionUID = 3125106876006900339L; - - public interface Annotations extends PipelineOp.Annotations { - - /** - * If true, only accept variable bindings for {@link #x} that have an - * inline internal value {@link IV}. Otherwise only accept variable bindings - * that are not inline in the statement indices. - * <p> - * @see IV#isInline() - */ - String INLINE = IsInline.class.getName() + ".inline"; - - } - - /** - * Required shallow copy constructor. - */ - public IsInline(final BOp[] values, - final Map<String, Object> annotations) { - super(values, annotations); - } - - /** - * Required deep copy constructor. - */ - public IsInline(final IsInline op) { - super(op); - } - - public IsInline(final IVariable<IV> x, final boolean inline) { - - super(new BOp[] { x }, NV.asMap(new NV(Annotations.INLINE, inline))); - - if (x == null) - throw new IllegalArgumentException(); - - } - - public boolean accept(IBindingSet s) { - - // get binding for "x". - final IConstant<IV> x = s.get((IVariable<IV>) get(0)/*x*/); - - if (x == null) - return true; // not yet bound. - - final IV iv = x.get(); - - final boolean inline = - (Boolean) getRequiredProperty(Annotations.INLINE); - - return iv.isInline() == inline; - - } - -} Copied: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInlineBOp.java (from rev 4196, branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInline.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInlineBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInlineBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,109 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.rdf.error.SparqlTypeErrorException; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Imposes the constraint <code>isInline(x)</code>. + */ +public class IsInlineBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = 3125106876006900339L; + + public interface Annotations extends PipelineOp.Annotations { + + /** + * If true, only accept variable bindings for {@link #x} that have an + * inline internal value {@link IV}. Otherwise only accept variable bindings + * that are not inline in the statement indices. + * <p> + * @see IV#isInline() + */ + String INLINE = IsInlineBOp.class.getName() + ".inline"; + + } + + public IsInlineBOp(final IVariable<IV> x, final boolean inline) { + + this(new BOp[] { x }, NV.asMap(new NV(Annotations.INLINE, inline))); + + } + + /** + * Required shallow copy constructor. + */ + public IsInlineBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 1 || args[0] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public IsInlineBOp(final IsInlineBOp op) { + super(op); + } + + public boolean accept(final IBindingSet bs) { + + final boolean inline = + (Boolean) getRequiredProperty(Annotations.INLINE); + + final IV iv = get(0).get(bs); + + // not yet bound + if (iv == null) + throw new SparqlTypeErrorException(); + + return iv.isInline() == inline; + + } + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Deleted: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -1,85 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -package com.bigdata.rdf.internal.constraints; - -import java.util.Map; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstant; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.NV; -import com.bigdata.bop.constraint.BOpConstraint; -import com.bigdata.rdf.internal.IV; - -/** - * Imposes the constraint <code>isLiteral(x)</code>. - */ -public class IsLiteral extends BOpConstraint { - - /** - * - */ - private static final long serialVersionUID = 3125106876006900339L; - - /** - * Required shallow copy constructor. - */ - public IsLiteral(final BOp[] values, - final Map<String, Object> annotations) { - super(values, annotations); - } - - /** - * Required deep copy constructor. - */ - public IsLiteral(final IsLiteral op) { - super(op); - } - - public IsLiteral(final IVariable<IV> x) { - - super(new BOp[] { x }, null/*annocations*/); - - if (x == null) - throw new IllegalArgumentException(); - - } - - public boolean accept(IBindingSet s) { - - // get binding for "x". - final IConstant<IV> x = s.get((IVariable<IV>) get(0)/*x*/); - - if (x == null) - return true; // not yet bound. - - final IV iv = x.get(); - - return iv.isLiteral(); - - } - -} Copied: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteralBOp.java (from rev 4196, branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteral.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteralBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsLiteralBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,91 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.IVariable; +import com.bigdata.rdf.error.SparqlTypeErrorException; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Imposes the constraint <code>isLiteral(x)</code>. + */ +public class IsLiteralBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = 3125106876006900339L; + + public IsLiteralBOp(final IVariable<IV> x) { + + this(new BOp[] { x }, null/*annocations*/); + + } + + /** + * Required shallow copy constructor. + */ + public IsLiteralBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 1 || args[0] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public IsLiteralBOp(final IsLiteralBOp op) { + super(op); + } + + public boolean accept(IBindingSet bs) { + + final IV iv = get(0).get(bs); + + // not yet bound + if (iv == null) + throw new SparqlTypeErrorException(); + + return iv.isLiteral(); + + } + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -32,6 +32,7 @@ import com.bigdata.bop.IValueExpression; import com.bigdata.bop.ImmutableBOp; import com.bigdata.bop.NV; +import com.bigdata.rdf.error.SparqlTypeErrorException; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; @@ -40,7 +41,7 @@ * operation to be applied to the operands is specified by the * {@link Annotations#OP} annotation. */ -final public class MathBOp extends ImmutableBOp +final public class MathBOp extends ValueExpressionBOp implements IValueExpression<IV> { /** @@ -62,14 +63,20 @@ } /** - * Required deep copy constructor. * + * @param left + * The left operand. + * @param right + * The right operand. * @param op + * The annotation specifying the operation to be performed on + * those operands. */ - public MathBOp(final MathBOp op) { + public MathBOp(final IValueExpression<IV> left, + final IValueExpression<IV> right, final MathOp op) { - super(op); - + this(new BOp[] { left, right }, NV.asMap(new NV(Annotations.OP, op))); + } /** @@ -94,37 +101,35 @@ } /** + * Required deep copy constructor. * - * @param left - * The left operand. - * @param right - * The right operand. * @param op - * The annotation specifying the operation to be performed on - * those operands. */ - public MathBOp(final IValueExpression<IV> left, - final IValueExpression<IV> right, final MathOp op) { + public MathBOp(final MathBOp op) { - this(new BOp[] { left, right }, NV.asMap(new NV(Annotations.OP, op))); - + super(op); + } -// /** -// * Clone is overridden to reduce heap churn. -// */ -// final public Math clone() { -// -// return this; -// -// } + final public IV get(final IBindingSet bs) { + + final IV left = left().get(bs); + final IV right = right().get(bs); + + // not yet bound + if (left == null || right == null) + throw new SparqlTypeErrorException(); + + return IVUtility.numericalMath(left, right, op()); + } + public IValueExpression<IV> left() { - return (IValueExpression<IV>) get(0); + return get(0); } public IValueExpression<IV> right() { - return (IValueExpression<IV>) get(1); + return get(1); } public MathOp op() { @@ -156,11 +161,10 @@ final public boolean equals(final IValueExpression<IV> o) { - if(!(o instanceof MathBOp)) { + if(!(o instanceof MathBOp)) { // incomparable types. return false; } - return equals((MathBOp) o); } @@ -172,39 +176,18 @@ private int hash = 0; public int hashCode() { - + int h = hash; - if (h == 0) { - final int n = arity(); - for (int i = 0; i < n; i++) { - h = 31 * h + get(i).hashCode(); - } - h = 31 * h + op().hashCode(); - hash = h; - } - return h; - + } - final public IV get(final IBindingSet bindingSet) { - - final IV left = left().get(bindingSet); - final IV right = right().get(bindingSet); - - if (left == null || right == null) - return null; - - return IVUtility.numericalMath(left, right, op()); - - } - } Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/NotBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/NotBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/NotBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,85 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Imposes the constraint <code>!x</code>. + */ +public class NotBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = -5701967329003122236L; + + public NotBOp(final IValueExpression<IV> x) { + + this(new BOp[] { x }, null/*Annotations*/); + + } + + /** + * Required shallow copy constructor. + */ + public NotBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 1 || args[0] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public NotBOp(final NotBOp op) { + super(op); + } + + public boolean accept(final IBindingSet bs) { + + final XSDBooleanIV iv = (XSDBooleanIV) get(0).get(bs); + + return !iv.booleanValue(); + + } + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/OrBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/OrBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/OrBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,114 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IValueExpression; +import com.bigdata.rdf.error.SparqlTypeErrorException; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.XSDBooleanIV; + +/** + * Imposes the constraint <code>x OR y</code>. + */ +public class OrBOp extends ValueExpressionBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = 610253427197564102L; + + public OrBOp(final IValueExpression<IV> x, final IValueExpression<IV> y) { + + this(new BOp[] { x, y }, null/*annocations*/); + + } + + /** + * Required shallow copy constructor. + */ + public OrBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 2 || args[0] == null || args[1] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public OrBOp(final OrBOp op) { + super(op); + } + + /** + * Follows semantics from SPARQL spec - "Testing Values". + * <p> + * see http://www.w3.org/TR/rdf-sparql-query/#tests section 11.2 + */ + public boolean accept(final IBindingSet bs) { + + XSDBooleanIV left, right; + + try { + left = (XSDBooleanIV) get(0).get(bs); + } catch (SparqlTypeErrorException ex) { + left = null; + } + + try { + right = (XSDBooleanIV) get(1).get(bs); + } catch (SparqlTypeErrorException ex) { + right = null; + } + + // special error handling per the SPARQL spec + if (left == null || right == null) { + // if one or the other is true, return true + if (left != null && left.booleanValue()) + return true; + if (right != null && right.booleanValue()) + return true; + // all other cases, throw a type error + throw new SparqlTypeErrorException(); + } + + return left.booleanValue() || right.booleanValue(); + + } + + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SameTermBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SameTermBOp.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SameTermBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -26,33 +26,41 @@ import java.util.Map; -import org.openrdf.query.algebra.Compare.CompareOp; - import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IValueExpression; -import com.bigdata.bop.NV; -import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.constraint.BOpConstraint; +import com.bigdata.rdf.error.SparqlTypeErrorException; import com.bigdata.rdf.internal.IV; -import com.bigdata.rdf.internal.IVUtility; +import com.bigdata.rdf.internal.XSDBooleanIV; /** * Compare two terms for exact equality. */ -public class SameTermBOp extends BOpConstraint { +public class SameTermBOp extends ValueExpressionBOp + implements IValueExpression<IV> { /** * */ private static final long serialVersionUID = 1L; + public SameTermBOp(final IValueExpression<IV> left, + final IValueExpression<IV> right) { + + this(new BOp[] { left, right }, null); + + } + /** * Required shallow copy constructor. */ - public SameTermBOp(final BOp[] values, - final Map<String, Object> annotations) { - super(values, annotations); + public SameTermBOp(final BOp[] args, final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 2 || args[0] == null || args[1] == null) + throw new IllegalArgumentException(); + } /** @@ -62,26 +70,23 @@ super(op); } - public SameTermBOp(final IValueExpression<IV> left, - final IValueExpression<IV> right) { - - super(new BOp[] { left, right }, null); + public boolean accept(final IBindingSet bs) { - if (left == null || right == null) - throw new IllegalArgumentException(); + final IV left = get(0).get(bs); + final IV right = get(1).get(bs); - } - - public boolean accept(final IBindingSet s) { - - final IV left = ((IValueExpression<IV>) get(0)).get(s); - final IV right = ((IValueExpression<IV>) get(1)).get(s); - + // not yet bound if (left == null || right == null) - return true; // not yet bound. + throw new SparqlTypeErrorException(); return left.equals(right); } + public IV get(final IBindingSet bs) { + + return accept(bs) ? XSDBooleanIV.TRUE : XSDBooleanIV.FALSE; + + } + } Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionBOp.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,66 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.ImmutableBOp; +import com.bigdata.rdf.internal.IV; + +/** + * Base class for RDF value expression BOps. Value expressions perform some + * evaluation on one or more value expressions as input and produce one + * value expression as output (boolean, numeric value, etc.) + */ +public abstract class ValueExpressionBOp extends ImmutableBOp + implements IValueExpression<IV> { + + /** + * + */ + private static final long serialVersionUID = -7068219781217676085L; + + /** + * Required shallow copy constructor. + */ + public ValueExpressionBOp(final BOp[] args, final Map<String, Object> anns) { + super(args, anns); + } + + /** + * Required deep copy constructor. + */ + public ValueExpressionBOp(final ValueExpressionBOp op) { + super(op); + } + + @Override + public IValueExpression<IV> get(final int i) { + return (IValueExpression<IV>) super.get(i); + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionConstraint.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionConstraint.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionConstraint.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -0,0 +1,109 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.constraint.BOpConstraint; +import com.bigdata.rdf.error.SparqlTypeErrorException; +import com.bigdata.rdf.internal.IV; + +/** + * BOpConstraint that wraps a {@link EBVBOp}, which itself computes the + * effective boolean value of an IValueExpression. + */ +public class ValueExpressionConstraint extends BOpConstraint { + + /** + * + */ + private static final long serialVersionUID = -7068219781217676085L; + + protected static final Logger log = Logger.getLogger(ValueExpressionConstraint.class); + + /** + * Convenience method to generate a constraint from a value expression. + */ + public static IConstraint wrap(final IValueExpression<IV> ve) { + return new ValueExpressionConstraint(new EBVBOp(ve)); + } + + + public ValueExpressionConstraint(final EBVBOp x) { + + this(new BOp[] { x }, null/*annocations*/); + + } + + /** + * Required shallow copy constructor. + */ + public ValueExpressionConstraint(final BOp[] args, + final Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 1 || args[0] == null) + throw new IllegalArgumentException(); + + } + + /** + * Required deep copy constructor. + */ + public ValueExpressionConstraint(final ValueExpressionConstraint op) { + super(op); + } + + @Override + public EBVBOp get(final int i) { + return (EBVBOp) super.get(i); + } + + public boolean accept(final IBindingSet bs) { + + try { + + // evaluate the EBV operator + return get(0).get(bs).booleanValue(); + + } catch (SparqlTypeErrorException ex) { + + // trap the type error and filter out the solution + if (log.isInfoEnabled()) + log.info("discarding solution due to type error: " + bs); + return false; + + } + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -12,18 +12,22 @@ import com.bigdata.bop.Constant; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.Var; import com.bigdata.bop.IPredicate.Annotations; -import com.bigdata.bop.constraint.NEConstant; import com.bigdata.bop.engine.QueryLog; import com.bigdata.bop.joinGraph.rto.JoinGraph; import com.bigdata.journal.ITx; import com.bigdata.journal.Journal; +import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.XSDIntIV; import com.bigdata.rdf.internal.constraints.CompareBOp; import com.bigdata.rdf.internal.constraints.MathBOp; +import com.bigdata.rdf.internal.constraints.NotBOp; +import com.bigdata.rdf.internal.constraints.SameTermBOp; +import com.bigdata.rdf.internal.constraints.ValueExpressionConstraint; import com.bigdata.rdf.model.BigdataURI; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.model.BigdataValueFactory; @@ -380,15 +384,14 @@ // the vertices of the join graph (the predicates). preds = new IPredicate[] { p0, p1, p2, p3, p4, p5, p6 }; - // the constraints on the join graph. - constraints = new IConstraint[] { + final IValueExpression[] ves = new IValueExpression[] { /* * FILTER * (<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances * /dataFromProducer1092/Product53999> != ?product) */ - new NEConstant(product, new Constant(product53999.getIV())), // + new NotBOp(new SameTermBOp(product, new Constant(product53999.getIV()))), // /* * FILTER (?simProperty1 < (?origProperty1 + 120) && @@ -431,6 +434,12 @@ CompareOp.GT) })),// }; + + // the constraints on the join graph. + constraints = new IConstraint[ves.length]; + for (int i = 0; i < ves.length; i++) { + constraints[i] = ValueExpressionConstraint.wrap(ves[i]); + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java 2011-02-21 22:01:49 UTC (rev 4218) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java 2011-02-22 20:29:45 UTC (rev 4219) @@ -28,23 +28,39 @@ package com.bigdata.rdf.internal.constraints; import java.util.GregorianCalendar; +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; +import java.util.concurrent.atomic.AtomicInteger; import javax.xml.datatype.XMLGregorianCalendar; +import org.apache.log4j.Logger; import org.openrdf.model.vocabulary.RDF; +import org.openrdf.query.QueryEvaluationException; import org.openrdf.query.algebra.Compare.CompareOp; import org.openrdf.query.algebra.MathExpr.MathOp; +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpUtility; import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; +import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; import com.bigdata.bop.Var; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.fed.QueryEngineFactory; import com.bigdata.bop.joinGraph.IEvaluationPlan; import com.bigdata.bop.joinGraph.IEvaluationPlanFactory; import com.bigdata.bop.joinGraph.fast.DefaultEvaluationPlanFactory2; +import com.bigdata.btree.IRangeQuery; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.model.BigdataLiteral; @@ -53,15 +69,23 @@ import com.bigdata.rdf.model.BigdataValueFactory; import com.bigdata.rdf.rio.StatementBuffer; import com.bigdata.rdf.rules.RuleContextEnum; +import com.bigdata.rdf.sail.BigdataSail; +import com.bigdata.rdf.sail.Rule2BOpUtility; +import com.bigdata.rdf.sail.sop.SOp2BOpUtility; +import com.bigdata.rdf.sail.sop.UnsupportedOperatorException; import com.bigdata.rdf.spo.SPOPredicate; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.rdf.store.ProxyTestCase; +import com.bigdata.relation.accesspath.ElementFilter; +import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.rule.IRule; import com.bigdata.relation.rule.Rule; import com.bigdata.relation.rule.eval.ActionEnum; import com.bigdata.relation.rule.eval.IJoinNexus; import com.bigdata.relation.rule.eval.IJoinNexusFactory; import com.bigdata.relation.rule.eval.ISolution; +import com.bigdata.striterator.ChunkedWrappedIterator; +import com.bigdata.striterator.Dechunkerator; import com.bigdata.striterator.IChunkedOrderedIterator; import com.sun.org.apache.xerces.internal.jaxp.datatype.XMLGregorianCalendarImpl; @@ -71,6 +95,8 @@ */ public class TestInlineConstraints extends ProxyTestCase { + protected static final Logger log = Logger.getLogger(TestInlineConstraints.class); + /** * */ @@ -85,6 +111,13 @@ super(name); } + @Override + public Properties getProperties() { + final Properties props = super.getProperties(); + props.setProperty(BigdataSail.Options.INLINE_DATE_TIMES, "true"); + return props; + } + public void testGT() { // store with no owl:sameAs closure @@ -142,25 +175,27 @@ final IRule rule = new Rule("test_greater_than", null, // head new IPredicate[] { - new SPOPredicate(SPO, s, type, x), - new SPOPredicate(SPO, s, age, a) + toPredicate(db, s, type, x), + toPredicate(db, s, age, a) }, // constraints on the rule. new IConstraint[] { - new CompareBOp(a, new Constant<IV>(_35.getIV()), CompareOp.GT) - }); + ValueExpressionConstraint.wrap(new CompareBOp(a, new Constant<IV>(_35.getIV()), CompareOp.GT)) + } + ); try { int numSolutions = 0; - IChunkedOrderedIterator<ISolution> solutions = runQuery(db, rule); + final IChunkedOrderedIterator<IBindingSet> solutions = runQuery(db, rule); while (solutions.hasNext()) { - ISolution solution = solutions.next(); + final IBindingSet bs = solutions.next(); - IBindingSet bs = solution.getBindingSet(); + ... [truncated message content] |
From: <tho...@us...> - 2011-02-23 14:44:26
|
Revision: 4234 http://bigdata.svn.sourceforge.net/bigdata/?rev=4234&view=rev Author: thompsonbry Date: 2011-02-23 14:44:18 +0000 (Wed, 23 Feb 2011) Log Message: ----------- Working on the runtime optimizer. - Split the JGraph class out of the JoinGraph operator. - Split out the static inner classes {Edge, Vertex, VertexSample, EdgeSample, Path, etc.} to reduce the complexity of the JGraph class file. Note that LUBM U1 has no solutions for Q2. Likewise, there are no solutions for BSBM Q5 for the 100 data set. However, it produces the same join ordering as the 100M and 200M data sets. - Added a SAMPLE_TYPE argument to the JoinGraph and modified JGraph to accept the SampleType as an argument in its constructor which is then passed through to the code which actually samples the vertices. - Modified the unit tests to use a non-random sample of the vertices by default in order to make the test results repeatable. However, it is interesting to run the tests against larger external data sets with random sampling to observe the effects of the sample bias on the estimated cost of the query! - Added a SampleBase which is extended by VertexSample and EdgeSample. - Modified SampleBase and SampleVertex to represent samples as IBindingSet[]s. Note: At some point we will need to generalize this to accept samples which are stored on the heap using the memory manager. This should be addressed when we integrate the memory manager into the query engine as the same abstraction is required in both places. In addition, the RTO will need to directly manage the allocation contexts associated with the samples so they can be released in a timely manner and no later than when the runtimeOptimization() method is done (except that exact samples could be used to execute the query once we drive the named and default graph query constructions down into the RTO by using an inline AP for the DataSetJoin). Note: This also fixes the build. My last commit included a reference to JGraph as a top-level class, which broken the build. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/rto/TestJGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Edge.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EdgeSample.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimateEnum.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedEdgeCardinalityComparator.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Vertex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/VertexSample.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2011-02-23 13:43:56 UTC (rev 4233) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2011-02-23 14:44:18 UTC (rev 4234) @@ -96,9 +96,12 @@ * sampling join paths. */ public static enum SampleType { - /** - * Samples are taken at even space offsets. - */ + /** + * Samples are taken at even space offsets. This produces a sample + * without any random effects. Re-sampling an index having the same data + * with the same key-range and the limit will always return the same + * results. This is useful to make unit test repeatable. + */ EVEN, /** * Sample offsets are computed randomly. @@ -153,19 +156,28 @@ super(args, annotations); } - + + /** + * @see Annotations#LIMIT + */ public int limit() { return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); } + /** + * @see Annotations#SEED + */ public long seed() { return getProperty(Annotations.SEED, Annotations.DEFAULT_SEED); } - + + /** + * @see Annotations#SAMPLE_TYPE + */ public SampleType getSampleType() { return SampleType.valueOf(getProperty(Annotations.SAMPLE_TYPE, Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Edge.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Edge.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Edge.java 2011-02-23 14:44:18 UTC (rev 4234) @@ -0,0 +1,556 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Feb 22, 2011 + */ +package com.bigdata.bop.joinGraph.rto; + +import java.io.Serializable; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.LocalChunkMessage; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; +import com.bigdata.striterator.Dechunkerator; + +/** + * An edge of the join graph is an annotated join operator. The edges of the + * join graph are undirected. Edges exist when the vertices share at least one + * variable. + * <p> + * {@link #hashCode()} is defined in terms of the unordered hash codes of the + * individual vertices. + */ +public class Edge implements Serializable { + + private static final transient Logger log = Logger.getLogger(Edge.class); + + private static final long serialVersionUID = 1L; + + /** + * The vertices connected by that edge. + */ + public final Vertex v1, v2; + + // /** + // * The set of shared variables. + // */ + // public final Set<IVariable<?>> shared; + + /** + * The last sample for this edge and <code>null</code> if the edge has not + * been sampled. + * <p> + * Note: This sample is only the one-step cutoff evaluation of the edge + * given a sample of its vertex having the lesser cardinality. It is NOT the + * cutoff sample of a join path having this edge except for the degenerate + * case where the edge is the first edge in the join path. + */ + transient EdgeSample sample = null; + + /** + * + * @param path + * The path which the edge is extending. + * @param v1 + * A vertex in that path which serves as the source of this edge. + * @param v2 + * + * FIXME EDGES : The concept of the "source" of an edge is + * actually quite misleading. This was originally an (arbitrary) + * vertex which shared a variable with the target vertex. + * However, in order to handle joins which are only indirectly + * constrained by a constraint we need to allow for a source + * vertex which does not share any variable (directly) with the + * target vertex. In addition, we also need the source path or + * the set of constraints to be attached to the edge. Finally, we + * can no longer share edges since they have to have some aspect + * of history attached. All in all, the "edge" is really just the + * last aspect of a path so what we have are ordered arrays of + * predicates and the constraints which run when each predicate + * is evaluated as part of a join. + */ + // * @param shared + public Edge(// + // final IPredicate<?>[] path, + final Vertex v1, final Vertex v2 + // , final Set<IVariable<?>> shared + ) { + if (v1 == null) + throw new IllegalArgumentException(); + if (v2 == null) + throw new IllegalArgumentException(); + // if (shared == null) + // throw new IllegalArgumentException(); + // Note: We need to allow edges which do not share variables + // if (shared.isEmpty()) + // throw new IllegalArgumentException(); + this.v1 = v1; + this.v2 = v2; + // this.shared = shared; + } + + /** + * The edge label is formed from the {@link BOp.Annotations#BOP_ID} of its + * ordered vertices (v1,v2). + */ + public String getLabel() { + + return "(" + v1.pred.getId() + "," + v2.pred.getId() + ")"; + + } + + /** + * Note: The vertices of the edge are labeled using the + * {@link BOp.Annotations#BOP_ID} associated with the {@link IPredicate} for + * each vertex. + */ + public String toString() { + + return "Edge{ " + getLabel() + ", estCard=" + + (sample == null ? "N/A" : sample.estimatedCardinality) + // + ", shared=" + shared.toString() + + + ", sample=" + sample// + + "}"; + + } + + /** + * Equality is determined by reference testing. + */ + public boolean equals(final Object o) { + + return this == o; + + } + + /** + * The hash code of an edge is the hash code of the vertex with the smaller + * hash code X 31 plus the hash code of the vertex with the larger hash + * code. This definition compensates for the arbitrary order in which the + * vertices may be expressed and also recognizes that the vertex hash codes + * are based on the bop ids, which are often small integers. + */ + public int hashCode() { + + if (hash == 0) { + + final int h1 = v1.hashCode(); + final int h2 = v2.hashCode(); + + final int h; + if (h1 < h2) { + + h = h1 * 31 + h2; + + } else { + + h = h2 * 31 + h1; + + } + + hash = h; + + } + return hash; + + } + + private int hash; + + /** + * Return the vertex with the smaller estimated cardinality. + * + * @throws IllegalStateException + * if either vertex has not been sampled. + */ + public Vertex getMinimumCardinalityVertex() { + + if (v1.sample == null) // vertex not sampled. + throw new IllegalStateException(); + + if (v2.sample == null) // vertex not sampled. + throw new IllegalStateException(); + + return (v1.sample.estimatedCardinality < v2.sample.estimatedCardinality) ? v1 : v2; + + } + + /** + * Return the vertex with the larger estimated cardinality (the vertex not + * returned by {@link #getMinimumCardinalityVertex()}). + * + * @throws IllegalStateException + * if either vertex has not been sampled. + */ + public Vertex getMaximumCardinalityVertex() { + + // The vertex with the minimum cardinality. + final Vertex o = getMinimumCardinalityVertex(); + + // Return the other vertex. + return (v1 == o) ? v2 : v1; + + } + + /** + * Estimate the cardinality of the edge, updating {@link #sample} as a + * side-effect. This is a NOP if the edge has already been sampled at that + * <i>limit</i>. This is a NOP if the edge sample is exact. + * + * @param queryEngine + * The query engine. + * @param limit + * The sample size. + * + * @return The new {@link EdgeSample} (this is also updated on + * {@link #sample} as a side-effect). + * + * @throws Exception + * + * FIXME This is actually using the source vertex as the source + * sample which is WRONG. + */ + public EdgeSample estimateCardinality(final QueryEngine queryEngine, + final int limit) throws Exception { + + if (limit <= 0) + throw new IllegalArgumentException(); + + // /* + // * Note: There is never a need to "re-sample" the edge. Unlike ROX, + // * we always can sample a vertex. This means that we can sample the + // * edges exactly once, during the initialization of the join graph. + // */ + // if (sample != null) + // throw new RuntimeException(); + + if (sample != null) { + + if (sample.limit >= limit) { + + // Already sampled at that limit. + return sample; + + } + + if (sample.estimateEnum == EstimateEnum.Exact) { + + // Sample is exact (fully materialized result). + return sample; + + } + + } + + /* + * Figure out which vertex has the smaller cardinality. The sample of + * that vertex is used since it is more representative than the sample + * of the other vertex. + * + * Note: If there are constraints which can run for this edge, then they + * will be attached when the edge is sampled. + */ + // vertex v, vprime + final Vertex v, vp; + if (v1.sample == null) // vertex not sampled. + throw new IllegalStateException(); + if (v2.sample == null) // vertex not sampled. + throw new IllegalStateException(); + if (v1.sample.estimatedCardinality < v2.sample.estimatedCardinality) { + v = v1; + vp = v2; + } else { + v = v2; + vp = v1; + } + +// /* +// * Convert the source sample into an IBindingSet[]. +// * +// * Note: This is now done when we sample the vertex. +// */ +// final IBindingSet[] sourceSample = new IBindingSet[v.sample.sample.length]; +// { +// for (int i = 0; i < sourceSample.length; i++) { +// final IBindingSet bset = new HashBindingSet(); +// BOpContext.copyValues((IElement) v.sample.sample[i], v.pred, +// bset); +// sourceSample[i] = bset; +// } +// } + + // Sample the edge and save the sample on the edge as a side-effect. + this.sample = estimateCardinality(queryEngine, limit, v, vp, + v.sample // the source sample. +// v.sample.estimatedCardinality,// +// v.sample.estimateEnum == EstimateEnum.Exact, // +// v.sample.limit,// +// v.sample.sample// + ); + + return sample; + + } + + /** + * Estimate the cardinality of the edge given a sample of either a vertex or + * a join path leading up to that edge. + * <p> + * Note: The caller is responsible for protecting against needless + * re-sampling. + * + * @param queryEngine + * The query engine. + * @param limit + * The limit for the cutoff join. + * @param vSource + * The source vertex. + * @param vTarget + * The target vertex + * @param sourceSample + * The input sample for the cutoff join. When this is a one-step + * estimation of the cardinality of the edge, then this sample is + * taken from the {@link VertexSample}. When the edge (vSource, + * vTarget) extends some {@link Path}, then this is taken from + * the {@link EdgeSample} for that {@link Path}. + * + * @return The result of sampling that edge. + * + * @throws Exception + */ + // * @param path The path which is being extended. + public EdgeSample estimateCardinality(// + final QueryEngine queryEngine,// + final int limit,// + // final IPredicate<?>[] path,// + final Vertex vSource,// + final Vertex vTarget,// + final SampleBase sourceSample// +// final long sourceEstimatedCardinality,// +// final boolean sourceSampleExact,// +// final int sourceSampleLimit,// +// final IBindingSet[] sourceSample// + ) throws Exception { + + if (limit <= 0) + throw new IllegalArgumentException(); + + /* + * Note: This sets up a cutoff pipeline join operator which makes an + * accurate estimate of the #of input solutions consumed and the #of + * output solutions generated. From that, we can directly compute the + * join hit ratio. This approach is preferred to injecting a "RowId" + * column as the estimates are taken based on internal counters in the + * join operator and the join operator knows how to cutoff evaluation as + * soon as the limit is satisfied, thus avoiding unnecessary effort. + */ + /* + * The set of constraint(s) (if any) which will be applied when we + * perform the cutoff evaluation of this edge (aka join). + * + * FIXME CONSTRAINTS - we need the join path to decide which constraints + * will be attached when we sample this edge (or at least the set of + * variables which are already known to be bound). + */ + final IConstraint[] constraints = null; + final int joinId = 1; + final Map<String, Object> anns = NV.asMap( + // + new NV(BOp.Annotations.BOP_ID, joinId),// + // @todo Why not use a factory which avoids bopIds + // already in use? + new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred.setBOpId(3)), + // Note: does not matter since not executed by the query + // controller. + // // disallow parallel evaluation of tasks + // new NV(PipelineOp.Annotations.MAX_PARALLEL,1), + // disallow parallel evaluation of chunks. + new NV(PipelineJoin.Annotations.MAX_PARALLEL_CHUNKS, 0), + // disable access path coalescing + new NV( PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS, false), // + // pass in constraints on this join. + new NV(PipelineJoin.Annotations.CONSTRAINTS, constraints),// + // cutoff join. + new NV(PipelineJoin.Annotations.LIMIT, (long) limit), + /* + * Note: In order to have an accurate estimate of the + * join hit ratio we need to make sure that the join + * operator runs using a single PipelineJoinStats + * instance which will be visible to us when the query + * is cutoff. In turn, this implies that the join must + * be evaluated on the query controller. + * + * @todo This implies that sampling of scale-out joins + * must be done using remote access paths. + */ + new NV(PipelineJoin.Annotations.SHARED_STATE, true),// + new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER)// + ); + + @SuppressWarnings("unchecked") + final PipelineJoin<?> joinOp = new PipelineJoin(new BOp[] {}, anns); + + final PipelineOp queryOp = joinOp; + + // run the cutoff sampling of the edge. + final UUID queryId = UUID.randomUUID(); + final IRunningQuery runningQuery = queryEngine.eval(queryId, queryOp, + new LocalChunkMessage<IBindingSet>(queryEngine, queryId, joinOp + .getId()/* startId */, -1 /* partitionId */, + new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { sourceSample.sample }))); + + final List<IBindingSet> result = new LinkedList<IBindingSet>(); + try { + try { + IBindingSet bset = null; + // Figure out the #of source samples consumed. + final Iterator<IBindingSet> itr = new Dechunkerator<IBindingSet>( + runningQuery.iterator()); + while (itr.hasNext()) { + bset = itr.next(); + result.add(bset); + } + } finally { + // verify no problems. + runningQuery.get(); + } + } finally { + runningQuery.cancel(true/* mayInterruptIfRunning */); + } + + // The join hit ratio can be computed directly from these stats. + final PipelineJoinStats joinStats = (PipelineJoinStats) runningQuery + .getStats().get(joinId); + + if (log.isTraceEnabled()) + log.trace(joinStats.toString()); + + // #of solutions in. + final int inputCount = (int) joinStats.inputSolutions.get(); + + // #of solutions out. + long outputCount = joinStats.outputSolutions.get(); + + // cumulative range count of the sampled access paths. + final long sumRangeCount = joinStats.accessPathRangeCount.get(); + + final EstimateEnum estimateEnum; + if (sourceSample.estimateEnum == EstimateEnum.Exact + && outputCount < limit) { + /* + * Note: If the entire source vertex is being fed into the cutoff + * join and the cutoff join outputCount is LT the limit, then the + * sample is the actual result of the join. That is, feeding all + * source solutions into the join gives fewer than the desired + * number of output solutions. + */ + estimateEnum = EstimateEnum.Exact; + } else if (inputCount == 1 && outputCount == limit) { + /* + * If the inputCount is ONE (1) and the outputCount is the limit, + * then the estimated cardinality is a lower bound as more than + * outputCount solutions might be produced by the join when + * presented with a single input solution. + * + * However, this condition suggests that the sum of the sampled + * range counts is a much better estimate of the cardinality of this + * join. + * + * For example, consider a join feeding a rangeCount of 16 into a + * rangeCount of 175000. With a limit of 100, we estimated the + * cardinality at 1600L (lower bound). In fact, the cardinality is + * 16*175000. This falsely low estimate can cause solutions which + * are really better to be dropped. + */ + // replace outputCount with the sum of the sampled range counts. + outputCount = sumRangeCount; + estimateEnum = EstimateEnum.LowerBound; + } else if (!(sourceSample.estimateEnum != EstimateEnum.Exact) + && inputCount == Math.min(sourceSample.limit, + sourceSample.estimatedCardinality) && outputCount == 0) { + /* + * When the source sample was not exact, the inputCount is EQ to the + * lesser of the source range count and the source sample limit, and + * the outputCount is ZERO (0), then feeding in all source solutions + * in is not sufficient to generate any output solutions. In this + * case, the estimated join hit ratio appears to be zero. However, + * the estimation of the join hit ratio actually underflowed and the + * real join hit ratio might be a small non-negative value. A real + * zero can only be identified by executing the full join. + * + * Note: An apparent join hit ratio of zero does NOT imply that the + * join will be empty (unless the source vertex sample is actually + * the fully materialized access path - this case is covered above). + */ + estimateEnum = EstimateEnum.Underflow; + } else { + estimateEnum = EstimateEnum.Normal; + } + + final double f = outputCount == 0 ? 0 + : (outputCount / (double) inputCount); + + final long estimatedCardinality = (long) (sourceSample.estimatedCardinality * f); + + final EdgeSample edgeSample = new EdgeSample(// +// sourceSample.estimatedCardinality, // +// sourceSample.estimateEnum, // +// sourceSample.limit, // + sourceSample,// + inputCount,// + outputCount, // + f, // + // args to SampleBase + estimatedCardinality, // + limit, // + estimateEnum,// + result.toArray(new IBindingSet[result.size()])); + + if (log.isDebugEnabled()) + log.debug(getLabel() + " : newSample=" + edgeSample); + + return edgeSample; + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Edge.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EdgeSample.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EdgeSample.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EdgeSample.java 2011-02-23 14:44:18 UTC (rev 4234) @@ -0,0 +1,174 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.bop.joinGraph.rto; + +import com.bigdata.bop.IBindingSet; + +/** + * A sample of an {@link Edge} (a join). + */ +public class EdgeSample extends SampleBase { + +// private static final transient Logger log = Logger +// .getLogger(EdgeSample.class); + +// /** +// * The estimated cardinality of the cutoff join. +// */ +// public final long estimatedCardinality; +// +// /** +// * The limit used to sample the cutoff join. +// */ +// public final int limit; +// +// /** +// * Indicates whether the estimate is exact, an upper bound, or a lower +// * bound. +// */ +// public final EstimateEnum estimateEnum; +// +// /** +// * The sample of the solutions for the join path. +// */ +// final IBindingSet[] sample; + + /** + * The source sample used to compute the cutoff join. + */ + public final SampleBase sourceSample; + +// /** +// * The estimated cardinality of the source sample. +// */ +// public final long sourceEstimatedCardinality; +// +// /** +// * The sample type for the source sample. +// */ +// public final EstimateEnum sourceEstimateEnum; +// +// /** +// * Return <code>true</code> iff the source sample is exact (because the +// * source is either a fully materialized vertex or an edge whose solutions +// * have been fully materialized). +// */ +// public boolean isSourceSampleExact() { +// +// return sourceEstimateEnum == EstimateEnum.Exact; +// +// } + + /** + * The #of binding sets out of the source sample vertex sample which were + * consumed. + */ + public final int inputCount; + + /** + * The #of binding sets generated before the join was cutoff. + * <p> + * Note: If the outputCount is zero then this is a good indicator that there + * is an error in the query such that the join will not select anything. + * This is not 100%, merely indicative. + */ + public final long outputCount; + + /** + * The ratio of the #of input samples consumed to the #of output samples + * generated (the join hit ratio or scale factor). + */ + public final double f; + + /** + * Create an object which encapsulates a sample of an edge. + * + * @param sourceSample + * The input sample. + * @param limit + * The limit used to sample the edge (this is the limit on the + * #of solutions generated by the cutoff join used when this + * sample was taken). + * @param inputCount + * The #of binding sets out of the source sample vertex sample + * which were consumed. + * @param outputCount + * The #of binding sets generated before the join was cutoff. + */ +// * @param sourceVertexSample +// * The sample for source vertex of the edge (whichever vertex has +// * the lower cardinality). + EdgeSample( + // final VertexSample sourceVertexSample, + final SampleBase sourceSample,// +// final long sourceEstimatedCardinality,// +// final EstimateEnum sourceEstimateEnum, // +// final int sourceSampleLimit,// + final int inputCount, // + final long outputCount,// + final double f, // + // args to SampleBase + final long estimatedCardinality,// + final int limit,// + final EstimateEnum estimateEnum,// + final IBindingSet[] sample// + ) { + + super(estimatedCardinality, limit, estimateEnum, sample); + + if(sourceSample == null) + throw new IllegalArgumentException(); + + this.sourceSample = sourceSample; + +// this.sourceEstimatedCardinality = sourceSample.estimatedCardinality; +// +// this.sourceEstimateEnum = sourceSample.estimateEnum; + + this.inputCount = inputCount; + + this.outputCount = outputCount; + + this.f = f; + + } + + @Override + protected void toString(final StringBuilder sb) { +// return getClass().getName() // +// + "{ estimatedCardinality=" + estimatedCardinality// +// + ", limit=" + limit // +// + ", estimateEnum=" + estimateEnum// + sb.append(", sourceEstimatedCardinality=" + sourceSample.estimatedCardinality); + sb.append(", sourceEstimateEnum=" + sourceSample.estimateEnum); + sb.append(", inputCount=" + inputCount); + sb.append(", outputCount=" + outputCount); + sb.append(", f=" + f); + // + ", estimateIsLowerBound=" + estimateIsLowerBound// + // + ", estimateIsUpperBound=" + estimateIsUpperBound// + // + ", sampleIsExactSolution=" + estimateIsExact // +// + "}"; + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EdgeSample.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimateEnum.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimateEnum.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimateEnum.java 2011-02-23 14:44:18 UTC (rev 4234) @@ -0,0 +1,68 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.bop.joinGraph.rto; + +/** + * Type safe enumeration describes the edge condition (if any) for a cardinality + * estimate. + */ +public enum EstimateEnum { + /** + * An estimate, but not any of the edge conditions. + */ + Normal(" "), + /** + * The cardinality estimate is exact. + */ + Exact("E"), + /** + * The cardinality estimation is a lower bound (the actual cardinality may + * be higher than the estimated value). + * <p> + * Note: The estimated cardinality reported for a {@link #LowerBound} is the + * sum of the fast range counts for the sampled access paths. See the logic + * which handles cutoff join sampling for details on this. + */ + LowerBound("L"), + /** + * Flag is set when the cardinality estimate underflowed (false zero (0)). + */ + Underflow("U"), + ; + + private EstimateEnum(final String code) { + + this.code = code; + + } + + private final String code; + + public String getCode() { + + return code; + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimateEnum.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedEdgeCardinalityComparator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedEdgeCardinalityComparator.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedEdgeCardinalityComparator.java 2011-02-23 14:44:18 UTC (rev 4234) @@ -0,0 +1,62 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.bop.joinGraph.rto; + +import java.util.Comparator; + + +/** + * Places edges into order by ascending estimated cardinality. Edges which + * are not weighted are ordered to the end. + * + * TODO unit tests, including with unweighted edges. + */ +class EstimatedEdgeCardinalityComparator implements Comparator<Edge> { + + public static final transient Comparator<Edge> INSTANCE = new EstimatedEdgeCardinalityComparator(); + + // @Override + public int compare(final Edge o1, final Edge o2) { + if (o1.sample == null && o2.sample == null) { + // Neither edge is weighted. + return 0; + } + if (o1.sample == null) { + // o1 is not weighted, but o2 is. sort o1 to the end. + return 1; + } + if (o2.sample == null) { + // o2 is not weighted. sort o2 to the end. + return -1; + } + final long id1 = o1.sample.estimatedCardinality; + final long id2 = o2.sample.estimatedCardinality; + if (id1 < id2) + return -1; + if (id1 > id2) + return 1; + return 0; + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedEdgeCardinalityComparator.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-23 14:44:18 UTC (rev 4234) @@ -0,0 +1,1937 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Feb 22, 2011 + */ + +package com.bigdata.bop.joinGraph.rto; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Formatter; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.ap.SampleIndex.SampleType; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.joinGraph.NoSolutionsException; +import com.bigdata.bop.joinGraph.PartitionedJoinGroup; +import com.bigdata.bop.rdf.join.DataSetJoin; + +/** + * A runtime optimizer for a join graph. The {@link JoinGraph} bears some + * similarity to ROX (Runtime Optimizer for XQuery), but has several significant + * differences: + * <ol> + * <li> + * 1. ROX starts from the minimum cardinality edge of the minimum cardinality + * vertex. The {@link JoinGraph} starts with one or more low cardinality + * vertices.</li> + * <li> + * 2. ROX always extends the last vertex added to a given join path. The + * {@link JoinGraph} extends all vertices having unexplored edges in each + * breadth first expansion.</li> + * <li> + * 3. ROX is designed to interleave operator-at-once evaluation of join path + * segments which dominate other join path segments. The {@link JoinGraph} is + * designed to prune all join paths which are known to be dominated by other + * join paths for the same set of vertices in each round and iterates until a + * join path is identified which uses all vertices and has the minimum expected + * cumulative estimated cardinality. Join paths which survive pruning are + * re-sampled as necessary in order to obtain better information about edges in + * join paths which have a low estimated cardinality in order to address a + * problem with underflow of the cardinality estimates.</li> + * </ol> + * + * TODO For join graphs with a large number of vertices we may need to constrain + * the #of vertices which are explored in parallel. This could be done by only + * branching the N lowest cardinality vertices from the already connected edges. + * Since fewer vertices are being explored in parallel, paths are more likely to + * converge onto the same set of vertices at which point we can prune the + * dominated paths. + * + * TODO Compare the cumulative expected cardinality of a join path with the + * expected cost of a join path. The latter allows us to also explore + * alternative join strategies, such as the parallel subquery versus scan and + * filter decision for named graph and default graph SPARQL queries. + * + * TODO Coalescing duplicate access paths can dramatically reduce the work + * performed by a pipelined nested index subquery. (A hash join eliminates all + * duplicate access paths using a scan and filter approach.) If we will run a + * pipeline nested index subquery join, then should the runtime query optimizer + * prefer paths with duplicate access paths? + * + * TODO How can we handle things like lexicon joins. A lexicon join is is only + * evaluated when the dynamic type of a variable binding indicates that the RDF + * Value must be materialized by a join against the ID2T index. Binding sets + * having inlined values can simply be routed around the join against the ID2T + * index. Routing around saves network IO in scale-out where otherwise we would + * route binding sets having identifiers which do not need to be materialized to + * the ID2T shards. + * + * @todo Examine the overhead of the runtime optimizer. Look at ways to prune + * its costs. For example, by pruning the search, by recognizing when the + * query is simple enough to execute directly, by recognizing when we have + * already materialized the answer to the query, etc. + * + * @todo Cumulative estimated cardinality is an estimate of the work to be done. + * However, the actual cost of a join depends on whether we will use + * nested index subquery or a hash join and the cost of that operation on + * the database. There could be counter examples where the cost of the + * hash join with a range scan using the unbound variable is LT the nested + * index subquery. For those cases, we will do the same amount of IO on + * the hash join but there will still be a lower cardinality to the join + * path since we are feeding in fewer solutions to be joined. + * + * @todo Look at the integration with the SAIL. We decorate the joins with some + * annotations. Those will have to be correctly propagated to the "edges" + * in order for edge sampling and incremental evaluation (or final + * evaluation) to work. The {@link DataSetJoin} essentially inlines one of + * its access paths. That should really be changed into an inline access + * path and a normal join operator so we can defer some of the details + * concerning the join operator annotations until we decide on the join + * path to be executed. An inline AP really implies an inline relation, + * which in turn implies that the query is a searchable context for + * query-local resources. + * <p> + * For s/o, when the AP is remote, the join evaluation context must be ANY + * and otherwise (for s/o) it must be SHARDED. + * <p> + * Since the join graph is fed the vertices (APs), it does not have access + * to the annotated joins so we need to generated appropriately annotated + * joins when sampling an edge and when evaluation a subquery. + * <p> + * One solution would be to always use the unpartitioned views of the + * indices for the runtime query optimizer, which is how we are estimating + * the range counts of the access paths right now. [Note that the static + * query optimizer ignores named and default graphs, while the runtime + * query optimizer SHOULD pay attention to these things and exploit their + * conditional selectivity for the query plan.] + * + * @todo Handle optional join graphs by first applying the runtime optimizer to + * the main join graph and obtaining a sample for the selected join path. + * That sample will then be feed into the the optional join graph in order + * to optimize the join order within the optional join graph (a join order + * which is selective in the optional join graph is better since it build + * up the #of intermediate results more slowly and hence do less work). + * <p> + * This is very much related to accepting a collection of non-empty + * binding sets when running the join graph. However, optional join graph + * should be presented in combination with the original join graph and the + * starting paths must be constrained to have the selected join path for + * the original join graph as a prefix. With this setup, the original join + * graph has been locked in to a specific join path and the sampling of + * edges and vertices for the optional join graph can proceed normally. + * <p> + * True optionals will always be appended as part of the "tail plan" for + * any join graph and can not be optimized as each optional join must run + * regardless (as long as the intermediate solution survives the + * non-optional joins). + * + * @todo There are two cases where a join graph must be optimized against a + * specific set of inputs. In one case, it is a sample (this is how + * optimization of an optional join group proceeds per above). In the + * other case, the set of inputs is fixed and is provided instead of a + * single empty binding set as the starting condition. This second case is + * actually a bit more complicated since we can not use a random sample of + * vertices unless the do not share any variables with the initial binding + * sets. When there is a shared variable, we need to do a cutoff join of + * the edge with the initial binding sets. When there is not a shared + * variable, we can sample the vertex and then do a cutoff join. + * + * @todo When we run into a cardinality estimation underflow (the expected + * cardinality goes to zero) we could double the sample size for just + * those join paths which hit a zero estimated cardinality and re-run them + * within the round. This would imply that we keep per join path limits. + * The vertex and edge samples are already aware of the limit at which + * they were last sampled so this should not cause any problems there. + * <p> + * A related option would be to deepen the samples only when we are in + * danger of cardinality estimation underflow. E.g., a per-path limit. + * Resampling vertices may only make sense when we increase the limit + * since otherwise we may find a different correlation with the new sample + * but the comparison of paths using one sample base with paths using a + * different sample base in a different round does not carry forward the + * cardinality estimates from the prior round (unless we do something like + * a weighted moving average). + * + * @todo When comparing choices among join paths having fully bound tails where + * the estimated cardinality has also gone to zero, we should prefer to + * evaluate vertices in the tail with better index locality first. For + * example, if one vertex had one variable in the original plan while + * another had two variables, then solutions which reach the 2-var vertex + * could be spread out over a much wider range of the selected index than + * those which reach the 1-var vertex. [In order to support this, we would + * need a means to indicate that a fully bound access path should use an + * index specified by the query optimizer rather than the primary index + * for the relation. In addition, this suggests that we should keep bloom + * filters for more than just the SPO(C) index in scale-out.] + * + * @todo Examine behavior when we do not have perfect covering indices. This + * will mean that some vertices can not be sampled using an index and that + * estimation of their cardinality will have to await the estimation of + * the cardinality of the edge(s) leading to that vertex. Still, the + * approach should be able to handle queries without perfect / covering + * automatically. Then experiment with carrying fewer statement indices + * for quads. + * + * @todo Unit test when there are no solutions to the query. In this case there + * will be no paths identified by the optimizer and the final path length + * becomes zero. + * + * @see <a + * href="http://www-db.informatik.uni-tuebingen.de/files/research/pathfinder/publications/rox-demo.pdf"> + * ROX </a> + */ +public class JGraph { + + private static final transient Logger log = Logger.getLogger(JGraph.class); + + /** + * Vertices of the join graph. + */ + private final Vertex[] V; + + /** + * Constraints on the join graph. A constraint is applied once all + * variables referenced by a constraint are known to be bound. + */ + private final IConstraint[] C; + + /** + * The kind of samples that will be taken when we sample a {@link Vertex}. + */ + private final SampleType sampleType; + + /** + * The edges of the join graph as determined by static analysis. Each + * edge is basically a possible join. This array is comprised of just + * those edges which are determined by static analysis. Additional edges + * MAY be identified dynamically. Altogether there are three categories + * of edges: + * <ol> + * <li>The vertices directly share variables (join with shared + * variable(s)). These edges are identified by static analysis in the + * constructor are are represented in {@link #E}.</li> + * <li>The vertices indirectly share variables via a constraint (join + * with indirectly shared variable(s)). These edges are identified by + * dynamic analysis. Each time we expand the set of join paths under + * consideration, we examine {@link #unshared} vertices. Given the join + * path under consideration and the set of constraints, it may be that + * the vertex will indirectly share a variable via a constraint and can + * therefore participate in a constrained join.</li> + * <li>The vertices do not share variables, either directly or + * indirectly (unconstrained join). All vertices can join. However, if + * there are no shared variables then the join is unconstrained (full + * cross product). These edges are identified dynamically. In each round + * for which a constrained edge could not be identified, we consider all + * vertices in {@link #unshared} and choose an unconstrained edge.</li> + * </ol> + * + * @see BOpUtility#canJoinUsingConstraints(IPredicate[], IPredicate, IConstraint[]) + */ + private final Edge[] E; + + /** + * An unordered collection of vertices which do not share any variables + * with the other vertices in the join graph. These vertices will + * produce full cross product joins unless a constraint causes indirect + * sharing of variables with a join path. + */ + private final Vertex[] unshared; + + public List<Vertex> getVertices() { + return Collections.unmodifiableList(Arrays.asList(V)); + } + + public List<Edge> getEdges() { + return Collections.unmodifiableList(Arrays.asList(E)); + } + + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append("JoinGraph"); + sb.append("{V=["); + for (Vertex v : V) { + sb.append("\nV[" + v.pred.getId() + "]=" + v); + } + sb.append("{C=["); + for (IConstraint c : C) { + sb.append("\nC[" + c.getId() + "]=" + c); + } + sb.append("\n],unshared=["); + for (Vertex v : unshared) { + sb.append("\n" + v); + } + sb.append("],E=["); + for (Edge e : E) { + sb.append("\n" + e); + } + sb.append("\n]}"); + return sb.toString(); + } + + /** + * + * @param v + * The vertices of the join graph. These are + * {@link IPredicate}s associated with required joins. + * @param constraints + * The constraints of the join graph (optional). Since all + * joins in the join graph are required, constraints are + * dynamically attached to the first join in which all of + * their variables are bound. + * + * @throws IllegalArgumentException + * if the vertices is <code>null</code>. + * @throws IllegalArgumentException + * if the vertices is an empty array. + * @throws IllegalArgumentException + * if any element of the vertices is <code>null</code>. + * @throws IllegalArgumentException + * if any constraint uses a variable which is never bound by + * the given predicates. + * @throws IllegalArgumentException + * if <i>sampleType</i> is <code>null</code>. + * + * @todo unit test for a constraint using a variable which is never + * bound. + */ + public JGraph(final IPredicate<?>[] v, final IConstraint[] constraints, + final SampleType sampleType) { + + if (v == null) + throw new IllegalArgumentException(); + + if (v.length < 2) + throw new IllegalArgumentException(); + + V = new Vertex[v.length]; + + for (int i = 0; i < v.length; i++) { + + if (v[i] == null) + throw new IllegalArgumentException(); + + V[i] = new Vertex(v[i]); + + } + + if (constraints != null) { + C = new IConstraint[constraints.length]; + for (int i = 0; i < constraints.length; i++) { + if (constraints[i] == null) + throw new IllegalArgumentException(); + C[i] = constraints[i]; + } + } else { + // No constraints. + C = null; + } + + if (sampleType == null) + throw new IllegalArgumentException(); + + this.sampleType = sampleType; + + /* + * Create edges to represent possible joins between predicates based + * on directly shared variables - these joins can be identified by a + * static analysis of the join graph. + * + * Note: There are really three classes of joins to be considered. + * + * (1) The target predicate directly shares a variable with the + * source predicate. These joins are always constrained since the + * source predicate will have bound that variable. This is the + * static analysis of the join graph. + * + * (2) When the source predicate shares a variable with a constraint + * which also shares a variable with the target predicate. While the + * predicates do not directly share a variable, these joins are + * constrained by the shared variable in the constraint on the + * target predicate. BSBM Q5 is an example of this case. We do not + * create edges for such joins here. Instead, we dynamically + * determine when a constrained join is possible when extending the + * join path in each round. Hence, this is part of the dynamic + * analysis of the join graph. + * + * (3) Any two predicates may always be joined. However, joins which + * do not share variables either directly or indirectly will be full + * cross products. Therefore such joins are run last and we do not + * create edges for them here. Again, this is part of the dynamic + * analysis of the join graph. + */ + { + + // The set of identified edges for vertices which share vars. + final List<Edge> tmp = new LinkedList<Edge>(); + + // The set of vertices which share variables. + final Set<Vertex> sharedEdgeVertices = new LinkedHashSet<Vertex>(); + + // The set of vertices which do not share variables. + final List<Vertex> unsharedEdgeVertices = new LinkedList<Vertex>(); + + for (int i = 0; i < v.length; i++) { + + // consider a source vertex. + final IPredicate<?> p1 = v[i]; + + // #of vertices which share a variable with source vertex. + int nmatched = 0; + + for (int j = i + 1; j < v.length; j++) { + + // consider a possible target vertex. + final IPredicate<?> p2 = v[j]; + +// final Set<IVariable<?>> shared = BOpUtility +// .getSharedVars(p1, p2); + + if(BOpUtility.canJoin(p1, p2)) { +// if (!shared.isEmpty()) { + + /* + * The source and target vertices can join based on + * one or more shared variable(s). + */ + + if (log.isDebugEnabled()) + log.debug("vertices shared variable(s): vars=" + + BOpUtility.getSharedVars(p1, p2) + + ", v1=" + p1 + ", v2=" + p2); + + tmp.add(new Edge(V[i], V[j]));//, shared)); + + sharedEdgeVertices.add(V[i]); + + sharedEdgeVertices.add(V[j]); + + nmatched++; + + } else if (constraints != null) { + + /* + * ... [truncated message content] |
From: <tho...@us...> - 2011-02-23 21:52:57
|
Revision: 4235 http://bigdata.svn.sourceforge.net/bigdata/?rev=4235&view=rev Author: thompsonbry Date: 2011-02-23 21:52:48 +0000 (Wed, 23 Feb 2011) Log Message: ----------- More work on the runtime query optimizer. As of this commit, it is finally paying attention to constraints on the join graph. The RTO continues to produce similar solutions other than for BSBM Q5. For BSBM Q5 it now considers the constraints on the join graph. However, I have not yet investigated the impact of the constraints on the selected join plan nor considered the efficiency of the new solution which is being produced for BSBM Q5 at the 100M or 200M data scales. - Dropped the Edge class which was making it impossible to apply constraints dynamically to cutoff joins. - Added support for constraints when computing cutoff joins. BSBM Q5 is the only query which we are running on the RTO which uses constraints so the solutions for the rest of the queries should not be changed as a result of this refactor. - Done: I am seeing too much edge resampling. The problem was that the LinkedHashMap with int[] keys was not recognizing when the keys were the same data. I introduced a PathIds class for the map key which fixes this. - Note: Estimates can randomly differ whenever there are two paths which span the same vertices and have exactly the same cost. For example, when the cost for all paths spanning some set of vertices is zero. In general, any path covering the same vertices for the same cost is as good as any other. In the special case when the cost is ZERO, this is either an underflow in the estimated cardinality or a true zero. [For LUBM Q2 on U1 it is a true zero.] - Added the EdgeSample for each new path to the edgeSamples map. Also added logic to clear samples from the edgeSamples map if we need to resample them at a higher limit. - Added Path#add/2 to centralize the logic for computing the cumulative cost of a join path. I am looking into ways of combining in the tuples read from the access path with the solutions produced as a better estimate of the access path cost and hence the total join path cost. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EdgeSample.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Vertex.java branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedCardinalityComparator.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/PathIds.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Edge.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedEdgeCardinalityComparator.java Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Edge.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Edge.java 2011-02-23 14:44:18 UTC (rev 4234) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Edge.java 2011-02-23 21:52:48 UTC (rev 4235) @@ -1,556 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Feb 22, 2011 - */ -package com.bigdata.bop.joinGraph.rto; - -import java.io.Serializable; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.UUID; - -import org.apache.log4j.Logger; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpEvaluationContext; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstraint; -import com.bigdata.bop.IPredicate; -import com.bigdata.bop.NV; -import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.engine.IRunningQuery; -import com.bigdata.bop.engine.LocalChunkMessage; -import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.bop.join.PipelineJoin; -import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; -import com.bigdata.relation.accesspath.ThickAsynchronousIterator; -import com.bigdata.striterator.Dechunkerator; - -/** - * An edge of the join graph is an annotated join operator. The edges of the - * join graph are undirected. Edges exist when the vertices share at least one - * variable. - * <p> - * {@link #hashCode()} is defined in terms of the unordered hash codes of the - * individual vertices. - */ -public class Edge implements Serializable { - - private static final transient Logger log = Logger.getLogger(Edge.class); - - private static final long serialVersionUID = 1L; - - /** - * The vertices connected by that edge. - */ - public final Vertex v1, v2; - - // /** - // * The set of shared variables. - // */ - // public final Set<IVariable<?>> shared; - - /** - * The last sample for this edge and <code>null</code> if the edge has not - * been sampled. - * <p> - * Note: This sample is only the one-step cutoff evaluation of the edge - * given a sample of its vertex having the lesser cardinality. It is NOT the - * cutoff sample of a join path having this edge except for the degenerate - * case where the edge is the first edge in the join path. - */ - transient EdgeSample sample = null; - - /** - * - * @param path - * The path which the edge is extending. - * @param v1 - * A vertex in that path which serves as the source of this edge. - * @param v2 - * - * FIXME EDGES : The concept of the "source" of an edge is - * actually quite misleading. This was originally an (arbitrary) - * vertex which shared a variable with the target vertex. - * However, in order to handle joins which are only indirectly - * constrained by a constraint we need to allow for a source - * vertex which does not share any variable (directly) with the - * target vertex. In addition, we also need the source path or - * the set of constraints to be attached to the edge. Finally, we - * can no longer share edges since they have to have some aspect - * of history attached. All in all, the "edge" is really just the - * last aspect of a path so what we have are ordered arrays of - * predicates and the constraints which run when each predicate - * is evaluated as part of a join. - */ - // * @param shared - public Edge(// - // final IPredicate<?>[] path, - final Vertex v1, final Vertex v2 - // , final Set<IVariable<?>> shared - ) { - if (v1 == null) - throw new IllegalArgumentException(); - if (v2 == null) - throw new IllegalArgumentException(); - // if (shared == null) - // throw new IllegalArgumentException(); - // Note: We need to allow edges which do not share variables - // if (shared.isEmpty()) - // throw new IllegalArgumentException(); - this.v1 = v1; - this.v2 = v2; - // this.shared = shared; - } - - /** - * The edge label is formed from the {@link BOp.Annotations#BOP_ID} of its - * ordered vertices (v1,v2). - */ - public String getLabel() { - - return "(" + v1.pred.getId() + "," + v2.pred.getId() + ")"; - - } - - /** - * Note: The vertices of the edge are labeled using the - * {@link BOp.Annotations#BOP_ID} associated with the {@link IPredicate} for - * each vertex. - */ - public String toString() { - - return "Edge{ " + getLabel() + ", estCard=" - + (sample == null ? "N/A" : sample.estimatedCardinality) - // + ", shared=" + shared.toString() + - + ", sample=" + sample// - + "}"; - - } - - /** - * Equality is determined by reference testing. - */ - public boolean equals(final Object o) { - - return this == o; - - } - - /** - * The hash code of an edge is the hash code of the vertex with the smaller - * hash code X 31 plus the hash code of the vertex with the larger hash - * code. This definition compensates for the arbitrary order in which the - * vertices may be expressed and also recognizes that the vertex hash codes - * are based on the bop ids, which are often small integers. - */ - public int hashCode() { - - if (hash == 0) { - - final int h1 = v1.hashCode(); - final int h2 = v2.hashCode(); - - final int h; - if (h1 < h2) { - - h = h1 * 31 + h2; - - } else { - - h = h2 * 31 + h1; - - } - - hash = h; - - } - return hash; - - } - - private int hash; - - /** - * Return the vertex with the smaller estimated cardinality. - * - * @throws IllegalStateException - * if either vertex has not been sampled. - */ - public Vertex getMinimumCardinalityVertex() { - - if (v1.sample == null) // vertex not sampled. - throw new IllegalStateException(); - - if (v2.sample == null) // vertex not sampled. - throw new IllegalStateException(); - - return (v1.sample.estimatedCardinality < v2.sample.estimatedCardinality) ? v1 : v2; - - } - - /** - * Return the vertex with the larger estimated cardinality (the vertex not - * returned by {@link #getMinimumCardinalityVertex()}). - * - * @throws IllegalStateException - * if either vertex has not been sampled. - */ - public Vertex getMaximumCardinalityVertex() { - - // The vertex with the minimum cardinality. - final Vertex o = getMinimumCardinalityVertex(); - - // Return the other vertex. - return (v1 == o) ? v2 : v1; - - } - - /** - * Estimate the cardinality of the edge, updating {@link #sample} as a - * side-effect. This is a NOP if the edge has already been sampled at that - * <i>limit</i>. This is a NOP if the edge sample is exact. - * - * @param queryEngine - * The query engine. - * @param limit - * The sample size. - * - * @return The new {@link EdgeSample} (this is also updated on - * {@link #sample} as a side-effect). - * - * @throws Exception - * - * FIXME This is actually using the source vertex as the source - * sample which is WRONG. - */ - public EdgeSample estimateCardinality(final QueryEngine queryEngine, - final int limit) throws Exception { - - if (limit <= 0) - throw new IllegalArgumentException(); - - // /* - // * Note: There is never a need to "re-sample" the edge. Unlike ROX, - // * we always can sample a vertex. This means that we can sample the - // * edges exactly once, during the initialization of the join graph. - // */ - // if (sample != null) - // throw new RuntimeException(); - - if (sample != null) { - - if (sample.limit >= limit) { - - // Already sampled at that limit. - return sample; - - } - - if (sample.estimateEnum == EstimateEnum.Exact) { - - // Sample is exact (fully materialized result). - return sample; - - } - - } - - /* - * Figure out which vertex has the smaller cardinality. The sample of - * that vertex is used since it is more representative than the sample - * of the other vertex. - * - * Note: If there are constraints which can run for this edge, then they - * will be attached when the edge is sampled. - */ - // vertex v, vprime - final Vertex v, vp; - if (v1.sample == null) // vertex not sampled. - throw new IllegalStateException(); - if (v2.sample == null) // vertex not sampled. - throw new IllegalStateException(); - if (v1.sample.estimatedCardinality < v2.sample.estimatedCardinality) { - v = v1; - vp = v2; - } else { - v = v2; - vp = v1; - } - -// /* -// * Convert the source sample into an IBindingSet[]. -// * -// * Note: This is now done when we sample the vertex. -// */ -// final IBindingSet[] sourceSample = new IBindingSet[v.sample.sample.length]; -// { -// for (int i = 0; i < sourceSample.length; i++) { -// final IBindingSet bset = new HashBindingSet(); -// BOpContext.copyValues((IElement) v.sample.sample[i], v.pred, -// bset); -// sourceSample[i] = bset; -// } -// } - - // Sample the edge and save the sample on the edge as a side-effect. - this.sample = estimateCardinality(queryEngine, limit, v, vp, - v.sample // the source sample. -// v.sample.estimatedCardinality,// -// v.sample.estimateEnum == EstimateEnum.Exact, // -// v.sample.limit,// -// v.sample.sample// - ); - - return sample; - - } - - /** - * Estimate the cardinality of the edge given a sample of either a vertex or - * a join path leading up to that edge. - * <p> - * Note: The caller is responsible for protecting against needless - * re-sampling. - * - * @param queryEngine - * The query engine. - * @param limit - * The limit for the cutoff join. - * @param vSource - * The source vertex. - * @param vTarget - * The target vertex - * @param sourceSample - * The input sample for the cutoff join. When this is a one-step - * estimation of the cardinality of the edge, then this sample is - * taken from the {@link VertexSample}. When the edge (vSource, - * vTarget) extends some {@link Path}, then this is taken from - * the {@link EdgeSample} for that {@link Path}. - * - * @return The result of sampling that edge. - * - * @throws Exception - */ - // * @param path The path which is being extended. - public EdgeSample estimateCardinality(// - final QueryEngine queryEngine,// - final int limit,// - // final IPredicate<?>[] path,// - final Vertex vSource,// - final Vertex vTarget,// - final SampleBase sourceSample// -// final long sourceEstimatedCardinality,// -// final boolean sourceSampleExact,// -// final int sourceSampleLimit,// -// final IBindingSet[] sourceSample// - ) throws Exception { - - if (limit <= 0) - throw new IllegalArgumentException(); - - /* - * Note: This sets up a cutoff pipeline join operator which makes an - * accurate estimate of the #of input solutions consumed and the #of - * output solutions generated. From that, we can directly compute the - * join hit ratio. This approach is preferred to injecting a "RowId" - * column as the estimates are taken based on internal counters in the - * join operator and the join operator knows how to cutoff evaluation as - * soon as the limit is satisfied, thus avoiding unnecessary effort. - */ - /* - * The set of constraint(s) (if any) which will be applied when we - * perform the cutoff evaluation of this edge (aka join). - * - * FIXME CONSTRAINTS - we need the join path to decide which constraints - * will be attached when we sample this edge (or at least the set of - * variables which are already known to be bound). - */ - final IConstraint[] constraints = null; - final int joinId = 1; - final Map<String, Object> anns = NV.asMap( - // - new NV(BOp.Annotations.BOP_ID, joinId),// - // @todo Why not use a factory which avoids bopIds - // already in use? - new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred.setBOpId(3)), - // Note: does not matter since not executed by the query - // controller. - // // disallow parallel evaluation of tasks - // new NV(PipelineOp.Annotations.MAX_PARALLEL,1), - // disallow parallel evaluation of chunks. - new NV(PipelineJoin.Annotations.MAX_PARALLEL_CHUNKS, 0), - // disable access path coalescing - new NV( PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS, false), // - // pass in constraints on this join. - new NV(PipelineJoin.Annotations.CONSTRAINTS, constraints),// - // cutoff join. - new NV(PipelineJoin.Annotations.LIMIT, (long) limit), - /* - * Note: In order to have an accurate estimate of the - * join hit ratio we need to make sure that the join - * operator runs using a single PipelineJoinStats - * instance which will be visible to us when the query - * is cutoff. In turn, this implies that the join must - * be evaluated on the query controller. - * - * @todo This implies that sampling of scale-out joins - * must be done using remote access paths. - */ - new NV(PipelineJoin.Annotations.SHARED_STATE, true),// - new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER)// - ); - - @SuppressWarnings("unchecked") - final PipelineJoin<?> joinOp = new PipelineJoin(new BOp[] {}, anns); - - final PipelineOp queryOp = joinOp; - - // run the cutoff sampling of the edge. - final UUID queryId = UUID.randomUUID(); - final IRunningQuery runningQuery = queryEngine.eval(queryId, queryOp, - new LocalChunkMessage<IBindingSet>(queryEngine, queryId, joinOp - .getId()/* startId */, -1 /* partitionId */, - new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { sourceSample.sample }))); - - final List<IBindingSet> result = new LinkedList<IBindingSet>(); - try { - try { - IBindingSet bset = null; - // Figure out the #of source samples consumed. - final Iterator<IBindingSet> itr = new Dechunkerator<IBindingSet>( - runningQuery.iterator()); - while (itr.hasNext()) { - bset = itr.next(); - result.add(bset); - } - } finally { - // verify no problems. - runningQuery.get(); - } - } finally { - runningQuery.cancel(true/* mayInterruptIfRunning */); - } - - // The join hit ratio can be computed directly from these stats. - final PipelineJoinStats joinStats = (PipelineJoinStats) runningQuery - .getStats().get(joinId); - - if (log.isTraceEnabled()) - log.trace(joinStats.toString()); - - // #of solutions in. - final int inputCount = (int) joinStats.inputSolutions.get(); - - // #of solutions out. - long outputCount = joinStats.outputSolutions.get(); - - // cumulative range count of the sampled access paths. - final long sumRangeCount = joinStats.accessPathRangeCount.get(); - - final EstimateEnum estimateEnum; - if (sourceSample.estimateEnum == EstimateEnum.Exact - && outputCount < limit) { - /* - * Note: If the entire source vertex is being fed into the cutoff - * join and the cutoff join outputCount is LT the limit, then the - * sample is the actual result of the join. That is, feeding all - * source solutions into the join gives fewer than the desired - * number of output solutions. - */ - estimateEnum = EstimateEnum.Exact; - } else if (inputCount == 1 && outputCount == limit) { - /* - * If the inputCount is ONE (1) and the outputCount is the limit, - * then the estimated cardinality is a lower bound as more than - * outputCount solutions might be produced by the join when - * presented with a single input solution. - * - * However, this condition suggests that the sum of the sampled - * range counts is a much better estimate of the cardinality of this - * join. - * - * For example, consider a join feeding a rangeCount of 16 into a - * rangeCount of 175000. With a limit of 100, we estimated the - * cardinality at 1600L (lower bound). In fact, the cardinality is - * 16*175000. This falsely low estimate can cause solutions which - * are really better to be dropped. - */ - // replace outputCount with the sum of the sampled range counts. - outputCount = sumRangeCount; - estimateEnum = EstimateEnum.LowerBound; - } else if (!(sourceSample.estimateEnum != EstimateEnum.Exact) - && inputCount == Math.min(sourceSample.limit, - sourceSample.estimatedCardinality) && outputCount == 0) { - /* - * When the source sample was not exact, the inputCount is EQ to the - * lesser of the source range count and the source sample limit, and - * the outputCount is ZERO (0), then feeding in all source solutions - * in is not sufficient to generate any output solutions. In this - * case, the estimated join hit ratio appears to be zero. However, - * the estimation of the join hit ratio actually underflowed and the - * real join hit ratio might be a small non-negative value. A real - * zero can only be identified by executing the full join. - * - * Note: An apparent join hit ratio of zero does NOT imply that the - * join will be empty (unless the source vertex sample is actually - * the fully materialized access path - this case is covered above). - */ - estimateEnum = EstimateEnum.Underflow; - } else { - estimateEnum = EstimateEnum.Normal; - } - - final double f = outputCount == 0 ? 0 - : (outputCount / (double) inputCount); - - final long estimatedCardinality = (long) (sourceSample.estimatedCardinality * f); - - final EdgeSample edgeSample = new EdgeSample(// -// sourceSample.estimatedCardinality, // -// sourceSample.estimateEnum, // -// sourceSample.limit, // - sourceSample,// - inputCount,// - outputCount, // - f, // - // args to SampleBase - estimatedCardinality, // - limit, // - estimateEnum,// - result.toArray(new IBindingSet[result.size()])); - - if (log.isDebugEnabled()) - log.debug(getLabel() + " : newSample=" + edgeSample); - - return edgeSample; - - } - -} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EdgeSample.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EdgeSample.java 2011-02-23 14:44:18 UTC (rev 4234) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EdgeSample.java 2011-02-23 21:52:48 UTC (rev 4235) @@ -26,59 +26,17 @@ import com.bigdata.bop.IBindingSet; /** - * A sample of an {@link Edge} (a join). + * A sample produced by a cutoff join. */ public class EdgeSample extends SampleBase { -// private static final transient Logger log = Logger -// .getLogger(EdgeSample.class); + // private static final transient Logger log = Logger + // .getLogger(EdgeSample.class); -// /** -// * The estimated cardinality of the cutoff join. -// */ -// public final long estimatedCardinality; -// -// /** -// * The limit used to sample the cutoff join. -// */ -// public final int limit; -// -// /** -// * Indicates whether the estimate is exact, an upper bound, or a lower -// * bound. -// */ -// public final EstimateEnum estimateEnum; -// -// /** -// * The sample of the solutions for the join path. -// */ -// final IBindingSet[] sample; - /** * The source sample used to compute the cutoff join. */ public final SampleBase sourceSample; - -// /** -// * The estimated cardinality of the source sample. -// */ -// public final long sourceEstimatedCardinality; -// -// /** -// * The sample type for the source sample. -// */ -// public final EstimateEnum sourceEstimateEnum; -// -// /** -// * Return <code>true</code> iff the source sample is exact (because the -// * source is either a fully materialized vertex or an edge whose solutions -// * have been fully materialized). -// */ -// public boolean isSourceSampleExact() { -// -// return sourceEstimateEnum == EstimateEnum.Exact; -// -// } /** * The #of binding sets out of the source sample vertex sample which were @@ -96,6 +54,11 @@ public final long outputCount; /** + * The #of tuples read from the access path when processing the cutoff join. + */ + public final long tuplesRead; + + /** * The ratio of the #of input samples consumed to the #of output samples * generated (the join hit ratio or scale factor). */ @@ -115,60 +78,47 @@ * which were consumed. * @param outputCount * The #of binding sets generated before the join was cutoff. + * @param tuplesRead + * The #of tuples read from the access path when processing the + * cutoff join. */ -// * @param sourceVertexSample -// * The sample for source vertex of the edge (whichever vertex has -// * the lower cardinality). - EdgeSample( - // final VertexSample sourceVertexSample, - final SampleBase sourceSample,// -// final long sourceEstimatedCardinality,// -// final EstimateEnum sourceEstimateEnum, // -// final int sourceSampleLimit,// + EdgeSample(final SampleBase sourceSample,// final int inputCount, // final long outputCount,// + final long tuplesRead,// final double f, // // args to SampleBase final long estimatedCardinality,// final int limit,// final EstimateEnum estimateEnum,// final IBindingSet[] sample// - ) { + ) { super(estimatedCardinality, limit, estimateEnum, sample); - if(sourceSample == null) + if (sourceSample == null) throw new IllegalArgumentException(); this.sourceSample = sourceSample; -// this.sourceEstimatedCardinality = sourceSample.estimatedCardinality; -// -// this.sourceEstimateEnum = sourceSample.estimateEnum; - this.inputCount = inputCount; this.outputCount = outputCount; + this.tuplesRead = tuplesRead; + this.f = f; } @Override protected void toString(final StringBuilder sb) { -// return getClass().getName() // -// + "{ estimatedCardinality=" + estimatedCardinality// -// + ", limit=" + limit // -// + ", estimateEnum=" + estimateEnum// - sb.append(", sourceEstimatedCardinality=" + sourceSample.estimatedCardinality); + sb.append(", sourceEstimatedCardinality=" + + sourceSample.estimatedCardinality); sb.append(", sourceEstimateEnum=" + sourceSample.estimateEnum); sb.append(", inputCount=" + inputCount); sb.append(", outputCount=" + outputCount); sb.append(", f=" + f); - // + ", estimateIsLowerBound=" + estimateIsLowerBound// - // + ", estimateIsUpperBound=" + estimateIsUpperBound// - // + ", sampleIsExactSolution=" + estimateIsExact // -// + "}"; } } Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedCardinalityComparator.java (from rev 4234, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedEdgeCardinalityComparator.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedCardinalityComparator.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedCardinalityComparator.java 2011-02-23 21:52:48 UTC (rev 4235) @@ -0,0 +1,62 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.bop.joinGraph.rto; + +import java.util.Comparator; + + +/** + * Places edges into order by ascending estimated cardinality. Edges which + * are not weighted are ordered to the end. + * + * TODO unit tests, including with unweighted edges. + */ +class EstimatedCardinalityComparator implements Comparator<Path> { + + public static final transient Comparator<Path> INSTANCE = new EstimatedCardinalityComparator(); + + // @Override + public int compare(final Path o1, final Path o2) { + if (o1.edgeSample == null && o2.edgeSample == null) { + // Neither edge is weighted. + return 0; + } + if (o1.edgeSample == null) { + // o1 is not weighted, but o2 is. sort o1 to the end. + return 1; + } + if (o2.edgeSample == null) { + // o2 is not weighted. sort o2 to the end. + return -1; + } + final long id1 = o1.edgeSample.estimatedCardinality; + final long id2 = o2.edgeSample.estimatedCardinality; + if (id1 < id2) + return -1; + if (id1 > id2) + return 1; + return 0; + } + +} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedEdgeCardinalityComparator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedEdgeCardinalityComparator.java 2011-02-23 14:44:18 UTC (rev 4234) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedEdgeCardinalityComparator.java 2011-02-23 21:52:48 UTC (rev 4235) @@ -1,62 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -package com.bigdata.bop.joinGraph.rto; - -import java.util.Comparator; - - -/** - * Places edges into order by ascending estimated cardinality. Edges which - * are not weighted are ordered to the end. - * - * TODO unit tests, including with unweighted edges. - */ -class EstimatedEdgeCardinalityComparator implements Comparator<Edge> { - - public static final transient Comparator<Edge> INSTANCE = new EstimatedEdgeCardinalityComparator(); - - // @Override - public int compare(final Edge o1, final Edge o2) { - if (o1.sample == null && o2.sample == null) { - // Neither edge is weighted. - return 0; - } - if (o1.sample == null) { - // o1 is not weighted, but o2 is. sort o1 to the end. - return 1; - } - if (o2.sample == null) { - // o2 is not weighted. sort o2 to the end. - return -1; - } - final long id1 = o1.sample.estimatedCardinality; - final long id2 = o2.sample.estimatedCardinality; - if (id1 < id2) - return -1; - if (id1 > id2) - return 1; - return 0; - } - -} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-23 14:44:18 UTC (rev 4234) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-23 21:52:48 UTC (rev 4235) @@ -30,7 +30,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.Formatter; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; @@ -44,11 +43,9 @@ import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; -import com.bigdata.bop.IVariable; import com.bigdata.bop.ap.SampleIndex.SampleType; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.joinGraph.NoSolutionsException; -import com.bigdata.bop.joinGraph.PartitionedJoinGroup; import com.bigdata.bop.rdf.join.DataSetJoin; /** @@ -236,50 +233,50 @@ */ private final SampleType sampleType; - /** - * The edges of the join graph as determined by static analysis. Each - * edge is basically a possible join. This array is comprised of just - * those edges which are determined by static analysis. Additional edges - * MAY be identified dynamically. Altogether there are three categories - * of edges: - * <ol> - * <li>The vertices directly share variables (join with shared - * variable(s)). These edges are identified by static analysis in the - * constructor are are represented in {@link #E}.</li> - * <li>The vertices indirectly share variables via a constraint (join - * with indirectly shared variable(s)). These edges are identified by - * dynamic analysis. Each time we expand the set of join paths under - * consideration, we examine {@link #unshared} vertices. Given the join - * path under consideration and the set of constraints, it may be that - * the vertex will indirectly share a variable via a constraint and can - * therefore participate in a constrained join.</li> - * <li>The vertices do not share variables, either directly or - * indirectly (unconstrained join). All vertices can join. However, if - * there are no shared variables then the join is unconstrained (full - * cross product). These edges are identified dynamically. In each round - * for which a constrained edge could not be identified, we consider all - * vertices in {@link #unshared} and choose an unconstrained edge.</li> - * </ol> - * - * @see BOpUtility#canJoinUsingConstraints(IPredicate[], IPredicate, IConstraint[]) - */ - private final Edge[] E; +// /** +// * The edges of the join graph as determined by static analysis. Each +// * edge is basically a possible join. This array is comprised of just +// * those edges which are determined by static analysis. Additional edges +// * MAY be identified dynamically. Altogether there are three categories +// * of edges: +// * <ol> +// * <li>The vertices directly share variables (join with shared +// * variable(s)). These edges are identified by static analysis in the +// * constructor are are represented in {@link #E}.</li> +// * <li>The vertices indirectly share variables via a constraint (join +// * with indirectly shared variable(s)). These edges are identified by +// * dynamic analysis. Each time we expand the set of join paths under +// * consideration, we examine {@link #unshared} vertices. Given the join +// * path under consideration and the set of constraints, it may be that +// * the vertex will indirectly share a variable via a constraint and can +// * therefore participate in a constrained join.</li> +// * <li>The vertices do not share variables, either directly or +// * indirectly (unconstrained join). All vertices can join. However, if +// * there are no shared variables then the join is unconstrained (full +// * cross product). These edges are identified dynamically. In each round +// * for which a constrained edge could not be identified, we consider all +// * vertices in {@link #unshared} and choose an unconstrained edge.</li> +// * </ol> +// * +// * @see BOpUtility#canJoinUsingConstraints(IPredicate[], IPredicate, IConstraint[]) +// */ +// private final Edge[] E; - /** - * An unordered collection of vertices which do not share any variables - * with the other vertices in the join graph. These vertices will - * produce full cross product joins unless a constraint causes indirect - * sharing of variables with a join path. - */ - private final Vertex[] unshared; +// /** +// * An unordered collection of vertices which do not share any variables +// * with the other vertices in the join graph. These vertices will +// * produce full cross product joins unless a constraint causes indirect +// * sharing of variables with a join path. +// */ +// private final Vertex[] unshared; public List<Vertex> getVertices() { return Collections.unmodifiableList(Arrays.asList(V)); } - public List<Edge> getEdges() { - return Collections.unmodifiableList(Arrays.asList(E)); - } +// public List<Edge> getEdges() { +// return Collections.unmodifiableList(Arrays.asList(E)); +// } public String toString() { final StringBuilder sb = new StringBuilder(); @@ -292,14 +289,14 @@ for (IConstraint c : C) { sb.append("\nC[" + c.getId() + "]=" + c); } - sb.append("\n],unshared=["); - for (Vertex v : unshared) { - sb.append("\n" + v); - } - sb.append("],E=["); - for (Edge e : E) { - sb.append("\n" + e); - } +// sb.append("\n],unshared=["); +// for (Vertex v : unshared) { +// sb.append("\n" + v); +// } +// sb.append("],E=["); +// for (Edge e : E) { +// sb.append("\n" + e); +// } sb.append("\n]}"); return sb.toString(); } @@ -367,185 +364,160 @@ this.sampleType = sampleType; - /* - * Create edges to represent possible joins between predicates based - * on directly shared variables - these joins can be identified by a - * static analysis of the join graph. - * - * Note: There are really three classes of joins to be considered. - * - * (1) The target predicate directly shares a variable with the - * source predicate. These joins are always constrained since the - * source predicate will have bound that variable. This is the - * static analysis of the join graph. - * - * (2) When the source predicate shares a variable with a constraint - * which also shares a variable with the target predicate. While the - * predicates do not directly share a variable, these joins are - * constrained by the shared variable in the constraint on the - * target predicate. BSBM Q5 is an example of this case. We do not - * create edges for such joins here. Instead, we dynamically - * determine when a constrained join is possible when extending the - * join path in each round. Hence, this is part of the dynamic - * analysis of the join graph. - * - * (3) Any two predicates may always be joined. However, joins which - * do not share variables either directly or indirectly will be full - * cross products. Therefore such joins are run last and we do not - * create edges for them here. Again, this is part of the dynamic - * analysis of the join graph. - */ - { - - // The set of identified edges for vertices which share vars. - final List<Edge> tmp = new LinkedList<Edge>(); - - // The set of vertices which share variables. - final Set<Vertex> sharedEdgeVertices = new LinkedHashSet<Vertex>(); - - // The set of vertices which do not share variables. - final List<Vertex> unsharedEdgeVertices = new LinkedList<Vertex>(); - - for (int i = 0; i < v.length; i++) { - - // consider a source vertex. - final IPredicate<?> p1 = v[i]; - - // #of vertices which share a variable with source vertex. - int nmatched = 0; - - for (int j = i + 1; j < v.length; j++) { - - // consider a possible target vertex. - final IPredicate<?> p2 = v[j]; - -// final Set<IVariable<?>> shared = BOpUtility -// .getSharedVars(p1, p2); - - if(BOpUtility.canJoin(p1, p2)) { -// if (!shared.isEmpty()) { - - /* - * The source and target vertices can join based on - * one or more shared variable(s). - */ - - if (log.isDebugEnabled()) - log.debug("vertices shared variable(s): vars=" - + BOpUtility.getSharedVars(p1, p2) - + ", v1=" + p1 + ", v2=" + p2); - - tmp.add(new Edge(V[i], V[j]));//, shared)); - - sharedEdgeVertices.add(V[i]); - - sharedEdgeVertices.add(V[j]); - - nmatched++; - - } else if (constraints != null) { - - /* - * The source and target vertices do not directly - * share any variable(s). However, there may be a - * constraint which shares a variable with both the - * source and target vertex. If such a constraint is - * found, then we add an edge now as that join is - * potentially constrained (less than the full - * Cartesian cross product). - * - * Note: While this identifies possible joins via a - * constraint, such joins are only legal when all - * variables used by the constraint are known to be - * bound. - * - * FIXME Edges should be identified dynamically, not - * statically. Probably all edges (aka possible - * joins) should be identified dynamically given the - * history represented by a given join path and the - * set of constraints declared for the join graph. - */ - - for(IConstraint c : constraints) { - - if(BOpUtility.getSharedVars(p1, c).isEmpty()) - continue; - - if(BOpUtility.getSharedVars(p2, c).isEmpty()) - continue; - - if (log.isDebugEnabled()) - log - .debug("vertices shared variable(s) via constraint: v1=" - + p1 - + ", v2=" - + p2 - + ", c=" + c); - - tmp.add(new Edge(V[i], V[j]));//, shared)); - - sharedEdgeVertices.add(V[i]); - - sharedEdgeVertices.add(V[j]); - - nmatched++; - - } - - } - - } - - if (nmatched == 0 && !sharedEdgeVertices.contains(V[i])) { - - /* - * The source vertex does not share any variables. - */ - - log - .warn("Vertex does not share any variables: " - + V[i]); - - unsharedEdgeVertices.add(V[i]); - - } - - } // create edges - - E = tmp.toArray(new Edge[0]); - - this.unshared = unsharedEdgeVertices.toArray(new Vertex[0]); - -// if(!unsharedEdgeVertices.isEmpty()) { +// /* +// * Create edges to represent possible joins between predicates based on +// * directly shared variables - these joins can be identified by a static +// * analysis of the join graph. +// */ +// { // -// /* -// * FIXME NO SHARED VARS : RUN LAST. This needs to be -// * supported. When vertices that do not share variables -// * either directly or via a constraint then they should run -// * last as they can not constrain the query. In this case, -// * they are not considered by the runtime optimizer when -// * building up the join path until all vertices which share -// * variables have been exhausted. At that point, the -// * remaining vertices are just appended to whatever join -// * path was selected as having the lowest cumulative -// * estimated cardinality. -// * -// * However, if there exists for a vertex which otherwise -// * does not share variables a constraint which should be -// * evaluated against that vertex, then that constraint -// * provides the basis for a edge (aka join). In this case, -// * an edge must be created for the vertex based on the -// * shared variable in the constraint and its position in the -// * join path will be decided by the runtime optimizer. -// */ +// // The set of identified edges for vertices which share vars. +// final List<Edge> tmp = new LinkedList<Edge>(); // -// throw new UnsupportedOperationException( -// "Some predicates do not share any variables with other predicates: unshared=" -// + unsharedEdgeVertices); -// -// } - - } +// // The set of vertices which share variables. +// final Set<Vertex> sharedEdgeVertices = new LinkedHashSet<Vertex>(); +// +//// // The set of vertices which do not share variables. +//// final List<Vertex> unsharedEdgeVertices = new LinkedList<Vertex>(); +// +// for (int i = 0; i < v.length; i++) { +// +// // consider a source vertex. +// final IPredicate<?> p1 = v[i]; +// +// // #of vertices which share a variable with source vertex. +// int nmatched = 0; +// +// for (int j = i + 1; j < v.length; j++) { +// +// // consider a possible target vertex. +// final IPredicate<?> p2 = v[j]; +// +//// final Set<IVariable<?>> shared = BOpUtility +//// .getSharedVars(p1, p2); +// +// if(BOpUtility.canJoin(p1, p2)) { +//// if (!shared.isEmpty()) { +// +// /* +// * The source and target vertices can join based on +// * one or more shared variable(s). +// */ +// +// if (log.isDebugEnabled()) +// log.debug("vertices shared variable(s): vars=" +// + BOpUtility.getSharedVars(p1, p2) +// + ", v1=" + p1 + ", v2=" + p2); +// +// tmp.add(new Edge(V[i], V[j]));//, shared)); +// +// sharedEdgeVertices.add(V[i]); +// +// sharedEdgeVertices.add(V[j]); +// +// nmatched++; +// +// } else if (constraints != null) { +// +// /* +// * The source and target vertices do not directly +// * share any variable(s). However, there may be a +// * constraint which shares a variable with both the +// * source and target vertex. If such a constraint is +// * found, then we add an edge now as that join is +// * potentially constrained (less than the full +// * Cartesian cross product). +// * +// * Note: While this identifies possible joins via a +// * constraint, such joins are only legal when all +// * variables used by the constraint are known to be +// * bound. +// * +// * FIXME Edges should be identified dynamically, not +// * statically. Probably all edges (aka possible +// * joins) should be identified dynamically given the +// * history represented by a given join path and the +// * set of constraints declared for the join graph. +// */ +// +// for(IConstraint c : constraints) { +// +// if(BOpUtility.getSharedVars(p1, c).isEmpty()) +// continue; +// +// if(BOpUtility.getSharedVars(p2, c).isEmpty()) +// continue; +// +// if (log.isDebugEnabled()) +// log +// .debug("vertices shared variable(s) via constraint: v1=" +// + p1 +// + ", v2=" +// + p2 +// + ", c=" + c); +// +// tmp.add(new Edge(V[i], V[j]));//, shared)); +// +// sharedEdgeVertices.add(V[i]); +// +// sharedEdgeVertices.add(V[j]); +// +// nmatched++; +// +// } +// +// } +// +// } +// +// if (nmatched == 0 && !sharedEdgeVertices.contains(V[i])) { +// +// /* +// * The source vertex does not share any variables. +// */ +// +// log.warn("Vertex does not share any variables: " + V[i]); +// +//// unsharedEdgeVertices.add(V[i]); +// +// } +// +// } // next vertex +// +// E = tmp.toArray(new Edge[0]); +// +//// this.unshared = unsharedEdgeVertices.toArray(new Vertex[0]); +// +//// if(!unsharedEdgeVertices.isEmpty()) { +//// +//// /* +//// * FIXME NO SHARED VARS : RUN LAST. This needs to be +//// * supported. When vertices that do not share variables +//// * either directly or via a constraint then they should run +//// * last as they can not constrain the query. In this case, +//// * they are not considered by the runtime optimizer when +//// * building up the join path until all vertices which share +//// * variables have been exhausted. At that point, the +//// * remaining vertices are just appended to whatever join +//// * path was selected as having the lowest cumulative +//// * estimated cardinality. +//// * +//// * However, if there exists for a vertex which otherwise +//// * does not share variables a constraint which should be +//// * evaluated against that vertex, then that constraint +//// * provides the basis for a edge (aka join). In this case, +//// * an edge must be created for the vertex based on the +//// * shared variable in the constraint and its position in the +//// * join path will be decided by the runtime optimizer. +//// */ +//// +//// throw new UnsupportedOperationException( +//// "Some predicates do not share any variables with other predicates: unshared=" +//// + unsharedEdgeVertices); +//// +//// } +// +// } // create edges. } @@ -626,9 +598,27 @@ int round = 1; + /* + * This map is used to associate join path segments (expressed as an + * ordered array of bopIds) with edge sample to avoid redundant effort. + * + * FIXME HEAP MANAGMENT : This map holds references to the cutoff join + * samples. To ensure that the map has the minimum heap footprint, it + * must be scanned each time we prune the set of active paths and any + * entry which is not a prefix of an active path should be removed. + * + * TODO MEMORY MANAGER : When an entry is cleared from this map, the + * corresponding allocation in the memory manager (if any) must be + * released. The life cycle of the map needs to be bracketed by a + * try/finally in order to ensure that all allocations associated with + * the map are released no later than when we leave the lexicon scope of + * that clause. + */ + final Map<PathIds, EdgeSample> edgeSamples = new LinkedHashMap<PathIds, EdgeSample>(); + while (paths.length > 0 && round < nvertices - 1) { - paths = expand(queryEngine, limit, round++, paths); + paths = expand(queryEngine, limit, round++, paths, edgeSamples); } @@ -644,13 +634,9 @@ if (log.isInfoEnabled()) { - /* - * @todo It would be nice to show the plan with the filters - * attached, but that might be something that the caller does. - */ log.info("\n*** Selected join path: " + Arrays.toString(paths[0].getVertexIds()) + "\n" - + showPath(paths[0])); + + showPath(paths[0], edgeSamples)); } @@ -674,7 +660,7 @@ if(p == null) throw new IllegalArgumentException(); - final IPredicate[] path = p.getPredicates(); + final IPredicate<?>[] path = p.getPredicates(); if (path.length != V.length) { throw new IllegalArgumentException( @@ -712,46 +698,45 @@ * * @param nedges * The maximum #of edges to choose. + * @param paths + * The set of possible initial paths to choose from. + * + * @return Up to <i>nedges</i> minimum cardinality paths. */ - public Path[] chooseStartingPaths(final int nedges) { + public Path[] chooseStartingPaths(final int nedges, final Path[] paths) { final List<Path> tmp = new LinkedList<Path>(); - // All edges in the graph. - final Edge[] edges = getEdges().toArray(new Edge[0]); - // Sort them by ascending expected cardinality. - Arrays.sort(edges, 0, edges.length, - EstimatedEdgeCardinalityComparator.INSTANCE); + Arrays.sort(paths, 0, paths.length, + EstimatedCardinalityComparator.INSTANCE); // Choose the top-N edges (those with the least cardinality). - for (int i = 0; i < edges.length && i < nedges; i++) { + for (int i = 0; i < paths.length && i < nedges; i++) { - tmp.add(new Path(ed... [truncated message content] |
From: <tho...@us...> - 2011-02-24 01:04:19
|
Revision: 4236 http://bigdata.svn.sourceforge.net/bigdata/?rev=4236&view=rev Author: thompsonbry Date: 2011-02-24 01:04:12 +0000 (Thu, 24 Feb 2011) Log Message: ----------- More work on the runtime optimizer. - Fixed a problem in JGraph#expand() where it was not resampling vertices when the limit was increased. - Cleaned out dead code. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-23 21:52:48 UTC (rev 4235) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-24 01:04:12 UTC (rev 4236) @@ -233,51 +233,10 @@ */ private final SampleType sampleType; -// /** -// * The edges of the join graph as determined by static analysis. Each -// * edge is basically a possible join. This array is comprised of just -// * those edges which are determined by static analysis. Additional edges -// * MAY be identified dynamically. Altogether there are three categories -// * of edges: -// * <ol> -// * <li>The vertices directly share variables (join with shared -// * variable(s)). These edges are identified by static analysis in the -// * constructor are are represented in {@link #E}.</li> -// * <li>The vertices indirectly share variables via a constraint (join -// * with indirectly shared variable(s)). These edges are identified by -// * dynamic analysis. Each time we expand the set of join paths under -// * consideration, we examine {@link #unshared} vertices. Given the join -// * path under consideration and the set of constraints, it may be that -// * the vertex will indirectly share a variable via a constraint and can -// * therefore participate in a constrained join.</li> -// * <li>The vertices do not share variables, either directly or -// * indirectly (unconstrained join). All vertices can join. However, if -// * there are no shared variables then the join is unconstrained (full -// * cross product). These edges are identified dynamically. In each round -// * for which a constrained edge could not be identified, we consider all -// * vertices in {@link #unshared} and choose an unconstrained edge.</li> -// * </ol> -// * -// * @see BOpUtility#canJoinUsingConstraints(IPredicate[], IPredicate, IConstraint[]) -// */ -// private final Edge[] E; - -// /** -// * An unordered collection of vertices which do not share any variables -// * with the other vertices in the join graph. These vertices will -// * produce full cross product joins unless a constraint causes indirect -// * sharing of variables with a join path. -// */ -// private final Vertex[] unshared; - public List<Vertex> getVertices() { return Collections.unmodifiableList(Arrays.asList(V)); } -// public List<Edge> getEdges() { -// return Collections.unmodifiableList(Arrays.asList(E)); -// } - public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("JoinGraph"); @@ -289,14 +248,6 @@ for (IConstraint c : C) { sb.append("\nC[" + c.getId() + "]=" + c); } -// sb.append("\n],unshared=["); -// for (Vertex v : unshared) { -// sb.append("\n" + v); -// } -// sb.append("],E=["); -// for (Edge e : E) { -// sb.append("\n" + e); -// } sb.append("\n]}"); return sb.toString(); } @@ -364,161 +315,6 @@ this.sampleType = sampleType; -// /* -// * Create edges to represent possible joins between predicates based on -// * directly shared variables - these joins can be identified by a static -// * analysis of the join graph. -// */ -// { -// -// // The set of identified edges for vertices which share vars. -// final List<Edge> tmp = new LinkedList<Edge>(); -// -// // The set of vertices which share variables. -// final Set<Vertex> sharedEdgeVertices = new LinkedHashSet<Vertex>(); -// -//// // The set of vertices which do not share variables. -//// final List<Vertex> unsharedEdgeVertices = new LinkedList<Vertex>(); -// -// for (int i = 0; i < v.length; i++) { -// -// // consider a source vertex. -// final IPredicate<?> p1 = v[i]; -// -// // #of vertices which share a variable with source vertex. -// int nmatched = 0; -// -// for (int j = i + 1; j < v.length; j++) { -// -// // consider a possible target vertex. -// final IPredicate<?> p2 = v[j]; -// -//// final Set<IVariable<?>> shared = BOpUtility -//// .getSharedVars(p1, p2); -// -// if(BOpUtility.canJoin(p1, p2)) { -//// if (!shared.isEmpty()) { -// -// /* -// * The source and target vertices can join based on -// * one or more shared variable(s). -// */ -// -// if (log.isDebugEnabled()) -// log.debug("vertices shared variable(s): vars=" -// + BOpUtility.getSharedVars(p1, p2) -// + ", v1=" + p1 + ", v2=" + p2); -// -// tmp.add(new Edge(V[i], V[j]));//, shared)); -// -// sharedEdgeVertices.add(V[i]); -// -// sharedEdgeVertices.add(V[j]); -// -// nmatched++; -// -// } else if (constraints != null) { -// -// /* -// * The source and target vertices do not directly -// * share any variable(s). However, there may be a -// * constraint which shares a variable with both the -// * source and target vertex. If such a constraint is -// * found, then we add an edge now as that join is -// * potentially constrained (less than the full -// * Cartesian cross product). -// * -// * Note: While this identifies possible joins via a -// * constraint, such joins are only legal when all -// * variables used by the constraint are known to be -// * bound. -// * -// * FIXME Edges should be identified dynamically, not -// * statically. Probably all edges (aka possible -// * joins) should be identified dynamically given the -// * history represented by a given join path and the -// * set of constraints declared for the join graph. -// */ -// -// for(IConstraint c : constraints) { -// -// if(BOpUtility.getSharedVars(p1, c).isEmpty()) -// continue; -// -// if(BOpUtility.getSharedVars(p2, c).isEmpty()) -// continue; -// -// if (log.isDebugEnabled()) -// log -// .debug("vertices shared variable(s) via constraint: v1=" -// + p1 -// + ", v2=" -// + p2 -// + ", c=" + c); -// -// tmp.add(new Edge(V[i], V[j]));//, shared)); -// -// sharedEdgeVertices.add(V[i]); -// -// sharedEdgeVertices.add(V[j]); -// -// nmatched++; -// -// } -// -// } -// -// } -// -// if (nmatched == 0 && !sharedEdgeVertices.contains(V[i])) { -// -// /* -// * The source vertex does not share any variables. -// */ -// -// log.warn("Vertex does not share any variables: " + V[i]); -// -//// unsharedEdgeVertices.add(V[i]); -// -// } -// -// } // next vertex -// -// E = tmp.toArray(new Edge[0]); -// -//// this.unshared = unsharedEdgeVertices.toArray(new Vertex[0]); -// -//// if(!unsharedEdgeVertices.isEmpty()) { -//// -//// /* -//// * FIXME NO SHARED VARS : RUN LAST. This needs to be -//// * supported. When vertices that do not share variables -//// * either directly or via a constraint then they should run -//// * last as they can not constrain the query. In this case, -//// * they are not considered by the runtime optimizer when -//// * building up the join path until all vertices which share -//// * variables have been exhausted. At that point, the -//// * remaining vertices are just appended to whatever join -//// * path was selected as having the lowest cumulative -//// * estimated cardinality. -//// * -//// * However, if there exists for a vertex which otherwise -//// * does not share variables a constraint which should be -//// * evaluated against that vertex, then that constraint -//// * provides the basis for a edge (aka join). In this case, -//// * an edge must be created for the vertex based on the -//// * shared variable in the constraint and its position in the -//// * join path will be decided by the runtime optimizer. -//// */ -//// -//// throw new UnsupportedOperationException( -//// "Some predicates do not share any variables with other predicates: unshared=" -//// + unsharedEdgeVertices); -//// -//// } -// -// } // create edges. - } /** @@ -793,14 +589,6 @@ log.info("\n*** Paths @ t0\n" + JGraph.showTable(paths_t0)); /* - * Discard samples which will not be reused. - * - * @todo Do this in each round since with more than one starting vertex - * we have have starting vertices which will not be re-sampled at any - * round (assuming that all paths beginning with a given starting vertex - * get pruned). See runtimeOptimizer() for some more notes on this. - */ - /* * Discard samples for vertices which were not chosen as starting points * for join paths. * @@ -919,13 +707,6 @@ x.vertices[0].sample(queryEngine, limit, sampleType); -// for(Edge e : x.edges) { -// -// e.v1.sample(queryEngine, limit); -// e.v2.sample(queryEngine, limit); -// -// } - } /* @@ -946,162 +727,113 @@ for (Path x : a) { -// // The edges which we have visited in this path. -// final List<Edge> edges = new LinkedList<Edge>(); - - // The vertices which we have visited in this path. -// final Set<Vertex> vertices = new LinkedHashSet<Vertex>(); - // The cutoff join sample of the one step shorter path segment. EdgeSample priorEdgeSample = null; for (int segmentLength = 2; segmentLength <= x.vertices.length; segmentLength++) { -// for(Edge ex : x.edges) { - -// // Add edge to the visited set for this join path. -// edges.add(e); // Generate unique key for this join path segment. -// final int[] ids = Path.getVertexIds(edges); final PathIds ids = new PathIds(BOpUtility.getPredIds(x .getPathSegment(segmentLength))); + // Look for sample for this path in our cache. + EdgeSample edgeSample = edgeSamples.get(ids); + + if (edgeSample != null && edgeSample.limit < limit + && !edgeSample.isExact()) { + if (log.isDebugEnabled()) + log.debug("Will resample at higher limit: " + ids); + // Time to resample this edge. + edgeSamples.remove(ids); + edgeSample = null; + } + if (priorEdgeSample == null) { /* * This is the first edge in the path. * - * Test our local table of join path segment estimates - * to see if we have already re-sampled that edge. If - * not, then re-sample it now. + * Test our local table of join path segment estimates to + * see if we have already re-sampled that edge. If not, then + * re-sample it now. */ - + assert segmentLength == 2; - // Test sample cache. - EdgeSample edgeSample = edgeSamples.get(ids); - if (edgeSample == null) { - -// if (e.sample != null && e.sample.limit >= limit) { -// -// // The existing sample for that edge is fine. -// edgeSample = e.sample; -// -// } else { - /* - * Re-sample the 1st edge in the join path, updating - * the sample on the edge as a side-effect. The - * cutoff sample is based on the vertex sample for - * the minimum cardinality vertex. - */ + /* + * Re-sample the 1st edge in the join path, updating the + * sample on the edge as a side-effect. The cutoff + * sample is based on the vertex sample for the minimum + * cardinality vertex. + */ - edgeSample = Path.cutoffJoin(// - queryEngine, - limit,// - x.getPathSegment(2),// 1st edge. - C,// constraints - x.vertices[0].sample// source sample. - ); + edgeSample = Path.cutoffJoin(// + queryEngine, limit,// + x.getPathSegment(2),// 1st edge. + C,// constraints + x.vertices[0].sample// source sample. + ); -// } - // Cache the sample. if (edgeSamples.put(ids, edgeSample) != null) throw new AssertionError(); } -// // Add both vertices to the visited set. -// vertices.add(x.vertices[0]); -// vertices.add(x.vertices[1]); -// vertices.add(e.v1); -// vertices.add(e.v2); - // Save sample. It will be used to re-sample the next edge. priorEdgeSample = edgeSample; - + } else { /* * The path segment is at least 3 vertices long. */ assert ids.length() >= 3; - -// final boolean v1Found = vertices.contains(e.v1); -// // The source vertex for the new edge. -// final Vertex sVertex = v1Found ? e.v1 : e.v2; + if (edgeSample == null) { -//// // The target vertex for the new edge. -// final Vertex tVertex = x.vertices[segmentLength - 1]; -// final Vertex tVertex = v1Found ? e.v2 : e.v1; + /* + * This is some N-step edge in the path, where N is + * greater than ONE (1). The source vertex is the vertex + * which already appears in the prior edges of this join + * path. The target vertex is the next vertex which is + * visited by the join path. The sample passed in is the + * prior edge sample -- that is, the sample from the + * path segment without the target vertex. This is the + * sample that we just updated when we visited the prior + * edge of the path. + */ - // Look for sample for this path in our cache. - EdgeSample edgeSample = edgeSamples.get(ids); + edgeSample = Path.cutoffJoin(queryEngine,// + limit,// + x.getPathSegment(ids.length()),// + C, // constraints + priorEdgeSample// + ); - if (edgeSample != null && edgeSample.limit < limit - && !edgeSample.isExact()) { if (log.isDebugEnabled()) - log.debug("Will resample at higher limit: " + ids); - // Time to resample this edge. - edgeSamples.remove(ids); - edgeSample = null; - } - - if (edgeSample == null) { + log.debug("Resampled: " + ids + " : " + edgeSample); - /* - * This is some N-step edge in the path, where N is greater - * than ONE (1). The source vertex is the vertex which - * already appears in the prior edges of this join path. The - * target vertex is the next vertex which is visited by the - * join path. The sample passed in is the prior edge sample - * -- that is, the sample from the path segment without the - * target vertex. This is the sample that we just updated - * when we visited the prior edge of the path. - */ + if (edgeSamples.put(ids, edgeSample) != null) + throw new AssertionError(); - edgeSample = Path.cutoffJoin( - queryEngine,// - limit,// -// sVertex,// -// tVertex,// - x.getPathSegment(ids.length()),// - C, // constraints - priorEdgeSample// -// priorEdgeSample.estimatedCardinality,// -// priorEdgeSample.estimateEnum == EstimateEnum.Exact, -// priorEdgeSample.limit,// -// priorEdgeSample.sample// - ); + } - if (log.isDebugEnabled()) - log.debug("Resampled: " + ids - + " : " + edgeSample); - - if (edgeSamples.put(ids, edgeSample) != null) - throw new AssertionError(); + // Save sample. It will be used to re-sample the next edge. + priorEdgeSample = edgeSample; } - // Save sample. It will be used to re-sample the next edge. - priorEdgeSample = edgeSample; - -// // Add target vertex to the visited set. -// vertices.add(tVertex); - - } - } // next path prefix in Path [x] if (priorEdgeSample == null) throw new AssertionError(); - + // Save the result on the path. x.edgeSample = priorEdgeSample; - + } // next Path [x]. /* @@ -1114,7 +846,7 @@ + a.length); final List<Path> tmp = new LinkedList<Path>(); - + for (Path x : a) { /* @@ -1122,68 +854,6 @@ */ final Set<Vertex> used = new LinkedHashSet<Vertex>(); -// /* -// * First, consider the edges identified by static analysis. This -// * will create zero or more paths from the current join path. -// * -// * FIXME Change this to use canJoin() and get rid of E[] and of -// * Edges in general. -// */ -// // Check all edges in the graph. -// for (Edge edgeInGraph : E) { -// -// // Figure out which vertices are already part of this path. -// final boolean v1Found = x.contains(edgeInGraph.v1); -// final boolean v2Found = x.contains(edgeInGraph.v2); -// -// if (log.isTraceEnabled()) -// log.trace("Edge: " + edgeInGraph + ", v1Found=" -// + v1Found + ", v2Found=" + v2Found); -// -// if (!v1Found && !v2Found) { -// // Edge is not connected to this path. -// continue; -// } -// -// if (v1Found && v2Found) { -// // Edge is already present in this path. -// continue; -// } -// -// // the target vertex for the new edge. -// final Vertex tVertex = v1Found ? edgeInGraph.v2 -// : edgeInGraph.v1; -// -//// // the source vertex for the new edge. -//// final Vertex sVertex = v1Found ? edgeInGraph.v1 -//// : edgeInGraph.v2; -// -// if (used.contains(tVertex)) { -// // Vertex already used to extend this path. -// if (log.isTraceEnabled()) -// log.trace("Edge: " + edgeInGraph -// + " - already used to extend this path."); -// continue; -// } -// -// // add the new vertex to the set of used vertices. -// used.add(tVertex); -// -// // (Re-)sample vertex before we sample a new edge -// tVertex.sample(queryEngine, limit, sampleType); -// -// // Extend the path to the new vertex. -// final Path p = x.addEdge(queryEngine, limit, tVertex, /*edgeInGraph,*/ C); -// -// // Add to the set of paths for this round. -// tmp.add(p); -// -// if (log.isTraceEnabled()) -// log.trace("Extended path with static edge: " -// + edgeInGraph + ", new path=" + p); -// -// } - { /* @@ -1244,97 +914,6 @@ // add the new vertex to the set of used vertices. used.add(tVertex); -// // (Re-)sample vertex before we sample a new edge -// tVertex.sample(queryEngine, limit, sampleType); - -// // Analyze join path and constraints for new path. -// final PartitionedJoinGroup g; -// { -// // extract ordered preds from path to be extended -// final List<IPredicate<?>> preds = new LinkedList<IPredicate<?>>( -// Arrays.asList(x.getPredicates())); -// // append the target vertex. -// preds.add(tVertex.pred); -// // convert to an array. -// final IPredicate<?>[] newPath = preds -// .toArray(new IPredicate[preds.size()]); -// g = new PartitionedJoinGroup(newPath, C); -// } -// -// // The set of constraints which will run for this vertex. -// final IConstraint[] c = g -// .getJoinGraphConstraints(tVertex.pred.getId()); -// -// if (c == null || c.length == 0) { -// /* -// * Must not be null since the constraint(s) are what -// * license this as a constrained join. -// */ -// throw new AssertionError(); -// } - -// /* -// * Find any vertex in the path which we are extending -// * which shares at least one variable with one of the -// * constraints which will run with this edge. That will -// * be the "source" vertex for the purposes of this path -// * extension. -// */ -// Vertex vSource = null; -// { -// -// // The set of variables used by the constraints. -// final Set<IVariable<?>> vars = new LinkedHashSet<IVariable<?>>(); -// -// for (IConstraint aConstraint : c) { -// -// final Iterator<IVariable<?>> vitr = BOpUtility -// .getSpannedVariables(aConstraint); -// -// while (vitr.hasNext()) { -// -// vars.add(vitr.next()); -// -// } -// -// } -// -// // Find a vertex using any of those variables. -// for (Vertex aVertex : x.vertices) { -// -// if (vSource != null) { -// // Done. -// break; -// } -// -// final IPredicate<?> aPred = aVertex.pred; -// -// final Iterator<IVariable<?>> vitr = BOpUtility -// .getArgumentVariables(aPred); -// -// while (vSource == null && vitr.hasNext()) { -// -// final IVariable<?> aVar = vitr.next(); -// -// if (vars.contains(aVar)) { -// -// // Done. -// vSource = aVertex; -// -// } -// -// } // while -// -// } // for -// -// if (vSource == null) -// throw new AssertionError("No shared variables?"); -// -// } // end block - -// final Edge dynamicEdge = new Edge(/* x, */vSource, -// tVertex); - // Extend the path to the new vertex. final Path p = x.addEdge(queryEngine, limit, tVertex, /*dynamicEdge,*/ C); @@ -1356,12 +935,9 @@ if (tmp.isEmpty()) { /* - * NO edges were identified by static and dynamic analysis - * so we must consider edges which represent fully - * unconstrained joins. + * No constrained joins were identified so we must consider + * edges which represent fully unconstrained joins. */ - -// assert unshared.length != 0; assert !nothingShared.isEmpty(); @@ -1375,14 +951,6 @@ */ final Vertex tVertex = nothingShared.iterator().next(); -// /* -// * Since there are no shared variables, any vertex already -// * in the path may be used as the source for this edge. -// */ -// final Vertex vSource = x.vertices[0]; - -// final Edge dynamicEdge = new Edge(vSource, tVertex); - // Extend the path to the new vertex. final Path p = x.addEdge(queryEngine, limit, tVertex,/*dynamicEdge*/ C); @@ -1436,28 +1004,6 @@ return null; } -// /** -// * Return the {@link Edge} associated with the given vertices. The -// * vertices may appear in any order. -// * -// * @param v1 -// * One vertex. -// * @param v2 -// * Another vertex. -// * -// * @return The edge -or- <code>null</code> if there is no such edge in -// * the join graph. -// */ -// public Edge getEdge(Vertex v1, Vertex v2) { -// for (Edge e : E) { -// if (e.v1 == v1 && e.v2 == v2) -// return e; -// if (e.v1 == v2 && e.v2 == v1) -// return e; -// } -// return null; -// } - /** * Obtain a sample and estimated cardinality (fast range count) for each * vertex. @@ -1600,195 +1146,10 @@ } -// for (Edge e : E) { -// -// if (e.v1.sample == null || e.v2.sample == null) { -// -// /* -// * We can only estimate the cardinality of edges connecting -// * vertices for which samples were obtained. -// */ -// -// continue; -// -// } -// -// /* -// * Figure out which vertex has the smaller cardinality. The sample -// * of that vertex is used since it is more representative than the -// * sample of the other vertex. -// */ -// // vertex v, vprime -// final Vertex v, vp; -//// if (e.v1.sample == null) // vertex not sampled. -//// throw new IllegalStateException(); -//// if (e.v2.sample == null) // vertex not sampled. -//// throw new IllegalStateException(); -// if (e.v1.sample.estimatedCardinality < e.v2.sample.estimatedCardinality) { -// v = e.v1; -// vp = e.v2; -// } else { -// v = e.v2; -// vp = e.v1; -// } -// -// // The path segment -// final IPredicate<?>[] preds = new IPredicate[] { v.pred, vp.pred }; -// -// /* -// * TODO If we get rid of the static analysis to identify edges with -// * shared variables then make sure that however we sample the -// * initial edges we protect against sampling the same edge more than -// * once. -// */ -// -// // cutoff join of the edge (v,vp) -// final EdgeSample edgeSample = e.sample = Edge.estimateCardinality( -// queryEngine,// -// limit, // sample limit -// preds, // ordered path segment. -// C, // constraints -// v.sample // sourceSample -// ); -// -// final Path p = new Path(v, vp, edgeSample); -// -// paths.add(p); -// -// } - return paths.toArray(new Path[paths.size()]); } -// /** -// * Return the {@link Edge} having the minimum estimated cardinality out of -// * those edges whose cardinality has been estimated. -// * -// * @param visited -// * A set of vertices to be excluded from consideration -// * (optional). -// * -// * @return The minimum cardinality edge -or- <code>null</code> if there are -// * no {@link Edge}s having an estimated cardinality. -// */ -// public Edge getMinimumCardinalityEdge(final Set<Vertex> visited) { -// -// long minCard = Long.MIN_VALUE; -// Edge minEdge = null; -// -// for (Edge e : E) { -// -// if (e.sample == null) { -// -// // Edge has not been sampled. -// continue; -// -// } -// -// if (visited != null -// && (visited.contains(e.v1) || visited.contains(e.v2))) { -// -// // A vertex of that edge has already been consumed. -// continue; -// -// } -// -// final long estimatedCardinality = e.sample.estimatedCardinality; -// -// if (minEdge == null || estimatedCardinality < minCard) { -// -// minEdge = e; -// -// minCard = estimatedCardinality; -// -// } -// -// } -// -// return minEdge; -// -// } - - // /** - // * Return the {@link Edge} having the minimum estimated cardinality - // out - // * of those edges whose cardinality has been estimated. - // * - // * @return The minimum cardinality edge -or- <code>null</code> if - // there - // * are no {@link Edge}s having an estimated cardinality. - // */ - // public Edge getMinimumCardinalityEdge() { - // - // return getMinimumCardinalityEdge(null); - // - // } - -// /** -// * Return the #of edges in which the given vertex appears where the -// * other vertex of the edge does not appear in the set of visited -// * vertices. -// * -// * @param v -// * The vertex. -// * @param visited -// * A set of vertices to be excluded from consideration. -// * -// * @return The #of such edges. -// */ -// public int getEdgeCount(final Vertex v, final Set<Vertex> visited) { -// -// return getEdges(v, visited).size(); -// -// } -// -// /** -// * Return the edges in which the given vertex appears where the other -// * vertex of the edge does not appear in the set of visited vertices. -// * -// * @param v -// * The vertex. -// * @param visited -// * A set of vertices to be excluded from consideration -// * (optional). -// * -// * @return Those edges. -// */ -// public List<Edge> getEdges(final Vertex v, final Set<Vertex> visited) { -// -// if (v == null) -// throw new IllegalArgumentException(); -// -// if (visited != null && visited.contains(v)) -// return Collections.emptyList(); -// -// final List<Edge> tmp = new LinkedList<Edge>(); -// -// for (Edge e : E) { -// -// if (v.equals(e.v1) || v.equals(e.v2)) { -// -// if (visited != null) { -// -// if (visited.contains(e.v1)) -// continue; -// -// if (visited.contains(e.v2)) -// continue; -// -// } -// -// tmp.add(e); -// -// } -// -// } -// -// return tmp; -// -// } - /** * Prune paths which are dominated by other paths. Paths are extended in * each round. Paths from previous rounds are always pruned. Of the new @@ -1817,9 +1178,6 @@ if (p.vertices.length > maxPathLen) { maxPathLen = p.vertices.length; } -// if (p.edges.size() > maxPathLen) { -// maxPathLen = p.edges.size(); -// } } final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); @@ -1828,7 +1186,6 @@ final Path Pi = a[i]; if (Pi.edgeSample == null) throw new RuntimeException("Not sampled: " + Pi); -// if (Pi.edges.size() < maxPathLen) { if (Pi.vertices.length < maxPathLen) { /* * Only the most recently generated set of paths survive to @@ -1890,27 +1247,6 @@ return b; } -///** -//* Places vertices into order by the {@link BOp#getId()} associated with -//* their {@link IPredicate}. -//*/ -//private static class BOpIdComparator implements Comparator<Vertex> { -// -// private static final transient Comparator<Vertex> INSTANCE = new BOpIdComparator(); -// -//// @Override -// public int compare(final Vertex o1, final Vertex o2) { -// final int id1 = o1.pred.getId(); -// final int id2 = o2.pred.getId(); -// if (id1 < id2) -// return -1; -// if (id2 > id1) -// return 1; -// return 0; -// } -// -//} - /** * Comma delimited table showing the estimated join hit ratio, the estimated * cardinality, and the set of vertices for each of the specified join @@ -2090,9 +1426,7 @@ // e.cumulativeEstimatedCardinality// ); } -// sb.append("\nv[" + vertexIds[i] + "] " + e.toString()); } - /**/ } sb.append("\n"); return sb.toString(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java 2011-02-23 21:52:48 UTC (rev 4235) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java 2011-02-24 01:04:12 UTC (rev 4236) @@ -295,11 +295,10 @@ * query is long running. Samples must be held until we have * identified the final join path since each vertex will be used by * each maximum length join path and we use the samples from the - * vertices to re-sample the surviving join paths in each round. + * vertices to re-sample the surviving join paths in each round. [In + * fact, the samples are not being provided to this evaluation context + * right now.] * - * @todo If there is a slice on the outer query, then the query result may - * well be materialized by now. - * * @todo If there are source binding sets then they need to be applied above * (when we are sampling) and below (when we evaluate the selected * join path). @@ -336,12 +335,12 @@ parentContext.getSink(), null/* sink2 */, null/* constraints */, null/* stats */); - System.out.println("nout=" + nout); +// System.out.println("nout=" + nout); // verify no problems. runningQuery.get(); - System.out.println("Future Ok"); +// System.out.println("Future Ok"); } catch (Throwable t) { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java 2011-02-23 21:52:48 UTC (rev 4235) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java 2011-02-24 01:04:12 UTC (rev 4236) @@ -81,20 +81,8 @@ private static final transient Logger log = Logger.getLogger(Path.class); -// /** -// * An immutable ordered list of the edges in the (aka the sequence of -// * joins represented by this path). -// * -// * @deprecated by with {@link #vertices}. -// */ -// final List<Edge> edges; - /** * An ordered list of the vertices in the {@link Path}. - * - * TODO Replace {@link #edges} with {@link #vertices} and declare a - * second array with the {@link VertexSample} for the initial vertex - * followed by the {@link EdgeSample} for each cutoff join in the path. */ final Vertex[] vertices; @@ -150,8 +138,8 @@ final EdgeSample edgeSample) { final long total = cumulativeEstimatedCardinality + // - edgeSample.estimatedCardinality + // - edgeSample.tuplesRead// this is part of the cost too. + edgeSample.estimatedCardinality // +// + edgeSample.tuplesRead // ; return total; @@ -431,44 +419,6 @@ } -// /** -// * Return the vertices in this path (in path order). For the first edge, -// * the minimum cardinality vertex is always reported first (this is -// * critical for producing the correct join plan). For the remaining -// * edges in the path, the unvisited is reported. -// * -// * @return The vertices (in path order). -// */ -// static private Vertex[] getVertices(final List<Edge> edges) { -// -// final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); -// -// for (Edge e : edges) { -// -// if (tmp.isEmpty()) { -// /* -// * The first edge is handled specially in order to report -// * the minimum cardinality vertex first. -// */ -// tmp.add(e.getMinimumCardinalityVertex()); -// tmp.add(e.getMaximumCardinalityVertex()); -// -// } else { -// -// tmp.add(e.v1); -// -// tmp.add(e.v2); -// -// } -// -// } -// -// final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); -// -// return a; -// -// } - /** * Return the predicates associated with the vertices. * @@ -492,36 +442,6 @@ } -// /** -// * Return the {@link BOp} identifiers of the predicates associated with -// * each vertex in path order. -// */ -// static int[] getVertexIds(final List<Edge> edges) { -// -// final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); -// -// for (Edge e : edges) { -// -// tmp.add(e.v1); -// -// tmp.add(e.v2); -// -// } -// -// final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); -// -// final int[] b = new int[a.length]; -// -// for (int i = 0; i < a.length; i++) { -// -// b[i] = a[i].pred.getId(); -// -// } -// -// return b; -// -// } - /** * Return <code>true</code> if this path begins with the given path. * @@ -543,14 +463,17 @@ } for (int i = 0; i < p.vertices.length; i++) { + final Vertex vSelf = vertices[i]; + final Vertex vOther = p.vertices[i]; -// final Edge eSelf = edges.get(i); -// final Edge eOther = p.edges.get(i); -// if (eSelf != eOther) { + if (vSelf != vOther) { + return false; + } + } return true; @@ -604,20 +527,6 @@ if (vnew == null) throw new IllegalArgumentException(); -// // Figure out which vertices are already part of this path. -// final boolean v1Found = contains(e.v1); -// final boolean v2Found = contains(e.v2); -// -// if (!v1Found && !v2Found) -// throw new IllegalArgumentException( -// "Edge does not extend path: edge=" + e + ", path=" -// + this); -// -// if (v1Found && v2Found) -// throw new IllegalArgumentException( -// "Edge already present in path: edge=" + e + ", path=" -// + this); - if(contains(vnew)) throw new IllegalArgumentException( "Vertex already present in path: vnew=" + vnew + ", path=" @@ -626,12 +535,9 @@ if (this.edgeSample == null) throw new IllegalStateException(); -// // The vertex which is already part of this path. -// final Vertex sourceVertex = v1Found ? e.v1 : e.v2; + // The new vertex. + final Vertex targetVertex = vnew; - // The new vertex, which is not part of this path. - final Vertex targetVertex = vnew;//v1Found ? e.v2 : e.v1; - /* * Chain sample the edge. * @@ -679,32 +585,15 @@ final EdgeSample edgeSample2 = cutoffJoin(// queryEngine,// limit, // -// sourceVertex, // -// targetVertex,// preds2,// constraints,// this.edgeSample // the source sample. -// this.sample.estimatedCardinality, -// this.sample.estimateEnum == EstimateEnum.Exact, -// this.sample.limit,// -// this.sample.sample// the sample fed into the cutoff join. ); { -// final List<Edge> edges = new ArrayList<Edge>( -// this.edges.size() + 1); -// -// edges.addAll(this.edges); -// -// edges.add(e); - final long cumulativeEstimatedCardinality = add( this.cumulativeEstimatedCardinality, edgeSample2); -// this.cumulativeEstimatedCardinality -// + edgeSample2.estimatedCardinality// -// + edgeSample2.tuplesRead// this is part of the cost too. -// ; // Extend the path. final Path tmp = new Path(vertices2, preds2, @@ -746,22 +635,12 @@ * * @throws Exception */ -// * @param vSource -// * The source vertex. -// * @param vTarget -// * The target vertex static public EdgeSample cutoffJoin(// final QueryEngine queryEngine,// final int limit,// final IPredicate<?>[] path,// final IConstraint[] constraints,// -// final Vertex vSource,// -// final Vertex vTarget,// final SampleBase sourceSample// -// final long sourceEstimatedCardinality,// -// final boolean sourceSampleExact,// -// final int sourceSampleLimit,// -// final IBindingSet[] sourceSample// ) throws Exception { if (path == null) @@ -826,12 +705,10 @@ * limit is satisfied, thus avoiding unnecessary effort. */ - // final int joinId = 1; final int joinId = idFactory.nextId(); - final Map<String, Object> anns = NV.asMap( - // + final Map<String, Object> anns = NV.asMap(// new NV(BOp.Annotations.BOP_ID, joinId),// - new NV(PipelineJoin.Annotations.PREDICATE, pred), + new NV(PipelineJoin.Annotations.PREDICATE, pred),// // Note: does not matter since not executed by the query // controller. // // disallow parallel evaluation of tasks @@ -979,9 +856,6 @@ final long estimatedCardinality = (long) (sourceSample.estimatedCardinality * f); final EdgeSample edgeSample = new EdgeSample(// -// sourceSample.estimatedCardinality, // -// sourceSample.estimateEnum, // -// sourceSample.limit, // sourceSample,// inputCount,// outputCount, // Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java 2011-02-23 21:52:48 UTC (rev 4235) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java 2011-02-24 01:04:12 UTC (rev 4236) @@ -59,7 +59,9 @@ * the sample as an inline access path. * * TODO This field should be used to avoid needless re-computation of a join - * whose exact solution is already known. + * whose exact solution is already known. We already do this within the + * runtime optimizer. To go further than that we need to do the partial + * evaluation of the join graph. */ public final EstimateEnum estimateEnum; Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-02-23 21:52:48 UTC (rev 4235) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-02-24 01:04:12 UTC (rev 4236) @@ -162,6 +162,8 @@ final File testDir = new File(tmpDir, "bigdata-tests"); testDir.mkdirs(); file = new File(testDir, resourceId + ".jnl"); + // uncomment to force a reload of the dataset. +// if(file.exists()) file.delete(); namespace = "LUBM_U1"; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-24 12:28:41
|
Revision: 4238 http://bigdata.svn.sourceforge.net/bigdata/?rev=4238&view=rev Author: thompsonbry Date: 2011-02-24 12:28:34 +0000 (Thu, 24 Feb 2011) Log Message: ----------- Improved RTO trace of cutoff joins. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java 2011-02-24 09:10:56 UTC (rev 4237) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java 2011-02-24 12:28:34 UTC (rev 4238) @@ -679,8 +679,9 @@ // Reservations for the bopIds used by the constraints. if (c != null) { for (IConstraint x : c) { - if (log.isDebugEnabled()) - log.debug("Attaching constraint: " + x); + if (log.isTraceEnabled()) + log.trace(Arrays.toString(BOpUtility.getPredIds(path)) + + ": constraint: " + x); final Iterator<BOp> itr = BOpUtility .preOrderIteratorWithAnnotations(x); while (itr.hasNext()) { @@ -774,7 +775,8 @@ .getStats().get(joinId); if (log.isTraceEnabled()) - log.trace(joinStats.toString()); + log.trace(Arrays.toString(BOpUtility.getPredIds(path)) + ": " + + joinStats.toString()); // #of solutions in. final int inputCount = (int) joinStats.inputSolutions.get(); @@ -869,7 +871,7 @@ if (log.isDebugEnabled()) log.debug(Arrays.toString(BOpUtility.getPredIds(path)) - + " : newSample=" + edgeSample); + + ": newSample=" + edgeSample); return edgeSample; Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-24 09:10:56 UTC (rev 4237) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-24 12:28:34 UTC (rev 4238) @@ -440,7 +440,7 @@ * to use a larger data set if we want to verify the optimizers join * path for a query which produces solutions in the data. */ - if (true) { + if (false) { final IPredicate<?>[] runtimeOrder = doTest(preds, null/* constraints */); /* * Verify that the runtime optimizer produced the expected join @@ -456,7 +456,7 @@ /* * Run w/ constraints. */ - if(false){ + if(true){ final IPredicate<?>[] runtimeOrder = doTest(preds, constraints); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-24 16:41:24
|
Revision: 4239 http://bigdata.svn.sourceforge.net/bigdata/?rev=4239&view=rev Author: thompsonbry Date: 2011-02-24 16:41:18 +0000 (Thu, 24 Feb 2011) Log Message: ----------- Reduced a log message from INFO to DEBUG for the initial set of vertices in the RTO. Clean up the RTO trace for (in/read/out) to present the data in a more natural order. Added some notes on the BSBM Q5 run. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-24 12:28:34 UTC (rev 4238) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-24 16:41:18 UTC (rev 4239) @@ -555,14 +555,14 @@ */ sampleAllVertices(queryEngine, limit); - if (log.isInfoEnabled()) { + if (log.isDebugEnabled()) { final StringBuilder sb = new StringBuilder(); sb.append("Vertices:\n"); for (Vertex v : V) { sb.append(v.toString()); sb.append("\n"); } - log.info(sb.toString()); + log.debug(sb.toString()); } /* @@ -1280,14 +1280,14 @@ static public String showTable(final Path[] a,final Path[] pruned) { final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); - f.format("%-6s %10s%1s * %10s (%6s/%6s/%6s) = %10s%1s : %10s %10s", + f.format("%-6s %10s%1s * %10s (%6s %6s %6s) = %10s%1s : %10s %10s", "path",// "sourceCard",// "",// sourceSampleExact "f",// - "out",// "in",// "read",// + "out",// "estCard",// "",// estimateIs(Exact|LowerBound|UpperBound) "sumEstCard",// @@ -1308,18 +1308,18 @@ } final EdgeSample edgeSample = x.edgeSample; if (edgeSample == null) { - f.format("%6d %10s%1s * %10s (%6s/%6s/%6s) = %10s%1s : %10s",// + f.format("%6d %10s%1s * %10s (%6s %6s %6s) = %10s%1s : %10s",// i, "N/A", "", "N/A", "N/A", "N/A", "N/A", "N/A", "", "N/A"); } else { - f.format("%6d %10d%1s * % 10.2f (%6d/%6d/%6d) = % 10d%1s : % 10d", // + f.format("%6d %10d%1s * % 10.2f (%6d %6d %6d) = % 10d%1s : % 10d", // i,// edgeSample.sourceSample.estimatedCardinality,// edgeSample.sourceSample.estimateEnum.getCode(),// edgeSample.f,// - edgeSample.outputCount,// edgeSample.inputCount,// edgeSample.tuplesRead,// + edgeSample.outputCount,// edgeSample.estimatedCardinality,// edgeSample.estimateEnum.getCode(),// x.cumulativeEstimatedCardinality// @@ -1360,14 +1360,14 @@ /* * @todo show limit on samples? */ - f.format("%6s %10s%1s * %10s (%6s/%6s/%6s) = %10s%1s : %10s",// + f.format("%6s %10s%1s * %10s (%6s %6s %6s) = %10s%1s : %10s",// "vertex", "sourceCard",// "",// sourceSampleExact "f",// - "out",// "in",// "read",// + "out",// "estCard",// "",// estimateIs(Exact|LowerBound|UpperBound) "sumEstCard"// @@ -1391,19 +1391,19 @@ } sb.append("\n"); if (sample == null) { - f.format("% 6d %10s%1s * %10s (%6s/%6s/%6s) = %10s%1s : %10s",// + f.format("% 6d %10s%1s * %10s (%6s %6s %6s) = %10s%1s : %10s",// predId,// "N/A", "", "N/A", "N/A", "N/A", "N/A", "N/A", "", "N/A"); } else if(sample instanceof VertexSample) { // Show the vertex sample for the initial vertex. - f.format("% 6d %10s%1s * %10s (%6s/%6s/%6s) = % 10d%1s : %10d",// + f.format("% 6d %10s%1s * %10s (%6s %6s %6s) = % 10d%1s : %10d",// predId,// - "NA",//sample.sourceSample.estimatedCardinality,// + "N/A",//sample.sourceSample.estimatedCardinality,// " ",//sample.sourceSample.isExact() ? "E" : "",// " ",//sample.f,// - "N/A",//sample.outputCount,// "N/A",//sample.inputCount,// "N/A",//sample.tuplesRead,// + "N/A",//sample.outputCount,// sample.estimatedCardinality,// sample.estimateEnum.getCode(),// sumEstCard// @@ -1412,14 +1412,14 @@ } else { // Show the sample for a cutoff join with the 2nd+ vertex. final EdgeSample edgeSample = (EdgeSample)sample; - f.format("% 6d %10d%1s * % 10.2f (%6d/%6d/%6d) = % 10d%1s : %10d",// + f.format("% 6d %10d%1s * % 10.2f (%6d %6d %6d) = % 10d%1s : %10d",// predId,// edgeSample.sourceSample.estimatedCardinality,// edgeSample.sourceSample.estimateEnum.getCode(),// edgeSample.f,// - edgeSample.outputCount,// edgeSample.inputCount,// edgeSample.tuplesRead,// + edgeSample.outputCount,// edgeSample.estimatedCardinality,// edgeSample.estimateEnum.getCode(),// sumEstCard// Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-24 12:28:34 UTC (rev 4238) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-24 16:41:18 UTC (rev 4239) @@ -97,11 +97,11 @@ * large data set to assess the relative performance of the static and * runtime query optimizers). */ - private static final boolean useExistingJournal = false; + private static final boolean useExistingJournal = true; -// private static final long existingPC = 284826; // BSBM 100M + private static final long existingPC = 284826; // BSBM 100M - private static final long existingPC = 566496; // BSBM 200M +// private static final long existingPC = 566496; // BSBM 200M private static final File existingJournal = new File("/data/bsbm/bsbm_" + existingPC + "/bigdata-bsbm.RW.jnl"); @@ -432,42 +432,81 @@ } - /* - * Run w/o constraints. - * - * Note: There are no solutions for this query against BSBM 100. The - * optimizer is only providing the fastest path to prove that. We have - * to use a larger data set if we want to verify the optimizers join - * path for a query which produces solutions in the data. - */ - if (false) { + /* + * Run w/o constraints. + * + * Note: There are no solutions for this query against BSBM 100. The + * optimizer is only providing the fastest path to prove that. We have + * to use a larger data set if we want to verify the optimizers join + * path for a query which produces solutions in the data. + * + * Note: The optimizer finds the same join path for the BSBM 100, 100M, + * and 200M data sets + */ + if (true) { + /* +100M: static: ids=[1, 2, 4, 6, 0, 3, 5] + +*** round=5, limit=600: paths{in=1,considered=1,out=1} +path sourceCard * f ( out/ in/ read) = estCard : sumEstCard joinPath + 0 2400 * 1.00 ( 600/ 600/ 600) = 2400 : 9066 [ 1 2 0 4 6 3 5 ] + +*** Selected join path: [1, 2, 0, 4, 6, 3, 5] +vertex sourceCard * f ( out/ in/ read) = estCard : sumEstCard + 1 NA * ( N/A/ N/A/ N/A) = 16E : 16 + 2 16E * 150.00 ( 600/ 4/ 10921) = 2400 : 13337 + 0 2400 * 1.00 ( 600/ 600/ 600) = 2400 : 16337 + 4 2400 * 1.00 ( 600/ 600/ 600) = 2400 : 19337 + 6 2400 * 1.00 ( 600/ 600/ 600) = 2400 : 22337 + 3 2400 * 1.00 ( 600/ 600/ 600) = 2400 : 25337 + 5 N/A * N/A ( N/A/ N/A/ N/A) = N/A : N/A + +test_bsbm_q5 : Total times: static=8741, runtime=8025, delta(static-runtime)=716 + +200M: static: ids=[1, 2, 4, 6, 0, 3, 5] + +*** round=5, limit=600: paths{in=1,considered=1,out=1} +path sourceCard * f ( out/ in/ read) = estCard : sumEstCard joinPath + 0 166410 * 1.00 ( 600/ 600/ 600) = 166410 : 998460 [ 1 2 0 4 6 3 5 ] + +test_bsbm_q5 : Total times: static=8871, runtime=8107, delta(static-runtime)=764 + */ final IPredicate<?>[] runtimeOrder = doTest(preds, null/* constraints */); - /* - * Verify that the runtime optimizer produced the expected join - * path. - * - * Note: The optimizer finds the same join path for the BSBM 100, - * 100M, and 200M data sets - */ - assertEquals("runtimeOrder", new int[] { 1, 2, 0, 4, 6, 3, 5 }, - BOpUtility.getPredIds(runtimeOrder)); +// assertEquals("runtimeOrder", new int[] { 1, 2, 0, 4, 6, 3, 5 }, BOpUtility.getPredIds(runtimeOrder)); } - /* - * Run w/ constraints. - */ + // Run w/ constraints. if(true){ + /* +100M: static: ids=[1, 2, 4, 6, 0, 3, 5] + +*** round=5, limit=600: paths{in=4,considered=4,out=1} +path sourceCard * f ( out/ in/ read) = estCard : sumEstCard joinPath + 0 107 * 1.00 ( 27/ 27/ 27) = 107 : 2541 [ 1 2 4 3 6 5 0 ] + + test_bsbm_q5 : Total times: static=7201, runtime=3686, delta(static-runtime)=3515 +*** Selected join path: [1, 2, 4, 3, 6, 5, 0] +vertex sourceCard * f ( out/ in/ read) = estCard : sumEstCard + 1 NA * ( N/A/ N/A/ N/A) = 16E : 16 + 2 16E * 150.00 ( 600/ 4/ 10921) = 2400 : 13337 + 4 2400 * 1.00 ( 600/ 600/ 600) = 2400 : 16337 + 3 2400 * 0.16 ( 97/ 600/ 600) = 387 : 17324 + 6 387 * 1.00 ( 97/ 97/ 97) = 387 : 17808 + 5 387 * 0.28 ( 27/ 97/ 97) = 107 : 18012 + 0 107 * 1.00 ( 27/ 27/ 27) = 107 : 18146 + +200M: static: ids=[1, 2, 4, 6, 0, 3, 5] + +*** round=5, limit=600: paths{in=4,considered=4,out=1} +path sourceCard * f ( out/ in/ read) = estCard : sumEstCard joinPath + 0 1941 * 1.00 ( 7/ 7/ 7) = 1941 : 344799 [ 1 2 4 3 6 5 0 ] + +test_bsbm_q5 : Total times: static=7312, runtime=3305, delta(static-runtime)=4007 + + */ final IPredicate<?>[] runtimeOrder = doTest(preds, constraints); - - /* - * Verify that the runtime optimizer produced the expected join - * path. - * - * FIXME Figure out what the right query is. - */ - assertEquals("runtimeOrder", new int[] { 1, 2, 0, 4, 6, 3, 5 }, - BOpUtility.getPredIds(runtimeOrder)); +// assertEquals("runtimeOrder", new int[] { 1, 2, 0, 4, 6, 3, 5 }, BOpUtility.getPredIds(runtimeOrder)); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-24 16:49:52
|
Revision: 4241 http://bigdata.svn.sourceforge.net/bigdata/?rev=4241&view=rev Author: thompsonbry Date: 2011-02-24 16:49:42 +0000 (Thu, 24 Feb 2011) Log Message: ----------- More work on the Runtime optimizer. JGraph: After each round, including the first, the logic now remove any entries from the edgeSamples map which do not correspond to a prefix for a surviving join path. This reduces the heap pressure of the RTO. SampleBase: Encapsulated the [sample] field as a private atomic reference and exposed a method to release the sampled solution set. This reduces heap pressure on the JVM and is forward looking to storing materialized samples on the native C heap using the memory manager. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-24 16:42:05 UTC (rev 4240) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-24 16:49:42 UTC (rev 4241) @@ -30,6 +30,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.Formatter; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; @@ -968,9 +969,15 @@ } // next path + /* + * Now examine the set of generated and sampled join paths. If any paths + * span the same vertices then they are alternatives and we can pick the + * best alternative now and prune the other alternatives for those + * vertices. + */ final Path[] paths_tp1 = tmp.toArray(new Path[tmp.size()]); - final Path[] paths_tp1_pruned = pruneJoinPaths(paths_tp1); + final Path[] paths_tp1_pruned = pruneJoinPaths(paths_tp1, edgeSamples); if (log.isDebugEnabled()) log.debug("\n*** round=" + round + ", limit=" + limit @@ -1153,22 +1160,26 @@ /** * Prune paths which are dominated by other paths. Paths are extended in * each round. Paths from previous rounds are always pruned. Of the new - * paths in each round, the following rule is applied to prune the - * search to just those paths which are known to dominate the other - * paths covering the same set of vertices: + * paths in each round, the following rule is applied to prune the search to + * just those paths which are known to dominate the other paths covering the + * same set of vertices: * <p> * If there is a path, [p] != [p1], where [p] is an unordered variant of - * [p1] (that is the vertices of p are the same as the vertices of p1), - * and the cumulative cost of [p] is LTE the cumulative cost of [p1], - * then [p] dominates (or is equivalent to) [p1] and p1 should be - * pruned. + * [p1] (that is the vertices of p are the same as the vertices of p1), and + * the cumulative cost of [p] is LTE the cumulative cost of [p1], then [p] + * dominates (or is equivalent to) [p1] and p1 should be pruned. * * @param a * A set of paths. + * @param edgeSamples + * The set of samples for path segments. Samples which are no + * longer in use after pruning will be cleared from the map and + * their materialized solution sets will be discarded. * * @return The set of paths with all dominated paths removed. */ - public Path[] pruneJoinPaths(final Path[] a) { + public Path[] pruneJoinPaths(final Path[] a, + final Map<PathIds, EdgeSample> edgeSamples) { /* * Find the length of the longest path(s). All shorter paths are * dropped in each round. @@ -1237,13 +1248,84 @@ } } // Pj } // Pi - final Set<Path> keep = new LinkedHashSet<Path>(); - for (Path p : a) { - if (pruned.contains(p)) - continue; - keep.add(p); + /* + * Generate a set of paths which will be retained. + */ + final Path[] b; + { + final Set<Path> keep = new LinkedHashSet<Path>(); + for (Path p : a) { + if (pruned.contains(p)) + continue; + keep.add(p); + } + // convert the retained paths to an array. + b = keep.toArray(new Path[keep.size()]); } - final Path[] b = keep.toArray(new Path[keep.size()]); + /* + * Clear any entries from the edgeSamples map which are not prefixes of + * the retained join paths. + */ + { + + final Iterator<Map.Entry<PathIds, EdgeSample>> itr = edgeSamples + .entrySet().iterator(); + + int ncleared = 0; + while (itr.hasNext()) { + + final Map.Entry<PathIds, EdgeSample> e = itr.next(); + + final PathIds ids = e.getKey(); + + // Consider the retained paths. + boolean found = false; + + for (Path p : b) { + + if (p.beginsWith(ids.ids)) { + + // This sample is still in use. + found = true; + + break; + + } + + } + + if (!found) { + + /* + * Clear sample no longer in use. + * + * Note: In fact, holding onto the sample metadata is + * relatively cheap if there was a reason to do so (it only + * effects the size of the [edgeSamples] map). It is holding + * onto the materialized solution set which puts pressure on + * the heap. + */ + if (log.isTraceEnabled()) + log.trace("Clearing sample: " + ids); + + // release the sampled solution set. + e.getValue().releaseSample(); + + // clear the entry from the array. + itr.remove(); + + ncleared++; + + } + + } + + if (ncleared > 0 && log.isDebugEnabled()) + log.debug("Cleared " + ncleared + " samples"); + + } + + // Return the set of retained paths. return b; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java 2011-02-24 16:42:05 UTC (rev 4240) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java 2011-02-24 16:49:42 UTC (rev 4241) @@ -202,7 +202,7 @@ if (edgeSample == null) throw new IllegalArgumentException(); - if (edgeSample.sample == null) + if (edgeSample.getSample() == null) throw new IllegalArgumentException(); // this.edges = Collections.singletonList(e); @@ -264,7 +264,7 @@ if (edgeSample == null) throw new IllegalArgumentException(); - if (edgeSample.sample == null) + if (edgeSample.getSample() == null) throw new IllegalArgumentException(); // this.edges = Collections.unmodifiableList(edges); @@ -457,9 +457,11 @@ if (p == null) throw new IllegalArgumentException(); - if (vertices.length > p.vertices.length) { + if (vertices.length < p.vertices.length) { + // Proven false since the caller's path is longer. return false; + } for (int i = 0; i < p.vertices.length; i++) { @@ -480,6 +482,43 @@ } /** + * Return <code>true</code> if this path begins with the given path. + * + * @param p + * The given path. + * + * @return <code>true</code> if this path begins with the given path. + * + * @todo unit tests. + */ + public boolean beginsWith(final int[] ids) { + + if (ids == null) + throw new IllegalArgumentException(); + + if (vertices.length < ids.length) { + // Proven false since the caller's path is longer. + return false; + } + + for (int i = 0; i < ids.length; i++) { + + final int idSelf = vertices[i].pred.getId(); + + final int idOther = ids[i]; + + if (idSelf != idOther) { + + return false; + + } + + } + + return true; + } + + /** * Return the first N {@link IPredicate}s in this {@link Path}. * * @param length @@ -658,7 +697,7 @@ if (sourceSample == null) throw new IllegalArgumentException(); - if (sourceSample.sample == null) + if (sourceSample.getSample() == null) throw new IllegalArgumentException(); // Figure out which constraints attach to each predicate. @@ -749,7 +788,7 @@ new LocalChunkMessage<IBindingSet>(queryEngine, queryId, joinOp .getId()/* startId */, -1 /* partitionId */, new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { sourceSample.sample }))); + new IBindingSet[][] { sourceSample.getSample() }))); final List<IBindingSet> result = new LinkedList<IBindingSet>(); try { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java 2011-02-24 16:42:05 UTC (rev 4240) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java 2011-02-24 16:49:42 UTC (rev 4241) @@ -27,6 +27,10 @@ package com.bigdata.bop.joinGraph.rto; +import java.util.concurrent.atomic.AtomicReference; + +import org.apache.log4j.Logger; + import com.bigdata.bop.IBindingSet; import com.bigdata.rwstore.sector.IMemoryManager; @@ -36,9 +40,22 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ + * + * TODO Large samples should be buffered on the {@link IMemoryManager} + * so they do not pose a burden on the heap. This will require us to + * manage the allocation contexts so we can release samples in a timely + * manner once they are no longer used and always release samples by + * the time the RTO is finished. [There is an additional twist if we + * have fully materialized some part of the join since we no longer + * need to evaluate that path segment. If the RTO can interleave query + * evaluation with exploration then we can take advantage of these + * materialized solutions.] */ public abstract class SampleBase { + private static final transient Logger log = Logger + .getLogger(SampleBase.class); + /** * The estimated cardinality of the underlying access path (for a vertex) or * the join (for a cutoff join). @@ -46,7 +63,7 @@ public final long estimatedCardinality; /** - * The limit used to produce the {@link #sample}. + * The limit used to produce the {@link #getSample() sample}. */ public final int limit; @@ -77,20 +94,39 @@ /** * Sample. + */ + private final AtomicReference<IBindingSet[]> sampleRef = new AtomicReference<IBindingSet[]>(); + + /** + * The sampled solution set. * - * TODO Large samples should be buffered on the {@link IMemoryManager} so - * they do not pose a burden on the heap. This will require us to manage the - * allocation contexts so we can release samples in a timely manner once - * they are no longer used and always release samples by the time the RTO is - * finished. [There is an additional twist if we have fully materialized - * some part of the join since we no longer need to evaluate that path - * segment. If the RTO can interleave query evaluation with exploration - * then we can take advantage of these materialized solutions.] + * @return The sampled solution set -or- <code>null</code> if it has been + * released. */ - final IBindingSet[] sample; + IBindingSet[] getSample() { + + return sampleRef.get(); + + } /** + * Release the sampled solution set. * + * TODO MEMORY MANAGER : release. + */ + void releaseSample() { + + if (sampleRef.getAndSet(null) != null) { + + if (log.isTraceEnabled()) + log.trace("Released sample: " + this); + + } + + } + + /** + * * @param estimatedCardinality * The estimated cardinality. * @param limit @@ -126,7 +162,7 @@ this.estimateEnum = estimateEnum; - this.sample = sample; + this.sampleRef.set(sample); } @@ -147,7 +183,10 @@ sb.append("{estimatedCardinality=" + estimatedCardinality); sb.append(",limit=" + limit); sb.append(",estimateEnum=" + estimateEnum); - sb.append(",sampleSize=" + sample.length); + { + final IBindingSet[] tmp = sampleRef.get(); + sb.append(",sampleSize=" + (tmp != null ? tmp.length : "N/A")); + } toString(sb); // allow extension sb.append("}"); return sb.toString(); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-24 16:42:05 UTC (rev 4240) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-24 16:49:42 UTC (rev 4241) @@ -24,10 +24,12 @@ import com.bigdata.journal.Journal; import com.bigdata.rdf.internal.XSDIntIV; import com.bigdata.rdf.internal.constraints.CompareBOp; +import com.bigdata.rdf.internal.constraints.Constraint; +import com.bigdata.rdf.internal.constraints.IsBoundBOp; import com.bigdata.rdf.internal.constraints.MathBOp; import com.bigdata.rdf.internal.constraints.NotBOp; import com.bigdata.rdf.internal.constraints.SameTermBOp; -import com.bigdata.rdf.internal.constraints.Constraint; +import com.bigdata.rdf.model.BigdataLiteral; import com.bigdata.rdf.model.BigdataURI; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.model.BigdataValueFactory; @@ -432,19 +434,19 @@ } - /* - * Run w/o constraints. + /* + * Run w/o constraints. + * + * Note: There are no solutions for this query against BSBM 100. The + * optimizer is only providing the fastest path to prove that. We have + * to use a larger data set if we want to verify the optimizers join + * path for a query which produces solutions in the data. * - * Note: There are no solutions for this query against BSBM 100. The - * optimizer is only providing the fastest path to prove that. We have - * to use a larger data set if we want to verify the optimizers join - * path for a query which produces solutions in the data. - * * Note: The optimizer finds the same join path for the BSBM 100, 100M, * and 200M data sets - */ + */ if (true) { - /* + /* 100M: static: ids=[1, 2, 4, 6, 0, 3, 5] *** round=5, limit=600: paths{in=1,considered=1,out=1} @@ -470,14 +472,15 @@ 0 166410 * 1.00 ( 600/ 600/ 600) = 166410 : 998460 [ 1 2 0 4 6 3 5 ] test_bsbm_q5 : Total times: static=8871, runtime=8107, delta(static-runtime)=764 - */ + */ final IPredicate<?>[] runtimeOrder = doTest(preds, null/* constraints */); -// assertEquals("runtimeOrder", new int[] { 1, 2, 0, 4, 6, 3, 5 }, BOpUtility.getPredIds(runtimeOrder)); + assertEquals("runtimeOrder", new int[] { 1, 2, 0, 4, 6, 3, 5 }, + BOpUtility.getPredIds(runtimeOrder)); } // Run w/ constraints. if(true){ - /* + /* 100M: static: ids=[1, 2, 4, 6, 0, 3, 5] *** round=5, limit=600: paths{in=4,considered=4,out=1} @@ -503,12 +506,233 @@ 0 1941 * 1.00 ( 7/ 7/ 7) = 1941 : 344799 [ 1 2 4 3 6 5 0 ] test_bsbm_q5 : Total times: static=7312, runtime=3305, delta(static-runtime)=4007 - - */ + + */ final IPredicate<?>[] runtimeOrder = doTest(preds, constraints); -// assertEquals("runtimeOrder", new int[] { 1, 2, 0, 4, 6, 3, 5 }, BOpUtility.getPredIds(runtimeOrder)); + assertEquals("runtimeOrder", new int[] { 1, 2, 4, 3, 6, 5, 0 }, + BOpUtility.getPredIds(runtimeOrder)); } } + /** + * BSBM Q3 + * + * <pre> + * PREFIX bsbm-inst: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/> + * PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/> + * PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> + * PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + * + * SELECT ?product ?label + * WHERE { + * ?product rdfs:label ?label . + * ?product a %ProductType% . + * ?product bsbm:productFeature %ProductFeature1% . + * ?product bsbm:productPropertyNumeric1 ?p1 . + * FILTER ( ?p1 > %x% ) + * ?product bsbm:productPropertyNumeric3 ?p3 . + * FILTER (?p3 < %y% ) + * OPTIONAL { + * ?product bsbm:productFeature %ProductFeature2% . + * ?product rdfs:label ?testVar } + * FILTER (!bound(?testVar)) + * } + * ORDER BY ?label + * LIMIT 10 + * </pre> + */ + public void test_bsbm_q3() throws Exception { + + fail("This test needs instance data for BSBM 100 and 100M"); + + QueryLog.logTableHeader(); + + final String namespace = getNamespace(); + + final AbstractTripleStore database = getDatabase(namespace); + + /* + * Resolve terms against the lexicon. + */ + final BigdataValueFactory valueFactory = database.getLexiconRelation() + .getValueFactory(); + + final String rdfs = "http://www.w3.org/2000/01/rdf-schema#"; + final String rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + final String bsbm = "http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/"; + final String bsbmInst ="http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/"; + + final BigdataURI rdfType = valueFactory.createURI(rdf + "type"); + + final BigdataURI rdfsLabel = valueFactory.createURI(rdfs + "label"); + + final BigdataURI productFeature = valueFactory.createURI(bsbm + + "productFeature"); + + final BigdataURI productPropertyNumeric1 = valueFactory.createURI(bsbm + + "productPropertyNumeric1"); + + final BigdataURI productPropertyNumeric3 = valueFactory.createURI(bsbm + + "productPropertyNumeric3"); + + // FIXME parameters + final BigdataURI productType = valueFactory.createURI(productInstance); + final BigdataURI productFeature1 = valueFactory.createURI(productInstance); + final BigdataURI productFeature2 = valueFactory.createURI(productInstance); + final BigdataLiteral x = valueFactory.createLiteral(productInstance); + final BigdataLiteral y = valueFactory.createLiteral(productInstance); + + final BigdataValue[] terms = new BigdataValue[] { rdfType, rdfsLabel, + productFeature, productPropertyNumeric1, + productPropertyNumeric3, productType, productFeature1, + productFeature2, x, y }; + + // resolve terms. + database.getLexiconRelation() + .addTerms(terms, terms.length, true/* readOnly */); + + { + for (BigdataValue tmp : terms) { + System.out.println(tmp + " : " + tmp.getIV()); + if (tmp.getIV() == null) + throw new RuntimeException("Not defined: " + tmp); + } + } + + final IConstraint[] constraints; + final IPredicate[] preds; + final IPredicate p0, p1, p2, p3, p4, p5, p6; + final IConstraint c0, c1, c2; + { + final IVariable product = Var.var("product"); + final IVariable label = Var.var("label"); + final IVariable p1Var = Var.var("p1"); + final IVariable p3Var = Var.var("p3"); + final IVariable testVar = Var.var("testVar"); + + // The name space for the SPO relation. + final String[] spoRelation = new String[] { namespace + ".spo" }; + +// // The name space for the Lexicon relation. +// final String[] lexRelation = new String[] { namespace + ".lex" }; + + final long timestamp = database.getIndexManager().getLastCommitTime(); + + int nextId = 0; + +// ?product rdfs:label ?label . + p0 = new SPOPredicate(new BOp[] {// + product, + new Constant(rdfsLabel.getIV()), + label// + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + + // ?product a %ProductType% . + p1 = new SPOPredicate(new BOp[] {// + product, + new Constant(rdfType.getIV()), + new Constant(productType.getIV())// + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + + // ?product bsbm:productFeature %ProductFeature1% . + p2 = new SPOPredicate(new BOp[] {// + product, + new Constant(productFeature.getIV()), + new Constant(productFeature1.getIV())// + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + + // ?product bsbm:productPropertyNumeric1 ?p1 . + p3 = new SPOPredicate(new BOp[] {// + product, + new Constant(productPropertyNumeric1.getIV()), + p1Var// + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + + // ?product bsbm:productPropertyNumeric3 ?p3 . + p4 = new SPOPredicate(new BOp[] {// + product, + new Constant(productPropertyNumeric3.getIV()), + p3Var// + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + + /* + * FIXME (p5,p6) below in an optional join group! + */ + + // ?product bsbm:productFeature %ProductFeature2% . + p5 = new SPOPredicate(new BOp[] {// + product, + new Constant(productFeature.getIV()), + new Constant(productFeature2.getIV()), + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + + // ?product rdfs:label ?testVar } + p6 = new SPOPredicate(new BOp[] {// + product, + new Constant(rdfsLabel.getIV()), + testVar, + },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, spoRelation)// + ); + + // the vertices of the join graph (the predicates). + preds = new IPredicate[] { p0, p1, p2, p3, p4, p5, p6 }; + + // FILTER ( ?p1 > %x% ) + c0 = Constraint.wrap(new CompareBOp(new BOp[] { p1Var, + new Constant(x.getIV()) }, NV.asMap(new NV[] { new NV( + CompareBOp.Annotations.OP, CompareOp.GT) }))); + + // FILTER (?p3 < %y% ) + c1 = Constraint.wrap(new CompareBOp(new BOp[] { p3Var, + new Constant(y.getIV()) }, NV.asMap(new NV[] { new NV( + CompareBOp.Annotations.OP, CompareOp.LT) }))); + + // FILTER (!bound(?testVar)) + c2 = Constraint.wrap(new NotBOp(new IsBoundBOp(testVar))); + + // the constraints on the join graph. + constraints = new IConstraint[] { c0, c1, c2 }; + + } + + /* + * Run the join graph w/ its constraints (?p1>%x% and ?p3<%y%), but not + * the optional join group nor its constraint (!bound(?testVar)). + * + * FIXME The optional join group is part of the tail plan and can not be + * fed into the RTO right now. + */ + final IPredicate<?>[] runtimeOrder = doTest(preds, new IConstraint[] { + c0, c1 }); + + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-24 20:33:37
|
Revision: 4246 http://bigdata.svn.sourceforge.net/bigdata/?rev=4246&view=rev Author: thompsonbry Date: 2011-02-24 20:33:29 +0000 (Thu, 24 Feb 2011) Log Message: ----------- - Moved canJoin() and canJoinUsingConstraints() to PartitionedJoinGraph utility and their test suites to the com.bigdata.bop.joinGraph package. - Modified Rule2BOpUtility in preparation for deciding constraint attachments dynamically. However, I still need to modify PartitionedJoinGroup to (a) accept a set of variables known to be bound on entry. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup_canJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup_canJoinUsingConstraints.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility_canJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility_canJoinUsingConstraints.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-02-24 17:49:44 UTC (rev 4245) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-02-24 20:33:29 UTC (rev 4246) @@ -27,7 +27,6 @@ package com.bigdata.bop; -import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; @@ -41,7 +40,6 @@ import com.bigdata.bop.BOp.Annotations; import com.bigdata.bop.engine.BOpStats; -import com.bigdata.bop.joinGraph.PartitionedJoinGroup; import com.bigdata.btree.AbstractNode; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; @@ -60,7 +58,8 @@ */ public class BOpUtility { - private static final Logger log = Logger.getLogger(BOpUtility.class); + private static transient final Logger log = Logger + .getLogger(BOpUtility.class); /** * Pre-order recursive visitation of the operator tree (arguments only, no @@ -1151,288 +1150,4 @@ } - /** - * Return <code>true</code> iff two predicates can join on the basis of at - * least one variable which is shared directly by those predicates. Only the - * operands of the predicates are considered. - * <p> - * Note: This method will only identify joins where the predicates directly - * share at least one variable. However, joins are also possible when the - * predicates share variables via one or more constraint(s). Use - * {@link #canJoinUsingConstraints(IPredicate[], IPredicate, IConstraint[])} - * to identify such joins. - * <p> - * Note: Any two predicates may join regardless of the presence of shared - * variables. However, such joins will produce the full cross product of the - * binding sets selected by each predicate. As such, they should be run last - * and this method will not return <code>true</code> for such predicates. - * <p> - * Note: This method is more efficient than {@link #getSharedVars(BOp, BOp)} - * because it does not materialize the sets of shared variables. However, it - * only considers the operands of the {@link IPredicate}s and is thus more - * restricted than {@link #getSharedVars(BOp, BOp)} as well. - * - * @param p1 - * A predicate. - * @param p2 - * Another predicate. - * - * @return <code>true</code> iff the predicates share at least one variable - * as an operand. - * - * @throws IllegalArgumentException - * if the two either reference is <code>null</code>. - */ -// * @throws IllegalArgumentException -// * if the reference are the same. - static public boolean canJoin(final IPredicate<?> p1, final IPredicate<?> p2) { - - if (p1 == null) - throw new IllegalArgumentException(); - - if (p2 == null) - throw new IllegalArgumentException(); - -// if (p1 == p2) -// throw new IllegalArgumentException(); - - // iterator scanning the operands of p1. - final Iterator<IVariable<?>> itr1 = BOpUtility.getArgumentVariables(p1); - - while (itr1.hasNext()) { - - final IVariable<?> v1 = itr1.next(); - - // iterator scanning the operands of p2. - final Iterator<IVariable<?>> itr2 = BOpUtility - .getArgumentVariables(p2); - - while (itr2.hasNext()) { - - final IVariable<?> v2 = itr2.next(); - - if (v1 == v2) { - - if (log.isDebugEnabled()) - log.debug("Can join: sharedVar=" + v1 + ", p1=" + p1 - + ", p2=" + p2); - - return true; - - } - - } - - } - - if (log.isDebugEnabled()) - log.debug("No directly shared variable: p1=" + p1 + ", p2=" + p2); - - return false; - - } - - /** - * Return <code>true</code> iff a predicate may be used to extend a join - * path on the basis of at least one variable which is shared either - * directly or via one or more constraints which may be attached to the - * predicate when it is added to the join path. The join path is used to - * decide which variables are known to be bound, which in turn decides which - * constraints may be run. Unlike the case when the variable is directly - * shared between the two predicates, a join involving a constraint requires - * us to know which variables are already bound so we can know when the - * constraint may be attached. - * <p> - * Note: Use {@link #canJoin(IPredicate, IPredicate)} instead to identify - * joins based on a variable which is directly shared. - * <p> - * Note: Any two predicates may join regardless of the presence of shared - * variables. However, such joins will produce the full cross product of the - * binding sets selected by each predicate. As such, they should be run last - * and this method will not return <code>true</code> for such predicates. - * - * @param path - * A join path containing at least one predicate. - * @param vertex - * A predicate which is being considered as an extension of that - * join path. - * @param constraints - * A set of zero or more constraints (optional). Constraints are - * attached dynamically once the variables which they use are - * bound. Hence, a constraint will always share a variable with - * any predicate to which it is attached. If any constraints are - * attached to the given vertex and they share a variable which - * has already been bound by the join path, then the vertex may - * join with the join path even if it does not directly bind that - * variable. - * - * @return <code>true</code> iff the vertex can join with the join path via - * a shared variable. - * - * @throws IllegalArgumentException - * if the join path is <code>null</code>. - * @throws IllegalArgumentException - * if the join path is empty. - * @throws IllegalArgumentException - * if any element in the join path is <code>null</code>. - * @throws IllegalArgumentException - * if the vertex is <code>null</code>. - * @throws IllegalArgumentException - * if the vertex is already part of the join path. - * @throws IllegalArgumentException - * if any element in the optional constraints array is - * <code>null</code>. - */ - static public boolean canJoinUsingConstraints(final IPredicate<?>[] path, - final IPredicate<?> vertex, final IConstraint[] constraints) { - - /* - * Check arguments. - */ - if (path == null) - throw new IllegalArgumentException(); - if (vertex == null) - throw new IllegalArgumentException(); - // constraints MAY be null. - if (path.length == 0) - throw new IllegalArgumentException(); - { - for (IPredicate<?> p : path) { - if (p == null) - throw new IllegalArgumentException(); - if (vertex == p) - throw new IllegalArgumentException(); - } - } - - /* - * Find the set of variables which are known to be bound because they - * are referenced as operands of the predicates in the join path. - */ - final Set<IVariable<?>> knownBound = new LinkedHashSet<IVariable<?>>(); - - for (IPredicate<?> p : path) { - - final Iterator<IVariable<?>> vitr = BOpUtility - .getArgumentVariables(p); - - while (vitr.hasNext()) { - - knownBound.add(vitr.next()); - - } - - } - - /* - * - * If the given predicate directly shares a variable with any of the - * predicates in the join path, then we can return immediately. - */ - { - - final Iterator<IVariable<?>> vitr = BOpUtility - .getArgumentVariables(vertex); - - while (vitr.hasNext()) { - - final IVariable<?> var = vitr.next(); - - if(knownBound.contains(var)) { - - if (log.isDebugEnabled()) - log.debug("Can join: sharedVar=" + var + ", path=" - + Arrays.toString(path) + ", vertex=" + vertex); - - return true; - - } - - } - - } - - if(constraints == null) { - - // No opportunity for a constraint based join. - - if (log.isDebugEnabled()) - log.debug("No directly shared variable: path=" - + Arrays.toString(path) + ", vertex=" + vertex); - - return false; - - } - - /* - * Find the set of constraints which can run with the vertex given the - * join path. - */ - { - - // Extend the new join path. - final IPredicate<?>[] newPath = new IPredicate[path.length + 1]; - - System.arraycopy(path/* src */, 0/* srcPos */, newPath/* dest */, - 0/* destPos */, path.length); - - newPath[path.length] = vertex; - - /* - * Find the constraints that will run with each vertex of the new - * join path. - * - * TODO This is a forward reference to a different package, so maybe - * move the canJoinWithConstraints() method to that package? - */ - final IConstraint[][] constraintRunArray = PartitionedJoinGroup - .getJoinGraphConstraints(newPath, constraints); - - /* - * Consider only the constraints attached to the last vertex in the - * new join path. All of their variables will be bound since (by - * definition) a constraint may not run until its variables are - * bound. If any of the constraints attached to that last share any - * variables which were already known to be bound in the caller's - * join path, then the vertex can join (without of necessity being - * a full cross product join). - */ - final IConstraint[] vertexConstraints = constraintRunArray[path.length]; - - for (IConstraint c : vertexConstraints) { - - // consider all variables spanned by the constraint. - final Iterator<IVariable<?>> vitr = BOpUtility - .getSpannedVariables(c); - - while (vitr.hasNext()) { - - final IVariable<?> var = vitr.next(); - - if (knownBound.contains(var)) { - - if (log.isDebugEnabled()) - log.debug("Can join: sharedVar=" + var + ", path=" - + Arrays.toString(path) + ", vertex=" - + vertex + ", constraint=" + c); - - return true; - - } - - } - - } - - } - - if (log.isDebugEnabled()) - log.debug("No shared variable: path=" + Arrays.toString(path) - + ", vertex=" + vertex + ", constraints=" - + Arrays.toString(constraints)); - - return false; - - } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java 2011-02-24 17:49:44 UTC (rev 4245) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java 2011-02-24 20:33:29 UTC (rev 4246) @@ -1,5 +1,6 @@ package com.bigdata.bop.joinGraph; +import java.util.Arrays; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -100,6 +101,8 @@ * A map indicating which constraints are run for which predicate in the * tail plan. The keys are the bopIds of the predicates in the tail plan. * The values are the sets of constraints to run for that tail. + * + * @todo This assumes that the tail plan is not reordered. */ private final Map<Integer/* predId */, List<IConstraint>> tailPlanConstraintMap = new LinkedHashMap<Integer, List<IConstraint>>(); @@ -442,6 +445,285 @@ } /** + * Return <code>true</code> iff two predicates can join on the basis of at + * least one variable which is shared directly by those predicates. Only the + * operands of the predicates are considered. + * <p> + * Note: This method will only identify joins where the predicates directly + * share at least one variable. However, joins are also possible when the + * predicates share variables via one or more constraint(s). Use + * {@link canJoinUsingConstraints} to identify such joins. + * <p> + * Note: Any two predicates may join regardless of the presence of shared + * variables. However, such joins will produce the full cross product of the + * binding sets selected by each predicate. As such, they should be run last + * and this method will not return <code>true</code> for such predicates. + * <p> + * Note: This method is more efficient than + * {@link BOpUtility#getSharedVars(BOp, BOp)} because it does not + * materialize the sets of shared variables. However, it only considers the + * operands of the {@link IPredicate}s and is thus more restricted than + * {@link BOpUtility#getSharedVars(BOp, BOp)} as well. + * + * @param p1 + * A predicate. + * @param p2 + * Another predicate. + * + * @return <code>true</code> iff the predicates share at least one variable + * as an operand. + * + * @throws IllegalArgumentException + * if the two either reference is <code>null</code>. + */ + static public boolean canJoin(final IPredicate<?> p1, final IPredicate<?> p2) { + + if (p1 == null) + throw new IllegalArgumentException(); + + if (p2 == null) + throw new IllegalArgumentException(); + + // iterator scanning the operands of p1. + final Iterator<IVariable<?>> itr1 = BOpUtility.getArgumentVariables(p1); + + while (itr1.hasNext()) { + + final IVariable<?> v1 = itr1.next(); + + // iterator scanning the operands of p2. + final Iterator<IVariable<?>> itr2 = BOpUtility + .getArgumentVariables(p2); + + while (itr2.hasNext()) { + + final IVariable<?> v2 = itr2.next(); + + if (v1 == v2) { + + if (log.isDebugEnabled()) + log.debug("Can join: sharedVar=" + v1 + ", p1=" + p1 + + ", p2=" + p2); + + return true; + + } + + } + + } + + if (log.isDebugEnabled()) + log.debug("No directly shared variable: p1=" + p1 + ", p2=" + p2); + + return false; + + } + + /** + * Return <code>true</code> iff a predicate may be used to extend a join + * path on the basis of at least one variable which is shared either + * directly or via one or more constraints which may be attached to the + * predicate when it is added to the join path. The join path is used to + * decide which variables are known to be bound, which in turn decides which + * constraints may be run. Unlike the case when the variable is directly + * shared between the two predicates, a join involving a constraint requires + * us to know which variables are already bound so we can know when the + * constraint may be attached. + * <p> + * Note: Use {@link PartitionedJoinGroup#canJoin(IPredicate, IPredicate)} + * instead to identify joins based on a variable which is directly shared. + * <p> + * Note: Any two predicates may join regardless of the presence of shared + * variables. However, such joins will produce the full cross product of the + * binding sets selected by each predicate. As such, they should be run last + * and this method will not return <code>true</code> for such predicates. + * + * @param path + * A join path containing at least one predicate. + * @param vertex + * A predicate which is being considered as an extension of that + * join path. + * @param constraints + * A set of zero or more constraints (optional). Constraints are + * attached dynamically once the variables which they use are + * bound. Hence, a constraint will always share a variable with + * any predicate to which it is attached. If any constraints are + * attached to the given vertex and they share a variable which + * has already been bound by the join path, then the vertex may + * join with the join path even if it does not directly bind that + * variable. + * + * @return <code>true</code> iff the vertex can join with the join path via + * a shared variable. + * + * @throws IllegalArgumentException + * if the join path is <code>null</code>. + * @throws IllegalArgumentException + * if the join path is empty. + * @throws IllegalArgumentException + * if any element in the join path is <code>null</code>. + * @throws IllegalArgumentException + * if the vertex is <code>null</code>. + * @throws IllegalArgumentException + * if the vertex is already part of the join path. + * @throws IllegalArgumentException + * if any element in the optional constraints array is + * <code>null</code>. + */ + static public boolean canJoinUsingConstraints(final IPredicate<?>[] path, + final IPredicate<?> vertex, final IConstraint[] constraints) { + + /* + * Check arguments. + */ + if (path == null) + throw new IllegalArgumentException(); + if (vertex == null) + throw new IllegalArgumentException(); + // constraints MAY be null. + if (path.length == 0) + throw new IllegalArgumentException(); + { + for (IPredicate<?> p : path) { + if (p == null) + throw new IllegalArgumentException(); + if (vertex == p) + throw new IllegalArgumentException(); + } + } + + /* + * Find the set of variables which are known to be bound because they + * are referenced as operands of the predicates in the join path. + */ + final Set<IVariable<?>> knownBound = new LinkedHashSet<IVariable<?>>(); + + for (IPredicate<?> p : path) { + + final Iterator<IVariable<?>> vitr = BOpUtility + .getArgumentVariables(p); + + while (vitr.hasNext()) { + + knownBound.add(vitr.next()); + + } + + } + + /* + * + * If the given predicate directly shares a variable with any of the + * predicates in the join path, then we can return immediately. + */ + { + + final Iterator<IVariable<?>> vitr = BOpUtility + .getArgumentVariables(vertex); + + while (vitr.hasNext()) { + + final IVariable<?> var = vitr.next(); + + if (knownBound.contains(var)) { + + if (log.isDebugEnabled()) + log.debug("Can join: sharedVar=" + var + ", path=" + + Arrays.toString(path) + ", vertex=" + vertex); + + return true; + + } + + } + + } + + if (constraints == null) { + + // No opportunity for a constraint based join. + + if (log.isDebugEnabled()) + log.debug("No directly shared variable: path=" + + Arrays.toString(path) + ", vertex=" + vertex); + + return false; + + } + + /* + * Find the set of constraints which can run with the vertex given the + * join path. + */ + { + + // Extend the new join path. + final IPredicate<?>[] newPath = new IPredicate[path.length + 1]; + + System.arraycopy(path/* src */, 0/* srcPos */, newPath/* dest */, + 0/* destPos */, path.length); + + newPath[path.length] = vertex; + + /* + * Find the constraints that will run with each vertex of the new + * join path. + * + * TODO This is a forward reference to a different package, so maybe + * move the canJoinWithConstraints() method to that package? + */ + final IConstraint[][] constraintRunArray = getJoinGraphConstraints( + newPath, constraints); + + /* + * Consider only the constraints attached to the last vertex in the + * new join path. All of their variables will be bound since (by + * definition) a constraint may not run until its variables are + * bound. If any of the constraints attached to that last share any + * variables which were already known to be bound in the caller's + * join path, then the vertex can join (without of necessity being a + * full cross product join). + */ + final IConstraint[] vertexConstraints = constraintRunArray[path.length]; + + for (IConstraint c : vertexConstraints) { + + // consider all variables spanned by the constraint. + final Iterator<IVariable<?>> vitr = BOpUtility + .getSpannedVariables(c); + + while (vitr.hasNext()) { + + final IVariable<?> var = vitr.next(); + + if (knownBound.contains(var)) { + + if (log.isDebugEnabled()) + log.debug("Can join: sharedVar=" + var + ", path=" + + Arrays.toString(path) + ", vertex=" + + vertex + ", constraint=" + c); + + return true; + + } + + } + + } + + } + + if (log.isDebugEnabled()) + log.debug("No shared variable: path=" + Arrays.toString(path) + + ", vertex=" + vertex + ", constraints=" + + Arrays.toString(constraints)); + + return false; + + } + + /** * Analyze a set of {@link IPredicate}s representing "runFirst", optional * joins, and non-optional joins which may be freely reordered together with * a collection of {@link IConstraint}s and partition them into a join graph @@ -450,6 +732,8 @@ * be used to formulate a complete query when combined with a desired join * ordering. * + * @param knownBound + * A set of variables which are known to be bound on entry. * @param sourcePreds * The predicates. * @param constraints @@ -467,6 +751,7 @@ * <code>null</code>. */ public PartitionedJoinGroup(// +// final Set<IVariable<?>> knownBound, final IPredicate<?>[] sourcePreds,// IConstraint[] constraints) { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-24 17:49:44 UTC (rev 4245) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-24 20:33:29 UTC (rev 4246) @@ -47,6 +47,7 @@ import com.bigdata.bop.ap.SampleIndex.SampleType; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.joinGraph.NoSolutionsException; +import com.bigdata.bop.joinGraph.PartitionedJoinGroup; import com.bigdata.bop.rdf.join.DataSetJoin; /** @@ -894,7 +895,7 @@ continue; } - if (!BOpUtility.canJoinUsingConstraints(// + if (!PartitionedJoinGroup.canJoinUsingConstraints(// x.getPredicates(),// path tVertex.pred,// vertex C// constraints @@ -1113,7 +1114,7 @@ vp = v1; } - if (!BOpUtility.canJoinUsingConstraints( + if (!PartitionedJoinGroup.canJoinUsingConstraints( new IPredicate[] { v.pred }, vp.pred, C)) { /* Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestAll.java 2011-02-24 17:49:44 UTC (rev 4245) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestAll.java 2011-02-24 20:33:29 UTC (rev 4246) @@ -62,6 +62,12 @@ suite.addTestSuite(TestPartitionedJoinGroup.class); + // unit tests for allowing joins based on shared variables in preds. + suite.addTestSuite(TestPartitionedJoinGroup_canJoin.class); + + // more complex logic for join paths. + suite.addTestSuite(TestPartitionedJoinGroup_canJoinUsingConstraints.class); + // static query optimizer test suite. suite.addTest(com.bigdata.bop.joinGraph.fast.TestAll.suite()); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup.java 2011-02-24 17:49:44 UTC (rev 4245) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup.java 2011-02-24 20:33:29 UTC (rev 4246) @@ -49,6 +49,11 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ + * + * FIXME Add test to ensure that constraints are run regardless as of the last + * join even if their variables are not known to be bound. Also, modify the + * constructor to accept a set of variables which are known to be bound on + * entry into the join group. */ public class TestPartitionedJoinGroup extends TestCase2 { @@ -493,7 +498,7 @@ final int nrange = expected.length; - java.util.Map range = new java.util.HashMap(); + final java.util.Map range = new java.util.HashMap(); for (int j = 0; j < nrange; j++) { @@ -513,7 +518,7 @@ } - Object actualObject = actual.next(); + final Object actualObject = actual.next(); if (range.remove(actualObject) == null) { Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup_canJoin.java (from rev 4218, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility_canJoin.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup_canJoin.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup_canJoin.java 2011-02-24 20:33:29 UTC (rev 4246) @@ -0,0 +1,153 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Feb 20, 2011 + */ + +package com.bigdata.bop.joinGraph; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.Var; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.joinGraph.PartitionedJoinGroup; + +import junit.framework.TestCase2; + +/** + * Unit tests for {@link PartitionedJoinGroup#canJoin(IPredicate, IPredicate)} + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestPartitionedJoinGroup_canJoin extends TestCase2 { + + /** + * + */ + public TestPartitionedJoinGroup_canJoin() { + } + + /** + * @param name + */ + public TestPartitionedJoinGroup_canJoin(String name) { + super(name); + } + + + /** + * Correct rejection tests. + * + * @see BOpUtility#canJoin(IPredicate, IPredicate). + */ + @SuppressWarnings("unchecked") + public void test_canJoin_correctRejection() { + + final IVariable<?> x = Var.var("x"); + final IVariable<?> y = Var.var("y"); + final IVariable<?> z = Var.var("z"); + + final IPredicate<?> p1 = new Predicate(new BOp[]{x,y}); + final IPredicate<?> p2 = new Predicate(new BOp[]{y,z}); + + // correct rejection w/ null arg. + try { + PartitionedJoinGroup.canJoin(null,p2); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Ignoring expected exception: " + ex); + } + + // correct rejection w/ null arg. + try { + PartitionedJoinGroup.canJoin(p1,null); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Ignoring expected exception: " + ex); + } + + } + + /** + * Semantics tests focused on shared variables in the operands. + * + * @see PartitionedJoinGroup#canJoin(IPredicate, IPredicate) + */ + @SuppressWarnings("unchecked") + public void test_canJoin() { + + final IVariable<?> u = Var.var("u"); + final IVariable<?> x = Var.var("x"); + final IVariable<?> y = Var.var("y"); + final IVariable<?> z = Var.var("z"); + + final IPredicate<?> p1 = new Predicate(new BOp[] { x, y }); + final IPredicate<?> p2 = new Predicate(new BOp[] { y, z }); + final IPredicate<?> p3 = new Predicate(new BOp[] { u, z }); + + // share y + assertTrue(PartitionedJoinGroup.canJoin(p1, p2)); + + // share z + assertTrue(PartitionedJoinGroup.canJoin(p2, p3)); + + // share z + assertFalse(PartitionedJoinGroup.canJoin(p1, p3)); + + // shares (x,y) with self. + assertTrue(PartitionedJoinGroup.canJoin(p1, p1)); + + } + + /** + * Verify that joins are not permitted when the variables are + * only shared via an annotation. + * + * @see PartitionedJoinGroup#canJoin(IPredicate, IPredicate) + */ + @SuppressWarnings("unchecked") + public void test_canJoin_annotationsAreIngored() { + + final IVariable<?> x = Var.var("x"); + final IVariable<?> y = Var.var("y"); + final IVariable<?> z = Var.var("z"); + + final IPredicate<?> p1 = new Predicate(new BOp[] { x, },// + new NV("foo", y)// + ); + final IPredicate<?> p2 = new Predicate(new BOp[] { z },// + new NV("foo", y) + ); + + // verify that the variables in the annotations are ignored. + assertFalse(PartitionedJoinGroup.canJoin(p1, p2)); + + } + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup_canJoinUsingConstraints.java (from rev 4233, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility_canJoinUsingConstraints.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup_canJoinUsingConstraints.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup_canJoinUsingConstraints.java 2011-02-24 20:33:29 UTC (rev 4246) @@ -0,0 +1,719 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Feb 20, 2011 + */ + +package com.bigdata.bop.joinGraph; + +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Random; +import java.util.Set; + +import junit.framework.TestCase2; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.ImmutableBOp; +import com.bigdata.bop.NV; +import com.bigdata.bop.Var; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.constraint.AND; +import com.bigdata.bop.constraint.BooleanValueExpression; +import com.bigdata.bop.constraint.Constraint; +import com.bigdata.bop.joinGraph.PartitionedJoinGroup; +import com.bigdata.bop.joinGraph.rto.JGraph; + +/** + * + * This test suite is built around around BSBM Q5. Each test has an existing + * join path and a new vertex to be added to the join path. The question is + * whether or not the vertex <em>can join</em> with the join path using one or + * more shared variable(s). This tests a method used to incrementally grow a + * join path when it is dynamically decided that an {@link IPredicate} may be + * added to the join path based on shared variables. Static analysis easily + * reports those joins which are allowed based on the variables directly given + * with two {@link IPredicate}s. The purpose of this test suite is to explore + * when joins (based on shared variables) become permissible through + * {@link IConstraint}s as the variable(s) used within those constraints become + * bound. + * <p> + * Note: To avoid a dependency on the RDF model layer, this just uses String + * constants for URIs and Literals. + * <h2>Analysis of BSBM Q5</h2> + * The following predicates all join on {@link #product}: + * <ul> + * <li>{@link #p0}</li> + * <li>{@link #p2}</li> + * <li>{@link #p4}</li> + * <li>{@link #p5}</li> + * </ul> + * The predicates ({@link #p3} and {@link #p5}) do not directly join with any of + * the other predicates (they do not directly share any variables). In general, + * a join without shared variables means the cross product of the sources will + * be materialized and such joins should be run last. + * <p> + * However, in this case there are two SPARQL FILTERs ({@link #c1} and + * {@link #c2}) which (a) use those variables ({@link #origProperty1} and + * {@link #origProperty2}); and (b) can constrain the query. This means that + * running the predicates without shared variables and applying the constraints + * before the tail of the plan can in fact lead to a more efficient join path. + * <p> + * This set of unit tests explores various join paths and verifies that the + * canJoin() and canJoinUsingConstraints() methods correctly recognize edges by + * which a join path can be extended corresponding to both static and dynamic + * analysis of the query. + * + * @see PartitionedJoinGroup#canJoin(IPredicate, IPredicate) + * @see PartitionedJoinGroup#canJoinUsingConstraints(IPredicate[], IPredicate, + * IConstraint[]) + * @see JGraph + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestBOpUtility_canJoinUsingConstraints.java 4211 2011-02-20 + * 21:20:44Z thompsonbry $ + * + * @todo These are the full plans generated by the runtime and static + * optimizers. One way to test canJoinXXX() is to run out these join plans + * and verify that they report "true" in each case. However, the critical + * bit to test are join plans where the predicates w/o the shared + * variables can be run earlier due to the FILTERs. + * + * <pre> + * test_bsbm_q5 : static [0] : : ids=[1, 2, 4, 6, 0, 3, 5] + * test_bsbm_q5 : runtime[0] : : ids=[1, 2, 0, 4, 6, 3, 5] + * </pre> + */ +//@SuppressWarnings("unchecked") +public class TestPartitionedJoinGroup_canJoinUsingConstraints extends TestCase2 { + + /** + * + */ + public TestPartitionedJoinGroup_canJoinUsingConstraints() { + } + + /** + * @param name + */ + public TestPartitionedJoinGroup_canJoinUsingConstraints(String name) { + super(name); + } + + /** + * Unit tests to verify that arguments are validated. + * + * @see PartitionedJoinGroup#canJoinUsingConstraints(IPredicate[], IPredicate, + * IConstraint[]) + */ + public void test_canJoinUsingConstraints_illegalArgument() { + + final IVariable<?> x = Var.var("x"); + final IVariable<?> y = Var.var("y"); + + final IPredicate<?> p1 = new Predicate(new BOp[]{x}); + + final IPredicate<?> p2 = new Predicate(new BOp[]{y}); + + // path must not be null. + try { + PartitionedJoinGroup.canJoinUsingConstraints(// + null, // path + p1,// vertex + new IConstraint[0]// constraints + ); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Expecting: " + IllegalArgumentException.class); + } + + // vertex must not be null. + try { + PartitionedJoinGroup.canJoinUsingConstraints(// + new IPredicate[]{p1}, // path + null,// vertex + new IConstraint[0]// constraints + ); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Expecting: " + IllegalArgumentException.class); + } + + // path may not be empty. + try { + PartitionedJoinGroup.canJoinUsingConstraints(// + new IPredicate[] {}, // path + p1,// vertex + new IConstraint[0]// constraints + ); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Expecting: " + IllegalArgumentException.class); + } + + // path elements may not be null. + try { + PartitionedJoinGroup.canJoinUsingConstraints(// + new IPredicate[] { p2, null }, // path + p1,// vertex + new IConstraint[0]// constraints + ); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Expecting: " + IllegalArgumentException.class); + } + + // vertex must not appear in the path. + try { + PartitionedJoinGroup.canJoinUsingConstraints(// + new IPredicate[] { p2, p1 }, // path + p1,// vertex + new IConstraint[0]// constraints + ); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Expecting: " + IllegalArgumentException.class); + } + + // constraint array may not contain null elements. + try { + PartitionedJoinGroup.canJoinUsingConstraints(// + new IPredicate[] { p2 }, // path + p1,// vertex + new IConstraint[] { // + Constraint.wrap(new NEConstant(x, new Constant<Integer>(12))), // + null // + }// constraints + ); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Expecting: " + IllegalArgumentException.class); + } + + } + + // The comparison operators. + static private final int GT = 0, LT = 1;// , EQ = 2, GTE = 3, LTE = 4; + + // The math operators. + static private final int PLUS = 0, MINUS = 1; + + // Annotation for the comparison or math operator. + static private final String OP = "op"; + + /** + * A do-nothing constraint. The constraint is never evaluated. It is only + * used to test the logic which decides when two predicates can join based + * on variable(s) shared via a constraint. + */ + static private final class MyCompareOp extends BooleanValueExpression { + + private static final long serialVersionUID = 1L; + + /** + * Required deep copy constructor. + * + * @param op + */ + public MyCompareOp(MyCompareOp op) { + super(op); + } + + /** + * @param args + * @param annotations + */ + public MyCompareOp(BOp[] args, Map<String, Object> annotations) { + super(args, annotations); + } + + public Boolean get(IBindingSet bindingSet) { + throw new UnsupportedOperationException(); + } + + } + + /** + * A do-nothing constraint. The constraint is never evaluated. It is only + * used to test the logic which decides when two predicates can join based + * on variable(s) shared via a constraint. + */ + static private final class NEConstant extends BooleanValueExpression { + + private static final long serialVersionUID = 1L; + + /** + * Required deep copy constructor. + * + * @param op + */ + public NEConstant(NEConstant op) { + super(op); + } + + /** + * @param args + * @param annotations + */ + public NEConstant(BOp[] args, Map<String, Object> annotations) { + super(args, annotations); + } + + public NEConstant(IVariable<?> var, IConstant<?> value) { + this(new BOp[] { var, value }, null/* annotations */); + } + + public Boolean get(IBindingSet bindingSet) { + throw new UnsupportedOperationException(); + } + + } + + /** + * A do-nothing value expression. The expression is never evaluated. It is + * only used to test the logic which decides when two predicates can join + * based on variable(s) shared via a constraint. + */ + static private final class MathBOp extends ImmutableBOp implements + IValueExpression { + + private static final long serialVersionUID = 1L; + + /** + * Required deep copy constructor. + * + * @param op + */ + public MathBOp(final MathBOp op) { + + super(op); + + } + + /** + * Required shallow copy constructor. + * + * @param args + * The operands. + * @param op + * The operation. + */ + public MathBOp(final BOp[] args, Map<String, Object> anns) { + + super(args, anns); + + if (args.length != 2 || args[0] == null || args[1] == null + || getProperty(OP) == null) { + + throw new IllegalArgumentException(); + + } + + } + + /** + * + * @param left + * The left operand. + * @param right + * The right operand. + * @param op + * The annotation specifying the operation to be performed on + * those operands. + */ + public MathBOp(final IValueExpression left, + final IValueExpression right, final int op) { + + this(new BOp[] { left, right }, NV.asMap(new NV(OP, op))); + + } + + public Object get(IBindingSet bindingSet) { + throw new UnsupportedOperationException(); + } + } + + static private final String rdfs = "http://www.w3.org/2000/01/rdf-schema#"; + + static private final String bsbm = "http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/"; + + static private final String rdfsLabel = rdfs + "label"; + + static private final String productFeature = bsbm + "productFeature"; + + static private final String productPropertyNumeric1 = "productPropertyNumeric1"; + + static private final String productPropertyNumeric2 = bsbm + + "productPropertyNumeric2"; + + static private final String productInstance = "http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product22"; + + private int nextId = 0; + + final IVariable<?> product = Var.var("product"); + + final IVariable<?> productLabel = Var.var("productLabel"); + + final IVariable<?> prodFeature = Var.var("prodFeature"); + + final IVariable<?> simProperty1 = Var.var("simProperty1"); + + final IVariable<?> simProperty2 = Var.var("simProperty2"); + + final IVariable<?> origProperty1 = Var.var("origProperty1"); + + final IVariable<?> origProperty2 = Var.var("origProperty2"); + + /** ?product rdfs:label ?productLabel . */ + final private IPredicate<?> p0 = new Predicate(new BOp[] {// + product, new Constant(rdfsLabel), productLabel },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** productInstance bsbm:productFeature ?prodFeature . */ + final private IPredicate<?> p1 = new Predicate(new BOp[] { // + new Constant(productInstance), new Constant(productFeature), + prodFeature },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** ?product bsbm:productFeature ?prodFeature . */ + final private IPredicate<?> p2 = new Predicate(new BOp[] { // + product, new Constant(productFeature), prodFeature },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** productInstance bsbm:productPropertyNumeric1 ?origProperty1 . */ + final private IPredicate<?> p3 = new Predicate(new BOp[] { // + new Constant<String>(productInstance), + new Constant(productPropertyNumeric1), origProperty1 },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** ?product bsbm:productPropertyNumeric1 ?simProperty1 . */ + final private IPredicate<?> p4 = new Predicate(new BOp[] { // + product, new Constant(productPropertyNumeric1), simProperty1 },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** productInstance bsbm:productPropertyNumeric2 ?origProperty2 . */ + final private IPredicate<?> p5 = new Predicate(new BOp[] { // + new Constant(productInstance), + new Constant(productPropertyNumeric2), origProperty2 },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** ?product bsbm:productPropertyNumeric2 ?simProperty2 . */ + final private IPredicate<?> p6 = new Predicate(new BOp[] { // + product, new Constant(productPropertyNumeric2), simProperty2 },// + new NV(BOp.Annotations.BOP_ID, nextId++)// + ); + + /** The vertices of the join graph (the predicates). */ + final IPredicate<?>[] preds = new IPredicate[] { p0, p1, p2, p3, p4, p5, p6 }; + + /** + * FILTER (productInstance != ?product) + */ + final IConstraint c0 = Constraint.wrap(new NEConstant(product, new Constant<String>( + productInstance))); + + /** + * FILTER (?simProperty1 < (?origProperty1 + 120) && ?simProperty1 > + * (?origProperty1 - 120)) + * <p> + * Note: The AND in the compound filters is typically optimized out such + * that each of these is represented as its own IConstraint, but I have + * combined them for the purposes of these unit tests. + */ + final IConstraint c1 = Constraint.wrap(new AND(// + new MyCompareOp( + new BOp[] { + simProperty1, + new MathBOp(origProperty1, new Constant<Integer>( + 120), PLUS) }, NV.asMap(new NV[] { new NV( + OP, LT) })), // + new MyCompareOp(new BOp[] { + simProperty1, + new MathBOp(origProperty1, new Constant<Integer>(120), + MINUS) }, NV.asMap(new NV[] { new NV(OP, GT) }))// + )); + + /** + * FILTER (?simProperty2 < (?origProperty2 + 170) && ?simProperty2 > + * (?origProperty2 - 170)) + * <p> + * Note: The AND in the compound filters is typically optimized out such + * that each of these is represented as its own IConstraint, but I have + * combined them for the purposes of these unit tests. + */ + final IConstraint c2 = Constraint.wrap(new AND(// + new MyCompareOp( + new BOp[] { + simProperty2, + new MathBOp(origProperty2, new Constant<Integer>( + 170), PLUS) }, NV.asMap(new NV[] { new NV( + OP, LT) })),// + new MyCompareOp(new BOp[] { + simProperty2, + new MathBOp(origProperty2, new Constant<Integer>(170), + MINUS) }, NV.asMap(new NV[] { new NV(OP, GT) }))// + )); + + /** The constraints on the join graph. */ + final IConstraint[] constraints = new IConstraint[] { c0, c1, c2 }; + + /** + * Unit test for one-step joins based on the {@link #product} variable. + */ + public void test_canJoinUsingConstraints_1step_productVar() { + + // share ?product + final IPredicate<?>[] a = new IPredicate[] { p0, p2, p4, p6 }; + for (int i = 0; i < a.length; i++) { + for (int j = i; j < a.length; j++) { + final IPredicate<?> t0 = a[i]; + final IPredicate<?> t1 = a[j]; + assertTrue(PartitionedJoinGroup.canJoin(t0, t1)); + assertTrue(PartitionedJoinGroup.canJoin(t1, t0)); + if (t0 != t1) { + /* + * Test join path extension, but not when the vertex used to + * extend the path is already present in the join path. + */ + assertTrue(PartitionedJoinGroup.canJoinUsingConstraints(// + new IPredicate[] { t0 }, // path + t1,// vertex + new IConstraint[0]// constraints + )); + assertTrue(PartitionedJoinGroup.canJoinUsingConstraints(// + new IPredicate[] { t1 }, // path + t0,// vertex + new IConstraint[0]// constraints + )); + } + } + } + + } + + /** + * Unit test for multi-step join paths based on the {@link #product} + * variable. + */ + public void test_canJoinUsingConstraints_multiStep_productVar() { + + final Random r = new Random(); + + // share ?product + final IPredicate<?>[] a = new IPredicate[] { p0, p2, p4, p6 }; + + // existing path length [1:3]. + final int existingPathLength = r.nextInt(3)+1; + + // generated pre-existing path. + final IPredicate<?>[] path = new IPredicate[existingPathLength]; + // vertex which will extend that path + final IPredicate<?> vertex; + { + // collection of predicates used so far by the path. + final Set<Integer> used = new LinkedHashSet<Integer>(); + for (int i = 0; i < path.length; i++) { + // Locate an unused predicate. + int index; + while (true) { + index = r.nextInt(a.length); + if (!used.contains(index)) { + used.add(index); + break; + } + } + // add to the path. + path[i] = a[index]; + } + // Locate an unused predicate to serve as the extension vertex. + { + // Locate an unused predicate. + int index; + while (true) { + index = r.nextInt(a.length); + if (!used.contains(index)) { + used.add(index); + break; + } + } + vertex = a[index]; + } + } + + // Verify all joins in the path are legal. + for (int i = 0; i < path.length - 1; i++) { + assertTrue(PartitionedJoinGroup.canJoin(path[i], path[i + 1])); + } + + // Verify the extension of the path is legal. + assertTrue(PartitionedJoinGroup.canJoinUsingConstraints(// + path,// + vertex,// + new IConstraint[0]// constraints + )); + + } + + /** + * Unit test examines the predicates without shared variables and verifies + * (a) that joins are not permitted when the constraints are not considered; + * and (b) that joins are permitted when the constraints are considered. + * <p> + * This test is identical to {@link #test_canJoinUsingConstraints_p5_p6()()} + * except that it considers the ({@link #p3} x {@link #p4}) join via the + * {@link #c1} constraint instead. + */ + public void test_canJoinUsingConstraints_p3_p4() { + + /* + * Verify (p3,p4) join is not permitted when we do not consider the + * constraints (i.e., the join would be an unconstrained cross product + * if it were executed). + */ + assertFalse(PartitionedJoinGroup.canJoin(p3, p4)); + assertFalse(PartitionedJoinGroup.canJoin(p4, p3)); + assertFalse(PartitionedJoinGroup.canJoinUsingConstraints(// + new IPredicate[] { p3 }, // path + p4,// vertex + new IConstraint[0]// constraints + )); + assertFalse(PartitionedJoinGroup.canJoinUsingConstraints(// + new IPredicate[] { p4 }, // path + p3,// vertex + new IConstraint[0]// constraints + )); + + /* + * Verify (p3,p4) join is not permitted if we do not consider the + * constraint which provides the shared variables. + */ + assertFalse(PartitionedJoinGroup.canJoinUsingConstraints(// + new IPredicate[] { p3 }, // path + p4,// vertex + new IConstraint[] { c2 }// constraints + )); + assertFalse(PartitionedJoinGroup.canJoinUsingConstraints(// + new IPredicate[] { p4 }, // path + p3,// vertex + new IConstraint[] { c2 }// constraints + )); + + /* + * Verify (p3,p4) join is permitted if we consider the constraint which + * provides the shared variables. + */ + assertTrue(PartitionedJoinGroup.canJoinUsingConstraints(// + new IPredicate[] { p3 }, // path + p4,// vertex + new IConstraint[] { c1 }// constraints + )); + assertTrue(PartitionedJoinGroup.canJoinUsingConstraints(// + new IPredicate[] { p4 }, // path + p3,// vertex + new IConstraint[] { c1 }// constraints + )); + + } + + /** + * Unit test examines the predicates without shared variables and verifies + * (a) that joins are not permitted when the constraints are not considered; + * and (b) that joins are permitted wh... [truncated message content] |
From: <mrp...@us...> - 2011-02-24 23:11:24
|
Revision: 4248 http://bigdata.svn.sourceforge.net/bigdata/?rev=4248&view=rev Author: mrpersonick Date: 2011-02-24 23:11:18 +0000 (Thu, 24 Feb 2011) Log Message: ----------- fixed constraint attachment Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java 2011-02-24 23:10:42 UTC (rev 4247) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java 2011-02-24 23:11:18 UTC (rev 4248) @@ -217,6 +217,13 @@ } + static public IConstraint[][] getJoinGraphConstraints( + final IPredicate<?>[] path, final IConstraint[] joinGraphConstraints) { + + return getJoinGraphConstraints(path, joinGraphConstraints, null); + + } + /** * Given a join path, return the set of constraints to be associated with * each join in that join path. Only those constraints whose variables are @@ -226,6 +233,9 @@ * The join path. * @param joinGraphConstraints * The constraints to be applied to the join path (optional). + * @param knownBoundVars + * Variables that are known to be bound as inputs to this + * join graph (parent queries). * * @return The constraints to be paired with each element of the join path. * @@ -250,7 +260,9 @@ * FIXME Unit tests. */ static public IConstraint[][] getJoinGraphConstraints( - final IPredicate<?>[] path, final IConstraint[] joinGraphConstraints) { + final IPredicate<?>[] path, + final IConstraint[] joinGraphConstraints, + final IVariable<?>[] knownBoundVars) { if (path == null) throw new IllegalArgumentException(); @@ -260,7 +272,16 @@ // the set of constraints for each predicate in the join path. final IConstraint[][] ret = new IConstraint[path.length][]; + + // the set of variables which are bound. + final Set<IVariable<?>> boundVars = new LinkedHashSet<IVariable<?>>(); + // add the already known bound vars + if (knownBoundVars != null) { + for (IVariable<?> v : knownBoundVars) + boundVars.add(v); + } + /* * For each predicate in the path in the given order, figure out which * constraint(s) would attach to that predicate based on which variables @@ -268,9 +289,6 @@ * given join path, we return that set of constraints. */ - // the set of variables which are bound. - final Set<IVariable<?>> boundVars = new LinkedHashSet<IVariable<?>>(); - // the set of constraints which have been consumed. final Set<IConstraint> used = new LinkedHashSet<IConstraint>(); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-02-24 23:10:42 UTC (rev 4247) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-02-24 23:11:18 UTC (rev 4248) @@ -70,6 +70,7 @@ import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.join.PipelineJoin; import com.bigdata.bop.joinGraph.IRangeCountFactory; +import com.bigdata.bop.joinGraph.PartitionedJoinGroup; import com.bigdata.bop.joinGraph.fast.DefaultEvaluationPlan2; import com.bigdata.bop.rdf.filter.StripContextFilter; import com.bigdata.bop.rdf.join.DataSetJoin; @@ -327,13 +328,16 @@ final AtomicInteger idFactory, final AbstractTripleStore db, final QueryEngine queryEngine, final Properties queryHints) { - return convert(rule, null/* conditionals */, idFactory, db, queryEngine, - queryHints); + return convert(rule, + null/* conditionals */, + null/* known bound variables */, + idFactory, db, queryEngine, queryHints); } public static PipelineOp convert(final IRule<?> rule, final Collection<IConstraint> conditionals, + final Set<IVariable<?>> knownBound, final AtomicInteger idFactory, final AbstractTripleStore db, final QueryEngine queryEngine, final Properties queryHints) { @@ -514,43 +518,6 @@ // final IVariable<?>[][] selectVars = RuleState // .computeRequiredVarsForEachTail(rule, order); - /* - * Map the constraints from the variables they use. This way, we can - * properly attach constraints to only the first tail in which the - * variable appears. This way we only run the appropriate constraint - * once, instead of for every tail. - */ -// final Map<IVariable<?>, Collection<IConstraint>> constraintsByVar = -// new HashMap<IVariable<?>, Collection<IConstraint>>(); -// for (int i = 0; i < rule.getConstraintCount(); i++) { -// final IConstraint c = rule.getConstraint(i); -// -// if (log.isDebugEnabled()) { -// log.debug(c); -// } -// -// final Set<IVariable<?>> uniqueVars = new HashSet<IVariable<?>>(); -// final Iterator<IVariable<?>> vars = BOpUtility.getSpannedVariables(c); -// while (vars.hasNext()) { -// final IVariable<?> v = vars.next(); -// uniqueVars.add(v); -// } -// -// for (IVariable<?> v : uniqueVars) { -// -// if (log.isDebugEnabled()) { -// log.debug(v); -// } -// -// Collection<IConstraint> constraints = constraintsByVar.get(v); -// if (constraints == null) { -// constraints = new LinkedList<IConstraint>(); -// constraintsByVar.put(v, constraints); -// } -// constraints.add(c); -// } -// } - PipelineOp left = startOp; if (conditionals != null) { // @todo lift into CONDITION on SubqueryOp @@ -602,102 +569,40 @@ } -// /* -// * Analyze the predicates and constraints to decide which constraints -// * will run with which predicates. @todo does not accept known bound -// * variables yet and does not report on the constraint attachment for -// * optional joins using the same assignedConstraint[] (which makes the -// * integration a bit more complicated). -// */ -// final IConstraint[][] assignedConstraints; -//// final PartitionedJoinGroup g; -// { -// // Extract IConstraint[] from the rule. -// final IConstraint[] constraints = new IConstraint[rule.getConstraintCount()]; -// for(int i=0; i<constraints.length; i++) { -// constraints[i] = rule.getConstraint(i); -// } -// -//// // Analyze the join graph. -//// g = new PartitionedJoinGroup(preds, constraints); -// -// // figure out which constraints are attached to which predicates. -// assignedConstraints = PartitionedJoinGroup.getJoinGraphConstraints( -// preds, constraints); -// } + /* + * Analyze the predicates and constraints to decide which constraints + * will run with which predicates. @todo does not handle optionals + * correctly, but we do not pass optionals in to Rule2BOpUtility + * from SOp2BOpUtility anymore so ok for now + */ + final IConstraint[][] assignedConstraints; + { + // Extract IConstraint[] from the rule. + final IConstraint[] constraints = new IConstraint[rule.getConstraintCount()]; + for(int i=0; i<constraints.length; i++) { + constraints[i] = rule.getConstraint(i); + } + + // figure out which constraints are attached to which predicates. + assignedConstraints = PartitionedJoinGroup.getJoinGraphConstraints( + preds, constraints, + knownBound.toArray(new IVariable<?>[knownBound.size()])); + } /* * */ - for (int i = 0; i < order.length; i++) { + for (int i = 0; i < preds.length; i++) { // assign a bop id to the predicate final Predicate<?> pred = (Predicate<?>) preds[i]; - // @todo Life will be simple once assignedConstraints is ready. -// left = join(queryEngine, left, pred,// -// Arrays.asList(assignedConstraints[i]), // -// context, idFactory, queryHints); + left = join(queryEngine, left, pred,// + Arrays.asList(assignedConstraints[i]), // + context, idFactory, queryHints); - /* - * Collect all the constraints for this predicate based on which - * variables make their first appearance in this tail - */ - final Collection<IConstraint> constraints = - new LinkedList<IConstraint>(); - -// /* -// * Peek through the predicate's args to find its variables. Use -// * these to attach constraints to the join based on the variables -// * that make their first appearance in this tail. -// */ -// for (BOp arg : pred.args()) { -// if (arg instanceof IVariable<?>) { -// final IVariable<?> v = (IVariable<?>) arg; -// /* -// * We do a remove because we don't ever need to run these -// * constraints again during subsequent joins once they have -// * been run once at the initial appearance of the variable. -// * -// * @todo revisit this when we dynamically re-order running -// * joins -// */ -// if (constraintsByVar.containsKey(v)) -// constraints.addAll(constraintsByVar.remove(v)); -// } -// } - - // just add all the constraints to the very last tail for now - if (i == (order.length-1) && rule.getConstraintCount() > 0) { - final Iterator<IConstraint> it = rule.getConstraints(); - while (it.hasNext()) { - constraints.add(it.next()); - } - } - - left = join(queryEngine, left, pred, constraints, context, - idFactory, queryHints); - } -// if (rule.getConstraintCount() > 0) { -// final Iterator<IConstraint> it = rule.getConstraints(); -// while (it.hasNext()) { -// final IConstraint c = it.next(); -// final int condId = idFactory.incrementAndGet(); -// final PipelineOp condOp = applyQueryHints( -// new ConditionalRoutingOp(new BOp[]{left}, -// NV.asMap(new NV[]{// -// new NV(BOp.Annotations.BOP_ID,condId), -// new NV(ConditionalRoutingOp.Annotations.CONDITION, c), -// })), queryHints); -// left = condOp; -// if (log.isDebugEnabled()) { -// log.debug("adding conditional routing op: " + condOp); -// } -// } -// } - if (log.isInfoEnabled()) { // just for now while i'm debugging log.info("rule=" + rule + ":::query=" Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java 2011-02-24 23:10:42 UTC (rev 4247) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java 2011-02-24 23:11:18 UTC (rev 4248) @@ -189,10 +189,11 @@ final Collection<IConstraint> postConditionals = new LinkedList<IConstraint>(); - final IRule rule = rule(join, preConditionals, postConditionals); + PipelineOp left = rule2BOp(join, preConditionals, postConditionals, + idFactory, db, queryEngine, queryHints); - PipelineOp left = Rule2BOpUtility.convert( - rule, preConditionals, idFactory, db, queryEngine, queryHints); +// PipelineOp left = Rule2BOpUtility.convert( +// rule, preConditionals, idFactory, db, queryEngine, queryHints); /* * Start with left=<this join group> and add a SubqueryOp for each @@ -229,14 +230,14 @@ if (!isUnion(child) || isEmptyUnion(child)) continue; + final boolean optional = isOptional(child); final PipelineOp subquery = union( child, idFactory, db, queryEngine, queryHints); - final boolean optional = isOptional(child); final int subqueryId = idFactory.incrementAndGet(); left = new SubqueryOp(new BOp[]{left}, new NV(Predicate.Annotations.BOP_ID, subqueryId),// new NV(SubqueryOp.Annotations.SUBQUERY, subquery),// - new NV(SubqueryOp.Annotations.OPTIONAL,optional)// + new NV(SubqueryOp.Annotations.OPTIONAL, optional)// ); if (log.isInfoEnabled()) { log.info("adding a subquery: " + subqueryId + "\n" + left); @@ -358,9 +359,11 @@ } - protected static IRule rule(final SOpGroup group, + protected static PipelineOp rule2BOp(final SOpGroup group, final Collection<IConstraint> preConditionals, - final Collection<IConstraint> postConditionals) { + final Collection<IConstraint> postConditionals, + final AtomicInteger idFactory, final AbstractTripleStore db, + final QueryEngine queryEngine, final Properties queryHints) { final Collection<IPredicate> preds = new LinkedList<IPredicate>(); final Collection<IConstraint> constraints = new LinkedList<IConstraint>(); @@ -504,7 +507,11 @@ null/* constants */, null/* taskFactory */, required); - return rule; + final PipelineOp left = Rule2BOpUtility.convert( + rule, preConditionals, nonOptParentVars, + idFactory, db, queryEngine, queryHints); + + return left; } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java 2011-02-24 23:10:42 UTC (rev 4247) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTreeBuilder.java 2011-02-24 23:11:18 UTC (rev 4248) @@ -172,6 +172,8 @@ collectSOps(sops, (Join) left, rslj, g, pg); } else if (left instanceof LeftJoin) { collectSOps(sops, (LeftJoin) left, rslj, groupId.incrementAndGet(), g); + } else if (left instanceof Union) { + collectSOps(sops, (Union) left, rslj, groupId.incrementAndGet(), g); } else { throw new UnsupportedOperatorException(left); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-25 21:20:10
|
Revision: 4251 http://bigdata.svn.sourceforge.net/bigdata/?rev=4251&view=rev Author: thompsonbry Date: 2011-02-25 21:20:03 +0000 (Fri, 25 Feb 2011) Log Message: ----------- Added an pathIsComplete boolean to PartitionedJoinGroup. When true, all constraints are attached no later than the last predicate. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java 2011-02-25 21:18:01 UTC (rev 4250) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java 2011-02-25 21:20:03 UTC (rev 4251) @@ -134,46 +134,52 @@ .toArray(new IConstraint[joinGraphConstraints.size()]); } - /** - * Return the set of constraints which should be attached to the last join - * in the given the join path. All joins in the join path must be - * non-optional joins (that is, part of either the head plan or the join - * graph). - * <p> - * The rule followed by this method is that each constraint will be attached - * to the first non-optional join at which all of its variables are known to - * be bound. It is assumed that constraints are attached to each join in the - * join path by a consistent logic, e.g., as dictated by this method. - * - * @param joinPath - * An ordered array of predicate identifiers representing a - * specific sequence of non-optional joins. - * - * @return The constraints which should be attached to the last join in the - * join path. - * - * @throws IllegalArgumentException - * if the join path is <code>null</code>. - * @throws IllegalArgumentException - * if the join path is empty. - * @throws IllegalArgumentException - * if any element of the join path is <code>null</code>. - * @throws IllegalArgumentException - * if any predicate specified in the join path is not known to - * this class. - * @throws IllegalArgumentException - * if any predicate specified in the join path is optional. - * - * @todo Implement (or refactor) the logic to decide which variables need to - * be propagated and which can be dropped. This decision logic will - * need to be available to the runtime query optimizer. - * - * @todo This does not pay attention to the head plan. If there can be - * constraints on the head plan then either this should be modified - * such that it can decide where they attach or we need to have a - * method which does the same thing for the head plan. - */ - public IConstraint[] getJoinGraphConstraints(final int[] pathIds) { + /** + * Return the set of constraints which should be attached to the last join + * in the given the join path. All joins in the join path must be + * non-optional joins (that is, part of either the head plan or the join + * graph). + * <p> + * The rule followed by this method is that each constraint will be attached + * to the first non-optional join at which all of its variables are known to + * be bound. It is assumed that constraints are attached to each join in the + * join path by a consistent logic, e.g., as dictated by this method. + * + * @param joinPath + * An ordered array of predicate identifiers representing a + * specific sequence of non-optional joins. + * @param pathIsComplete + * <code>true</code> iff the <i>path</i> represents a complete + * join path. When <code>true</code>, any constraints which have + * not already been attached will be attached to the last + * predicate in the join path. + * + * @return The constraints which should be attached to the last join in the + * join path. + * + * @throws IllegalArgumentException + * if the join path is <code>null</code>. + * @throws IllegalArgumentException + * if the join path is empty. + * @throws IllegalArgumentException + * if any element of the join path is <code>null</code>. + * @throws IllegalArgumentException + * if any predicate specified in the join path is not known to + * this class. + * @throws IllegalArgumentException + * if any predicate specified in the join path is optional. + * + * @todo Implement (or refactor) the logic to decide which variables need to + * be propagated and which can be dropped. This decision logic will + * need to be available to the runtime query optimizer. + * + * @todo This does not pay attention to the head plan. If there can be + * constraints on the head plan then either this should be modified + * such that it can decide where they attach or we need to have a + * method which does the same thing for the head plan. + */ + public IConstraint[] getJoinGraphConstraints(final int[] pathIds, + final boolean pathIsComplete) { /* * Verify arguments and resolve bopIds to predicates. @@ -212,57 +218,59 @@ } - return getJoinGraphConstraints(path, joinGraphConstraints - .toArray(new IConstraint[joinGraphConstraints.size()]))[pathIds.length - 1]; + final IConstraint[] constraints = joinGraphConstraints + .toArray(new IConstraint[joinGraphConstraints.size()]); + + final IConstraint[][] attachedConstraints = getJoinGraphConstraints( + path, constraints, null/* knownBound */, pathIsComplete); + return attachedConstraints[pathIds.length - 1]; + } - static public IConstraint[][] getJoinGraphConstraints( - final IPredicate<?>[] path, final IConstraint[] joinGraphConstraints) { +// static public IConstraint[][] getJoinGraphConstraints( +// final IPredicate<?>[] path, final IConstraint[] joinGraphConstraints) { +// +// return getJoinGraphConstraints(path, joinGraphConstraints, null/*knownBound*/); +// +// } - return getJoinGraphConstraints(path, joinGraphConstraints, null); - - } - - /** - * Given a join path, return the set of constraints to be associated with - * each join in that join path. Only those constraints whose variables are - * known to be bound will be attached. - * - * @param path - * The join path. - * @param joinGraphConstraints - * The constraints to be applied to the join path (optional). - * @param knownBoundVars - * Variables that are known to be bound as inputs to this - * join graph (parent queries). - * - * @return The constraints to be paired with each element of the join path. - * - * @throws IllegalArgumentException - * if the join path is <code>null</code>. - * @throws IllegalArgumentException - * if the join path is empty. - * @throws IllegalArgumentException - * if any element of the join path is <code>null</code>. - * @throws IllegalArgumentException - * if any element of the join graph constraints is - * <code>null</code>. - * - * @todo It should be an error if a variable appear in a constraint is not - * bound by any possible join path. However, it may not be possible to - * determine this by local examination of a join graph since we do not - * know which variables may be presented as already bound when the - * join graph is evaluated (but we can only run the join graph - * currently against static source binding sets and for that case this - * is knowable). - * - * FIXME Unit tests. - */ + /** + * Given a join path, return the set of constraints to be associated with + * each join in that join path. Only those constraints whose variables are + * known to be bound will be attached. + * + * @param path + * The join path. + * @param joinGraphConstraints + * The constraints to be applied to the join path (optional). + * @param knownBoundVars + * Variables that are known to be bound as inputs to this join + * graph (parent queries). + * @param pathIsComplete + * <code>true</code> iff the <i>path</i> represents a complete + * join path. When <code>true</code>, any constraints which have + * not already been attached will be attached to the last predicate + * in the join path. + * + * @return The constraints to be paired with each element of the join path. + * + * @throws IllegalArgumentException + * if the join path is <code>null</code>. + * @throws IllegalArgumentException + * if the join path is empty. + * @throws IllegalArgumentException + * if any element of the join path is <code>null</code>. + * @throws IllegalArgumentException + * if any element of the join graph constraints is + * <code>null</code>. + */ static public IConstraint[][] getJoinGraphConstraints( - final IPredicate<?>[] path, - final IConstraint[] joinGraphConstraints, - final IVariable<?>[] knownBoundVars) { + final IPredicate<?>[] path,// + final IConstraint[] joinGraphConstraints,// + final IVariable<?>[] knownBoundVars,// + final boolean pathIsComplete// + ) { if (path == null) throw new IllegalArgumentException(); @@ -343,7 +351,7 @@ boolean attach = false; - if (i == path.length-1) { + if (pathIsComplete && i == path.length - 1) { // attach all unused constraints to last predicate attach = true; @@ -700,12 +708,11 @@ /* * Find the constraints that will run with each vertex of the new * join path. - * - * TODO This is a forward reference to a different package, so maybe - * move the canJoinWithConstraints() method to that package? */ final IConstraint[][] constraintRunArray = getJoinGraphConstraints( - newPath, constraints); + newPath, constraints, null/*knownBound*/, + true/*pathIsComplete*/ + ); /* * Consider only the constraints attached to the last vertex in the @@ -997,7 +1004,8 @@ // figure out which constraints are attached to which predicates. final IConstraint[][] assignedConstraints = PartitionedJoinGroup - .getJoinGraphConstraints(preds, constraints); + .getJoinGraphConstraints(preds, constraints, null/*knownBound*/, + true/*pathIsComplete*/); final PipelineJoin<?>[] joins = new PipelineJoin[preds.length]; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup.java 2011-02-25 21:18:01 UTC (rev 4250) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup.java 2011-02-25 21:20:03 UTC (rev 4251) @@ -249,25 +249,32 @@ // System.out.println(Arrays.toString(actual)); // c1 is applied when x is bound. x is bound by p0. - assertEquals(new IConstraint[] { c1 }, fixture - .getJoinGraphConstraints(new int[] { p1.getId(), - p0.getId() })); + assertEquals(new IConstraint[] { c1 }, fixture + .getJoinGraphConstraints(// + new int[] { p1.getId(), p0.getId() },// + false// pathIsComplete + )); /* * c1 is applied when x is bound. x is bound by p0. p0 is the * last predicate in this join path, so c1 is attached to p0. */ assertEquals(new IConstraint[] { c1 }, fixture - .getJoinGraphConstraints(new int[] { p0.getId()})); + .getJoinGraphConstraints(// + new int[] { p0.getId()},// + false//pathIsComplete + )); - /* - * c2 is applied when y is bound. y is bound by p1. p1 is the - * last predicate in this join path, p1 is the last predicate in - * this join path so c2 is attached to p1. - */ - assertEquals(new IConstraint[] { c2 }, fixture - .getJoinGraphConstraints(new int[] { p0.getId(), - p1.getId() })); + /* + * c2 is applied when y is bound. y is bound by p1. p1 is the + * last predicate in this join path, p1 is the last predicate in + * this join path so c2 is attached to p1. + */ + assertEquals(new IConstraint[] { c2 }, fixture + .getJoinGraphConstraints(// + new int[] { p0.getId(), p1.getId() },// + false// pathIsComplete + )); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-02-25 21:18:01 UTC (rev 4250) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-02-25 21:20:03 UTC (rev 4251) @@ -586,7 +586,9 @@ // figure out which constraints are attached to which predicates. assignedConstraints = PartitionedJoinGroup.getJoinGraphConstraints( preds, constraints, - knownBound.toArray(new IVariable<?>[knownBound.size()])); + knownBound.toArray(new IVariable<?>[knownBound.size()]), + true// pathIsComplete + ); } /* This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-02-27 21:14:11
|
Revision: 4256 http://bigdata.svn.sourceforge.net/bigdata/?rev=4256&view=rev Author: thompsonbry Date: 2011-02-27 21:14:03 +0000 (Sun, 27 Feb 2011) Log Message: ----------- Progress on the runtime optimizer. - Tracking more statistics in the JGraph trace. - Tracking the expected #of tuples read as well as the expected cardinality. This let's us look at a proxy for IO costs, but real IO estimates are tricker. - Made the limit dynamic on a per-patch basis and responsive when there is cardinality estimate underflow. - Never prune a path if the cardinality estimate has underflowed. Instead, increase the sample limit more quickly. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EdgeSample.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedCardinalityComparator.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Vertex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/VertexSample.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EdgeSample.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EdgeSample.java 2011-02-27 21:11:23 UTC (rev 4255) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EdgeSample.java 2011-02-27 21:14:03 UTC (rev 4256) @@ -45,6 +45,11 @@ public final int inputCount; /** + * The #of tuples read from the access path when processing the cutoff join. + */ + public final long tuplesRead; + + /** * The #of binding sets generated before the join was cutoff. * <p> * Note: If the outputCount is zero then this is a good indicator that there @@ -53,10 +58,11 @@ */ public final long outputCount; - /** - * The #of tuples read from the access path when processing the cutoff join. - */ - public final long tuplesRead; + /** + * The adjusted cardinality estimate for the cutoff join (this is + * {@link #outputCount} as adjusted for a variety of edge conditions). + */ + public final long adjCard; /** * The ratio of the #of input samples consumed to the #of output samples @@ -64,37 +70,71 @@ */ public final double f; - /** - * Create an object which encapsulates a sample of an edge. - * - * @param sourceSample - * The input sample. - * @param limit - * The limit used to sample the edge (this is the limit on the - * #of solutions generated by the cutoff join used when this - * sample was taken). - * @param inputCount - * The #of binding sets out of the source sample vertex sample - * which were consumed. - * @param outputCount - * The #of binding sets generated before the join was cutoff. - * @param tuplesRead - * The #of tuples read from the access path when processing the - * cutoff join. - */ + /** + * The sum of the fast range count for each access path tested. + * <p> + * Note: We use pipeline joins to sample cutoff joins so there will be one + * access path read for each solution in. However, a hash join could be used + * when the operator is fully executed. The hash join will have one access + * path on which we read for all input solutions and the range count of the + * access path will be larger since the access path will be less + * constrained. + */ + public final long sumRangeCount; + + /** + * Estimated tuples read if the operator were fully executed. This is in + * contrast to {@link SampleBase#estCard}, which is the estimated output + * cardinality if the operator were fully executed. + * + * TODO The actual IOs depend on the join type (hash join versus pipeline + * join) and whether or not the file has index order (segment versus + * journal). A hash join will read once on the AP. A pipeline join will read + * once per input solution. A key-range read on a segment uses multi-block + * IO while a key-range read on a journal uses random IO. Also, remote + * access path reads are more expensive than sharded or hash partitioned + * access path reads in scale-out. + */ + public final long estRead; + + /** + * Create an object which encapsulates a sample of an edge. + * + * @param sourceSample + * The input sample. + * @param limit + * The limit used to sample the edge (this is the limit on the + * #of solutions generated by the cutoff join used when this + * sample was taken). + * @param inputCount + * The #of binding sets out of the source sample vertex sample + * which were consumed. + * @param tuplesRead + * The #of tuples read from the access path when processing the + * cutoff join. + * @param outputCount + * The #of binding sets generated before the join was cutoff. + * @param adjustedCard + * The adjusted cardinality estimate for the cutoff join (this is + * <i>outputCount</i> as adjusted for a variety of edge + * conditions). + */ EdgeSample(final SampleBase sourceSample,// final int inputCount, // + final long tuplesRead,// + final long sumRangeCount,// final long outputCount,// - final long tuplesRead,// + final long adjustedCard,// final double f, // // args to SampleBase - final long estimatedCardinality,// + final long estCard,// + final long estRead,// final int limit,// final EstimateEnum estimateEnum,// final IBindingSet[] sample// ) { - super(estimatedCardinality, limit, estimateEnum, sample); + super(estCard, limit, estimateEnum, sample); if (sourceSample == null) throw new IllegalArgumentException(); @@ -103,22 +143,31 @@ this.inputCount = inputCount; + this.tuplesRead = tuplesRead; + + this.sumRangeCount = sumRangeCount; + this.outputCount = outputCount; + + this.adjCard = adjustedCard; - this.tuplesRead = tuplesRead; + this.f = f; - this.f = f; + this.estRead = estRead; } @Override protected void toString(final StringBuilder sb) { - sb.append(", sourceEstimatedCardinality=" - + sourceSample.estimatedCardinality); - sb.append(", sourceEstimateEnum=" + sourceSample.estimateEnum); - sb.append(", inputCount=" + inputCount); - sb.append(", outputCount=" + outputCount); - sb.append(", f=" + f); - } + sb.append(", sourceEstCard=" + sourceSample.estCard); + sb.append(", sourceEstimateEnum=" + sourceSample.estimateEnum); + sb.append(", inputCount=" + inputCount); + sb.append(", tuplesRead=" + tuplesRead); + sb.append(", sumRangeCount=" + sumRangeCount); + sb.append(", outputCount=" + outputCount); + sb.append(", adjustedCard=" + adjCard); + sb.append(", f=" + f); + sb.append(", estRead=" + estRead); + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedCardinalityComparator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedCardinalityComparator.java 2011-02-27 21:11:23 UTC (rev 4255) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/EstimatedCardinalityComparator.java 2011-02-27 21:14:03 UTC (rev 4256) @@ -50,8 +50,8 @@ // o2 is not weighted. sort o2 to the end. return -1; } - final long id1 = o1.edgeSample.estimatedCardinality; - final long id2 = o2.edgeSample.estimatedCardinality; + final long id1 = o1.edgeSample.estCard; + final long id2 = o2.edgeSample.estCard; if (id1 < id2) return -1; if (id1 > id2) Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-27 21:11:23 UTC (rev 4255) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-27 21:14:03 UTC (rev 4256) @@ -83,9 +83,9 @@ * dominated paths. * * TODO Compare the cumulative expected cardinality of a join path with the - * expected cost of a join path. The latter allows us to also explore - * alternative join strategies, such as the parallel subquery versus scan and - * filter decision for named graph and default graph SPARQL queries. + * tuples read of a join path. The latter allows us to also explore alternative + * join strategies, such as the parallel subquery versus scan and filter + * decision for named graph and default graph SPARQL queries. * * TODO Coalescing duplicate access paths can dramatically reduce the work * performed by a pipelined nested index subquery. (A hash join eliminates all @@ -93,14 +93,6 @@ * pipeline nested index subquery join, then should the runtime query optimizer * prefer paths with duplicate access paths? * - * TODO How can we handle things like lexicon joins. A lexicon join is is only - * evaluated when the dynamic type of a variable binding indicates that the RDF - * Value must be materialized by a join against the ID2T index. Binding sets - * having inlined values can simply be routed around the join against the ID2T - * index. Routing around saves network IO in scale-out where otherwise we would - * route binding sets having identifiers which do not need to be materialized to - * the ID2T shards. - * * @todo Examine the overhead of the runtime optimizer. Look at ways to prune * its costs. For example, by pruning the search, by recognizing when the * query is simple enough to execute directly, by recognizing when we have @@ -217,8 +209,10 @@ */ public class JGraph { - private static final transient Logger log = Logger.getLogger(JGraph.class); + private static final String NA = "N/A"; + private static final transient Logger log = Logger.getLogger(JGraph.class); + /** * Vertices of the join graph. */ @@ -254,32 +248,37 @@ return sb.toString(); } - /** - * - * @param v - * The vertices of the join graph. These are - * {@link IPredicate}s associated with required joins. - * @param constraints - * The constraints of the join graph (optional). Since all - * joins in the join graph are required, constraints are - * dynamically attached to the first join in which all of - * their variables are bound. - * - * @throws IllegalArgumentException - * if the vertices is <code>null</code>. - * @throws IllegalArgumentException - * if the vertices is an empty array. - * @throws IllegalArgumentException - * if any element of the vertices is <code>null</code>. - * @throws IllegalArgumentException - * if any constraint uses a variable which is never bound by - * the given predicates. - * @throws IllegalArgumentException - * if <i>sampleType</i> is <code>null</code>. - * - * @todo unit test for a constraint using a variable which is never - * bound. - */ + /** + * + * @param v + * The vertices of the join graph. These are {@link IPredicate}s + * associated with required joins. + * @param constraints + * The constraints of the join graph (optional). Since all joins + * in the join graph are required, constraints are dynamically + * attached to the first join in which all of their variables are + * bound. + * + * @throws IllegalArgumentException + * if the vertices is <code>null</code>. + * @throws IllegalArgumentException + * if the vertices is an empty array. + * @throws IllegalArgumentException + * if any element of the vertices is <code>null</code>. + * @throws IllegalArgumentException + * if any constraint uses a variable which is never bound by the + * given predicates. + * @throws IllegalArgumentException + * if <i>sampleType</i> is <code>null</code>. + * + * @todo unit test for a constraint using a variable which is never bound. + * the constraint should be attached at the last vertex in the join + * path. this will cause the query to fail unless the variable was + * already bound, e.g., by a parent query or in the solutions pumped + * into the {@link JoinGraph} operator. + * + * @todo unit test when the join graph has a single vertex. + */ public JGraph(final IPredicate<?>[] v, final IConstraint[] constraints, final SampleType sampleType) { @@ -557,30 +556,35 @@ */ sampleAllVertices(queryEngine, limit); - if (log.isDebugEnabled()) { - final StringBuilder sb = new StringBuilder(); - sb.append("Vertices:\n"); - for (Vertex v : V) { - sb.append(v.toString()); - sb.append("\n"); - } - log.debug(sb.toString()); - } + if (log.isInfoEnabled()) { + final StringBuilder sb = new StringBuilder(); + sb.append("Sampled vertices:\n"); + for (Vertex v : V) { + if (v.sample != null) { + sb.append("id="+v.pred.getId()+" : "); + sb.append(v.sample.toString()); + sb.append("\n"); + } + } + log.info(sb.toString()); + } /* * Estimate the cardinality for each edge. */ final Path[] a = estimateInitialEdgeWeights(queryEngine, limit); - if (log.isDebugEnabled()) { - final StringBuilder sb = new StringBuilder(); - sb.append("All possible initial paths:\n"); - for (Path x : a) { - sb.append(x.toString()); - sb.append("\n"); - } - log.debug(sb.toString()); - } +// if (log.isDebugEnabled()) { +// final StringBuilder sb = new StringBuilder(); +// sb.append("All possible initial paths:\n"); +// for (Path x : a) { +// sb.append(x.toString()); +// sb.append("\n"); +// } +// log.debug(sb.toString()); +// } + if (log.isInfoEnabled()) + log.info("\n*** Initial Paths\n" + JGraph.showTable(a)); /* * Choose the initial set of paths. @@ -681,12 +685,11 @@ if (a.length == 0) throw new IllegalArgumentException(); - // increment the limit by itself in each round. - final int limit = (round + 1) * limitIn; +// // increment the limit by itself in each round. +// final int limit = (round + 1) * limitIn; if (log.isDebugEnabled()) - log.debug("round=" + round + ", limit=" + limit - + ", #paths(in)=" + a.length); + log.debug("round=" + round + ", #paths(in)=" + a.length); /* * Re-sample the vertices which are the initial vertex of any of the @@ -703,10 +706,12 @@ * a NOP if the vertex has been fully materialized. */ if (log.isDebugEnabled()) - log.debug("Re-sampling in-use vertices: limit=" + limit); + log.debug("Re-sampling in-use vertices."); for (Path x : a) { + final int limit = x.getNewLimit(limitIn); + x.vertices[0].sample(queryEngine, limit, sampleType); } @@ -725,11 +730,22 @@ * a given path prefix more than once per round. */ if (log.isDebugEnabled()) - log.debug("Re-sampling in-use path segments: limit=" + limit); + log.debug("Re-sampling in-use path segments."); for (Path x : a) { - // The cutoff join sample of the one step shorter path segment. + /* + * Get the new sample limit for the path. + * + * TODO We only need to increase the sample limit starting at the + * vertex where we have a cardinality underflow or variability in + * the cardinality estimate. This is increasing the limit in each + * round of expansion, which means that we are reading more data + * than we really need to read. + */ + final int limit = x.getNewLimit(limitIn); + + // The cutoff join sample of the one step shorter path segment. EdgeSample priorEdgeSample = null; for (int segmentLength = 2; segmentLength <= x.vertices.length; segmentLength++) { @@ -775,6 +791,7 @@ queryEngine, limit,// x.getPathSegment(2),// 1st edge. C,// constraints + V.length == 2,// pathIsComplete x.vertices[0].sample// source sample. ); @@ -812,6 +829,7 @@ limit,// x.getPathSegment(ids.length()),// C, // constraints + V.length == ids.length(), // pathIsComplete priorEdgeSample// ); @@ -844,14 +862,19 @@ */ if (log.isDebugEnabled()) - log.debug("Expanding paths: limit=" + limit + ", #paths(in)=" - + a.length); + log.debug("Expanding paths: #paths(in)=" + a.length); final List<Path> tmp = new LinkedList<Path>(); for (Path x : a) { - /* + /* + * We already increased the sample limit for the path in the loop + * above. + */ + final int limit = x.edgeSample.limit; + + /* * The set of vertices used to expand this path in this round. */ final Set<Vertex> used = new LinkedHashSet<Vertex>(); @@ -916,9 +939,10 @@ // add the new vertex to the set of used vertices. used.add(tVertex); - // Extend the path to the new vertex. - final Path p = x.addEdge(queryEngine, limit, - tVertex, /*dynamicEdge,*/ C); + // Extend the path to the new vertex. + final Path p = x + .addEdge(queryEngine, limit, tVertex, /* dynamicEdge, */ + C, x.getVertexCount() + 1 == V.length/* pathIsComplete */); // Add to the set of paths for this round. tmp.add(p); @@ -954,8 +978,9 @@ final Vertex tVertex = nothingShared.iterator().next(); // Extend the path to the new vertex. - final Path p = x.addEdge(queryEngine, limit, - tVertex,/*dynamicEdge*/ C); + final Path p = x + .addEdge(queryEngine, limit, tVertex,/* dynamicEdge */ + C, x.getVertexCount() + 1 == V.length/* pathIsComplete */); // Add to the set of paths for this round. tmp.add(p); @@ -981,12 +1006,12 @@ final Path[] paths_tp1_pruned = pruneJoinPaths(paths_tp1, edgeSamples); if (log.isDebugEnabled()) - log.debug("\n*** round=" + round + ", limit=" + limit + log.debug("\n*** round=" + round + " : generated paths\n" + JGraph.showTable(paths_tp1, paths_tp1_pruned)); if (log.isInfoEnabled()) - log.info("\n*** round=" + round + ", limit=" + limit + log.info("\n*** round=" + round + ": paths{in=" + a.length + ",considered=" + paths_tp1.length + ",out=" + paths_tp1_pruned.length + "}\n" + JGraph.showTable(paths_tp1_pruned)); @@ -1012,15 +1037,31 @@ return null; } - /** - * Obtain a sample and estimated cardinality (fast range count) for each - * vertex. - * - * @param queryEngine - * The query engine. - * @param limit - * The sample size. - */ + /** + * Obtain a sample and estimated cardinality (fast range count) for each + * vertex. + * + * @param queryEngine + * The query engine. + * @param limit + * The sample size. + * + * TODO Only sample vertices with an index. + * + * TODO Consider other cases where we can avoid sampling a vertex + * or an initial edge. + * <p> + * Be careful about rejecting high cardinality vertices here as + * they can lead to good solutions (see the "bar" data set + * example). + * <p> + * BSBM Q5 provides a counter example where (unless we translate + * it into a key-range constraint on an index) some vertices do + * not share a variable directly and hence will materialize the + * full cross product before filtering which is *really* + * expensive. + * + */ public void sampleAllVertices(final QueryEngine queryEngine, final int limit) { for (Vertex v : V) { @@ -1068,7 +1109,9 @@ * create a join path with a single edge (v,vp) using the sample * obtained from the cutoff join. */ - + + final boolean pathIsComplete = 2 == V.length; + for (int i = 0; i < V.length; i++) { final Vertex v1 = V[i]; @@ -1106,7 +1149,7 @@ */ final Vertex v, vp; - if (v1.sample.estimatedCardinality < v2.sample.estimatedCardinality) { + if (v1.sample.estCard < v2.sample.estCard) { v = v1; vp = v2; } else { @@ -1143,6 +1186,7 @@ limit, // sample limit preds, // ordered path segment. C, // constraints + pathIsComplete,// v.sample // sourceSample ); @@ -1181,6 +1225,7 @@ */ public Path[] pruneJoinPaths(final Path[] a, final Map<PathIds, EdgeSample> edgeSamples) { + final boolean neverPruneUnderflow = true; /* * Find the length of the longest path(s). All shorter paths are * dropped in each round. @@ -1198,7 +1243,12 @@ final Path Pi = a[i]; if (Pi.edgeSample == null) throw new RuntimeException("Not sampled: " + Pi); - if (Pi.vertices.length < maxPathLen) { + if (neverPruneUnderflow + && Pi.edgeSample.estimateEnum == EstimateEnum.Underflow) { + // Do not prune if path has cardinality underflow. + continue; + } + if (Pi.vertices.length < maxPathLen) { /* * Only the most recently generated set of paths survive to * the next round. @@ -1214,16 +1264,21 @@ final Path Pj = a[j]; if (Pj.edgeSample == null) throw new RuntimeException("Not sampled: " + Pj); - if (pruned.contains(Pj)) + if (neverPruneUnderflow + && Pj.edgeSample.estimateEnum == EstimateEnum.Underflow) { + // Do not prune if path has cardinality underflow. + continue; + } + if (pruned.contains(Pj)) continue; final boolean isPiSuperSet = Pi.isUnorderedVariant(Pj); if (!isPiSuperSet) { // Can not directly compare these join paths. continue; } - final long costPi = Pi.cumulativeEstimatedCardinality; - final long costPj = Pj.cumulativeEstimatedCardinality; - final boolean lte = costPi <= costPj; + final long costPi = Pi.sumEstCard; + final long costPj = Pj.sumEstCard; + final boolean lte = costPi <= costPj; List<Integer> prunedByThisPath = null; if (lte) { prunedByThisPath = new LinkedList<Integer>(); @@ -1363,17 +1418,24 @@ static public String showTable(final Path[] a,final Path[] pruned) { final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); - f.format("%-6s %10s%1s * %10s (%6s %6s %6s) = %10s%1s : %10s %10s", + f.format("%-4s %10s%1s * %10s (%8s %8s %8s %8s %8s %8s) = %10s %10s%1s : %10s %10s %10s", "path",// - "sourceCard",// + "srcCard",// "",// sourceSampleExact "f",// + // ( "in",// - "read",// + "sumRgCt",//sumRangeCount + "tplsRead",// "out",// + "limit",// + "adjCard",// + // ) = + "estRead",// "estCard",// "",// estimateIs(Exact|LowerBound|UpperBound) - "sumEstCard",// + "sumEstRead",// sumEstimatedTuplesRead + "sumEstCard",// sumEstimatedCardinality "joinPath\n" ); for (int i = 0; i < a.length; i++) { @@ -1391,21 +1453,27 @@ } final EdgeSample edgeSample = x.edgeSample; if (edgeSample == null) { - f.format("%6d %10s%1s * %10s (%6s %6s %6s) = %10s%1s : %10s",// - i, "N/A", "", "N/A", "N/A", "N/A", "N/A", "N/A", "", - "N/A"); + f.format("%4d %10s%1s * %10s (%8s %8s %8s %8s %8s %8s) = %10s %10s%1s : %10s %10s",// + i, NA, "", NA, NA, NA, NA, NA, NA, NA, NA, NA, "", NA, + NA); } else { - f.format("%6d %10d%1s * % 10.2f (%6d %6d %6d) = % 10d%1s : % 10d", // + f.format("%4d %10d%1s * % 10.2f (%8d %8d %8d %8d %8d %8d) = %10d % 10d%1s : % 10d % 10d", // i,// - edgeSample.sourceSample.estimatedCardinality,// + edgeSample.sourceSample.estCard,// edgeSample.sourceSample.estimateEnum.getCode(),// edgeSample.f,// edgeSample.inputCount,// + edgeSample.sumRangeCount,// edgeSample.tuplesRead,// edgeSample.outputCount,// - edgeSample.estimatedCardinality,// + edgeSample.limit,// + edgeSample.adjCard,// + // = + edgeSample.estRead,// + edgeSample.estCard,// edgeSample.estimateEnum.getCode(),// - x.cumulativeEstimatedCardinality// + x.sumEstRead,// + x.sumEstCard// ); } sb.append(" ["); @@ -1443,70 +1511,103 @@ /* * @todo show limit on samples? */ - f.format("%6s %10s%1s * %10s (%6s %6s %6s) = %10s%1s : %10s",// - "vertex", - "sourceCard",// + f.format("%4s %10s%1s * %10s (%8s %8s %8s %8s %8s %8s) = %10s %10s%1s : %10s %10s",// %10s %10s",// + "vert", + "srcCard",// "",// sourceSampleExact "f",// + // ( "in",// - "read",// + "sumRgCt",// sumRangeCount + "tplsRead",// tuplesRead "out",// + "limit",// + "adjCard",// + // ) = + "estRead",// "estCard",// "",// estimateIs(Exact|LowerBound|UpperBound) + "sumEstRead",// "sumEstCard"// ); - long sumEstCard = 0; + long sumEstRead = 0; // sum(estRead), where estRead := tuplesRead*f + long sumEstCard = 0; // sum(estCard) +// double sumEstCost = 0; // sum(f(estCard,estRead)) for (int i = 0; i < x.vertices.length; i++) { final int[] ids = BOpUtility .getPredIds(x.getPathSegment(i + 1)); final int predId = x.vertices[i].pred.getId(); final SampleBase sample; - if(i==0) { - sample = x.vertices[i].sample; - } else { - // edge sample from the caller's map. - sample = edgeSamples.get(new PathIds(ids)); - } - if (sample != null) { - sumEstCard += sample.estimatedCardinality; - if (sample instanceof EdgeSample) - sumEstCard += ((EdgeSample) sample).tuplesRead; - } + if (i == 0) { + sample = x.vertices[i].sample; + if (sample != null) { + sumEstRead = sample.estCard; // dbls as estRead for vtx + sumEstCard = sample.estCard; + } + } else { + // edge sample from the caller's map. + sample = edgeSamples.get(new PathIds(ids)); + if (sample != null) { + sumEstRead+= ((EdgeSample) sample).estRead; + sumEstCard += ((EdgeSample) sample).estCard; + } + } sb.append("\n"); if (sample == null) { - f.format("% 6d %10s%1s * %10s (%6s %6s %6s) = %10s%1s : %10s",// + f.format("% 4d %10s%1s * %10s (%8s %8s %8s %8s %8s %8s) = %10s %10s%1s : %10s %10s",// %10s %10s",// predId,// - "N/A", "", "N/A", "N/A", "N/A", "N/A", "N/A", "", "N/A"); + NA, "", NA, NA, NA, NA, NA, NA, NA, NA, NA, "", NA, NA);//,NA,NA); } else if(sample instanceof VertexSample) { - // Show the vertex sample for the initial vertex. - f.format("% 6d %10s%1s * %10s (%6s %6s %6s) = % 10d%1s : %10d",// + /* + * Show the vertex sample for the initial vertex. + * + * Note: we do not store all fields for a vertex sample + * which are stored for an edge sample because so many of + * the values are redundant for a vertex sample. Therefore, + * this sets up local variables which are equivalent to the + * various edge sample columns that we will display. + */ + final long sumRangeCount = sample.estCard; + final long estRead = sample.estCard; + final long tuplesRead = Math.min(sample.estCard, sample.limit); + final long outputCount = Math.min(sample.estCard, sample.limit); + final long adjCard = Math.min(sample.estCard, sample.limit); + f.format("% 4d %10s%1s * %10s (%8s %8s %8s %8s %8s %8s) = % 10d % 10d%1s : %10d %10d",// %10d %10s",// predId,// - "N/A",//sample.sourceSample.estimatedCardinality,// - " ",//sample.sourceSample.isExact() ? "E" : "",// + " ",//srcSample.estCard + " ",//srcSample.estimateEnum " ",//sample.f,// - "N/A",//sample.inputCount,// - "N/A",//sample.tuplesRead,// - "N/A",//sample.outputCount,// - sample.estimatedCardinality,// + " ",//sample.inputCount, + sumRangeCount,// + tuplesRead,// + outputCount,// + sample.limit,// limit + adjCard,// adjustedCard + estRead,// estRead + sample.estCard,// estCard sample.estimateEnum.getCode(),// + sumEstRead,// sumEstCard// -// e.cumulativeEstimatedCardinality// ); } else { // Show the sample for a cutoff join with the 2nd+ vertex. final EdgeSample edgeSample = (EdgeSample)sample; - f.format("% 6d %10d%1s * % 10.2f (%6d %6d %6d) = % 10d%1s : %10d",// + f.format("% 4d %10d%1s * % 10.2f (%8d %8d %8d %8d %8d %8d) = % 10d % 10d%1s : %10d %10d",// %10d %10",// predId,// - edgeSample.sourceSample.estimatedCardinality,// + edgeSample.sourceSample.estCard,// edgeSample.sourceSample.estimateEnum.getCode(),// edgeSample.f,// edgeSample.inputCount,// + edgeSample.sumRangeCount,// edgeSample.tuplesRead,// edgeSample.outputCount,// - edgeSample.estimatedCardinality,// + edgeSample.limit,// + edgeSample.adjCard,// + edgeSample.estRead,// + edgeSample.estCard,// edgeSample.estimateEnum.getCode(),// + sumEstRead,// sumEstCard// -// e.cumulativeEstimatedCardinality// ); } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java 2011-02-27 21:11:23 UTC (rev 4255) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java 2011-02-27 21:14:03 UTC (rev 4256) @@ -91,19 +91,43 @@ * is computed by the constructor and cached as it is used repeatedly. */ private final IPredicate<?>[] preds; + + /** + * The sample obtained by the step-wise cutoff evaluation of the ordered + * edges of the path. + * <p> + * Note: This sample is generated one edge at a time rather than by + * attempting the cutoff evaluation of the entire join path (the latter + * approach does allow us to limit the amount of work to be done to satisfy + * the cutoff). + * <p> + * Note: This is updated when we resample the path prior to expanding the + * path with another vertex. + */ + EdgeSample edgeSample;// TODO rename pathSample? + + /** + * Examine the path. If there is a cardinality underflow, then boost the + * sampling limit. Otherwise, increase the sample by the caller's value. + * + * @param limitIn + * The default increment for the sample limit. + * + * @return The limit to use when resampling this path. + */ + public int getNewLimit(final int limitIn) { + + if (edgeSample.estimateEnum == EstimateEnum.Underflow) { + + return edgeSample.limit * 2; + + } + + return edgeSample.limit + limitIn; + + } /** - * The sample obtained by the step-wise cutoff evaluation of the ordered - * edges of the path. - * <p> - * Note: This sample is generated one edge at a time rather than by - * attempting the cutoff evaluation of the entire join path (the latter - * approach does allow us to limit the amount of work to be done to - * satisfy the cutoff). - */ - EdgeSample edgeSample; - - /** * The cumulative estimated cardinality of the path. This is zero for an * empty path. For a path consisting of a single edge, this is the estimated * cardinality of that edge. When creating a new path by adding an edge to @@ -118,29 +142,36 @@ * vertex in path order. The EdgeSamples are maintained in a map * managed by JGraph during optimization. */ - final public long cumulativeEstimatedCardinality; + final public long sumEstCard; + /** + * The cumulative estimated #of tuples that would be read for this path if + * it were to be fully executed (sum(tuplesRead*f) for each step in the + * path). + */ + final public long sumEstRead; + + /** + * The expected cost of this join path if it were to be fully executed. This + * is a function of {@link #sumEstCard} and {@link #sumEstRead}. The + * former reflects the #of intermediate solutions generated. The latter + * reflects the #of tuples read from the disk. These two measures are + * tracked separately and then combined into the {@link #sumEstCost}. + */ + final public double sumEstCost; + /** - * Combine the cumulative estimated cost of the source path with the cost of - * the edge sample and return the cumulative estimated cost of the new path. + * Combine the cumulative expected cardinality and the cumulative expected + * tuples read to produce an overall measure of the expected cost of the + * join path if it were to be fully executed. * - * @param cumulativeEstimatedCardinality - * The cumulative estimated cost of the source path. - * @param edgeSample - * The cost estimate for the cutoff join required to extend the - * source path to the new path. - * @return The cumulative estimated cost of the new path. - * - * FIXME Figure out how to properly combine/weight the #of tuples - * read and the #of solutions produced! + * @return The cumulative estimated cost of the join path. */ - static private long add(final long cumulativeEstimatedCardinality, - final EdgeSample edgeSample) { + private static double getTotalCost(final Path p) { - final long total = cumulativeEstimatedCardinality + // - edgeSample.estimatedCardinality // -// + edgeSample.tuplesRead // - ; + final long total = p.sumEstCard + // + p.sumEstRead// + ; return total; @@ -162,7 +193,7 @@ // sb.append(e.getLabel()); // first = false; // } - sb.append("],cumEstCard=" + cumulativeEstimatedCardinality + sb.append("],cumEstCard=" + sumEstCard + ",sample=" + edgeSample + "}"); return sb.toString(); } @@ -205,48 +236,52 @@ if (edgeSample.getSample() == null) throw new IllegalArgumentException(); -// this.edges = Collections.singletonList(e); + this.vertices = new Vertex[]{v0,v1}; - this.vertices = new Vertex[]{v0,v1};//getVertices(edges); - this.preds = getPredicates(vertices); this.edgeSample = edgeSample; - this.cumulativeEstimatedCardinality = add(0L/*cumulativeEstimatedCardinality*/,edgeSample); -// edgeSample.estimatedCardinality +// -// edgeSample.tuplesRead// this is part of the cost too. -// ; + /* + * The estimated cardinality of the cutoff join of (v0,v1). + */ + this.sumEstCard = edgeSample.estCard; -// this.cumulativeEstimatedCardinality = // -// edgeSample.estimatedCardinality +// -// edgeSample.tuplesRead// this is part of the cost too. -// ; + /* + * The expected #of tuples read for the full join of (v0,v1). This is + * everything which could be visited for [v0] plus the #of tuples read + * from [v1] during the cutoff join times the (adjusted) join hit ratio. + */ + this.sumEstRead = v0.sample.estCard + edgeSample.estRead; + this.sumEstCost = getTotalCost(this); + } - /** - * Private constructor used when we extend a path. - * - * @param vertices - * The ordered array of vertices in the new path. The last entry - * in this array is the vertex which was used to extend the path. - * @param preds - * The ordered array of predicates in the new path (correlated - * with the vertices and passed in since it is already computed - * by the caller). - * @param cumulativeEstimatedCardinality - * The cumulative estimated cardinality of the new path. - * @param edgeSample - * The sample from the cutoff join of the last vertex added to - * this path. - */ + /** + * Private constructor used when we extend a path. + * + * @param vertices + * The ordered array of vertices in the new path. The last entry + * in this array is the vertex which was used to extend the path. + * @param preds + * The ordered array of predicates in the new path (correlated + * with the vertices and passed in since it is already computed + * by the caller). + * @param edgeSample + * The sample from the cutoff join of the last vertex added to + * this path. + * @param sumEstCard + * The cumulative estimated cardinality of the new path. + * @param sumEstRead + * The cumulative estimated tuples read of the new path. + */ private Path(// final Vertex[] vertices,// final IPredicate<?>[] preds,// -// final List<Edge> edges,// - final long cumulativeEstimatedCardinality,// - final EdgeSample edgeSample// + final EdgeSample edgeSample,// + final long sumEstCard,// + final long sumEstRead// ) { if (vertices == null) @@ -258,7 +293,7 @@ if (vertices.length != preds.length) throw new IllegalArgumentException(); - if (cumulativeEstimatedCardinality < 0) + if (sumEstCard < 0) throw new IllegalArgumentException(); if (edgeSample == null) @@ -267,15 +302,17 @@ if (edgeSample.getSample() == null) throw new IllegalArgumentException(); -// this.edges = Collections.unmodifiableList(edges); + this.vertices = vertices; - this.vertices = vertices; - - this.preds = preds; - - this.cumulativeEstimatedCardinality = cumulativeEstimatedCardinality; + this.preds = preds; - this.edgeSample = edgeSample; + this.edgeSample = edgeSample; + + this.sumEstCard = sumEstCard; + + this.sumEstRead = sumEstRead; + + this.sumEstCost = getTotalCost(this); } @@ -552,6 +589,9 @@ * The new vertex. * @param constraints * The join graph constraints (if any). + * @param pathIsComplete + * <code>true</code> iff all vertices in the join graph are + * incorporated into this path. * * @return The new path. The materialized sample for the new path is the * sample obtained by the cutoff join for the edge added to the @@ -560,7 +600,8 @@ * @throws Exception */ public Path addEdge(final QueryEngine queryEngine, final int limit, - final Vertex vnew, final IConstraint[] constraints) + final Vertex vnew, final IConstraint[] constraints, + final boolean pathIsComplete) throws Exception { if (vnew == null) @@ -626,59 +667,63 @@ limit, // preds2,// constraints,// + pathIsComplete,// this.edgeSample // the source sample. ); - { - final long cumulativeEstimatedCardinality = add( - this.cumulativeEstimatedCardinality, edgeSample2); + // Extend the path. + final Path tmp = new Path(// + vertices2,// + preds2,// + edgeSample2,// + this.sumEstCard + edgeSample2.estCard,// sumEstCard + this.sumEstRead + edgeSample2.estRead // sumEstRead + ); - // Extend the path. - final Path tmp = new Path(vertices2, preds2, - cumulativeEstimatedCardinality, edgeSample2); + return tmp; - return tmp; - - } - } - /** - * Cutoff join of the last vertex in the join path. - * <p> - * <strong>The caller is responsible for protecting against needless - * re-sampling.</strong> This includes cases where a sample already exists - * at the desired sample limit and cases where the sample is already exact. - * - * @param queryEngine - * The query engine. - * @param limit - * The limit for the cutoff join. - * @param path - * The path segment, which must include the target vertex as the - * last component of the path segment. - * @param constraints - * The constraints declared for the join graph (if any). The - * appropriate constraints will be applied based on the variables - * which are known to be bound as of the cutoff join for the last - * vertex in the path segment. - * @param sourceSample - * The input sample for the cutoff join. When this is a one-step - * estimation of the cardinality of the edge, then this sample is - * taken from the {@link VertexSample}. When the edge (vSource, - * vTarget) extends some {@link Path}, then this is taken from - * the {@link EdgeSample} for that {@link Path}. - * - * @return The result of sampling that edge. - * - * @throws Exception - */ + /** + * Cutoff join of the last vertex in the join path. + * <p> + * <strong>The caller is responsible for protecting against needless + * re-sampling.</strong> This includes cases where a sample already exists + * at the desired sample limit and cases where the sample is already exact. + * + * @param queryEngine + * The query engine. + * @param limit + * The limit for the cutoff join. + * @param path + * The path segment, which must include the target vertex as the + * last component of the path segment. + * @param constraints + * The constraints declared for the join graph (if any). The + * appropriate constraints will be applied based on the variables + * which are known to be bound as of the cutoff join for the last + * vertex in the path segment. + * @param pathIsComplete + * <code>true</code> iff all vertices in the join graph are + * incorporated into this path. + * @param sourceSample + * The input sample for the cutoff join. When this is a one-step + * estimation of the cardinality of the edge, then this sample is + * taken from the {@link VertexSample}. When the edge (vSource, + * vTarget) extends some {@link Path}, then this is taken from + * the {@link EdgeSample} for that {@link Path}. + * + * @return The result of sampling that edge. + * + * @throws Exception + */ static public EdgeSample cutoffJoin(// final QueryEngine queryEngine,// final int limit,// final IPredicate<?>[] path,// final IConstraint[] constraints,// + final boolean pathIsComplete,// final SampleBase sourceSample// ) throws Exception { @@ -702,8 +747,8 @@ // Figure out which constraints attach to each predicate. final IConstraint[][] constraintAttachmentArray = PartitionedJoinGroup - .getJoinGraphConstraints(path, constraints,null/*knownVariables*/, - false/*FIXME pathIsComplete*/); + .getJoinGraphConstraints(path, constraints, null/*knownBound*/, + pathIsComplete); // The constraint(s) (if any) for this join. final IConstraint[] c = constraintAttachmentArray[path.length - 1]; @@ -793,6 +838,7 @@ final List<IBindingSet> result = new LinkedList<IBindingSet>(); try { + int nresults = 0; try { IBindingSet bset = null; // Figure out the #of source samples consumed. @@ -801,10 +847,11 @@ while (itr.hasNext()) { bset = itr.next(); result.add(bset); + nresults++; // TODO break out if cutoff join over produces! } } finally { // verify no problems. - runningQuery.get(); + runningQuery.get(); // TODO CANCEL query once limit is satisfied THEN check the future for errors. } } finally { runningQuery.cancel(true/* mayInterruptIfRunning */); @@ -822,8 +869,11 @@ final int inputCount = (int) joinStats.inputSolutions.get(); // #of solutions out. - long outputCount = joinStats.outputSolutions.get(); + final long outputCount = joinStats.outputSolutions.get(); + // #of solutions out as adjusted for various edge conditions. + final long adjustedCard; + // cumulative range count of the sampled access paths. final long sumRangeCount = joinStats.accessPathRangeCount.get(); @@ -838,6 +888,7 @@ * number of output solutions. */ estimateEnum = EstimateEnum.Exact; + adjustedCard = outputCount; } else if (inputCount == 1 && outputCount == limit) { /* * If the inputCount is ONE (1) and the outputCount is the limit, @@ -856,11 +907,11 @@ * are really better to be dropped. */ // replace outputCount with the sum of the sampled range counts. - outputCount = sumRangeCount; + adjustedCard = sumRangeCount; estimateEnum = EstimateEnum.LowerBound; } else if ((sourceSample.estimateEnum != EstimateEnum.Exact) - && inputCount == Math.min(sourceSample.limit, - sourceSample.estimatedCardinality) && outputCount == 0) { + /*&& inputCount == Math.min(sourceSample.limit, + sourceSample.estimatedCardinality) */ && outputCount == 0) { /* * When the source sample was not exact, the inputCount is EQ to the * lesser of the source range count and the source sample limit, and @@ -874,10 +925,16 @@ * Note: An apparent join hit ratio of zero does NOT imply that the * join will be empty (unless the source vertex sample is actually * the fully materialized access path - this case is covered above). + * + * path sourceCard * f ( in read out limit adjCard) = estCard : sumEstCard joinPath + * 15 4800L * 0.00 ( 200 200 0 300 0) = 0 : 3633 [ 3 1 6 5 ] + */ estimateEnum = EstimateEnum.Underflow; + adjustedCard = outputCount; } else { estimateEnum = EstimateEnum.Normal; + adjustedCard = outputCount; } /* @@ -891,20 +948,43 @@ * read. */ final long tuplesRead = joinStats.accessPathUnitsIn.get(); - - final double f = outputCount == 0 ? 0 - : (outputCount / (double) inputCount); - final long estimatedCardinality = (long) (sourceSample.estimatedCardinality * f); + /* + * Compute the hit-join ratio based on the adjusted cardinality + * estimate. + */ + final double f = adjustedCard == 0 ? 0 + : (adjustedCard / (double) inputCount); +// final double f = outputCount == 0 ? 0 +// : (outputCount / (double) inputCount); + // estimated output cardinality of fully executed operator. + final long estCard = (long) (sourceSample.estCard * f); + + /* + * estimated tuples read for fully executed operator + * + * TODO The actual IOs depend on the join type (hash join versus + * pipeline join) and whether or not the file has index order (segment + * versus journal). A hash join will read once on the AP. A pipeline + * join will read once per input solution. A key-range read on a segment + * uses multi-block IO while a key-range read on a journal uses random + * IO. Also, remote access path reads are more expensive than sharded + * or hash partitioned access path reads in scale-out. + */ + final long estRead = (long) (sumRangeCount * f); + final EdgeSample edgeSample = new EdgeSample(// sourceSample,// inputCount,// + tuplesRead,// + sumRangeCount,// outputCount, // - tuplesRead,// + adjustedCard,// f, // // args to SampleBase - estimatedCardinality, // + estCard, // estimated output cardinality if fully executed. + estRead, // estimated tuples read if fully executed. limit, // estimateEnum,// result.toArray(new IBindingSet[result.size()])); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java 2011-02-27 21:11:23 UTC (rev 4255) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java 2011-02-27 21:14:03 UTC (rev 4256) @@ -56,11 +56,17 @@ private static final transient Logger log = Logger .getLogger(SampleBase.class); - /** - * The estimated cardinality of the underlying access path (for a vertex) or - * the join (for a cutoff join). - */ - public final long estimatedCardinality; + /** + * The total estimated cardinality of the underlying access path (for a + * vertex) or the join path segment (for a cutoff join). + * + * TODO When using a non-perfect index, the estimated cardinality is only + * part of the cost. The #of tuples scanned is also important. Even when + * scanning and filtering in key order this could trigger random IOs unless + * the file has index order (an IndexSegment file has index order but a + * BTree on a journal does not). + */ + public final long estCard; /** * The limit used to produce the {@link #getSample() sample}. @@ -156,7 +162,7 @@ if (sample == null) throw new IllegalArgumentException(); - this.estimatedCardinality = estimatedCardinality; + this.estCard = estimatedCardinality; this.limit = limit; @@ -180,7 +186,7 @@ public String toString() { final StringBuilder sb = new StringBuilder(); sb.append(getClass().getSimpleName()); - sb.append("{estimatedCardinality=" + estimatedCardinality); + sb.append("{estCard=" + estCard); sb.append(",limit=" + limit); sb.append(",estimateEnum=" + estimateEnum); { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Vertex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Vertex.java 2011-02-27 21:11:23 UTC (rev 4255) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Vertex.java 2011-02-27 21:14:03 UTC (rev 4256) @@ -153,7 +153,7 @@ final IAccessPath ap = context.getAccessPath(r, pred); final long rangeCount = oldSample == null ? ap - .rangeCount(false/* exact */) : oldSample.estimatedCardinality; + .rangeCount(false/* exact */) : oldSample.estCard; if (rangeCount <= limit) { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/VertexSample.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/VertexSample.java 2011-02-27 21:11:23 UTC (rev 4255) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/VertexSample.java 2011-02-27 21:14:03 UTC (rev 4256) @@ -37,7 +37,7 @@ * historical view or even a time scale of query which is significantly * faster than update). * - * @param estimatedCardinality + * @param estCard * The estimated cardinality. * @param limit * The cutoff limit used to make that cardinality estimate. @@ -49,10 +49,10 @@ * @param sample * The sample. */ - public VertexSample(final long estimatedCardinality, final int limit, + public VertexSample(final long estCard, final int limit, final EstimateEnum estimateEnum, final IBindingSet[] sample) { - super(estimatedCardinality, limit, estimateEnum, sample); + super(estCard, limit, estimateEnum, sample); switch (estimateEnum) { case Normal: Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java 2011-02-27 21:11:23 UTC (rev 4255) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java 2011-02-27 21:14:03 UTC (rev 4256) @@ -39,6 +39,7 @@ import com.bigdata.bop.BOpContextBase; import com.bigdata.bop.BOpIdFactory; +import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; @@ -89,8 +90,17 @@ /** The initial sampling limit. */ static private final int limit = 100; - - /** The #of edges considered for the initial paths. */ + + /** + * The #of edges considered for the initial paths. + * + * FIXME We need to consider all of the low cardinality vertices, e.g., BSBM + * Q5 has 3 such vertices. Also, we should not consider vertices when + * looking for the initial edges which are relatively unconstrained (e.g., + * 1-bound). This could be handled by sampling the top-N vertices in reverse + * rank order of their cardinality and any with a cardinality LT 10x the + * initial sample limit. + */ static private final int nedges = 2; static private final SampleType sampleType = SampleType.EVEN; @@ -240,9 +250,14 @@ final IPredicate<?>[] runtimePredOrder = runRuntimeQueryOptimizer( getQueryEngine(), limit, nedges, sampleType, preds, constraints); + long totalGivenTime = 0; long totalRuntimeTime = 0; long totalStaticTime = 0; + long givenSolutions = 0; + long runtimeSolutions = 0; + long staticSolutions = ... [truncated message content] |
From: <tho...@us...> - 2011-02-28 16:26:45
|
Revision: 4260 http://bigdata.svn.sourceforge.net/bigdata/?rev=4260&view=rev Author: thompsonbry Date: 2011-02-28 16:26:37 +0000 (Mon, 28 Feb 2011) Log Message: ----------- Some more work on the RTO. - added SELECTed variables to the test cases. - added [distinct] flag and run DISTINCT when specified to the test cases. - more fiddling with the estCard, estRead, and estCost. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup_canJoinUsingConstraints.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java 2011-02-28 16:24:53 UTC (rev 4259) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java 2011-02-28 16:26:37 UTC (rev 4260) @@ -23,6 +23,7 @@ import com.bigdata.bop.PipelineOp; import com.bigdata.bop.join.PipelineJoin; import com.bigdata.bop.joinGraph.rto.JoinGraph; +import com.bigdata.bop.solutions.DistinctBindingSetOp; import com.bigdata.bop.solutions.SliceOp; /** @@ -956,58 +957,87 @@ } - /** - * Generate a query plan from an ordered collection of predicates. - * - * @param p - * The join path. - * - * @return The query plan. - * - * FIXME Select only those variables required by downstream - * processing or explicitly specified by the caller (in the case - * when this is a subquery, the caller has to declare which - * variables are selected and will be returned out of the subquery). - * - * FIXME For scale-out, we need to either mark the join's evaluation - * context based on whether or not the access path is local or - * remote (and whether the index is key-range distributed or hash - * partitioned). - * - * FIXME Add a method to generate a runnable query plan from the - * collection of predicates and constraints on the - * {@link PartitionedJoinGroup} together with an ordering over the - * join graph. This is a bit different for the join graph and the - * optionals in the tail plan. The join graph itself should either - * be a {@link JoinGraph} operator which gets evaluated at run time - * or reordered by whichever optimizer is selected for the query - * (query hints). - * - * @todo The order of the {@link IPredicate}s in the tail plan is currently - * unchanged from their given order (optional joins without - * constraints can not reduce the selectivity of the query). However, - * it could be worthwhile to run optionals with constraints before - * those without constraints since the constraints can reduce the - * selectivity of the query. If we do this, then we need to reorder - * the optionals based on the partial order imposed what variables - * they MIGHT bind (which are not bound by the join graph). - * - * @todo multiple runFirst predicates can be evaluated in parallel unless - * they have shared variables. When there are no shared variables, - * construct a TEE pattern such that evaluation proceeds in parallel. - * When there are shared variables, the runFirst predicates must be - * ordered based on those shared variables (at which point, it is - * probably an error to flag them as runFirst). - */ - static public PipelineOp getQuery(final BOpIdFactory idFactory, - final IPredicate<?>[] preds, final IConstraint[] constraints) { + /** + * Generate a query plan from an ordered collection of predicates. + * + * @param distinct + * <code>true</code> iff only the distinct solutions are desired. + * @param selected + * The variable(s) to be projected out of the join graph. + * @param preds + * The join path which will be used to execute the join graph. + * @param constraints + * The constraints on the join graph. + * + * @return The query plan. + * + * FIXME Select only those variables required by downstream + * processing or explicitly specified by the caller (in the case + * when this is a subquery, the caller has to declare which + * variables are selected and will be returned out of the subquery). + * + * FIXME For scale-out, we need to either mark the join's evaluation + * context based on whether or not the access path is local or + * remote (and whether the index is key-range distributed or hash + * partitioned). + * + * FIXME Add a method to generate a runnable query plan from the + * collection of predicates and constraints on the + * {@link PartitionedJoinGroup} together with an ordering over the + * join graph. This is a bit different for the join graph and the + * optionals in the tail plan. The join graph itself should either + * be a {@link JoinGraph} operator which gets evaluated at run time + * or reordered by whichever optimizer is selected for the query + * (query hints). + * + * @todo The order of the {@link IPredicate}s in the tail plan is currently + * unchanged from their given order (optional joins without + * constraints can not reduce the selectivity of the query). However, + * it could be worthwhile to run optionals with constraints before + * those without constraints since the constraints can reduce the + * selectivity of the query. If we do this, then we need to reorder + * the optionals based on the partial order imposed what variables + * they MIGHT bind (which are not bound by the join graph). + * + * @todo multiple runFirst predicates can be evaluated in parallel unless + * they have shared variables. When there are no shared variables, + * construct a TEE pattern such that evaluation proceeds in parallel. + * When there are shared variables, the runFirst predicates must be + * ordered based on those shared variables (at which point, it is + * probably an error to flag them as runFirst). + */ + static public PipelineOp getQuery(final BOpIdFactory idFactory, + final boolean distinct, final IVariable<?>[] selected, + final IPredicate<?>[] preds, final IConstraint[] constraints) { + /* + * Reserve ids used by the join graph or its constraints. + */ + { + for (IPredicate<?> p : preds) { + idFactory.reserve(p.getId()); + } + if (constraints != null) { + for (IConstraint c : constraints) { + final Iterator<BOp> itr = BOpUtility + .preOrderIteratorWithAnnotations(c); + while (itr.hasNext()) { + final BOp y = itr.next(); + final Integer anId = (Integer) y + .getProperty(BOp.Annotations.BOP_ID); + if (anId != null) + idFactory.reserve(anId.intValue()); + } + } + } + } + // figure out which constraints are attached to which predicates. final IConstraint[][] assignedConstraints = PartitionedJoinGroup .getJoinGraphConstraints(preds, constraints, null/*knownBound*/, true/*pathIsComplete*/); - final PipelineJoin<?>[] joins = new PipelineJoin[preds.length]; +// final PipelineJoin<?>[] joins = new PipelineJoin[preds.length]; PipelineOp lastOp = null; @@ -1016,6 +1046,7 @@ // The next vertex in the selected join order. final IPredicate<?> p = preds[i]; + // Annotations for this join. final List<NV> anns = new LinkedList<NV>(); anns.add(new NV(PipelineJoin.Annotations.PREDICATE, p)); @@ -1027,23 +1058,35 @@ // // anns.add(new NV(PipelineJoin.Annotations.SELECT, vars.toArray(new IVariable[vars.size()]))); - if (assignedConstraints[i] != null - && assignedConstraints[i].length > 0) - anns - .add(new NV(PipelineJoin.Annotations.CONSTRAINTS, - assignedConstraints[i])); + if (assignedConstraints[i] != null + && assignedConstraints[i].length > 0) { + // attach constraints to this join. + anns.add(new NV(PipelineJoin.Annotations.CONSTRAINTS, + assignedConstraints[i])); + } - final PipelineJoin<?> joinOp = new PipelineJoin( - lastOp == null ? new BOp[0] : new BOp[] { lastOp }, anns - .toArray(new NV[anns.size()])); + final PipelineJoin<?> joinOp = new PipelineJoin(// + lastOp == null ? new BOp[0] : new BOp[] { lastOp }, // + anns.toArray(new NV[anns.size()])// + ); - joins[i] = joinOp; - lastOp = joinOp; } -// final PipelineOp queryOp = lastOp; + if (distinct) { + lastOp = new DistinctBindingSetOp(new BOp[] { lastOp }, NV + .asMap(new NV[] { + new NV(PipelineOp.Annotations.BOP_ID, idFactory + .nextId()), // + new NV(PipelineOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + new NV(PipelineOp.Annotations.SHARED_STATE, true),// + new NV(DistinctBindingSetOp.Annotations.VARIABLES, + selected),// + })// + ); + } /* * FIXME Why does wrapping with this slice appear to be @@ -1052,7 +1095,7 @@ * * [This should perhaps be moved into the caller.] */ - final PipelineOp queryOp = new SliceOp(new BOp[] { lastOp }, NV + lastOp = new SliceOp(new BOp[] { lastOp }, NV .asMap(new NV[] { new NV(JoinGraph.Annotations.BOP_ID, idFactory.nextId()), // new NV(JoinGraph.Annotations.EVALUATION_CONTEXT, @@ -1061,9 +1104,7 @@ }) // ); -// final PipelineOp queryOp = lastOp; - - return queryOp; + return lastOp; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-28 16:24:53 UTC (rev 4259) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2011-02-28 16:26:37 UTC (rev 4260) @@ -37,6 +37,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.log4j.Logger; @@ -415,6 +416,27 @@ while (paths.length > 0 && round < nvertices - 1) { + /* + * Resample the paths. + * + * Note: Since the vertex samples are random, it is possible for the + * #of paths with cardinality estimate underflow to jump up and down + * due to the sample which is making its way through each path in + * each round. + */ + int nunderflow; + + while ((nunderflow = resamplePaths(queryEngine, limit, round, + paths, edgeSamples)) > 0) { + + log.warn("resampling in round=" + round + " : " + nunderflow + + " paths have cardinality estimate underflow."); + + } + + /* + * Extend the paths by one vertex. + */ paths = expand(queryEngine, limit, round++, paths, edgeSamples); } @@ -426,8 +448,11 @@ } - // Should be one winner. - assert paths.length == 1; + // Should be one winner. + if (paths.length != 1) { + throw new AssertionError("Expected one path but have " + + paths.length + " paths."); + } if (log.isInfoEnabled()) { @@ -629,48 +654,30 @@ } - /** - * Do one breadth first expansion. In each breadth first expansion we extend - * each of the active join paths by one vertex for each remaining vertex - * which enjoys a constrained join with that join path. In the event that - * there are no remaining constrained joins, we will extend the join path - * using an unconstrained join if one exists. In all, there are three - * classes of joins to be considered: - * <ol> - * <li>The target predicate directly shares a variable with the source join - * path. Such joins are always constrained since the source predicate will - * have bound that variable.</li> - * <li>The target predicate indirectly shares a variable with the source - * join path via a constraint can run for the target predicate and which - * shares a variable with the source join path. These joins are indirectly - * constrained by the shared variable in the constraint. BSBM Q5 is an - * example of this case.</li> - * <li>Any predicates may always be join to an existing join path. However, - * joins which do not share variables either directly or indirectly will be - * full cross products. Therefore such joins are added to the join path only - * after all constrained joins have been consumed.</li> - * </ol> - * - * @param queryEngine - * The query engine. - * @param limitIn - * The limit (this is automatically multiplied by the round to - * increase the sample size in each round). - * @param round - * The round number in [1:n]. - * @param a - * The set of paths from the previous round. For the first round, - * this is formed from the initial set of edges to consider. - * @param edgeSamples - * A map used to associate join path segments (expressed as an - * ordered array of bopIds) with {@link EdgeSample}s to avoid - * redundant effort. - * - * @return The set of paths which survived pruning in this round. - * - * @throws Exception - */ - public Path[] expand(final QueryEngine queryEngine, int limitIn, + /** + * Resample the initial vertices for the specified join paths and then + * resample the cutoff join for each given join path in path order. + * + * @param queryEngine + * The query engine. + * @param limitIn + * The original limit. + * @param round + * The round number in [1:n]. + * @param a + * The set of paths from the previous round. For the first round, + * this is formed from the initial set of edges to consider. + * @param edgeSamples + * A map used to associate join path segments (expressed as an + * ordered array of bopIds) with {@link EdgeSample}s to avoid + * redundant effort. + * + * @return The number of join paths which are experiencing cardinality + * estimate underflow. + * + * @throws Exception + */ + public int resamplePaths(final QueryEngine queryEngine, int limitIn, final int round, final Path[] a, final Map<PathIds, EdgeSample> edgeSamples) throws Exception { @@ -684,38 +691,60 @@ throw new IllegalArgumentException(); if (a.length == 0) throw new IllegalArgumentException(); - -// // increment the limit by itself in each round. -// final int limit = (round + 1) * limitIn; + + /* + * Re-sample the vertices which are the initial vertex of any of the + * existing paths. + * + * Note: We do not need to resample vertices unless they are the first + * vertex in some path. E.g., the initial vertices from which we start. + * The inputs to an EdgeSample are always either the sample of an + * initial vertex or the sample of a prior cutoff join in the join + * path's own history. + * + * Note: A request to re-sample a vertex is a NOP unless the limit has + * been increased since the last time the vertex was sampled. It is also + * a NOP if the vertex has been fully materialized. + * + * Note: Before resampling the vertices, decide what the maximum limit + * will be for each vertex by examining the paths using that vertex, + * their current sample limit (TODO there should be a distinct limit for + * the vertex and for each cutoff join), and whether or not each path + * experiences a cardinality estimate underflow. + */ + { - if (log.isDebugEnabled()) - log.debug("round=" + round + ", #paths(in)=" + a.length); + if (log.isDebugEnabled()) + log.debug("Re-sampling in-use vertices."); - /* - * Re-sample the vertices which are the initial vertex of any of the - * existing paths. - * - * Note: We do not need to resample vertices unless they are the first - * vertex in some path. E.g., the initial vertices from which we start. - * The inputs to an EdgeSample are always either the sample of an - * initial vertex or the sample of a prior cutoff join in the join - * path's own history. - * - * Note: A request to re-sample a vertex is a NOP unless the limit has - * been increased since the last time the vertex was sampled. It is also - * a NOP if the vertex has been fully materialized. - */ - if (log.isDebugEnabled()) - log.debug("Re-sampling in-use vertices."); + final Map<Vertex, AtomicInteger/* limit */> vertexLimit = new LinkedHashMap<Vertex, AtomicInteger>(); - for (Path x : a) { + for (Path x : a) { - final int limit = x.getNewLimit(limitIn); + final int limit = x.getNewLimit(limitIn); - x.vertices[0].sample(queryEngine, limit, sampleType); + final Vertex v = x.vertices[0]; - } + AtomicInteger theLimit = vertexLimit.get(v); + if (theLimit == null) { + vertexLimit.put(v, theLimit = new AtomicInteger()); + } + theLimit.set(limit); + } + + for (Path x : a) { + + final Vertex v = x.vertices[0]; + + final int limit = vertexLimit.get(v).intValue(); + + v.sample(queryEngine, limit, sampleType); + + } + + } + /* * Re-sample the cutoff join for each edge in each of the existing * paths using the newly re-sampled vertices. @@ -732,6 +761,7 @@ if (log.isDebugEnabled()) log.debug("Re-sampling in-use path segments."); + int nunderflow = 0; for (Path x : a) { /* @@ -759,8 +789,8 @@ if (edgeSample != null && edgeSample.limit < limit && !edgeSample.isExact()) { - if (log.isDebugEnabled()) - log.debug("Will resample at higher limit: " + ids); + if (log.isTraceEnabled()) + log.trace("Will resample at higher limit: " + ids); // Time to resample this edge. edgeSamples.remove(ids); edgeSample = null; @@ -833,8 +863,8 @@ priorEdgeSample// ); - if (log.isDebugEnabled()) - log.debug("Resampled: " + ids + " : " + edgeSample); + if (log.isTraceEnabled()) + log.trace("Resampled: " + ids + " : " + edgeSample); if (edgeSamples.put(ids, edgeSample) != null) throw new AssertionError(); @@ -853,9 +883,80 @@ // Save the result on the path. x.edgeSample = priorEdgeSample; + + if (x.edgeSample.estimateEnum == EstimateEnum.Underflow) { + if (log.isDebugEnabled()) + log.debug("Cardinality underflow: " + x); + nunderflow++; + } } // next Path [x]. + return nunderflow; + + } + + /** + * Do one breadth first expansion. In each breadth first expansion we extend + * each of the active join paths by one vertex for each remaining vertex + * which enjoys a constrained join with that join path. In the event that + * there are no remaining constrained joins, we will extend the join path + * using an unconstrained join if one exists. In all, there are three + * classes of joins to be considered: + * <ol> + * <li>The target predicate directly shares a variable with the source join + * path. Such joins are always constrained since the source predicate will + * have bound that variable.</li> + * <li>The target predicate indirectly shares a variable with the source + * join path via a constraint can run for the target predicate and which + * shares a variable with the source join path. These joins are indirectly + * constrained by the shared variable in the constraint. BSBM Q5 is an + * example of this case.</li> + * <li>Any predicates may always be join to an existing join path. However, + * joins which do not share variables either directly or indirectly will be + * full cross products. Therefore such joins are added to the join path only + * after all constrained joins have been consumed.</li> + * </ol> + * + * @param queryEngine + * The query engine. + * @param limitIn + * The original limit. + * @param round + * The round number in [1:n]. + * @param a + * The set of paths from the previous round. For the first round, + * this is formed from the initial set of edges to consider. + * @param edgeSamples + * A map used to associate join path segments (expressed as an + * ordered array of bopIds) with {@link EdgeSample}s to avoid + * redundant effort. + * + * @return The set of paths which survived pruning in this round. + * + * @throws Exception + */ + public Path[] expand(final QueryEngine queryEngine, int limitIn, + final int round, final Path[] a, + final Map<PathIds, EdgeSample> edgeSamples) throws Exception { + + if (queryEngine == null) + throw new IllegalArgumentException(); + if (limitIn <= 0) + throw new IllegalArgumentException(); + if (round <= 0) + throw new IllegalArgumentException(); + if (a == null) + throw new IllegalArgumentException(); + if (a.length == 0) + throw new IllegalArgumentException(); + +// // increment the limit by itself in each round. +// final int limit = (round + 1) * limitIn; + + if (log.isDebugEnabled()) + log.debug("round=" + round + ", #paths(in)=" + a.length); + /* * Expand each path one step from each vertex which branches to an * unused vertex. @@ -1005,12 +1106,13 @@ final Path[] paths_tp1_pruned = pruneJoinPaths(paths_tp1, edgeSamples); - if (log.isDebugEnabled()) - log.debug("\n*** round=" + round - + " : generated paths\n" - + JGraph.showTable(paths_tp1, paths_tp1_pruned)); + if (log.isDebugEnabled()) // shows which paths were pruned. + log.info("\n*** round=" + round + ": paths{in=" + a.length + + ",considered=" + paths_tp1.length + ",out=" + + paths_tp1_pruned.length + "}\n" + + JGraph.showTable(paths_tp1, paths_tp1_pruned)); - if (log.isInfoEnabled()) + if (log.isInfoEnabled()) // only shows the surviving paths. log.info("\n*** round=" + round + ": paths{in=" + a.length + ",considered=" + paths_tp1.length + ",out=" + paths_tp1_pruned.length @@ -1243,11 +1345,6 @@ final Path Pi = a[i]; if (Pi.edgeSample == null) throw new RuntimeException("Not sampled: " + Pi); - if (neverPruneUnderflow - && Pi.edgeSample.estimateEnum == EstimateEnum.Underflow) { - // Do not prune if path has cardinality underflow. - continue; - } if (Pi.vertices.length < maxPathLen) { /* * Only the most recently generated set of paths survive to @@ -1256,28 +1353,32 @@ pruned.add(Pi); continue; } - if (pruned.contains(Pi)) + if (pruned.contains(Pi)) { + // already pruned. continue; + } + if (neverPruneUnderflow + && Pi.edgeSample.estimateEnum == EstimateEnum.Underflow) { + // Do not use path to prune if path has cardinality underflow. + continue; + } for (int j = 0; j < a.length; j++) { if (i == j) continue; final Path Pj = a[j]; if (Pj.edgeSample == null) throw new RuntimeException("Not sampled: " + Pj); - if (neverPruneUnderflow - && Pj.edgeSample.estimateEnum == EstimateEnum.Underflow) { - // Do not prune if path has cardinality underflow. - continue; + if (pruned.contains(Pj)) { + // already pruned. + continue; } - if (pruned.contains(Pj)) - continue; final boolean isPiSuperSet = Pi.isUnorderedVariant(Pj); if (!isPiSuperSet) { // Can not directly compare these join paths. continue; } - final long costPi = Pi.sumEstCard; - final long costPj = Pj.sumEstCard; + final long costPi = Pi.sumEstCost; + final long costPj = Pj.sumEstCost; final boolean lte = costPi <= costPj; List<Integer> prunedByThisPath = null; if (lte) { @@ -1418,7 +1519,7 @@ static public String showTable(final Path[] a,final Path[] pruned) { final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); - f.format("%-4s %10s%1s * %10s (%8s %8s %8s %8s %8s %8s) = %10s %10s%1s : %10s %10s %10s", + f.format("%-4s %10s%1s * %10s (%8s %8s %8s %8s %8s %8s) = %10s %10s%1s : %10s %10s %10s %10s", "path",// "srcCard",// "",// sourceSampleExact @@ -1434,8 +1535,9 @@ "estRead",// "estCard",// "",// estimateIs(Exact|LowerBound|UpperBound) - "sumEstRead",// sumEstimatedTuplesRead - "sumEstCard",// sumEstimatedCardinality + "sumEstRead",// + "sumEstCard",// + "sumEstCost",// "joinPath\n" ); for (int i = 0; i < a.length; i++) { @@ -1453,11 +1555,11 @@ } final EdgeSample edgeSample = x.edgeSample; if (edgeSample == null) { - f.format("%4d %10s%1s * %10s (%8s %8s %8s %8s %8s %8s) = %10s %10s%1s : %10s %10s",// - i, NA, "", NA, NA, NA, NA, NA, NA, NA, NA, NA, "", NA, + f.format("%4d %10s%1s * %10s (%8s %8s %8s %8s %8s %8s) = %10s %10s%1s : %10s %10s %10s",// + i, NA, "", NA, NA, NA, NA, NA, NA, NA, NA, NA, "", NA, NA, NA); } else { - f.format("%4d %10d%1s * % 10.2f (%8d %8d %8d %8d %8d %8d) = %10d % 10d%1s : % 10d % 10d", // + f.format("%4d %10d%1s * % 10.2f (%8d %8d %8d %8d %8d %8d) = %10d % 10d%1s : % 10d % 10d % 10d", // i,// edgeSample.sourceSample.estCard,// edgeSample.sourceSample.estimateEnum.getCode(),// @@ -1473,7 +1575,8 @@ edgeSample.estCard,// edgeSample.estimateEnum.getCode(),// x.sumEstRead,// - x.sumEstCard// + x.sumEstCard,// + x.sumEstCost ); } sb.append(" ["); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java 2011-02-28 16:24:53 UTC (rev 4259) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java 2011-02-28 16:26:37 UTC (rev 4260) @@ -38,6 +38,7 @@ import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.ap.SampleIndex; @@ -79,6 +80,11 @@ */ public interface Annotations extends PipelineOp.Annotations { + /** + * The variables which are projected out of the join graph. + */ + String SELECTED = JoinGraph.class.getName() + ".selected"; + /** * The vertices of the join graph, expressed an an {@link IPredicate}[] * (required). @@ -120,6 +126,15 @@ } /** + * @see Annotations#SELECTED + */ + public IVariable<?>[] getSelected() { + + return (IVariable[]) getRequiredProperty(Annotations.SELECTED); + + } + + /** * @see Annotations#VERTICES */ public IPredicate<?>[] getVertices() { @@ -176,6 +191,15 @@ super(args, anns); // required property. + final IVariable<?>[] selected = (IVariable[]) getProperty(Annotations.SELECTED); + + if (selected == null) + throw new IllegalArgumentException(Annotations.SELECTED); + + if (selected.length == 0) + throw new IllegalArgumentException(Annotations.SELECTED); + + // required property. final IPredicate<?>[] vertices = (IPredicate[]) getProperty(Annotations.VERTICES); if (vertices == null) @@ -253,6 +277,12 @@ } + /** + * {@inheritDoc} + * + * + * TODO where to handle DISTINCT, ORDER BY, GROUP BY for join graph? + */ public Void call() throws Exception { // Create the join graph. @@ -266,9 +296,10 @@ // Factory avoids reuse of bopIds assigned to the predicates. final BOpIdFactory idFactory = new BOpIdFactory(); - // Generate the query from the join path. - final PipelineOp queryOp = PartitionedJoinGroup.getQuery(idFactory, - p.getPredicates(), getConstraints()); + // Generate the query from the join path. + final PipelineOp queryOp = PartitionedJoinGroup.getQuery(idFactory, + false/* distinct */, getSelected(), p.getPredicates(), + getConstraints()); // Run the query, blocking until it is done. JoinGraph.runSubquery(context, queryOp); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java 2011-02-28 16:24:53 UTC (rev 4259) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java 2011-02-28 16:26:37 UTC (rev 4260) @@ -158,21 +158,26 @@ * reflects the #of tuples read from the disk. These two measures are * tracked separately and then combined into the {@link #sumEstCost}. */ - final public double sumEstCost; - - /** - * Combine the cumulative expected cardinality and the cumulative expected - * tuples read to produce an overall measure of the expected cost of the - * join path if it were to be fully executed. - * - * @return The cumulative estimated cost of the join path. - */ - private static double getTotalCost(final Path p) { + final public long sumEstCost; - final long total = p.sumEstCard + // - p.sumEstRead// - ; + /** + * Combine the cumulative expected cardinality and the cumulative expected + * tuples read to produce an overall measure of the expected cost of the + * join path if it were to be fully executed. + * + * @return The cumulative estimated cost of the join path. + * + * TODO Compute this incrementally as estCost using estRead and + * estCard and then take the running sum as sumEstCost and update + * the JGraph trace appropriately. + */ + private static long getCost(final long sumEstRead, final long sumEstCard) { + final long total; +// total = sumEstCard + sumEstRead; // intermediate results + IO. +// total = sumEstRead; // just IO + total = sumEstCard; // just intermediate results. + return total; } @@ -193,8 +198,12 @@ // sb.append(e.getLabel()); // first = false; // } - sb.append("],cumEstCard=" + sumEstCard - + ",sample=" + edgeSample + "}"); + sb.append("]"); + sb.append(",sumEstRead=" + sumEstRead); + sb.append(",sumEstCard=" + sumEstCard); + sb.append(",sumEstCost=" + sumEstCost); + sb.append(",sample=" + edgeSample); + sb.append("}"); return sb.toString(); } @@ -243,18 +252,18 @@ this.edgeSample = edgeSample; /* - * The estimated cardinality of the cutoff join of (v0,v1). - */ - this.sumEstCard = edgeSample.estCard; - - /* * The expected #of tuples read for the full join of (v0,v1). This is * everything which could be visited for [v0] plus the #of tuples read * from [v1] during the cutoff join times the (adjusted) join hit ratio. */ this.sumEstRead = v0.sample.estCard + edgeSample.estRead; - this.sumEstCost = getTotalCost(this); + /* + * The estimated cardinality of the cutoff join of (v0,v1). + */ + this.sumEstCard = edgeSample.estCard; + + this.sumEstCost = getCost(this.sumEstRead, this.sumEstCard); } @@ -312,7 +321,7 @@ this.sumEstRead = sumEstRead; - this.sumEstCost = getTotalCost(this); + this.sumEstCost = getCost(this.sumEstRead, this.sumEstCard); } @@ -618,29 +627,27 @@ // The new vertex. final Vertex targetVertex = vnew; - /* - * Chain sample the edge. - * - * Note: ROX uses the intermediate result I(p) for the existing path as - * the input when sampling the edge. The corresponding concept for us is - * the sample for this Path, which will have all variable bindings - * produced so far. In order to estimate the cardinality of the new join - * path we have to do a one step cutoff evaluation of the new Edge, - * given the sample available on the current Path. - * - * FIXME It is possible for the resulting edge sample to be empty (no - * solutions). Unless the sample also happens to be exact, this is an - * indication that the estimated cardinality has underflowed. We track - * the estimated cumulative cardinality, so this does not make the join - * path an immediate winner, but it does mean that we can not probe - * further on that join path as we lack any intermediate solutions to - * feed into the downstream joins. To resolve that, we have to increase - * the sample limit (unless the path is the winner, in which case we can - * fully execute the join path segment and materialize the results and - * use those to probe further, but this will require the use of the - * memory manager to keep the materialized intermediate results off of - * the Java heap). - */ + /* + * Chain sample the edge. + * + * Note: ROX uses the intermediate result I(p) for the existing path as + * the input when sampling the edge. The corresponding concept for us is + * the sample for this Path, which will have all variable bindings + * produced so far. In order to estimate the cardinality of the new join + * path we have to do a one step cutoff evaluation of the new Edge, + * given the sample available on the current Path. + * + * Note: It is possible for the resulting edge sample to be empty (no + * solutions). Unless the sample also happens to be exact, this is an + * indication that the estimated cardinality has underflowed. We track + * the estimated cumulative cardinality, so this does not make the join + * path an immediate winner, but it does mean that we can not probe + * further on that join path as we lack any intermediate solutions to + * feed into the downstream joins. To resolve that, we have to increase + * the sample limit (unless the path is the winner, in which case we can + * fully execute the join path segment and materialize the results and + * use those to probe further). + */ // Ordered array of all predicates including the target vertex. final IPredicate<?>[] preds2; @@ -846,15 +853,22 @@ runningQuery.iterator()); while (itr.hasNext()) { bset = itr.next(); - result.add(bset); - nresults++; // TODO break out if cutoff join over produces! + result.add(bset); + if (nresults++ >= limit) { + // Break out if cutoff join over produces! + break; + } } } finally { - // verify no problems. - runningQuery.get(); // TODO CANCEL query once limit is satisfied THEN check the future for errors. + // ensure terminated regardless. + runningQuery.cancel(true/* mayInterruptIfRunning */); } - } finally { - runningQuery.cancel(true/* mayInterruptIfRunning */); + } finally { + // verify no problems. + if (runningQuery.getCause() != null) { + // wrap throwable from abnormal termination. + throw new RuntimeException(runningQuery.getCause()); + } } // The join hit ratio can be computed directly from these stats. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup_canJoinUsingConstraints.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup_canJoinUsingConstraints.java 2011-02-28 16:24:53 UTC (rev 4259) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup_canJoinUsingConstraints.java 2011-02-28 16:26:37 UTC (rev 4260) @@ -809,6 +809,49 @@ } /** + * <code>[5 6 0 2 1 4 3]</code>. + * + * FIXME The above join path produces a false ZERO result for the query and + * all of the join path segments below produce a false exact ZERO (0E) + * cardinality estimate. Figure out why. The final path chosen could have + * been any of the one step extensions of a path with a false 0E cardinality + * estimate. + * + * <pre> + * INFO : 3529 main com.bigdata.bop.joinGraph.rto.JGraph.expand(JGraph.java:1116): + * ** round=4: paths{in=14,considered=26,out=6} + * path srcCard * f ( in sumRgCt tplsRead out limit adjCard) = estRead estCard : sumEstRead sumEstCard sumEstCost joinPath + * 0 0E * 0.00 ( 0 0 0 0 200 0) = 0 0E : 1 0 0 [ 5 6 0 2 1 4 ] + * 1 0E * 0.00 ( 0 0 0 0 200 0) = 0 0E : 1 0 0 [ 5 6 0 2 4 3 ] + * 2 0E * 0.00 ( 0 0 0 0 200 0) = 0 0E : 1 0 0 [ 5 6 0 4 1 3 ] + * 3 0E * 0.00 ( 0 0 0 0 200 0) = 0 0E : 1 0 0 [ 5 6 2 1 4 3 ] + * 4 208 * 1.00 ( 26 26 26 26 400 26) = 26 208 : 16576 1447 1447 [ 5 3 1 2 4 0 ] + * 5 0E * 0.00 ( 0 0 0 0 200 0) = 0 0E : 2 1 1 [ 5 3 6 0 1 2 ] + * </pre> + */ + public void test_attachConstraints_BSBM_Q5_path04() { + + final IPredicate<?>[] path = { p5, p6, p0, p2, p1, p4, p3 }; + + final IConstraint[][] actual = PartitionedJoinGroup + .getJoinGraphConstraints(path, constraints, + null/* knownBoundVars */, true/* pathIsComplete */); + + final Set<IConstraint>[] expected = new Set[] { // + NA, // p5 + asSet(new IConstraint[] { c0, c2 }), // p6 + NA, // p0 + NA, // p2 + NA, // p1 + NA, // p4 + C1, // p3 + }; + + assertSameConstraints(expected, actual); + + } + + /** * Verifies that the right set of constraints is attached at each of the * vertices of a join path. Comparison of {@link IConstraint} instances is * by reference. Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java 2011-02-28 16:24:53 UTC (rev 4259) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java 2011-02-28 16:26:37 UTC (rev 4260) @@ -43,6 +43,7 @@ import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariable; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.ap.SampleIndex.SampleType; import com.bigdata.bop.engine.BOpStats; @@ -82,12 +83,8 @@ private Journal jnl; -// protected AbstractTripleStore database; + protected QueryEngine queryEngine; -// private String namespace; - - private QueryEngine queryEngine; - /** The initial sampling limit. */ static private final int limit = 100; @@ -236,12 +233,14 @@ * JVM run using the known solutions produced by the runtime versus * static query optimizers. */ - protected IPredicate<?>[] doTest(final IPredicate<?>[] preds, - final IConstraint[] constraints) throws Exception { + protected IPredicate<?>[] doTest(final boolean distinct, + final IVariable<?>[] selected, final IPredicate<?>[] preds, + final IConstraint[] constraints) throws Exception { if (warmUp) - runQuery("Warmup", queryEngine, runStaticQueryOptimizer( - getQueryEngine(), preds), constraints); + runQuery("Warmup", queryEngine, distinct, selected, + runStaticQueryOptimizer(getQueryEngine(), preds), + constraints); /* * Run the runtime query optimizer once (its cost is not counted @@ -269,8 +268,8 @@ if (runGivenOrder) { final long begin = System.currentTimeMillis(); - final BOpStats stats = runQuery(GIVEN, queryEngine, preds, - constraints); + final BOpStats stats = runQuery(GIVEN, queryEngine, distinct, + selected, preds, constraints); final long nout = stats.unitsOut.get(); if (i == 0) givenSolutions = nout; @@ -285,9 +284,9 @@ if (runStaticQueryOptimizer) { final long begin = System.currentTimeMillis(); - final BOpStats stats = runQuery(STATIC, queryEngine, - runStaticQueryOptimizer(getQueryEngine(), preds), - constraints); + final BOpStats stats = runQuery(STATIC, queryEngine, distinct, + selected, runStaticQueryOptimizer(getQueryEngine(), + preds), constraints); final long nout = stats.unitsOut.get(); if (i == 0) staticSolutions = nout; @@ -311,8 +310,8 @@ // Evaluate the query using the selected join order. final long begin = System.currentTimeMillis(); - final BOpStats stats = runQuery(RUNTIME, queryEngine, - runtimePredOrder, constraints); + final BOpStats stats = runQuery(RUNTIME, queryEngine, distinct, + selected, runtimePredOrder, constraints); final long nout = stats.unitsOut.get(); if (i == 0) runtimeSolutions = nout; @@ -428,14 +427,15 @@ } - /** - * Run a query joining a set of {@link IPredicate}s in the given join order. - * - * @return The stats for the last operator in the pipeline. - */ - private static BOpStats runQuery(final String msg, - final QueryEngine queryEngine, final IPredicate<?>[] predOrder, - final IConstraint[] constraints) throws Exception { + /** + * Run a query joining a set of {@link IPredicate}s in the given join order. + * + * @return The stats for the last operator in the pipeline. + */ + protected static BOpStats runQuery(final String msg, + final QueryEngine queryEngine, final boolean distinct, + final IVariable<?>[] selected, final IPredicate<?>[] predOrder, + final IConstraint[] constraints) throws Exception { if (log.isInfoEnabled()) log.info("Running " + msg); @@ -455,18 +455,16 @@ } final PipelineOp queryOp = PartitionedJoinGroup.getQuery(idFactory, - predOrder, constraints); + distinct, selected, predOrder, constraints); System.out.println(BOpUtility.toString(queryOp)); - // submit query to runtime optimizer. + // run the query, counting results and chunks. + long nout = 0; + long nchunks = 0; final IRunningQuery q = queryEngine.eval(queryOp); - try { - // drain the query results. - long nout = 0; - long nchunks = 0; final IAsynchronousIterator<IBindingSet[]> itr = q.iterator(); try { while (itr.hasNext()) { @@ -477,25 +475,26 @@ } finally { itr.close(); } + } finally { + // ensure terminated. + q.cancel(true/* mayInterruptIfRunning */); + } - // check the Future for the query. - q.get(); + // Check the Future for the query. + if (q.getCause() != null) { + // Wrap Throwable from abnormal termination. + throw new RuntimeException(q.getCause()); + } - // show the results. - final BOpStats stats = q.getStats().get(queryOp.getId()); + // show the results. + final BOpStats stats = q.getStats().get(queryOp.getId()); - System.err.println(msg + " : ids=" + Arrays.toString(ids) - + ", elapsed=" + q.getElapsed() + ", nout=" + nout - + ", nchunks=" + nchunks + ", stats=" + stats); + System.err.println(msg + " : ids=" + Arrays.toString(ids) + + ", elapsed=" + q.getElapsed() + ", nout=" + nout + + ", nchunks=" + nchunks + ", stats=" + stats); - return stats; + return stats; - } finally { - - q.cancel(true/* mayInterruptIfRunning */); - - } - } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-28 16:24:53 UTC (rev 4259) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-28 16:26:37 UTC (rev 4260) @@ -18,6 +18,7 @@ import com.bigdata.bop.NV; import com.bigdata.bop.Var; import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.engine.QueryLog; import com.bigdata.bop.joinGraph.rto.JoinGraph; import com.bigdata.journal.ITx; @@ -273,6 +274,8 @@ } } + final boolean distinct = true; + final IVariable<?>[] selected; final IConstraint[] constraints; final IPredicate[] preds; final IPredicate p0, p1, p2, p3, p4, p5, p6; @@ -285,6 +288,8 @@ final IVariable origProperty1 = Var.var("origProperty1"); final IVariable origProperty2 = Var.var("origProperty2"); + selected = new IVariable[] { product, productLabel }; + // The name space for the SPO relation. final String[] spoRelation = new String[] { namespace + ".spo" }; @@ -473,9 +478,10 @@ test_bsbm_q5 : Total times: static=8871, runtime=8107, delta(static-runtime)=764 */ - final IPredicate<?>[] runtimeOrder = doTest(preds, null/* constraints */); - assertEquals("runtimeOrder", new int[] { 1, 2, 0, 4, 6, 3, 5 }, - BOpUtility.getPredIds(runtimeOrder)); + final IPredicate<?>[] runtimeOrder = doTest(distinct, selected, + preds, null/* constraints */); + assertEquals("runtimeOrder", new int[] { 1, 2, 0, 4, 6, 3, 5 }, + BOpUtility.getPredIds(runtimeOrder)); } // Run w/ constraints. @@ -508,11 +514,32 @@ test_bsbm_q5 : Total times: static=7312, runtime=3305, delta(static-runtime)=4007 */ - final IPredicate<?>[] runtimeOrder = doTest(preds, constraints); - assertEquals("runtimeOrder", new int[] { 1, 2, 4, 3, 6, 5, 0 }, - BOpUtility.getPredIds(runtimeOrder)); + final IPredicate<?>[] runtimeOrder = doTest(distinct, selected, + preds, constraints); + /* + * FIXME The RTO produces join paths on some runs which appear to + * have no solutions. I've written a unit test for constraint + * attachment for the case below, but the constraints appear to be + * attached correctly. I've also run the "bad" join path directly + * (see below) and it finds the correct #of solutions. This is + * pretty weird. + */ + // [5, 3, 1, 2, 4, 6, 0] - Ok and faster. + // [5, 3, 1, 2, 4, 6, 0] - Ok and faster (8828 vs 3621) + // [5, 6, 0, 2, 1, 4, 3] - no results!!! + // [5, 6, 0, 2, 1, 4, 3] - again, no results. + assertEquals("runtimeOrder", new int[] { 1, 2, 4, 3, 6, 5, 0 }, + BOpUtility.getPredIds(runtimeOrder)); } + if(false){ + // Run some fixed order. +// final IPredicate<?>[] path = { p5, p6, p0, p2, p1, p4, p3 }; + final IPredicate<?>[] path = { p5, p3, p1, p2, p4, p6, p0 }; + runQuery("FIXED ORDER", queryEngine, distinct, selected, path, + constraints); + } + } /** @@ -600,6 +627,8 @@ } } + final boolean distinct = false; + final IVariable<?>[] selected; final IConstraint[] constraints; final IPredicate[] preds; final IPredicate p0, p1, p2, p3, p4, p5, p6; @@ -611,6 +640,8 @@ final IVariable p3Var = Var.var("p3"); final IVariable testVar = Var.var("testVar"); + selected = new IVariable[]{product,label}; + // The name space for the SPO relation. final String[] spoRelation = new String[] { namespace + ".spo" }; @@ -730,8 +761,8 @@ * FIXME The optional join group is part of the tail plan and can not be * fed into the RTO right now. */ - final IPredicate<?>[] runtimeOrder = doTest(preds, new IConstraint[] { - c0, c1 }); + final IPredicate<?>[] runtimeOrder = doTest(distinct, selected, preds, + new IConstraint[] { c0, c1 }); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java 2011-02-28 16:24:53 UTC (rev 4259) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java 2011-02-28 16:26:37 UTC (rev 4260) @@ -265,15 +265,16 @@ } } - final IPredicate[] preds; - final IPredicate p0, p1, p2, p3, p4, p5; + final IVariable<?>[] selected; + final IPredicate<?>[] preds; + final IPredicate<?> p0, p1, p2, p3, p4, p5; { // a, value, d, b, f - final IVariable<?> a = Var.var("a"); - final IVariable<?> value = Var.var("value"); - final IVariable<?> d = Var.var("d"); - final IVariable<?> b = Var.var("b"); - final IVariable<?> f = Var.var("f"); + final IVariable<?> a = Var.var("a"); // ?item + final IVariable<?> value = Var.var("value"); // ?value + final IVariable<?> d = Var.var("d"); // ?type + final IVariable<?> b = Var.var("b"); // ?order + final IVariable<?> f = Var.var("f"); // ?employeeNum final IVariable<?> g0 = Var.var("g0"); final IVariable<?> g1 = Var.var("g1"); @@ -281,13 +282,14 @@ final IVariable<?> g3 = Var.var("g3"); final IVariable<?> g4 = Var.var("g4"); final IVariable<?> g5 = Var.var("g5"); - + selected = new IVariable[]{f,d}; + // The name space for the SPO relation. final String[] spoRelation = new String[] { namespace + ".spo" }; - // The name space for the Lexicon relation. - final String[] lexRelation = new String[] { namespace + ".lex" }; +// // The name space for the Lexicon relation. +// final String[] lexRelation = new String[] { namespace + ".lex" }; final long timestamp = database.getIndexManager().getLastCommitTime(); @@ -348,7 +350,10 @@ } - final IPredicate<?>[] runtimeOrder = doTest(preds, null/* constraints */); + // TODO Should use GROUP BY with SELECT expression rather than DISTINCT. + final boolean distinct = true; + final IPredicate<?>[] runtimeOrder = doTest(distinct, selected, preds, + null/* constraints */); { /* @@ -360,8 +365,10 @@ */ // after the refactor. - final int[] expected = new int[]{0, 1, 2, 3, 4, 5}; - + final int[] expected; + expected = new int[] { 0, 1, 2, 3, 4, 5 }; // estCard +// expected = new int[] { 0, 1, 2, 3, 4, 5 }; // estRead + // before the refactor. // final int[] expected = new int[] { 0, 1, 3, 2, 4, 5 }; Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-02-28 16:24:53 UTC (rev 4259) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-02-28 16:26:37 UTC (rev 4260) @@ -368,13 +368,17 @@ } } - final IPredicate[] preds; - final IPredicate p0, p1, p2, p3, p4, p5; + final boolean distinct = false; + final IVariable<?>[] selected; + final IPredicate<?>[] preds; + final IPredicate<?> p0, p1, p2, p3, p4, p5; { final IVariable<?> x = Var.var("x"); final IVariable<?> y = Var.var("y"); final IVariable<?> z = Var.var("z"); + selected = new IVariable[] { x, y, z }; + // The name space for the SPO relation. final String[] relation = new String[] { namespace + ".spo" }; @@ -437,29 +441,34 @@ preds = new IPredicate[] { p0, p1, p2, p3, p4, p5 }; } - final IPredicate<?>[] runtimeOrder = doTest(preds, null/* constraints */); + final IPredicate<?>[] runtimeOrder = doTest(distinct, selected, preds, + null/* constraints */); - if(!useExistingJournal) { - /* - * Verify that the runtime optimizer produced the expected join - * path. - * - * Note: There are no solutions for this query against U1. The - * optimizer is only providing the fastest path to prove that. We - * have to use a larger data set if we want to verify the optimizers - * join path for a query which produces solutions in the data. - */ - + final int[] expected; + if(useExistingJournal) { + // after refactor on U50 + expected = new int[] {2, 4, 5, 3, 0, 1}; // based on estCard +// expected = new int[] {1, 4, 5, 3, 0, 2}; // based on estRead + } else { // order produced after refactor - final int[] expected = new int[] { 4, 5, 0, 1, 2, 3 }; + expected = new int[] { 4, 5, 0, 1, 2, 3 }; // order produced before refactor. -// final int[] expected = new int[] { 4, 5, 0, 3, 1, 2 }; +// expected = new int[] { 4, 5, 0, 3, 1, 2 }; + } - assertEquals("runtimeOrder", expected, BOpUtility - .getPredIds(runtimeOrder)); - } + /* + * Verify that the runtime optimizer produced the expected join path. + * + * Note: There are no solutions for this query against U1. The optimizer + * is only providing the fastest path to prove that. We have to use a + * larger data set if we want to verify the optimizers join path for a + * query which produces solutions in the data. + */ + assertEquals("runtimeOrder", expected, BOpUtility + .getPredIds(runtimeOrder)); + } // LUBM_Q2 /** @@ -527,13 +536,17 @@ } } - final IPredicate[] preds; - final IPredicate p0, p1, p2, p3, p4; + final boolean distinct = false; + final IVariable<?>[] selected; + final IPredicate<?>[] preds; + final IPredicate<?> p0, p1, p2, p3, p4; { final IVariable<?> x = Var.var("x"); final IVariable<?> y = Var.var("y"); final IVariable<?> z = Var.var("z"); + selected = new IVariable[]{x,y,z}; + // The name space for the SPO relation. final String[] relation = new String[] { namespace + ".spo" }; @@ -588,17 +601,18 @@ preds = new IPredicate[] { p0, p1, p2, p3, p4 }; } - final IPredicate<?>[] runtimeOrder = doTest(preds, null/* constraints */); + final IPredicate<?>[] runtimeOrder = doTest(distinct, selected, preds, + null/* constraints */); - if (!useExistingJournal) { - /* - * Verify that the runtime optimizer produced the expected join - * path. - */ - assertEquals("runtimeOrder", new int[] { 3, 0, 2, 1, 4 }, - BOpUtility.getPredIds(runtimeOrder)); - } - + /* + * Verify that the runtime optimizer produced the expected join path. + */ + final int[] expected; + expected = new int[] { 3, 0, 2, 1, 4 };// estCard +// expected = new int[] { 3, 0, 2, 1, 4 };// estRead + assertEquals("runtimeOrder", expected, BOpUtility + .getPredIds(runtimeOrder)); + } // LUBM Q8 /** @@ -667,13 +681,17 @@ } } - final IPredicate[] preds; - final IPredicate p0, p1, p2, p3, p4, p5; + final boolean distinct = false; + final IVariable<?>[] selected; + final IPredicate<?>[] preds; + final IPredicate<?> p0, p1, p2, p3, p4, p5; { final IVariable<?> x = Var.var("x"); final IVariable<?> y = Var.var("y"); final IVariable<?> z = Var.var("z"); + selected = new IVariable[] { x, y, z }; + // The name space for the SPO relation. final String[] relation = new String[] { namespace + ".spo" }; @@ -736,23 +754,26 @@ preds = new IPredicate[] { p0, p1, p2, p3, p4, p5 }; } - final IPredicate<?>[] runtimeOrder = doTest(preds, null/* constraints */); + final IPredicate<?>[] runtimeOrder = doTest(distinct, selected, preds, + null/* constraints */); - if (!useExistingJournal) { - /* - * Verify that the runtime optimiz... [truncated message content] |
From: <tho...@us...> - 2011-03-01 01:05:41
|
Revision: 4261 http://bigdata.svn.sourceforge.net/bigdata/?rev=4261&view=rev Author: thompsonbry Date: 2011-03-01 01:05:33 +0000 (Tue, 01 Mar 2011) Log Message: ----------- Commit of partial support for rotating a key-range constraint onto an access path from/to key based on a path by MikeP. The logic in the SAIL which recognizes and lifts the key-range constraint onto the predicate is disabled in this commit. Look at ~817 of BigdataSailEvaluationStrategy3 to enable this behavior. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/BooleanValueExpression.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/Constraint.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SUM.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/Constraint.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlClient.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOp2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/sop/SOpTree.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/Range.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/RangeBOp.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestRangeBOp.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -195,9 +195,12 @@ */ public BOpBase(final BOpBase op) { // deep copy the arguments. - args = deepCopy(op.args); +// args = deepCopy(op.args); // deep copy the annotations. - annotations = deepCopy(op.annotations); +// annotations = deepCopy(op.annotations); + // Note: only shallow copy is required to achieve immutable semantics! + args = Arrays.copyOf(op.args, op.args.length); + annotations = new LinkedHashMap<String, Object>(op.annotations); } // /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -678,6 +678,12 @@ * @return The newly annotated {@link IPredicate}. */ public IPredicate<E> setBOpId(int bopId); + + /** + * Return a copy of this predicate with a different {@link IVariableOrConstant} + * for the arg specified by the supplied index parameter. + */ + public IPredicate<E> setArg(int index, IVariableOrConstant arg); /** * Return <code>true</code> iff this operator is an access path which writes Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -419,6 +419,16 @@ } + public Predicate<E> setArg(final int index, final IVariableOrConstant arg) { + + final Predicate<E> tmp = this.clone(); + + tmp._set(index, arg); + + return tmp; + + } + /** * Add an {@link Annotations#INDEX_LOCAL_FILTER}. When there is a filter for * the named property, the filters are combined. Otherwise the filter is Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/BooleanValueExpression.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/BooleanValueExpression.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/BooleanValueExpression.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -27,16 +27,16 @@ import java.util.Map; import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpBase; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IValueExpression; -import com.bigdata.bop.ImmutableBOp; /** * Base class for boolean value expression BOps. Value expressions perform some * evaluation on one or more value expressions as input and produce one * boolean as output. */ -public abstract class BooleanValueExpression extends ImmutableBOp +public abstract class BooleanValueExpression extends BOpBase implements IValueExpression<Boolean> { /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/Constraint.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/Constraint.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/Constraint.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -29,14 +29,14 @@ import org.apache.log4j.Logger; import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpBase; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstraint; -import com.bigdata.bop.ImmutableBOp; /** * BOpConstraint that wraps a {@link BooleanValueExpression}. */ -public class Constraint extends ImmutableBOp implements IConstraint { +public class Constraint extends BOpBase implements IConstraint { /** * Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -1593,9 +1593,11 @@ this.accessPath = context.getAccessPath(relation, predicate); - if (log.isDebugEnabled()) - log.debug("joinOp=" + joinOp + ", #bindingSets=" + n - + ", accessPath=" + accessPath); + if (log.isDebugEnabled()) { + log.debug("joinOp=" + joinOp); + log.debug("#bindingSets=" + n); + log.debug("accessPath=" + accessPath); + } // convert to array for thread-safe traversal. this.bindingSets = bindingSets.toArray(new IBindingSet[n]); @@ -1644,7 +1646,11 @@ // range count of the as-bound access path (should be cached). final long rangeCount = accessPath .rangeCount(false/* exact */); - + + if (log.isDebugEnabled()) { + log.debug("range count: " + rangeCount); + } + stats.accessPathCount.increment(); stats.accessPathRangeCount.add(rangeCount); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractKeyOrder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractKeyOrder.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractKeyOrder.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -72,7 +72,7 @@ } - final public byte[] getFromKey(final IKeyBuilder keyBuilder, + public byte[] getFromKey(final IKeyBuilder keyBuilder, final IPredicate<E> predicate) { keyBuilder.reset(); @@ -99,17 +99,43 @@ } - return noneBound ? null : keyBuilder.getKey(); + final byte[] key = noneBound ? null : keyBuilder.getKey(); + return key; + } - final public byte[] getToKey(final IKeyBuilder keyBuilder, + public byte[] getToKey(final IKeyBuilder keyBuilder, final IPredicate<E> predicate) { - final byte[] from = getFromKey(keyBuilder, predicate); + keyBuilder.reset(); - return from == null ? null : SuccessorUtil.successor(from); + final int keyArity = getKeyArity(); // use the key's "arity". + boolean noneBound = true; + + for (int i = 0; i < keyArity; i++) { + + final IVariableOrConstant<?> term = predicate.get(getKeyOrder(i)); + + // Note: term MAY be null for the context position. + if (term == null || term.isVar()) + break; + + /* + * Note: If you need to override the default IKeyBuilder behavior do + * it in the invoked method. + */ + appendKeyComponent(keyBuilder, i, term.get()); + + noneBound = false; + + } + + final byte[] key = noneBound ? null : keyBuilder.getKey(); + + return key == null ? null : SuccessorUtil.successor(key); + } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SUM.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SUM.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/SUM.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -25,8 +25,6 @@ import java.util.Map; -import org.openrdf.query.algebra.MathExpr.MathOp; - import com.bigdata.bop.BOp; import com.bigdata.bop.BOpBase; import com.bigdata.bop.IBindingSet; @@ -34,10 +32,10 @@ import com.bigdata.bop.IVariable; import com.bigdata.bop.aggregate.AggregateBase; import com.bigdata.bop.aggregate.IAggregate; -import com.bigdata.bop.aggregate.AggregateBase.FunctionCode; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.internal.XSDLongIV; +import com.bigdata.rdf.internal.constraints.MathBOp.MathOp; import com.bigdata.rdf.model.BigdataLiteral; /** Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -32,11 +32,10 @@ import java.util.ArrayList; import java.util.UUID; -import org.openrdf.query.algebra.MathExpr.MathOp; - import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.KeyBuilder; import com.bigdata.rawstore.Bytes; +import com.bigdata.rdf.internal.constraints.MathBOp.MathOp; import com.bigdata.rdf.model.BigdataBNode; import com.bigdata.rdf.model.BigdataLiteral; @@ -235,6 +234,10 @@ return new XSDDecimalIV(left.multiply(right)); case DIVIDE: return new XSDDecimalIV(left.divide(right)); + case MIN: + return new XSDDecimalIV(left.compareTo(right) < 0 ? left : right); + case MAX: + return new XSDDecimalIV(left.compareTo(right) > 0 ? left : right); default: throw new UnsupportedOperationException(); } @@ -253,6 +256,10 @@ return new XSDIntegerIV(left.multiply(right)); case DIVIDE: return new XSDIntegerIV(left.divide(right)); + case MIN: + return new XSDIntegerIV(left.compareTo(right) < 0 ? left : right); + case MAX: + return new XSDIntegerIV(left.compareTo(right) > 0 ? left : right); default: throw new UnsupportedOperationException(); } @@ -271,6 +278,10 @@ return new XSDFloatIV(left*right); case DIVIDE: return new XSDFloatIV(left/right); + case MIN: + return new XSDFloatIV(Math.min(left,right)); + case MAX: + return new XSDFloatIV(Math.max(left,right)); default: throw new UnsupportedOperationException(); } @@ -289,6 +300,10 @@ return new XSDDoubleIV(left*right); case DIVIDE: return new XSDDoubleIV(left/right); + case MIN: + return new XSDDoubleIV(Math.min(left,right)); + case MAX: + return new XSDDoubleIV(Math.max(left,right)); default: throw new UnsupportedOperationException(); } @@ -307,6 +322,10 @@ return new XSDIntIV(left*right); case DIVIDE: return new XSDIntIV(left/right); + case MIN: + return new XSDIntIV(Math.min(left,right)); + case MAX: + return new XSDIntIV(Math.max(left,right)); default: throw new UnsupportedOperationException(); } @@ -325,6 +344,10 @@ return new XSDLongIV(left*right); case DIVIDE: return new XSDLongIV(left/right); + case MIN: + return new XSDLongIV(Math.min(left,right)); + case MAX: + return new XSDLongIV(Math.max(left,right)); default: throw new UnsupportedOperationException(); } Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/Range.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/Range.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/Range.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -0,0 +1,73 @@ +/** + +Copyright (C) SYSTAP, LLC 2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +package com.bigdata.rdf.internal; + +import java.io.Serializable; + +/** + * Represents a numerical range of IVs - a lower bound and an upper bound. + * Useful for constraining predicates to a particular range of values for the + * object. + */ +public class Range implements Serializable { + + /** + * + */ + private static final long serialVersionUID = -706615195901299026L; + + private final IV from, to; + + /** + * Construct a numerical range using two IVs. The range includes the from + * and to value (>= from && <= to). Non-inclusive from and to must be + * accomplished using a filter. The from must be less than or equal to the + * to. + */ + public Range(final IV from, final IV to) { + + if (!from.isNumeric()) + throw new IllegalArgumentException("not numeric: " + from); + if (!to.isNumeric()) + throw new IllegalArgumentException("not numeric: " + to); + + final int compare = IVUtility.numericalCompare(from, to); + if (compare > 0) + throw new IllegalArgumentException("invalid range: " + from+">"+to); + + this.from = from; + this.to = to; + + } + + public IV from() { + return from; + } + + public IV to() { + return to; + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -93,6 +93,10 @@ public CompareBOp(final CompareBOp op) { super(op); } + + public CompareOp op() { + return (CompareOp) getRequiredProperty(Annotations.OP); + } public boolean accept(final IBindingSet s) { @@ -103,7 +107,7 @@ if (left == null || right == null) throw new SparqlTypeErrorException(); - final CompareOp op = (CompareOp) getProperty(Annotations.OP); + final CompareOp op = op(); if (left.isTermId() && right.isTermId()) { if (op == CompareOp.EQ || op == CompareOp.NE) { Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/Constraint.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/Constraint.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/Constraint.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -86,6 +86,10 @@ return (EBVBOp) super.get(i); } + public IValueExpression<IV> getValueExpression() { + return get(0).get(0); + } + public boolean accept(final IBindingSet bs) { try { Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -62,6 +62,25 @@ } + public enum MathOp { + PLUS, + MINUS, + MULTIPLY, + DIVIDE, + MIN, + MAX; + + public static MathOp valueOf(org.openrdf.query.algebra.MathExpr.MathOp op) { + switch(op) { + case PLUS: return MathOp.PLUS; + case MINUS: return MathOp.MINUS; + case MULTIPLY: return MathOp.MULTIPLY; + case DIVIDE: return MathOp.DIVIDE; + } + throw new IllegalArgumentException(); + } + } + /** * * @param left @@ -189,5 +208,5 @@ return h; } - + } Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/RangeBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/RangeBOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/RangeBOp.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -0,0 +1,245 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.rdf.internal.constraints; + +import java.util.Map; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpBase; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IValueExpression; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.ImmutableBOp; +import com.bigdata.bop.NV; +import com.bigdata.rdf.error.SparqlTypeErrorException; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.Range; + +final public class RangeBOp extends BOpBase + implements IVariable<Range> { + + /** + * + */ + private static final long serialVersionUID = 3368581489737593349L; + +// private static final Logger log = Logger.getLogger(RangeBOp.class); + + + public interface Annotations extends ImmutableBOp.Annotations { + + String VAR = RangeBOp.class.getName() + ".var"; + + String FROM = RangeBOp.class.getName() + ".from"; + + String TO = RangeBOp.class.getName() + ".to"; + + } + + @SuppressWarnings("rawtypes") + public RangeBOp(final IVariable<IV> var, + final IValueExpression<IV> from, + final IValueExpression<IV> to) { + + this(NOARGS, + NV.asMap(new NV(Annotations.VAR, var), + new NV(Annotations.FROM, from), + new NV(Annotations.TO, to))); + + } + + /** + * Required shallow copy constructor. + */ + public RangeBOp(final BOp[] args, Map<String,Object> anns) { + + super(args,anns); + + if (getProperty(Annotations.VAR) == null + || getProperty(Annotations.FROM) == null + || getProperty(Annotations.TO) == null) { + + throw new IllegalArgumentException(); + + } + + } + + /** + * Required deep copy constructor. + */ + public RangeBOp(final RangeBOp op) { + + super(op); + + } + + public IVariable<IV> var() { + return (IVariable<IV>) getRequiredProperty(Annotations.VAR); + } + + public IValueExpression<IV> from() { + return (IValueExpression<IV>) getRequiredProperty(Annotations.FROM); + } + + public IValueExpression<IV> to() { + return (IValueExpression<IV>) getRequiredProperty(Annotations.TO); + } + + final public Range get(final IBindingSet bs) { + +// log.debug("getting the asBound value"); + + final IV from = from().get(bs); + final IV to = to().get(bs); + +// log.debug("from: " + from); +// log.debug("to: " + to); + + // sort of like Var.get(), which returns null when the variable + // is not yet bound + if (from == null || to == null) + return null; + + try { + // let Range ctor() do the type checks and valid range checks + return new Range(from, to); + } catch (IllegalArgumentException ex) { + // log the reason the range is invalid +// if (log.isInfoEnabled()) +// log.info("dropping solution: " + ex.getMessage()); + // drop the solution + throw new SparqlTypeErrorException(); + } + + } + + final public RangeBOp asBound(final IBindingSet bs) { + + final RangeBOp asBound = (RangeBOp) this.clone(); + +// log.debug("getting the asBound value"); + + final IV from = from().get(bs); + final IV to = to().get(bs); + +// log.debug("from: " + from); +// log.debug("to: " + to); + + // sort of like Var.get(), which returns null when the variable + // is not yet bound + if (from == null || to == null) + return asBound; + + asBound._setProperty(Annotations.FROM, new Constant(from)); + asBound._setProperty(Annotations.TO, new Constant(to)); + + return asBound; + + } + + final public boolean isFullyBound() { + + return from() instanceof IConstant && to() instanceof IConstant; + + } + + @Override + public boolean isVar() { + return true; + } + + @Override + public boolean isConstant() { + return false; + } + + @Override + public Range get() { +// log.debug("somebody tried to get me"); + + return null; + } + + @Override + public String getName() { + return var().getName(); + } + + @Override + public boolean isWildcard() { + return false; + } + + + final public boolean equals(final IVariableOrConstant op) { + + if (op == null) + return false; + + if (this == op) + return true; + + if (op instanceof IVariable<?>) { + + return var().getName().equals(((IVariable<?>) op).getName()); + + } + + return false; + + } + + final private boolean _equals(final RangeBOp op) { + + return var().equals(op.var()) + && from().equals(op.from()) + && to().equals(op.to()); + + } + + /** + * Caches the hash code. + */ + private int hash = 0; + public int hashCode() { +// +// int h = hash; +// if (h == 0) { +// h = 31 * h + var().hashCode(); +// h = 31 * h + from().hashCode(); +// h = 31 * h + to().hashCode(); +// hash = h; +// } +// return h; +// + return var().hashCode(); + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionBOp.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/ValueExpressionBOp.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -27,8 +27,8 @@ import java.util.Map; import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpBase; import com.bigdata.bop.IValueExpression; -import com.bigdata.bop.ImmutableBOp; import com.bigdata.rdf.internal.IV; /** @@ -36,7 +36,7 @@ * evaluation on one or more value expressions as input and produce one * value expression as output (boolean, numeric value, etc.) */ -public abstract class ValueExpressionBOp extends ImmutableBOp +public abstract class ValueExpressionBOp extends BOpBase implements IValueExpression<IV> { /** Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOKeyOrder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOKeyOrder.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOKeyOrder.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -35,14 +35,19 @@ import java.util.Iterator; import java.util.NoSuchElementException; +import org.apache.log4j.Logger; + +import com.bigdata.bop.IConstant; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.btree.keys.IKeyBuilder; +import com.bigdata.btree.keys.SuccessorUtil; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; +import com.bigdata.rdf.internal.Range; +import com.bigdata.rdf.internal.constraints.RangeBOp; import com.bigdata.rdf.model.StatementEnum; import com.bigdata.striterator.AbstractKeyOrder; -import com.bigdata.striterator.IKeyOrder; /** * Represents the key order used by an index for a triple relation. @@ -65,6 +70,8 @@ */ private static final long serialVersionUID = 87501920529732159L; + private static Logger log = Logger.getLogger(SPOKeyOrder.class); + /* * Note: these constants make it possible to use switch(index()) constructs. */ @@ -466,11 +473,92 @@ // // } - @Override + public byte[] getFromKey(final IKeyBuilder keyBuilder, + final IPredicate<ISPO> predicate) { + + keyBuilder.reset(); + + final int keyArity = getKeyArity(); // use the key's "arity". + + boolean noneBound = true; + + final RangeBOp range = (RangeBOp) + predicate.getProperty(SPOPredicate.Annotations.RANGE); + + for (int i = 0; i < keyArity; i++) { + + final int index = getKeyOrder(i); + + final IVariableOrConstant<?> term = predicate.get(index); + + if (term == null || term.isVar()) { + if (index == 2 && range != null && range.isFullyBound()) { + final IConstant<IV> c = (IConstant<IV>) range.from(); + appendKeyComponent(keyBuilder, i, c.get()); + noneBound = false; + } else { + break; + } + } else { + appendKeyComponent(keyBuilder, i, term.get()); + noneBound = false; + } + + } + + final byte[] key = noneBound ? null : keyBuilder.getKey(); + + return key; + + } + + public byte[] getToKey(final IKeyBuilder keyBuilder, + final IPredicate<ISPO> predicate) { + + keyBuilder.reset(); + + final int keyArity = getKeyArity(); // use the key's "arity". + + boolean noneBound = true; + + final RangeBOp range = (RangeBOp) + predicate.getProperty(SPOPredicate.Annotations.RANGE); + + for (int i = 0; i < keyArity; i++) { + + final int index = getKeyOrder(i); + + final IVariableOrConstant<?> term = predicate.get(index); + + // Note: term MAY be null for the context position. + if (term == null || term.isVar()) { + if (index == 2 && range != null && range.isFullyBound()) { + final IConstant<IV> c = (IConstant<IV>) range.to(); + appendKeyComponent(keyBuilder, i, c.get()); + noneBound = false; + } else { + break; + } + } else { + appendKeyComponent(keyBuilder, i, term.get()); + noneBound = false; + } + + } + + final byte[] key = noneBound ? null : keyBuilder.getKey(); + + return key == null ? null : SuccessorUtil.successor(key); + + } + + protected void appendKeyComponent(final IKeyBuilder keyBuilder, final int index, final Object keyComponent) { ((IV) keyComponent).encode(keyBuilder); + +// log.debug("appending key component: " + keyComponent); } @@ -672,32 +760,34 @@ static public SPOKeyOrder getKeyOrder(final IPredicate<ISPO> predicate, final int keyArity) { - final Object s = predicate.get(0).isVar() ? null : predicate - .get(0).get(); + final RangeBOp range = (RangeBOp) + predicate.getProperty(SPOPredicate.Annotations.RANGE); + + final boolean rangeIsBound = range != null && range.isFullyBound(); + + final boolean s = !predicate.get(0).isVar(); - final Object p = predicate.get(1).isVar() ? null : predicate - .get(1).get(); + final boolean p = !predicate.get(1).isVar(); - final Object o = predicate.get(2).isVar() ? null : predicate - .get(2).get(); + final boolean o = !predicate.get(2).isVar() || rangeIsBound; if (keyArity == 3) { // Note: Context is ignored! - if (s != null && p != null && o != null) { + if (s && p && o) { return SPO; - } else if (s != null && p != null) { + } else if (s && p) { return SPO; - } else if (s != null && o != null) { + } else if (s && o) { return OSP; - } else if (p != null && o != null) { + } else if (p && o) { return POS; - } else if (s != null) { + } else if (s) { return SPO; - } else if (p != null) { + } else if (p) { return POS; - } else if (o != null) { + } else if (o) { return OSP; } else { return SPO; @@ -708,39 +798,31 @@ @SuppressWarnings("unchecked") final IVariableOrConstant<IV> t = predicate.get(3); - final IV c = t == null ? null : (t.isVar() ? null : t.get()); + final boolean c = t != null && !t.isVar(); - /* - * if ((s == null && p == null && o == null && c == null) || (s != - * null && p == null && o == null && c == null) || (s != null && p - * != null && o == null && c == null) || (s != null && p != null && - * o != null && c == null) || (s != null && p != null && o != null - * && c != null)) { return SPOKeyOrder.SPOC; } - */ - - if ((s == null && p != null && o == null && c == null) - || (s == null && p != null && o != null && c == null) - || (s == null && p != null && o != null && c != null)) { + if ((!s && p && !o && !c) + || (!s && p && o && !c) + || (!s && p && o && c)) { return POCS; } - if ((s == null && p == null && o != null && c == null) - || (s == null && p == null && o != null && c != null) - || (s != null && p == null && o != null && c != null)) { + if ((!s && !p && o && !c) + || (!s && !p && o && c) + || (s && !p && o && c)) { return OCSP; } - if ((s == null && p == null && o == null && c != null) - || (s != null && p == null && o == null && c != null) - || (s != null && p != null && o == null && c != null)) { + if ((!s && !p && !o && c) + || (s && !p && !o && c) + || (s && p && !o && c)) { return CSPO; } - if ((s == null && p != null && o == null && c != null)) { + if ((!s && p && !o && c)) { return PCSO; } - if ((s != null && p == null && o != null && c == null)) { + if ((s && !p && o && !c)) { return SOPC; } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -26,12 +26,15 @@ import java.util.Map; import com.bigdata.bop.BOp; +import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.NV; import com.bigdata.bop.ap.Predicate; import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.Range; +import com.bigdata.rdf.internal.constraints.RangeBOp; import com.bigdata.relation.rule.IAccessPathExpander; /** @@ -49,10 +52,17 @@ public class SPOPredicate extends Predicate<ISPO> { /** - * - */ - private static final long serialVersionUID = 1L; + * + */ + private static final long serialVersionUID = 3517916629931687107L; + public interface Annotations extends Predicate.Annotations { + + String RANGE = SPOPredicate.class.getName() + ".range"; + + } + + /** * Variable argument version of the shallow copy constructor. */ @@ -275,9 +285,9 @@ } @SuppressWarnings("unchecked") - final public IVariableOrConstant<IV> o() { + final public IVariableOrConstant o() { - return (IVariableOrConstant<IV>) get(2/* o */); + return (IVariableOrConstant) get(2/* o */); } @@ -287,6 +297,12 @@ return (IVariableOrConstant<IV>) get(3/* c */); } + + final public RangeBOp range() { + + return (RangeBOp) getProperty(Annotations.RANGE); + + } /** * Strengthened return type. @@ -296,8 +312,23 @@ @Override public SPOPredicate asBound(final IBindingSet bindingSet) { - return (SPOPredicate) super.asBound(bindingSet); + if (bindingSet == null) + throw new IllegalArgumentException(); + final SPOPredicate tmp = (SPOPredicate) super.asBound(bindingSet); + + final RangeBOp rangeBOp = range(); + + // we don't have a range bop for ?o + if (rangeBOp == null) + return tmp; + + final RangeBOp asBound = rangeBOp.asBound(bindingSet); + + tmp._setProperty(Annotations.RANGE, asBound); + + return tmp; + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -5,7 +5,6 @@ import java.util.UUID; import org.openrdf.query.algebra.Compare.CompareOp; -import org.openrdf.query.algebra.MathExpr.MathOp; import org.openrdf.rio.RDFFormat; import com.bigdata.bop.BOp; @@ -18,7 +17,6 @@ import com.bigdata.bop.NV; import com.bigdata.bop.Var; import com.bigdata.bop.IPredicate.Annotations; -import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.engine.QueryLog; import com.bigdata.bop.joinGraph.rto.JoinGraph; import com.bigdata.journal.ITx; @@ -30,6 +28,7 @@ import com.bigdata.rdf.internal.constraints.MathBOp; import com.bigdata.rdf.internal.constraints.NotBOp; import com.bigdata.rdf.internal.constraints.SameTermBOp; +import com.bigdata.rdf.internal.constraints.MathBOp.MathOp; import com.bigdata.rdf.model.BigdataLiteral; import com.bigdata.rdf.model.BigdataURI; import com.bigdata.rdf.model.BigdataValue; Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -39,7 +39,6 @@ import org.openrdf.model.vocabulary.RDF; import org.openrdf.query.QueryEvaluationException; import org.openrdf.query.algebra.Compare.CompareOp; -import org.openrdf.query.algebra.MathExpr.MathOp; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpUtility; @@ -48,46 +47,36 @@ import com.bigdata.bop.IConstant; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IPredicate.Annotations; import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.Var; -import com.bigdata.bop.IPredicate.Annotations; import com.bigdata.bop.bindingSet.HashBindingSet; import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.fed.QueryEngineFactory; -import com.bigdata.bop.joinGraph.IEvaluationPlan; -import com.bigdata.bop.joinGraph.IEvaluationPlanFactory; -import com.bigdata.bop.joinGraph.fast.DefaultEvaluationPlanFactory2; import com.bigdata.btree.IRangeQuery; import com.bigdata.rdf.error.SparqlTypeErrorException; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.internal.XSDBooleanIV; +import com.bigdata.rdf.internal.constraints.MathBOp.MathOp; import com.bigdata.rdf.model.BigdataLiteral; import com.bigdata.rdf.model.BigdataURI; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.model.BigdataValueFactory; import com.bigdata.rdf.rio.StatementBuffer; -import com.bigdata.rdf.rules.RuleContextEnum; import com.bigdata.rdf.sail.BigdataSail; import com.bigdata.rdf.sail.Rule2BOpUtility; -import com.bigdata.rdf.sail.sop.SOp2BOpUtility; -import com.bigdata.rdf.sail.sop.UnsupportedOperatorException; import com.bigdata.rdf.spo.SPOPredicate; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.rdf.store.ProxyTestCase; -import com.bigdata.relation.accesspath.ElementFilter; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.rule.IRule; import com.bigdata.relation.rule.Rule; -import com.bigdata.relation.rule.eval.ActionEnum; -import com.bigdata.relation.rule.eval.IJoinNexus; -import com.bigdata.relation.rule.eval.IJoinNexusFactory; -import com.bigdata.relation.rule.eval.ISolution; import com.bigdata.striterator.ChunkedWrappedIterator; import com.bigdata.striterator.Dechunkerator; import com.bigdata.striterator.IChunkedOrderedIterator; Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-02-28 16:26:37 UTC (rev 4260) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java 2011-03-01 01:05:33 UTC (rev 4261) @@ -28,6 +28,7 @@ import org.openrdf.query.algebra.And; import org.openrdf.query.algebra.Bound; import org.openrdf.query.algebra.Compare; +import org.openrdf.query.algebra.Compare.CompareOp; import org.openrdf.query.algebra.Filter; import org.openrdf.query.algebra.Group; import org.openrdf.query.algebra.IsBNode; @@ -37,7 +38,6 @@ import org.openrdf.query.algebra.Join; import org.openrdf.query.algebra.LeftJoin; import org.openrdf.query.algebra.MathExpr; -import org.openrdf.query.algebra.MathExpr.MathOp; import org.openrdf.query.algebra.MultiProjection; import org.openrdf.query.algebra.Not; import org.openrdf.query.algebra.Or; @@ -51,7 +51,6 @@ import org.openrdf.query.algebra.SameTerm; import org.openrdf.query.algebra.StatementPattern; import org.openrdf.query.algebra.StatementPattern.Scope; -import org.openrdf.query.algebra.Str; import org.openrdf.query.algebra.TupleExpr; import org.openrdf.query.algebra.UnaryTupleOperator; import org.openrdf.query.algebra.Union; @@ -85,6 +84,10 @@ import com.bigdata.rdf.internal.DummyIV; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.XSDBooleanIV; +import com.bigdata.rdf.internal.XSDDecimalIV; +import com.bigdata.rdf.internal.XSDDoubleIV; +import com.bigdata.rdf.internal.XSDIntIV; +import com.bigdata.rdf.internal.XSDIntegerIV; import com.bigdata.rdf.internal.constraints.AndBOp; import com.bigdata.rdf.internal.constraints.CompareBOp; import com.bigdata.rdf.internal.constraints.Constraint; @@ -94,8 +97,10 @@ import com.bigdata.rdf.internal.constraints.IsLiteralBOp; import com.bigdata.rdf.internal.constraints.IsURIBOp; import com.bigdata.rdf.internal.constraints.MathBOp; +import com.bigdata.rdf.internal.constraints.MathBOp.MathOp; import com.bigdata.rdf.internal.constraints.NotBOp; import com.bigdata.rdf.internal.constraints.OrBOp; +import com.bigdata.rdf.internal.constraints.RangeBOp; import com.bigdata.rdf.internal.constraints.SameTermBOp; import com.bigdata.rdf.lexicon.LexiconRelation; import com.bigdata.rdf.model.BigdataValue; @@ -809,6 +814,17 @@ */ attachNamedGraphsFilterToSearches(sopTree); + if (false) { + /* + * Look for numerical filters that can be rotated inside predicates + */ + final Iterator<SOpGroup> groups = sopTree.groups(); + while (groups.hasNext()) { + final SOpGroup g = groups.next(); + attachRangeBOps(g); + } + } + /* * Gather variables required by Sesame outside of the query * evaluation (projection and global sesame filters). @@ -939,7 +955,7 @@ final IChunkedOrderedIterator<IBindingSet> it2 = new ChunkedWrappedIterator<IBindingSet>( new Dechunkerator<IBindingSet>(it1)); - + // Materialize IVs as RDF Values. final CloseableIteration<BindingSet, QueryEvaluationException> result = // Monitor IRunningQuery and cancel if Sesame iterator is closed. @@ -954,315 +970,6 @@ } -// /** -// * This is the method that will attempt to take a top-level join or left -// * join and turn it into a native bigdata rule. The Sesame operators Join -// * and LeftJoin share only the common base class BinaryTupleOperator, but -// * other BinaryTupleOperators are not supported by this method. Other -// * specific types of BinaryTupleOperators will cause this method to throw -// * an exception. -// * <p> -// * This method will also turn a single top-level StatementPattern into a -// * rule with one predicate in it. -// * <p> -// * Note: As a pre-condition, the {@link Value}s in the query expression -// * MUST have been rewritten as {@link BigdataValue}s and their term -// * identifiers MUST have been resolved. Any term identifier that remains -// * {@link IRawTripleStore#NULL} is an indication that there is no entry for -// * that {@link Value} in the database. Since the JOINs are required (vs -// * OPTIONALs), that means that there is no solution for the JOINs and an -// * {@link EmptyIteration} is returned rather than evaluating the query. -// * -// * @param join -// * @return native bigdata rule -// * @throws UnsupportedOperatorException -// * this exception will be thrown if the Sesame join contains any -// * SPARQL language constructs that cannot be converted into -// * the bigdata native rule model -// * @throws QueryEvaluationException -// */ -// private IRule createNativeQueryOld(final TupleExpr join) -// throws UnsupportedOperatorException, -// QueryEvaluationException { -// -// if (!(join instanceof StatementPattern || -// join instanceof Join || join instanceof LeftJoin || -// join instanceof Filter)) { -// throw new AssertionError( -// "only StatementPattern, Join, and LeftJoin supported"); -// } -// -// // flattened collection of statement patterns nested within this join, -// // along with whether or not each one is optional -// final Map<StatementPattern, Boolean> stmtPatterns = -// new LinkedHashMap<StatementPattern, Boolean>(); -// // flattened collection of filters nested within this join -// final Collection<Filter> filters = new LinkedList<Filter>(); -// -// // will throw EncounteredUnknownTupleExprException if the join -// // contains something we don't handle yet -//// collectStatementPatterns(join, stmtPatterns, filters); -// -// if (false) { -// for (Map.Entry<StatementPattern, Boolean> entry : -// stmtPatterns.entrySet()) { -// log.debug(entry.getKey() + ", optional=" + entry.getValue()); -// } -// for (Filter filter : filters) { -// log.debug(filter.getCondition()); -// } -// } -// -// // generate tails -// Collection<IPredicate> tails = new LinkedList<IPredicate>(); -// // keep a list of free text searches for later to solve a named graphs -// // problem -// final Map<IPredicate, StatementPattern> searches = -// new HashMap<IPredicate, StatementPattern>(); -// for (Map.Entry<StatementPattern, Boolean> entry : stmtPatterns -// .entrySet()) { -// StatementPattern sp = entry.getKey(); -// boolean optional = entry.getValue(); -// IPredicate tail = toPredicate(sp, optional); -// // encountered a value not in the database lexicon -// if (tail == null) { -// if (log.isDebugEnabled()) { -// log.debug("could not generate tail for: " + sp); -// } -// if (optional) { -// // for optionals, just skip the tail -// continue; -// } else { -// // for non-optionals, skip the entire rule -// return null; -// } -// } -// if (tail.getAccessPathExpander() instanceof FreeTextSearchExpander) { -// searches.put(tail, sp); -// } -// tails.add(tail); -// } -// -// /* -// * When in quads mode, we need to go through the free text searches and -// * make sure that they are properly filtered for the dataset where -// * needed. Joins will take care of this, so we only need to add a filter -// * when a search variable does not appear in any other tails that are -// * non-optional. -// * -// * @todo Bryan seems to think this can be fixed with a DISTINCT JOIN -// * mechanism in the rule evaluation. -// */ -// if (database.isQuads() && dataset != null) { -// for (IPredicate search : searches.keySet()) { -// final Set<URI> graphs; -// StatementPattern sp = searches.get(search); -// switch (sp.getScope()) { -// case DEFAULT_CONTEXTS: { -// /* -// * Query against the RDF merge of zero or more source -// * graphs. -// */ -// graphs = dataset.getDefaultGraphs(); -// break; -// } -// case NAMED_CONTEXTS: { -// /* -// * Query against zero or more named graphs. -// */ -// graphs = dataset.getNamedGraphs(); -// break; -// } -// default: -// throw new AssertionError(); -// } -// if (graphs == null) { -// continue; -// } -// // why would we use a constant with a free text search??? -// if (search.get(0).isConstant()) { -// throw new AssertionError(); -// } -// // get ahold of the search variable -// com.bigdata.bop.Var searchVar = -// (com.bigdata.bop.Var) search.get(0); -// if (log.isDebugEnabled()) { -// log.debug(searchVar); -// } -// // start by assuming it needs filtering, guilty until proven -// // innocent -// boolean needsFilter = true; -// // check the other tails one by one -// for (IPredicate<ISPO> tail : tails) { -// IAccessPathExpander<ISPO> expander = -// tail.getAccessPathExpander(); -// // only concerned with non-optional tails that are not -// // themselves magic searches -// if (expander instanceof FreeTextSearchExpander -// || tail.isOptional()) { -// continue; -// } -// // see if the search variable appears in this tail -// boolean appears = false; -// for (int i = 0; i < tail.arity(); i++) { -// IVariableOrConstant term = tail.get(i); -// if (log.isDebugEnabled()) { -// log.debug(term); -// } -// if (term.equals(searchVar)) { -// appears = true; -// break; -// } -// } -// // if it appears, we don't need a filter -// if (appears) { -// needsFilter = false; -// break; -// } -// } -// // if it needs a filter, add it to the expander -// if (needsFilter) { -// if (log.isDebugEnabled()) { -// log.debug("needs filter: " + searchVar); -// } -// FreeTextSearchExpander expander = (FreeTextSearchExpander) -// search.getAccessPathExpander(); -// expander.addNamedGraphsFilter(graphs); -// } -// } -// } -// -// // generate constraints -// final Collection<IConstraint> constraints = -// new LinkedList<IConstraint>(); -// final Iterator<Filter> filterIt = filters.iterator(); -// while (filterIt.hasNext()) { -// final Filter filter = filterIt.next(); -// final IConstraint constraint = toConstraint(filter.getCondition()); -// if (constraint != null) { -// // remove if we are able to generate a native constraint for it -// if (log.isDebugEnabled()) { -// log.debug("able to generate a constraint: " + constraint); -// } -// filterIt.remove(); -// constraints.add(constraint); -// } -// } -// -// /* -// * FIXME Native slice, DISTINCT, etc. are all commented out for now. -// * Except for ORDER_BY, support exists for all of these features in the -// * native rules, but we need to separate the rewrite of the tupleExpr -// * and its evaluation in order to properly handle this stuff. -// */ -// IQueryOptions queryOptions = QueryOptions.NONE; -// // if (slice) { -// // if (!distinct && !union) { -// // final ISlice slice = new Slice(offset, limit); -// // queryOptions = new QueryOptions(false/* distinct */, -// // true/* stable */, null/* orderBy */, slice); -// // } -// // } else { -// // if (distinct && !union) { -// // queryOptions = QueryOptions.DISTINCT; -// // } -// // } -// -//// if (log.isDebugEnabled()) { -//// for (IPredicate<ISPO> tail : tails) { -//// IAccessPathExpander<ISPO> expander = tail.getAccessPathExpander(); -//// if (expander != null) { -//// IAccessPath<ISPO> accessPath = database.getSPORelation() -//// .getAccessPath(tail); -//// accessPath = expander.getAccessPath(accessPath); -//// IChunkedOrderedIterator<ISPO> it = accessPath.iterator(); -//// while (it.hasNext()) { -//// log.debug(it.next().toString(database)); -//// } -//// } -//// } -//// } -// -// /* -// * Collect a set of variables required beyond just the join (i.e. -// * aggregation, projection, filters, etc.) -// */ -// Set<String> required = new HashSet<String>(); -// -// try { -// -// QueryModelNode p = join; -// while (true) { -// p = p.getParentNode(); -// if (log.isDebugEnabled()) { -// log.debug(p.getClass()); -// } -// if (p instanceof UnaryTupleOperator) { -// required.addAll(collectVariables((UnaryTupleOperator) p)); -// } -// if (p instanceof QueryRoot) { -// break; -// } -// } -// -// if (filters.size() > 0) { -// for (Filter filter : filters) { -// required.addAll(collectVariables((UnaryTupleOperator) filter)); -// } -// } -// -// } catch (Exception ex) { -// throw new QueryEvaluationException(ex); -// } -// -// IVariable[] requiredVars = new IVariable[required.size()]; -// int i = 0; -// for (String v : required) { -// requiredVars[i++] = com.bigdata.bop.Var.var(v); -// } -// -// if (log.isDebugEnabled()) { -// log.debug("required binding names: " + Arrays.toString(requiredVars)); -// } -// -//// if (starJoins) { // database.isQuads() == false) { -//// if (log.isDebugEnabled()) { -//// log.debug("generating star joins"); -//// } -//// tails = generateStarJoins(tails); -//// } -// -// // generate native rule -// IRule rule = new Rule("nativeJoin", -// // @todo should serialize the query string here for the logs. -// null, // head -// tails.toArray(new IPredicate[tails.size()]), queryOptions, -// // constraints on the rule. -// constraints.size() > 0 ? constraints -// .toArray(new IConstraint[constraints.size()]) : null, -// null/* constants */, null/* taskFactory */, requiredVars); -// -// if (BigdataStatics.debug) { -// System.err.println(join.toString()); -// System.err.println(rule.toString()); -// } -// -// // we have filters that we could not translate natively -// if (filters.size() > 0) { -// if (log.isDebugEnabled()) { -// log.debug("could not translate " + filters.size() -// + " filters into native constraints:"); -// for (Filter filter : filters) { -// log.debug("\n" + filter.getCondition()); -// } -// } -// // use the basic filter iterator for remaining filters -//// rule = new ProxyRuleWithSesameFilters(rule, filters); -// } -// -// return rule; -// -// } private void attachNamedGraphsFilterToSearches(final SOpTree sopTree) { @@ -1370,7 +1077,125 @@ } } + + protected void attachRangeBOps(final SOpGroup g) { + final Map<IVariable,Collection<IValueExpression>> lowerBounds = + new LinkedHashMap<IVariable,Collection<IValueExpression>>(); + final Map<IVariable,Collection<IValueExpression>> upperBounds = + new LinkedHashMap<IVariable,Collection<IValueExpression>>(); + + for (SOp sop : g) { + final BOp bop = sop.getBOp(); + if (!(bop instanceof Constraint)) { + continue; + } + final Constraint c = (Constraint) bop; + if (!(c.getValueExpression() instanceof Com... [truncated message content] |
From: <tho...@us...> - 2011-03-02 19:15:30
|
Revision: 4262 http://bigdata.svn.sourceforge.net/bigdata/?rev=4262&view=rev Author: thompsonbry Date: 2011-03-02 19:15:21 +0000 (Wed, 02 Mar 2011) Log Message: ----------- Some optimizations of heap churn (the bop deep copy semantics are now shallow copy semantics, which is sufficient to maintain their immutable contract). Created a canonicalizing factory and applied it to the BigdataValueFactoryImpl, which is already shared for a given namespace. I am still looking at how to apply this to the LexiconRelation to increase sharing of the term cache. Added private loggers for the NanoHTTP class hierarchy. Added test w/o shared variables to TestPipelineJoin. Interned various strings which are used as annotation names. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Constant.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IConstraint.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/CopyOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INConstraint.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ComparatorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IndexMetadata.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/counters/httpd/CounterSetHTTPD.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/counters/httpd/CounterSetHTTPDServer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/httpd/AbstractHTTPD.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/httpd/NanoHTTPD.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/PipelineDelayOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/GROUP_CONCAT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/join/DataSetJoin.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/IsInlineBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/MathBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/RangeBOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataValueFactoryImpl.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl3.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/CanonicalFactory.java Property Changed: ---------------- branches/QUADS_QUERY_BRANCH/bigdata-perf/ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -226,10 +226,13 @@ // * @return <code>true</code> if all arguments and annotations are the same. // */ // boolean sameData(final BOp o); - - /** - * Interface declaring well known annotations. - */ + + /** + * Interface declaring well known annotations. + * <p> + * Note: Annotation names should be {@link String#intern() interned} in + * order to avoid having duplicate values for those strings on the heap. + */ public interface Annotations { /** @@ -238,7 +241,7 @@ * identifier for the {@link BOp} within the context of its owning * query. */ - String BOP_ID = BOp.class.getName() + ".bopId"; + String BOP_ID = (BOp.class.getName() + ".bopId").intern(); /** * The timeout for the operator evaluation (milliseconds). @@ -253,7 +256,7 @@ * be interpreted with respect to the time when the query began to * execute. */ - String TIMEOUT = BOp.class.getName() + ".timeout"; + String TIMEOUT = (BOp.class.getName() + ".timeout").intern(); /** * The default timeout for operator evaluation. @@ -266,7 +269,7 @@ * * @see BOpEvaluationContext */ - String EVALUATION_CONTEXT = BOp.class.getName() + ".evaluationContext"; + String EVALUATION_CONTEXT = (BOp.class.getName() + ".evaluationContext").intern(); BOpEvaluationContext DEFAULT_EVALUATION_CONTEXT = BOpEvaluationContext.ANY; @@ -280,7 +283,7 @@ * * @see BOp#isController() */ - String CONTROLLER = BOp.class.getName()+".controller"; + String CONTROLLER = (BOp.class.getName()+".controller").intern(); boolean DEFAULT_CONTROLLER = false; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -85,30 +85,30 @@ * not. A "copy on write" map might be better. */ static protected final transient Map<String,Object> NOANNS = Collections.emptyMap(); - - /** - * The argument values - <strong>direct access to this field is - * discouraged</strong> - the field is protected to support - * <em>mutation</em> APIs and should not be relied on for other purposes. - * <p> - * Note: This field is reported out as a {@link List} so we can make it - * thread safe and, if desired, immutable. However, it is internally a - * simple array and exposed to subclasses so they can implement mutation - * operations which return deep copies in which the argument values have - * been modified. - * <p> - * If we allow mutation of the arguments then caching of the arguments (or - * annotations) by classes such as {@link EQ} will cause {@link #clone()} to - * fail because (a) it will do a field-by-field copy on the concrete - * implementation class; and (b) it will not consistently update the cached - * references. In order to "fix" this problem, any classes which cache - * arguments or annotations would have to explicitly overrides - * {@link #clone()} in order to set those fields based on the arguments on - * the cloned {@link BOpBase} class. - * <p> - * Note: This must be at least "effectively" final per the effectively - * immutable contract for {@link BOp}s. - */ + + /** + * The argument values - <strong>direct access to this field is + * discouraged</strong> - the field is protected to support + * <em>mutation</em> APIs and should not be relied on for other purposes. + * <p> + * Note: This field is reported out as a {@link List} so we can make it + * thread safe and, if desired, immutable. However, it is internally a + * simple array. Subclasses can implement mutation operations which return + * deep copies in which the argument values have been modified using + * {@link #_set(int, BOp)}. + * <p> + * If we allowed mutation of the arguments (outside of the object creation + * pattern) then caching of the arguments (or annotations) by classes such + * as {@link EQ} will cause {@link #clone()} to fail because (a) it will do + * a field-by-field copy on the concrete implementation class; and (b) it + * will not consistently update the cached references. In order to "fix" + * this problem, any classes which cache arguments or annotations would have + * to explicitly overrides {@link #clone()} in order to set those fields + * based on the arguments on the cloned {@link BOpBase} class. + * <p> + * Note: This must be at least "effectively" final per the effectively + * immutable contract for {@link BOp}s. + */ private final BOp[] args; /** @@ -118,7 +118,14 @@ * immutable contract for {@link BOp}s. */ private final Map<String,Object> annotations; - + + /** + * The default initial capacity used for an empty annotation map -- empty + * maps use the minimum initial capacity to avoid waste since we create a + * large number of {@link BOp}s during query evaluation. + */ + static private transient final int DEFAULT_INITIAL_CAPACITY = 2; + /** * Check the operator argument. * @@ -199,7 +206,12 @@ // deep copy the annotations. // annotations = deepCopy(op.annotations); // Note: only shallow copy is required to achieve immutable semantics! - args = Arrays.copyOf(op.args, op.args.length); + if (op.args == NOARGS || op.args.length == 0) { + // fast path for zero arity operators. + args = NOARGS; + } else { + args = Arrays.copyOf(op.args, op.args.length); + } annotations = new LinkedHashMap<String, Object>(op.annotations); } @@ -232,12 +244,57 @@ checkArgs(args); this.args = args; - - this.annotations = (annotations == null ? new LinkedHashMap<String, Object>() - : annotations); - + + this.annotations = (annotations == null ? new LinkedHashMap<String, Object>( + DEFAULT_INITIAL_CAPACITY) + : annotations); + } + /* + * Note: This will not work since the keys provide the strong references to + * the values.... For this purpose we need to use a ConcurrentHashMap with + * an access policy which did not rely on weak references to clear its + * entries. + */ +// static private Map<String, Object> internMap(final Map<String, Object> anns) { +// final int initialCapacity = (int) (anns.size() / .75f/* loadFactor */) + 1; +// final Map<String, Object> t = new LinkedHashMap<String, Object>( +// initialCapacity); +// for(Map.Entry<String,Object> e : t.entrySet()) { +// final String k = intern(e.getKey()); +// t.put(k, e.getValue()); +// } +// return t; +// } +// +// /** +// * Intern the string within a canonicalizing hash map using weak values. +// * @param s +// * @return +// */ +// static private String intern(final String s) { +// +// final String t = termCache.putIfAbsent(s, s); +// +// if (t != null) +// return t; +// +// return s; +// +// } +// +// /** +// * A canonicalizing hash map using weak values. Entries will be cleared from +// * the map once their values are no longer referenced. +// * ConcurrentWeakValueCacheWithBatchedUpdates +// */ +// static private transient final ConcurrentWeakValueCacheWithBatchedUpdates<String,String> termCache = new ConcurrentWeakValueCacheWithBatchedUpdates<String,String>(// +// 1000, // queueCapacity +// .75f, // loadFactor (.75 is the default) +// 16 // concurrency level (16 is the default) +// ); + final public Map<String, Object> annotations() { return Collections.unmodifiableMap(annotations); @@ -375,10 +432,10 @@ * before returning control to the caller. This would result in less * heap churn. */ - static protected BOp[] deepCopy(final BOp[] a) { - if (a == NOARGS) { - // fast path for zero arity operators. - return a; + static protected BOp[] deepCopy(final BOp[] a) { + if (a == NOARGS || a.length == 0) { + // fast path for zero arity operators. + return NOARGS; } final BOp[] t = new BOp[a.length]; for (int i = 0; i < a.length; i++) { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Constant.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Constant.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Constant.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -44,7 +44,7 @@ * The {@link IVariable} which is bound to that constant value * (optional). */ - String VAR = Constant.class.getName() + ".var"; + String VAR = (Constant.class.getName() + ".var").intern(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IConstraint.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IConstraint.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IConstraint.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -52,4 +52,9 @@ */ public boolean accept(IBindingSet bindingSet); + /** + * A zero length empty {@link IConstraint} array. + */ + public IConstraint[] EMPTY = new IConstraint[0]; + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -84,7 +84,7 @@ * @see https://sourceforge.net/apps/trac/bigdata/ticket/180 (Migrate * the RDFS inference and truth maintenance logic to BOPs) */ - String RELATION_NAME = IPredicate.class.getName() + ".relationName"; + String RELATION_NAME = (IPredicate.class.getName() + ".relationName").intern(); // /** // * The {@link IKeyOrder} which will be used to read on the relation. @@ -99,7 +99,7 @@ /** * <code>true</code> iff the predicate has SPARQL optional semantics. */ - String OPTIONAL = IPredicate.class.getName() + ".optional"; + String OPTIONAL = (IPredicate.class.getName() + ".optional").intern(); // /** // * Constraints on the elements read from the relation. @@ -139,7 +139,7 @@ * * @see IRangeQuery#rangeIterator(byte[], byte[], int, int, IFilter) */ - String INDEX_LOCAL_FILTER = IPredicate.class.getName() + ".indexLocalFilter"; + String INDEX_LOCAL_FILTER = (IPredicate.class.getName() + ".indexLocalFilter").intern(); /** * An optional {@link BOpFilterBase} to be applied to the elements of @@ -156,7 +156,7 @@ * one another. You can chain {@link FilterBase} filters together as * well. */ - String ACCESS_PATH_FILTER = IPredicate.class.getName() + ".accessPathFilter"; + String ACCESS_PATH_FILTER = (IPredicate.class.getName() + ".accessPathFilter").intern(); /** * Access path expander pattern. This allows you to wrap or replace the @@ -185,13 +185,13 @@ * * @see IAccessPathExpander */ - String ACCESS_PATH_EXPANDER = IPredicate.class.getName() + ".accessPathExpander"; + String ACCESS_PATH_EXPANDER = (IPredicate.class.getName() + ".accessPathExpander").intern(); /** * The partition identifier -or- <code>-1</code> if the predicate does * not address a specific shard. */ - String PARTITION_ID = IPredicate.class.getName() + ".partitionId"; + String PARTITION_ID = (IPredicate.class.getName() + ".partitionId").intern(); int DEFAULT_PARTITION_ID = -1; @@ -233,7 +233,7 @@ * * @see BOpEvaluationContext */ - String REMOTE_ACCESS_PATH = IPredicate.class.getName() + ".remoteAccessPath"; + String REMOTE_ACCESS_PATH = (IPredicate.class.getName() + ".remoteAccessPath").intern(); boolean DEFAULT_REMOTE_ACCESS_PATH = true; @@ -245,8 +245,8 @@ * * @see #DEFAULT_FULLY_BUFFERED_READ_THRESHOLD */ - String FULLY_BUFFERED_READ_THRESHOLD = IPredicate.class.getName() - + ".fullyBufferedReadThreshold"; + String FULLY_BUFFERED_READ_THRESHOLD = (IPredicate.class.getName() + + ".fullyBufferedReadThreshold").intern(); /** * Default for {@link #FULLY_BUFFERED_READ_THRESHOLD}. @@ -277,7 +277,7 @@ * * @see #DEFAULT_FLAGS */ - String FLAGS = IPredicate.class.getName() + ".flags"; + String FLAGS = (IPredicate.class.getName() + ".flags").intern(); /** * The default flags will visit the keys and values of the non-deleted @@ -302,7 +302,7 @@ * * @see #TIMESTAMP */ - String MUTATION = IPredicate.class.getName() + ".mutation"; + String MUTATION = (IPredicate.class.getName() + ".mutation").intern(); boolean DEFAULT_MUTATION = false; @@ -312,7 +312,7 @@ * * @see #MUTATION */ - String TIMESTAMP = IPredicate.class.getName() + ".timestamp"; + String TIMESTAMP = (IPredicate.class.getName() + ".timestamp").intern(); // /** // * An optional {@link IConstraint}[] which places restrictions on the Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -71,7 +71,7 @@ * the ancestor in the operator tree which serves as the default sink * for binding sets (optional, default is the parent). */ - String SINK_REF = PipelineOp.class.getName() + ".sinkRef"; + String SINK_REF = (PipelineOp.class.getName() + ".sinkRef").intern(); /** * The value of the annotation is the {@link BOp.Annotations#BOP_ID} of @@ -80,8 +80,8 @@ * * @see #ALT_SINK_GROUP */ - String ALT_SINK_REF = PipelineOp.class.getName() - + ".altSinkRef"; + String ALT_SINK_REF = (PipelineOp.class.getName() + ".altSinkRef") + .intern(); /** * The value reported by {@link PipelineOp#isSharedState()} (default @@ -96,7 +96,8 @@ * When <code>true</code>, the {@link QueryEngine} will impose the * necessary constraints when the operator is evaluated. */ - String SHARED_STATE = PipelineOp.class.getName() + ".sharedState"; + String SHARED_STATE = (PipelineOp.class.getName() + ".sharedState") + .intern(); boolean DEFAULT_SHARED_STATE = false; @@ -116,7 +117,7 @@ * have less effect and performance tends to be best around a modest * value (10) for those annotations. */ - String MAX_PARALLEL = PipelineOp.class.getName() + ".maxParallel"; + String MAX_PARALLEL = (PipelineOp.class.getName() + ".maxParallel").intern(); /** * @see #MAX_PARALLEL @@ -136,8 +137,8 @@ * data to be assigned to an evaluation task is governed by * {@link #MAX_MEMORY} instead. */ - String MAX_MESSAGES_PER_TASK = PipelineOp.class.getName() - + ".maxMessagesPerTask"; + String MAX_MESSAGES_PER_TASK = (PipelineOp.class.getName() + + ".maxMessagesPerTask").intern(); /** * @see #MAX_MESSAGES_PER_TASK @@ -151,8 +152,8 @@ * amount of data which can be buffered on the JVM heap during pipelined * query evaluation. */ - String PIPELINE_QUEUE_CAPACITY = PipelineOp.class.getName() - + ".pipelineQueueCapacity"; + String PIPELINE_QUEUE_CAPACITY = (PipelineOp.class.getName() + + ".pipelineQueueCapacity").intern(); /** * @see #PIPELINE_QUEUE_CAPACITY @@ -165,7 +166,7 @@ * "blocked" evaluation depending on how it buffers its data for * evaluation. */ - String PIPELINED = PipelineOp.class.getName() + ".pipelined"; + String PIPELINED = (PipelineOp.class.getName() + ".pipelined").intern(); /** * @see #PIPELINED @@ -201,7 +202,7 @@ * semantics. Such operators MUST throw an exception if the value of * this annotation could result in multiple evaluation passes. */ - String MAX_MEMORY = PipelineOp.class.getName() + ".maxMemory"; + String MAX_MEMORY = (PipelineOp.class.getName() + ".maxMemory").intern(); /** * @see #MAX_MEMORY Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate/AggregateBase.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -103,14 +103,14 @@ * The aggregate function identifier ({@link FunctionCode#COUNT}, * {@link FunctionCode#SUM}, etc). */ - String FUNCTION_CODE = AggregateBase.class.getName() + ".functionCode"; + String FUNCTION_CODE = (AggregateBase.class.getName() + ".functionCode").intern(); /** * Optional boolean property indicates whether the aggregate applies to * the distinct within group solutions (default * {@value #DEFAULT_DISTINCT}). */ - String DISTINCT = AggregateBase.class.getName() + ".distinct"; + String DISTINCT = (AggregateBase.class.getName() + ".distinct").intern(); boolean DEFAULT_DISTINCT = false; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -35,7 +35,6 @@ import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; -import com.bigdata.bop.IConstraint; import com.bigdata.bop.IElement; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -117,7 +117,7 @@ /** * The sample limit (default {@value #DEFAULT_LIMIT}). */ - String LIMIT = SampleIndex.class.getName() + ".limit"; + String LIMIT = (SampleIndex.class.getName() + ".limit").intern(); int DEFAULT_LIMIT = 100; @@ -126,7 +126,7 @@ * (default {@value #DEFAULT_SEED}). A non-zero value may be used to * create a repeatable sample. */ - String SEED = SampleIndex.class.getName() + ".seed"; + String SEED = (SampleIndex.class.getName() + ".seed").intern(); long DEFAULT_SEED = 0L; @@ -134,12 +134,12 @@ * The {@link IPredicate} describing the access path to be sampled * (required). */ - String PREDICATE = SampleIndex.class.getName() + ".predicate"; + String PREDICATE = (SampleIndex.class.getName() + ".predicate").intern(); /** * The type of sample to take (default {@value #DEFAULT_SAMPLE_TYPE)}. */ - String SAMPLE_TYPE = SampleIndex.class.getName() + ".sampleType"; + String SAMPLE_TYPE = (SampleIndex.class.getName() + ".sampleType").intern(); String DEFAULT_SAMPLE_TYPE = SampleType.RANDOM.name(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -69,7 +69,7 @@ * When the condition is not satisfied, the binding set is routed to the * alternative sink. */ - String CONDITION = ConditionalRoutingOp.class.getName() + ".condition"; + String CONDITION = (ConditionalRoutingOp.class.getName() + ".condition").intern(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/CopyOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/CopyOp.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/CopyOp.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -68,13 +68,13 @@ * An optional {@link IConstraint}[] which places restrictions on the * legal patterns in the variable bindings. */ - String CONSTRAINTS = CopyOp.class.getName() + ".constraints"; + String CONSTRAINTS = (CopyOp.class.getName() + ".constraints").intern(); /** * An optional {@link IBindingSet}[] to be used <strong>instead</strong> * of the default source. */ - String BINDING_SETS = CopyOp.class.getName() + ".bindingSets"; + String BINDING_SETS = (CopyOp.class.getName() + ".bindingSets").intern(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INConstraint.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INConstraint.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INConstraint.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -59,7 +59,7 @@ /** * The variable against which the constraint is applied. */ - String VARIABLE = INConstraint.class.getName() + ".variable"; + String VARIABLE = (INConstraint.class.getName() + ".variable").intern(); /** * The set of allowed values for that variable. @@ -67,7 +67,7 @@ * @todo allow large sets to be specified by reference to a resource * which is then materialized on demand during evaluation. */ - String SET = INConstraint.class.getName() + ".set"; + String SET = (INConstraint.class.getName() + ".set").intern(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -90,20 +90,21 @@ public interface Annotations extends PipelineOp.Annotations { - /** - * The ordered {@link BOp}[] of subqueries to be evaluated for each - * binding set presented (required). - */ - String SUBQUERIES = SubqueryOp.class.getName() + ".subqueries"; + /** + * The ordered {@link BOp}[] of subqueries to be evaluated for each + * binding set presented (required). + */ + String SUBQUERIES = (AbstractSubqueryOp.class.getName() + ".subqueries") + .intern(); - /** - * The maximum parallelism with which the subqueries will be evaluated - * (default is unlimited). - */ - String MAX_PARALLEL_SUBQUERIES = AbstractSubqueryOp.class.getName() - + ".maxParallelSubqueries"; + /** + * The maximum parallelism with which the subqueries will be evaluated + * (default is unlimited). + */ + String MAX_PARALLEL_SUBQUERIES = (AbstractSubqueryOp.class.getName() + ".maxParallelSubqueries") + .intern(); - int DEFAULT_MAX_PARALLEL_SUBQUERIES = Integer.MAX_VALUE; + int DEFAULT_MAX_PARALLEL_SUBQUERIES = Integer.MAX_VALUE; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/SubqueryOp.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -74,14 +74,14 @@ * {@link SubqueryOp} (required). This should be a * {@link PipelineOp}. */ - String SUBQUERY = SubqueryOp.class.getName() + ".subquery"; + String SUBQUERY = (SubqueryOp.class.getName() + ".subquery").intern(); /** * When <code>true</code> the subquery has optional semantics (if the * subquery fails, the original binding set will be passed along to the * downstream sink anyway) (default {@value #DEFAULT_OPTIONAL}). */ - String OPTIONAL = SubqueryOp.class.getName() + ".optional"; + String OPTIONAL = (SubqueryOp.class.getName() + ".optional").intern(); boolean DEFAULT_OPTIONAL = false; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -228,8 +228,8 @@ * {@link QueryEngine#newRunningQuery(QueryEngine, UUID, boolean, IQueryClient, PipelineOp)} * in which case they might not support this option. */ - String RUNNING_QUERY_CLASS = QueryEngine.class.getName() - + ".runningQueryClass"; + String RUNNING_QUERY_CLASS = (QueryEngine.class.getName() + + ".runningQueryClass").intern(); // String DEFAULT_RUNNING_QUERY_CLASS = StandaloneChainedRunningQuery.class.getName(); String DEFAULT_RUNNING_QUERY_CLASS = ChunkedRunningQuery.class.getName(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -121,14 +121,14 @@ * The {@link IPredicate} which is used to generate the * {@link IAccessPath}s during the join. */ - String PREDICATE = PipelineJoin.class.getName() + ".predicate"; + String PREDICATE = (PipelineJoin.class.getName() + ".predicate").intern(); /** * An optional {@link IVariable}[] identifying the variables to be * retained in the {@link IBindingSet}s written out by the operator. All * variables are retained unless this annotation is specified. */ - String SELECT = PipelineJoin.class.getName() + ".select"; + String SELECT = (PipelineJoin.class.getName() + ".select").intern(); // /** // * Marks the join as "optional" in the SPARQL sense. Binding sets which @@ -149,7 +149,7 @@ * An {@link IConstraint}[] which places restrictions on the legal * patterns in the variable bindings (optional). */ - String CONSTRAINTS = PipelineJoin.class.getName() + ".constraints"; + String CONSTRAINTS = (PipelineJoin.class.getName() + ".constraints").intern(); /** * The maximum parallelism with which the pipeline will consume the @@ -175,7 +175,7 @@ * this option might well go away which would allow us to simplify * the PipelineJoin implementation. */ - String MAX_PARALLEL_CHUNKS = PipelineJoin.class.getName() + ".maxParallelChunks"; + String MAX_PARALLEL_CHUNKS = (PipelineJoin.class.getName() + ".maxParallelChunks").intern(); int DEFAULT_MAX_PARALLEL_CHUNKS = 0; @@ -195,8 +195,8 @@ * * @todo unit tests when (en|dis)abled. */ - String COALESCE_DUPLICATE_ACCESS_PATHS = PipelineJoin.class.getName() - + ".coalesceDuplicateAccessPaths"; + String COALESCE_DUPLICATE_ACCESS_PATHS = (PipelineJoin.class.getName() + + ".coalesceDuplicateAccessPaths").intern(); boolean DEFAULT_COALESCE_DUPLICATE_ACCESS_PATHS = true; @@ -206,7 +206,7 @@ * * @todo Unit tests for this feature (it is used by the JoinGraph). */ - String LIMIT = PipelineJoin.class.getName() + ".limit"; + String LIMIT = (PipelineJoin.class.getName() + ".limit").intern(); long DEFAULT_LIMIT = Long.MAX_VALUE; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -801,7 +801,7 @@ if (constraints == null) { // replace with an empty array. - constraints = new IConstraint[0]; + constraints = IConstraint.EMPTY; } /* Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -83,25 +83,25 @@ /** * The variables which are projected out of the join graph. */ - String SELECTED = JoinGraph.class.getName() + ".selected"; + String SELECTED = (JoinGraph.class.getName() + ".selected").intern(); /** * The vertices of the join graph, expressed an an {@link IPredicate}[] * (required). */ - String VERTICES = JoinGraph.class.getName() + ".vertices"; + String VERTICES = (JoinGraph.class.getName() + ".vertices").intern(); /** * The constraints on the join graph, expressed an an * {@link IConstraint}[] (optional, defaults to no constraints). */ - String CONSTRAINTS = JoinGraph.class.getName() + ".constraints"; + String CONSTRAINTS = (JoinGraph.class.getName() + ".constraints").intern(); /** * The initial limit for cutoff sampling (default * {@value #DEFAULT_LIMIT}). */ - String LIMIT = JoinGraph.class.getName() + ".limit"; + String LIMIT = (JoinGraph.class.getName() + ".limit").intern(); int DEFAULT_LIMIT = 100; @@ -110,7 +110,7 @@ * cardinality will be used to generate the initial join paths (default * {@value #DEFAULT_NEDGES}). This must be a positive integer. */ - String NEDGES = JoinGraph.class.getName() + ".nedges"; + String NEDGES = (JoinGraph.class.getName() + ".nedges").intern(); int DEFAULT_NEDGES = 2; @@ -119,7 +119,7 @@ * * @see SampleIndex.SampleType */ - String SAMPLE_TYPE = JoinGraph.class.getName() + ".sampleType"; + String SAMPLE_TYPE = (JoinGraph.class.getName() + ".sampleType").intern(); String DEFAULT_SAMPLE_TYPE = SampleType.RANDOM.name(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -78,17 +78,17 @@ * @see IPredicate#asBound(IBindingSet) * @see IRelation#newElement(java.util.List, IBindingSet) */ - String SELECTED = InsertOp.class.getName() + ".selected"; + String SELECTED = (InsertOp.class.getName() + ".selected").intern(); /** * The namespace of the relation to which the index belongs. */ - String RELATION = InsertOp.class.getName() + ".relation"; + String RELATION = (InsertOp.class.getName() + ".relation").intern(); /** * The {@link IKeyOrder} for the index. */ - String KEY_ORDER = InsertOp.class.getName() + ".keyOrder"; + String KEY_ORDER = (InsertOp.class.getName() + ".keyOrder").intern(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ComparatorOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ComparatorOp.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ComparatorOp.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -55,7 +55,7 @@ * will be imposed and the order (ascending or descending) for each * variable. */ - String ORDER = ComparatorOp.class.getName() + ".order"; + String ORDER = (ComparatorOp.class.getName() + ".order").intern(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -51,7 +51,7 @@ * Binding sets with distinct values for the specified variables will be * passed on. */ - String VARIABLES = DistinctBindingSetOp.class.getName() + ".variables"; + String VARIABLES = (DistinctBindingSetOp.class.getName() + ".variables").intern(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/GroupByOp.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -59,7 +59,7 @@ * {@link #GROUP_BY} declaration as simple {@link IVariable} s; or (b) * be declared by {@link #COMPUTE}. */ - String SELECT = GroupByOp.class.getName() + ".select"; + String SELECT = (GroupByOp.class.getName() + ".select").intern(); // /** // * The ordered set of {@link IValueExpression}s which are to be @@ -90,7 +90,7 @@ * the aggregation groups (required). Variables references will be * resolved against the incoming solutions. */ - String GROUP_BY = GroupByOp.class.getName() + ".groupBy"; + String GROUP_BY = (GroupByOp.class.getName() + ".groupBy").intern(); /** * An {@link IConstraint}[] applied to the aggregated solutions @@ -99,7 +99,7 @@ * TODO Should be the BEV of an {@link IValueExpression}, which might or * might not be an {@link IConstraint}. */ - String HAVING = GroupByOp.class.getName() + ".having"; + String HAVING = (GroupByOp.class.getName() + ".having").intern(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -88,7 +88,7 @@ /** * The first solution to be returned to the caller (origin ZERO). */ - String OFFSET = SliceOp.class.getName() + ".offset"; + String OFFSET = (SliceOp.class.getName() + ".offset").intern(); long DEFAULT_OFFSET = 0L; @@ -96,7 +96,7 @@ * The maximum #of solutions to be returned to the caller (default is * all). */ - String LIMIT = SliceOp.class.getName() + ".limit"; + String LIMIT = (SliceOp.class.getName() + ".limit").intern(); /** * A value of {@link Long#MAX_VALUE} is used to indicate that there is Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SortOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SortOp.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SortOp.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -55,7 +55,7 @@ * * @see ComparatorOp */ - String COMPARATOR = MemorySortOp.class.getName() + ".comparator"; + String COMPARATOR = (MemorySortOp.class.getName() + ".comparator").intern(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IndexMetadata.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IndexMetadata.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IndexMetadata.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -316,9 +316,9 @@ * * @see #DEFAULT_BLOOM_FILTER */ - String BLOOM_FILTER = com.bigdata.btree.BTree.class.getPackage() + String BLOOM_FILTER = (com.bigdata.btree.BTree.class.getPackage() .getName() - + ".bloomFilter"; + + ".bloomFilter").intern(); String DEFAULT_BLOOM_FILTER = "false"; @@ -379,9 +379,9 @@ * application is performing sustained writes on the index (hundreds of * thousands to millions of records). */ - String WRITE_RETENTION_QUEUE_CAPACITY = com.bigdata.btree.AbstractBTree.class + String WRITE_RETENTION_QUEUE_CAPACITY = (com.bigdata.btree.AbstractBTree.class .getPackage().getName() - + ".writeRetentionQueue.capacity"; + + ".writeRetentionQueue.capacity").intern(); /** * The #of entries on the write retention queue that will be scanned for @@ -392,9 +392,9 @@ * incremental writes occur iff the {@link AbstractNode#referenceCount} * is zero and the node or leaf is dirty. */ - String WRITE_RETENTION_QUEUE_SCAN = com.bigdata.btree.AbstractBTree.class + String WRITE_RETENTION_QUEUE_SCAN = (com.bigdata.btree.AbstractBTree.class .getPackage().getName() - + ".writeRetentionQueue.scan"; + + ".writeRetentionQueue.scan").intern(); String DEFAULT_WRITE_RETENTION_QUEUE_CAPACITY = "500";// was 500 @@ -408,17 +408,17 @@ * * FIXME {@link KeyBuilder} configuration support is not finished. */ - String KEY_BUILDER_FACTORY = com.bigdata.btree.AbstractBTree.class + String KEY_BUILDER_FACTORY = (com.bigdata.btree.AbstractBTree.class .getPackage().getName() - + "keyBuilderFactory"; + + "keyBuilderFactory").intern(); /** * Override the {@link IRabaCoder} used for the keys in the nodes of a * B+Tree (the default is a {@link FrontCodedRabaCoder} instance). */ - String NODE_KEYS_CODER = com.bigdata.btree.AbstractBTree.class + String NODE_KEYS_CODER = (com.bigdata.btree.AbstractBTree.class .getPackage().getName() - + "nodeKeysCoder"; + + "nodeKeysCoder").intern(); /** * Override the {@link IRabaCoder} used for the keys of leaves in @@ -426,9 +426,9 @@ * * @see DefaultTupleSerializer#setLeafKeysCoder(IRabaCoder) */ - String LEAF_KEYS_CODER = com.bigdata.btree.AbstractBTree.class + String LEAF_KEYS_CODER = (com.bigdata.btree.AbstractBTree.class .getPackage().getName() - + ".leafKeysCoder"; + + ".leafKeysCoder").intern(); /** * Override the {@link IRabaCoder} used for the values of leaves in @@ -436,9 +436,9 @@ * * @see DefaultTupleSerializer#setLeafValuesCoder(IRabaCoder) */ - String LEAF_VALUES_CODER = com.bigdata.btree.AbstractBTree.class + String LEAF_VALUES_CODER = (com.bigdata.btree.AbstractBTree.class .getPackage().getName() - + ".leafValuesCoder"; + + ".leafValuesCoder").intern(); // /** // * Option determines whether or not per-child locks are used by @@ -492,7 +492,7 @@ * also need to override the {@link Checkpoint} class - for * example the {@link MetadataIndex} does this. */ - String BTREE_CLASS_NAME = BTree.class.getName()+".className"; + String BTREE_CLASS_NAME = (BTree.class.getName()+".className").intern(); /** * The name of an optional property whose value specifies the branching @@ -501,7 +501,7 @@ * @see #DEFAULT_BTREE_BRANCHING_FACTOR * @see #INDEX_SEGMENT_BRANCHING_FACTOR */ - String BTREE_BRANCHING_FACTOR = BTree.class.getName()+".branchingFactor"; + String BTREE_BRANCHING_FACTOR = (BTree.class.getName()+".branchingFactor").intern(); /** * The default branching factor for a mutable {@link BTree}. @@ -595,8 +595,8 @@ * * FIXME Record level compression support is not finished. */ - String BTREE_RECORD_COMPRESSOR_FACTORY = BTree.class.getName() - + ".recordCompressorFactory"; + String BTREE_RECORD_COMPRESSOR_FACTORY = (BTree.class.getName() + + ".recordCompressorFactory").intern(); /** * @@ -614,9 +614,9 @@ * The name of the property whose value specifies the branching factory * for an immutable {@link IndexSegment}. */ - String INDEX_SEGMENT_BRANCHING_FACTOR = IndexSegment.class + String INDEX_SEGMENT_BRANCHING_FACTOR = (IndexSegment.class .getName() - + ".branchingFactor"; + + ".branchingFactor").intern(); /** * The default branching factor for an {@link IndexSegment}. @@ -646,8 +646,8 @@ * @todo should be on by default? (but verify that the unit tests do * not run out of memory when it is enabled by default). */ - String INDEX_SEGMENT_BUFFER_NODES = IndexSegment.class.getName() - + ".bufferNodes"; + String INDEX_SEGMENT_BUFFER_NODES = (IndexSegment.class.getName() + + ".bufferNodes").intern(); /** * @see #INDEX_SEGMENT_BUFFER_NODES @@ -711,8 +711,8 @@ * * FIXME Record level compression support is not finished. */ - String INDEX_SEGMENT_RECORD_COMPRESSOR_FACTORY = IndexSegment.class.getName() - + ".recordCompressorFactory"; + String INDEX_SEGMENT_RECORD_COMPRESSOR_FACTORY = (IndexSegment.class.getName() + + ".recordCompressorFactory").intern(); /** * @@ -796,9 +796,9 @@ * {@link AbstractSubtask} sink handling writes for the associated index * partition. */ - String MASTER_QUEUE_CAPACITY = AsynchronousIndexWriteConfiguration.class + String MASTER_QUEUE_CAPACITY = (AsynchronousIndexWriteConfiguration.class .getName() - + ".masterQueueCapacity"; + + ".masterQueueCapacity").intern(); String DEFAULT_MASTER_QUEUE_CAPACITY = "5000"; @@ -806,9 +806,9 @@ * The desired size of the chunks that the master will draw from its * queue. */ - String MASTER_CHUNK_SIZE = AsynchronousIndexWriteConfiguration.class + String MASTER_CHUNK_SIZE = (AsynchronousIndexWriteConfiguration.class .getName() - + ".masterChunkSize"; + + ".masterChunkSize").intern(); String DEFAULT_MASTER_CHUNK_SIZE = "10000"; @@ -816,9 +816,9 @@ * The time in nanoseconds that the master will combine smaller chunks * so that it can satisfy the desired <i>masterChunkSize</i>. */ - String MASTER_CHUNK_TIMEOUT_NANOS = AsynchronousIndexWriteConfiguration.class + String MASTER_CHUNK_TIMEOUT_NANOS = (AsynchronousIndexWriteConfiguration.class .getName() - + ".masterChunkTimeoutNanos"; + + ".masterChunkTimeoutNanos").intern(); String DEFAULT_MASTER_CHUNK_TIMEOUT_NANOS = "" + TimeUnit.MILLISECONDS.toNanos(50); @@ -830,9 +830,9 @@ * the sink remains responsible rather than blocking inside of the * {@link IAsynchronousIterator} for long periods of time. */ - String SINK_POLL_TIMEOUT_NANOS = AsynchronousIndexWriteConfiguration.class + String SINK_POLL_TIMEOUT_NANOS = (AsynchronousIndexWriteConfiguration.class .getName() - + ".sinkPollTimeoutNanos"; + + ".sinkPollTimeoutNanos").intern(); String DEFAULT_SINK_POLL_TIMEOUT_NANOS = "" + TimeUnit.MILLISECONDS.toNanos(50); @@ -840,9 +840,9 @@ /** * The capacity of the internal queue for the per-sink output buffer. */ - String SINK_QUEUE_CAPACITY = AsynchronousIndexWriteConfiguration.class + String SINK_QUEUE_CAPACITY = (AsynchronousIndexWriteConfiguration.class .getName() - + ".sinkQueueCapacity"; + + ".sinkQueueCapacity").intern(); String DEFAULT_SINK_QUEUE_CAPACITY = "5000"; @@ -850,9 +850,9 @@ * The desired size of the chunks written that will be written by the * {@link AbstractSubtask sink}. */ - String SINK_CHUNK_SIZE = AsynchronousIndexWriteConfiguration.class + String SINK_CHUNK_SIZE = (AsynchronousIndexWriteConfiguration.class .getName() - + ".sinkChunkSize"; + + ".sinkChunkSize").intern(); String DEFAULT_SINK_CHUNK_SIZE = "10000"; @@ -865,9 +865,9 @@ * the index partition. This makes it much easier to adjust the * performance since you simply adjust the {@link #SINK_CHUNK_SIZE}. */ - String SINK_CHUNK_TIMEOUT_NANOS = AsynchronousIndexWriteConfiguration.class + String SINK_CHUNK_TIMEOUT_NANOS = (AsynchronousIndexWriteConfiguration.class .getName() - + ".sinkChunkTimeoutNanos"; + + ".sinkChunkTimeoutNanos").intern(); String DEFAULT_SINK_CHUNK_TIMEOUT_NANOS = "" + Long.MAX_VALUE; @@ -890,9 +890,9 @@ * sink is writing. */ // GTE chunkTimeout - String SINK_IDLE_TIMEOUT_NANOS = AsynchronousIndexWriteConfiguration.class + String SINK_IDLE_TIMEOUT_NANOS = (AsynchronousIndexWriteConfiguration.class .getName() - + ".sinkIdleTimeoutNanos"; + + ".sinkIdleTimeoutNanos").intern(); String DEFAULT_SINK_IDLE_TIMEOUT_NANOS = "" + Long.MAX_VALUE; @@ -916,9 +916,9 @@ * * @see OverflowManager.Options#SCATTER_SPLIT_ENABLED */ - String SCATTER_SPLIT_ENABLED = ScatterSplitConfiguration.class + String SCATTER_SPLIT_ENABLED = (ScatterSplitConfiguration.class .getName() - + ".enabled"; + + ".enabled").intern(); String DEFAULT_SCATTER_SPLIT_ENABLED = "true"; @@ -935,9 +935,9 @@ * performed. The allowable range is therefore constrained to * <code>(0.1 : 1.0)</code>. */ - String SCATTER_SPLIT_PERCENT_OF_SPLIT_THRESHOLD = ScatterSplitConfiguration.class + String SCATTER_SPLIT_PERCENT_OF_SPLIT_THRESHOLD = (ScatterSplitConfiguration.class .getName() - + ".percentOfSplitThreshold"; + + ".percentOfSplitThreshold").intern(); String DEFAULT_SCATTER_SPLIT_PERCENT_OF_SPLIT_THRESHOLD = ".25"; @@ -946,9 +946,9 @@ * to use all discovered data services (default * {@value #DEFAULT_SCATTER_SPLIT_DATA_SERVICE_COUNT}). */ - String SCATTER_SPLIT_DATA_SERVICE_COUNT = ScatterSplitConfiguration.class + String SCATTER_SPLIT_DATA_SERVICE_COUNT = (ScatterSplitConfiguration.class .getName() - + ".dataServiceCount"; + + ".dataServiceCount").intern(); String DEFAULT_SCATTER_SPLIT_DATA_SERVICE_COUNT = "0"; @@ -980,9 +980,9 @@ * asynchronous index writes in order to obtain high throughput with * sustained index writes. */ - String SCATTER_SPLIT_INDEX_PARTITION_COUNT = ScatterSplitConfiguration.class + String SCATTER_SPLIT_INDEX_PARTITION_COUNT = (ScatterSplitConfiguration.class .getName() - + ".indexPartitionCount"; + + ".indexPartitionCount").intern(); String DEFAULT_SCATTER_SPLIT_INDEX_PARTITION_COUNT = "0"; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/counters/httpd/CounterSetHTTPD.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/counters/httpd/CounterSetHTTPD.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/counters/httpd/CounterSetHTTPD.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -12,6 +12,8 @@ import java.util.Properties; import java.util.Vector; +import org.apache.log4j.Logger; + import com.bigdata.counters.CounterSet; import com.bigdata.counters.query.CounterSetSelector; import com.bigdata.counters.query.ICounterSelector; @@ -30,6 +32,8 @@ */ public class CounterSetHTTPD extends AbstractHTTPD { + static private final Logger log = Logger.getLogger(CounterSetHTTPD.class); + /** * The {@link CounterSet} exposed by this service. */ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/counters/httpd/CounterSetHTTPDServer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/counters/httpd/CounterSetHTTPDServer.java 2011-03-01 01:05:33 UTC (rev 4261) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/counters/httpd/CounterSetHTTPDServer.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -140,7 +140,7 @@ Logger.getLogger(XHTMLRenderer.class).setLevel(level); // set logging level on the service. - NanoHTTPD.log.setLevel(level); + Logger.getLogger(NanoHTTPD.class).setLevel(level); } else if (arg.equals("-events")) { Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/CanonicalFactory.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/CanonicalFactory.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/CanonicalFactory.java 2011-03-02 19:15:21 UTC (rev 4262) @@ -0,0 +1,128 @@ +package com.bigdata.util; + +import com.bigdata.cache.ConcurrentWeakValueCache; + +/** + * A pattern for a canonicalizing factory based on a map with weak values. + * + * @param <KEY> + * @param <VAL> + * @param <STATE> + * + * @author thompsonbry + */ +abstract public class CanonicalFactory<KEY, VAL, STATE> { + + /** + * Canonicalizing mapping. + */ +// private WeakValueCache<KEY, VAL> cache; + private ConcurrentWeakValueCache<KEY,VAL> cache; + + /** + * + * @param queueCapacity + * The capacity of the backing hard reference queue. This places + * a lower bound on the #of instances which will be retained by + * the factory. + */ + public CanonicalFactory(final int queueCapacity) { + +// cache = new WeakValueCache<KEY, VAL>(new LRUCache<KEY, VAL>(queueCapacity)); + cache = new ConcurrentWeakValueCache<KEY, VAL>(queueCapacity); + + } + + /** + * Canonical factory pattern. + * + * @param key + * The key. + * @param state + * Additional state from the caller which will be passed through + * to {@link #newInstance(Object, Object)} when creating a new + * instance (optional). + * + * @return The instance paired with that key. + * + * @throws IllegalArgumentException + * if the key is <code>null</code>. + */ + public VAL getInstance(final KEY key, final STATE state) { + + if (key == null) + throw new IllegalArgumentException(); + + // check first w/o lock. + VAL val = cache.get(key); + + if (val != null) { + /* + * Fast code path if entry exists for that key. This amortizes the + * lock costs by relying on the striped locks of the CHM to provide + * less lock contention. + */ + return val; + } + + // obtain lock + synchronized (cache) { + + // check with lock held + val = cache.get(key); + + if (val == null) { + + // create an instance + val = newInstance(key,state); + + // pair that instance with the key in the map. +// cache.put(key, val, true/* dirty */); + cache.put(key, val); + + } + + return val; + + } + + } + + /** + * Remove an entry from the cache. + * <p> + * Note: It is sometimes necessary to clear a cache entry. For example, if a + * pers... [truncated message content] |