From: <tho...@us...> - 2011-01-02 21:40:08
|
Revision: 4045 http://bigdata.svn.sourceforge.net/bigdata/?rev=4045&view=rev Author: thompsonbry Date: 2011-01-02 21:40:00 +0000 (Sun, 02 Jan 2011) Log Message: ----------- Hooked up the query hints into the sail. Moved the declaration of the query hints namespace into a QueryHints interface in the sail package. Added a query hint to select the join optimizer (Static, Runtime, None). The Runtime query optimizer can only be used for plains triples without optionals right now. Adding support for optionals is easy enough (MikeP is signed up for this). Added support for quads and scale-out is more tricky since we place various annotations on the joins (not just the predicates) when configuring for named or default graph queries (e.g., when to use a REMOTE access path). Since the runtime query optimizer works directly with the IPredicates, the annotations for the joins probably need to be inferred directly from the annotations for the predicates in order for this to be compatible with the JoinGraph optimizer. Also, the DataSetJoin needs to be replaced by a standard join against an inline access path comprising the default or named graph data set. This requires some changes to the AccessPath class. PipelineJoin lacked a deep copy constructor. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailBooleanQuery.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailTupleQuery.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -360,8 +360,31 @@ if (!annotations.containsKey(name)) return defaultValue; - return (T) annotations.get(name); + final Object val = annotations.get(name); + if (defaultValue != null && val.getClass() != defaultValue.getClass()) { + + /* + * Attempt to convert to the correct target type. + */ + + if (defaultValue.getClass() == Integer.class) { + return (T) Integer.valueOf("" + val); + } + if (defaultValue.getClass() == Long.class) { + return (T) Long.valueOf("" + val); + } + if (defaultValue.getClass() == Float.class) { + return (T) Float.valueOf("" + val); + } + if (defaultValue.getClass() == Double.class) { + return (T) Double.valueOf("" + val); + } + + } + + return (T) val; + } // @SuppressWarnings("unchecked") Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -1416,7 +1416,7 @@ return preds; } - + /** * Return the {@link BOp} identifiers of the predicates associated with * each vertex in path order. @@ -1851,6 +1851,55 @@ } + /** + * Return a permutation vector which may be used to reorder the given + * {@link IPredicate}[] into the evaluation order selected by the + * runtime query optimizer. + * + * @throws IllegalArgumentException + * if the argument is <code>null</code>. + * @throws IllegalArgumentException + * if the given {@link Path} does not cover all vertices in + * the join graph. + */ + public int[] getOrder(final Path p) { + + if(p == null) + throw new IllegalArgumentException(); + + final IPredicate[] path = p.getPredicates(); + + if (path.length != V.length) { + throw new IllegalArgumentException( + "Wrong path length: #vertices=" + V.length + + ", but path.length=" + path.length); + } + + final int[] order = new int[V.length]; + + for (int i = 0; i < order.length; i++) { + + boolean found = false; + for (int j = 0; j < order.length; j++) { + + if (path[i].getId() == V[j].pred.getId()) { + order[i] = j; + found = true; + break; + } + + } + + if (!found) + throw new RuntimeException("No such vertex: id=" + + path[i].getId()); + + } + + return order; + + } + /** * Choose the starting vertices. * Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -378,6 +378,15 @@ } /** + * Deep copy constructor. + * + * @param op + */ + public PipelineJoin(final PipelineJoin<E> op) { + super(op); + } + + /** * Shallow copy vararg constructor. * * @param args @@ -637,6 +646,9 @@ this.predicate = joinOp.getPredicate(); this.constraints = joinOp.constraints(); this.maxParallel = joinOp.getMaxParallel(); + if (maxParallel < 0) + throw new IllegalArgumentException(Annotations.MAX_PARALLEL + + "=" + maxParallel); if (maxParallel > 0) { // shared service. service = new LatchedExecutor(context.getIndexManager() Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -110,7 +110,7 @@ * Deep Copy constructor. * @param op */ - public SliceOp(SliceOp op) { + public SliceOp(final SliceOp op) { super(op); @@ -122,7 +122,7 @@ * @param args * @param annotations */ - public SliceOp(BOp[] args, Map<String, Object> annotations) { + public SliceOp(final BOp[] args, final Map<String, Object> annotations) { super(args, annotations); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -52,6 +52,7 @@ import org.openrdf.model.Value; import org.openrdf.model.impl.URIImpl; + /** * A vocabulary for bigdata specific extensions. * @@ -64,15 +65,6 @@ * The namespace used for bigdata specific extensions. */ String NAMESPACE = "http://www.bigdata.com/rdf#"; - - /** - * The namespace prefix used in SPARQL queries to signify query hints. You - * can embed query hints into a SPARQL query as follows: - * <code> - * PREFIX BIGDATA_QUERY_HINTS: <http://www.bigdata.com/queryHints#com.bigdata.relation.rule.eval.DefaultRuleTaskFactory.nestedSubquery=true&com.bigdata.fullScanTreshold=1000> - * </code> - */ - String QUERY_HINTS_NAMESPACE = "BIGDATA_QUERY_HINTS"; /** * The name of a per-statement attribute whose value is recognized in Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -230,7 +230,7 @@ final Properties properties = getProperties(); final File file; - if (false) { + if (true) { /* * Use a persistent file that is generated once and then reused by * each test run. @@ -899,6 +899,8 @@ // System.err.println(getName() + " : runtime optimizer join order " // + Arrays.toString(Path.getVertexIds(p.edges))); + System.err.println(getName() + " : order[]=" + Arrays.toString(g.getOrder(p))); + return p.getPredicates(); } finally { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -366,7 +366,7 @@ * This is the top-level method called by the SAIL to evaluate a query. * The TupleExpr parameter here is guaranteed to be the root of the operator * tree for the query. Query hints are parsed by the SAIL from the - * namespaces in the original query. See {@link BD#QUERY_HINTS_NAMESPACE}. + * namespaces in the original query. See {@link QueryHints#NAMESPACE}. */ public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( TupleExpr expr, BindingSet bindings, Properties queryHints) @@ -1673,7 +1673,7 @@ final QueryEngine queryEngine = tripleSource.getSail().getQueryEngine(); - final int startId = 1; +// final int startId = 1; final PipelineOp query; { @@ -1686,7 +1686,7 @@ // Convert the step to a bigdata operator tree. query = Rule2BOpUtility.convert(step, idFactory, database, - queryEngine); + queryEngine, queryHints); if (log.isInfoEnabled()) log.info(query); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -3117,7 +3117,7 @@ * {@link Options#QUERY_TIME_EXPANDER}, but not on a per-query basis. * <p> * QueryHints are a set of properties that are parsed from a SPARQL - * query. See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * query. See {@link QueryHints#NAMESPACE} for more information. * * @todo The [bindings] are supposed to be inputs to the query * evaluation, but I am still not quite clear what the role of the Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailBooleanQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailBooleanQuery.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailBooleanQuery.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -12,14 +12,13 @@ import org.openrdf.sail.SailConnection; import org.openrdf.sail.SailException; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; -import com.bigdata.rdf.store.BD; public class BigdataSailBooleanQuery extends SailBooleanQuery implements BigdataSailQuery { /** * Query hints are embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ private final Properties queryHints; @@ -32,7 +31,7 @@ /** * Overriden to use query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public boolean evaluate() throws QueryEvaluationException { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -42,14 +42,13 @@ import org.openrdf.repository.sail.SailRepositoryConnection; import org.openrdf.sail.SailException; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; -import com.bigdata.rdf.store.BD; public class BigdataSailGraphQuery extends SailGraphQuery implements BigdataSailQuery { /** * Query hints are embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ private final Properties queryHints; @@ -222,7 +221,7 @@ /** * Overriden to use query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public GraphQueryResult evaluate() throws QueryEvaluationException { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -29,7 +29,6 @@ import com.bigdata.rdf.sail.sparql.PrefixDeclProcessor; import com.bigdata.rdf.sail.sparql.StringEscapesProcessor; import com.bigdata.rdf.store.AbstractTripleStore; -import com.bigdata.rdf.store.BD; public class BigdataSailRepositoryConnection extends SailRepositoryConnection { @@ -52,7 +51,7 @@ * <p> * Overridden to capture query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public SailGraphQuery prepareGraphQuery(final QueryLanguage ql, @@ -72,7 +71,7 @@ * <p> * Overridden to capture query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public SailTupleQuery prepareTupleQuery(final QueryLanguage ql, @@ -89,7 +88,7 @@ * <p> * Overridden to capture query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. See - * {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * {@link QueryHints#NAMESPACE} for more information. */ @Override public SailBooleanQuery prepareBooleanQuery(final QueryLanguage ql, @@ -106,7 +105,7 @@ * <p> * Overridden to capture query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public SailQuery prepareQuery(final QueryLanguage ql, final String qs, @@ -251,7 +250,7 @@ /** * Parse query hints from a query string. Query hints are embedded in the * query string via special namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ private Properties parseQueryHints(QueryLanguage ql, String queryString, String baseURI) @@ -270,7 +269,7 @@ for (Map.Entry<String, String> prefix : prefixes.entrySet()) { // if we see one that matches the magic namespace, try // to parse it - if (prefix.getKey().equalsIgnoreCase(BD.QUERY_HINTS_NAMESPACE)) { + if (prefix.getKey().equalsIgnoreCase(QueryHints.NAMESPACE)) { String hints = prefix.getValue(); // has to have a # and it can't be at the end int i = hints.indexOf('#'); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailTupleQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailTupleQuery.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailTupleQuery.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -14,14 +14,13 @@ import org.openrdf.sail.SailConnection; import org.openrdf.sail.SailException; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; -import com.bigdata.rdf.store.BD; public class BigdataSailTupleQuery extends SailTupleQuery implements BigdataSailQuery { /** * Query hints are embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ private final Properties queryHints; @@ -34,7 +33,7 @@ /** * Overriden to use query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public TupleQueryResult evaluate() throws QueryEvaluationException { Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -0,0 +1,66 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Jan 2, 2011 + */ + +package com.bigdata.rdf.sail; + +import com.bigdata.bop.BOp; + +/** + * Query hint directives understood by a bigdata SPARQL end point. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface QueryHints { + + /** + * The namespace prefix used in SPARQL queries to signify query hints. You + * can embed query hints into a SPARQL query as follows: + * + * <pre> + * PREFIX BIGDATA_QUERY_HINTS: <http://www.bigdata.com/queryHints#name1=value1&name2=value2> + * </pre> + * + * where <i>name</i> is the name of a query hint and <i>value</i> is the + * value associated with that query hint. Multiple query hints can be + * specified (as shown in this example) using a <code>&</code> character + * to separate each name=value pair. + * <p> + * Query hints are either directives understood by the SPARQL end point or + * {@link BOp.Annotations}. A list of the known directives is declared by + * this interface. + */ + String NAMESPACE = "BIGDATA_QUERY_HINTS"; + + /** + * Specify the query optimizer. + * + * @see QueryOptimizerEnum + */ + String OPTIMIZER = QueryHints.class.getName() + ".optimizer"; + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -0,0 +1,69 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Jan 2, 2011 + */ +package com.bigdata.rdf.sail; + +/** + * The known query optimizers. + * + * @see QueryHints#OPTIMIZER + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public enum QueryOptimizerEnum { + /** + * The query optimizer is disabled. The joins in the query will be evaluated + * in the order in which they are given. This may be used to compensate when + * the static query optimizer produces an inefficient join ordering. + */ + None, + /** + * A query optimizer based on a static analysis of the query which relies on + * fast range counts for the basic graph patterns to estimate the + * cardinality of the different access paths. This optimizer is fast but it + * can fail to order joins correctly as the error in the estimated + * cardinality of joins can grow exponentially in the number of joins in the + * query. + */ + Static, + /** + * A runtime query optimizer based on sampling. The runtime query optimizer + * samples each of the access paths and each of the joins and builds out + * join paths in a breadth first manner until it finds a join ordering which + * is known to dominate the other possible join orderings. The runtime query + * optimizer takes into account the actual cardinality and correlation in + * the query and the data selected by that query. The runtime query + * optimizer can have slightly more overhead than the static query + * optimizer, but it never produces a bad join ordering and often identifies + * the <em>best</em> join ordering. For cases where the <code>static</code> + * query optimizer produces a bad join ordering, the runtime query optimizer + * can find join orderings which are orders of magnitude more efficient (10x + * or 100x). For long running joins, this can translates into a savings of + * minutes or hours. + */ + Runtime; +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -30,12 +30,14 @@ import java.io.Serializable; import java.util.Arrays; import java.util.Collection; +import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; @@ -64,6 +66,8 @@ import com.bigdata.bop.bset.StartOp; import com.bigdata.bop.controller.Steps; import com.bigdata.bop.controller.Union; +import com.bigdata.bop.controller.JoinGraph.JGraph; +import com.bigdata.bop.controller.JoinGraph.Path; import com.bigdata.bop.cost.ScanCostReport; import com.bigdata.bop.cost.SubqueryCostReport; import com.bigdata.bop.engine.QueryEngine; @@ -243,13 +247,13 @@ */ public static PipelineOp convert(final IStep step, final AtomicInteger idFactory, final AbstractTripleStore db, - final QueryEngine queryEngine) { + final QueryEngine queryEngine, final Properties queryHints) { if (step instanceof IRule<?>) { // Convert the step to a bigdata operator tree. PipelineOp tmp = convert((IRule<?>) step, idFactory, db, - queryEngine); + queryEngine, queryHints); if (!tmp.getEvaluationContext().equals( BOpEvaluationContext.CONTROLLER)) { @@ -265,15 +269,55 @@ } - return tmp; + return applyQueryHints(tmp, queryHints); } - return convert((IProgram) step, idFactory, db, queryEngine); + return convert((IProgram) step, idFactory, db, queryEngine, queryHints); } /** + * Apply any query hints to the operator as annotations of that operator. + * + * @param op + * The operator. + * @param queryHints + * The query hints. + * + * @return A copy of that operator to which the query hints (if any) have + * been applied. If there are no query hints then the original + * operator is returned. + * + * @todo It would be nice if this would only apply those query hints to an + * operator which are known to be annotations understood by that + * operator. This information is basically available from the inner + * Annotation interface for a given operator class, but that is not + * really all that accessible. + */ + private static PipelineOp applyQueryHints(PipelineOp op, + Properties queryHints) { + + final Enumeration<?> pnames = queryHints.propertyNames(); + + while (pnames.hasMoreElements()) { + + final String name = (String) pnames.nextElement(); + + final String value = queryHints.getProperty(name); + + if (log.isInfoEnabled()) + log.info("Query hint: [" + name + "=" + value + "]"); + + op = (PipelineOp) op.setProperty(name, value); + + } + + return op; + + } + + /** * Convert a rule into an operator tree. * * @param rule @@ -282,52 +326,164 @@ */ public static PipelineOp convert(final IRule<?> rule, final AtomicInteger idFactory, final AbstractTripleStore db, - final QueryEngine queryEngine) { + final QueryEngine queryEngine, final Properties queryHints) { // // true iff the database is in quads mode. // final boolean isQuadsQuery = db.isQuads(); - final PipelineOp startOp = new StartOp(new BOp[] {}, + final PipelineOp startOp = applyQueryHints(new StartOp(new BOp[] {}, NV.asMap(new NV[] {// new NV(Predicate.Annotations.BOP_ID, idFactory .incrementAndGet()),// new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// - })); - + })),queryHints); + /* * First put the tails in the correct order based on the logic in * DefaultEvaluationPlan2. + * + * @todo Consider making order[] disappear such that all of the arrays + * (preds[], cardinality[], keyOrder[]) are indexed directly by the + * array index rather than by order[i]. Alternatively, make sure that + * the runtime query optimizer reports the permutation array (order[]) + * so we can maintain information about the relationship between the + * given joins and the evaluation order. */ final BOpContextBase context = new BOpContextBase(queryEngine); - final DefaultEvaluationPlan2 plan = new DefaultEvaluationPlan2( - new IRangeCountFactory() { + + final QueryOptimizerEnum optimizer = QueryOptimizerEnum + .valueOf(queryHints.getProperty(QueryHints.OPTIMIZER, + QueryOptimizerEnum.Static.toString())); - public long rangeCount(final IPredicate pred) { - return context.getRelation(pred).getAccessPath(pred) - .rangeCount(false); + // The evaluation plan order. + final int[] order; + // The estimated cardinality of each tail (if the optimizer provides it) + final long[] cardinality; + // The index assigned to each tail of the rule by static analysis. + final IKeyOrder[] keyOrder; + + switch(optimizer) { + case None: { + /* + * Do not run the join optimizer. + * + * @todo Do we need to move any of the joins to the front, e.g., + * magic search, or should everything just be left the way it is? + */ + order = new int[rule.getTailCount()]; + for (int i = 0; i < order.length; i++) { + order[i] = i; } + cardinality = null; + keyOrder = null; + break; + } + case Static: { + /* + * Static query optimizer. + */ + final DefaultEvaluationPlan2 plan = new DefaultEvaluationPlan2( + new IRangeCountFactory() { + + public long rangeCount(final IPredicate pred) { + return context.getRelation(pred) + .getAccessPath(pred).rangeCount(false); + } + + }, rule); + + order = plan.getOrder(); + + /* + * The index assigned to each tail of the rule by static analysis + * (this is often not the index which is actually used when we + * evaluate a given predicate since we always choose the best index + * and that can depend on whether or not we are binding the context + * position for a default or named graph query. When optional joins + * are involved, some variables may not become bound for some + * solutions. A different index will often be chosen for access + * paths using the unbound variable. + */ + + // the #of variables in each tail of the rule (set by side-effect). + final int[] nvars = new int[rule.getTailCount()]; + + cardinality = new long[rule.getTailCount()]; + for (int i = 0; i < cardinality.length; i++) { + cardinality[i] = plan.cardinality(i); + } + + keyOrder = computeKeyOrderForEachTail(rule, context, order, nvars); + + break; + + } + case Runtime: { + /* + * The runtime query optimizer. + * + * FIXME MikeP: I have modified the JoinGraph so that it can report + * the permutation order. However, the code here needs to isolate + * the join graph rather than running against all predicates in the + * tail. As it is, it will reorder optionals. + * + * FIXME We can not optimize quads here using the runtime query + * optimizer since we have not yet generated the full query plan. In + * order to get the runtime query optimizer working for quads we + * need to replace the DataSetJoin with a PipelineJoin against an + * inline "relation" containing the named or default graphs IVs. The + * runtime query optimizer does not accept the JOIN operators so the + * annotations which are being applied there will be lost which is + * another problem, especially in scale-out. Both of these issues + * need to be resolved before quads can be used with the runtime + * query optimizer. + * + * @todo In fact, we should be able to write in a JoinGraph operator + * which optimizes the join graph and then evaluates it rather than + * explicitly doing the optimization and evaluation steps here. + * + * @todo Make sure that a summary of the information collected by + * the runtime query optimizer is attached as an annotation to the + * query. + * + * @todo query hints for [limit] and [nedges]. + */ - }, rule); - - // evaluation plan order. - final int[] order = plan.getOrder(); - - // the #of variables in each tail of the rule. - final int[] nvars = new int[rule.getTailCount()]; + // The initial sampling limit. + final int limit = 100; - /* - * The index assigned to each tail of the rule by static analysis (this - * is often not the index which is actually used when we evaluate a - * given predicate since we always choose the best index and that can - * depend on whether or not we are binding the context position for a - * default or named graph query. When optional joins are involved, some - * variables may not become bound for some solutions. A different index - * will often be chosen for access paths using the unbound variable. - */ - final IKeyOrder[] keyOrder = computeKeyOrderForEachTail(rule, context, - order, nvars); + // The #of edges considered for the initial paths. + final int nedges = 2; + // isolate/extract the join graph. + final IPredicate[] preds = new IPredicate[rule.getTailCount()]; + for (int i = 0; i < preds.length; i++) { + preds[i] = rule.getTail(i); + } + + final JGraph g = new JGraph(preds); + + final Path p; + try { + p = g.runtimeOptimizer(queryEngine, limit, nedges); + } catch (Exception e) { + throw new RuntimeException(e); + } + + // the permutation order. + order = g.getOrder(p); + + keyOrder = null; + + cardinality = null; + + break; + } + default: + throw new AssertionError("Unknown option: " + optimizer); + } + // the variables to be retained for each join. final IVariable<?>[][] selectVars = RuleState .computeRequiredVarsForEachTail(rule, order); @@ -379,15 +535,22 @@ Predicate<?> pred = (Predicate<?>) rule.getTail(order[i]).setBOpId( idFactory.incrementAndGet()); - // decorate the predicate with the assigned index. -// pred = pred.setKeyOrder(keyOrder[order[i]]); - pred = (Predicate<?>) pred.setProperty(Annotations.ORIGINAL_INDEX, - keyOrder[order[i]]); + /* + * Decorate the predicate with the assigned index (this is purely + * informative). + */ + if (keyOrder != null && keyOrder[order[i]] != null) { + // pred = pred.setKeyOrder(keyOrder[order[i]]); + pred = (Predicate<?>) pred.setProperty( + Annotations.ORIGINAL_INDEX, keyOrder[order[i]]); + } // decorate the predicate with the cardinality estimate. - pred = (Predicate<?>) pred.setProperty( - Annotations.ESTIMATED_CARDINALITY, plan - .cardinality(order[i])); + if (cardinality != null) { + pred = (Predicate<?>) pred.setProperty( + Annotations.ESTIMATED_CARDINALITY, + cardinality[order[i]]); + } /* * Collect all the constraints for this predicate based on which @@ -468,11 +631,11 @@ switch (scope) { case NAMED_CONTEXTS: left = namedGraphJoin(queryEngine, context, idFactory, - left, anns, pred, dataset); + left, anns, pred, dataset, queryHints); break; case DEFAULT_CONTEXTS: left = defaultGraphJoin(queryEngine, context, idFactory, - left, anns, pred, dataset); + left, anns, pred, dataset, queryHints); break; default: throw new AssertionError(); @@ -494,10 +657,10 @@ BOpEvaluationContext.ANY)); anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - - left = new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + left = applyQueryHints(new PipelineJoin(new BOp[] { left }, + anns.toArray(new NV[anns.size()])), queryHints); + } } else { @@ -506,7 +669,7 @@ * Triples or provenance mode. */ - left = triplesModeJoin(queryEngine, left, anns, pred); + left = triplesModeJoin(queryEngine, left, anns, pred, queryHints); } @@ -533,7 +696,8 @@ * @return The join operator. */ private static PipelineOp triplesModeJoin(final QueryEngine queryEngine, - final PipelineOp left, final List<NV> anns, Predicate<?> pred) { + final PipelineOp left, final List<NV> anns, Predicate<?> pred, + final Properties queryHints) { final boolean scaleOut = queryEngine.isScaleOut(); if (scaleOut) { @@ -551,8 +715,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -578,7 +742,7 @@ private static PipelineOp namedGraphJoin(final QueryEngine queryEngine, final BOpContextBase context, final AtomicInteger idFactory, final PipelineOp left, final List<NV> anns, Predicate<?> pred, - final Dataset dataset) { + final Dataset dataset, final Properties queryHints) { final boolean scaleOut = queryEngine.isScaleOut(); if (scaleOut) { @@ -603,8 +767,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -616,8 +780,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -646,8 +810,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -662,8 +826,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -714,8 +878,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } else { @@ -762,8 +926,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { dataSetJoin }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { dataSetJoin }, + anns.toArray(new NV[anns.size()])), queryHints); } @@ -786,7 +950,7 @@ private static PipelineOp defaultGraphJoin(final QueryEngine queryEngine, final BOpContextBase context, final AtomicInteger idFactory, final PipelineOp left, final List<NV> anns, Predicate<?> pred, - final Dataset dataset) { + final Dataset dataset, final Properties queryHints) { /* * @todo raise this into the caller and do one per rule rather than once @@ -813,8 +977,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -842,8 +1006,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE, pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -911,8 +1075,8 @@ // // } // -// return new PipelineJoin(new BOp[] { left, pred }, anns -// .toArray(new NV[anns.size()])); +// return applyQueryHints(new PipelineJoin(new BOp[] { left, pred }, anns +// .toArray(new NV[anns.size()])),queryHints); // // } @@ -987,8 +1151,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE, pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])),queryHints); } else { @@ -1037,8 +1201,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])),queryHints); } @@ -1059,7 +1223,7 @@ */ public static PipelineOp convert(final IProgram program, final AtomicInteger idFactory, final AbstractTripleStore db, - final QueryEngine queryEngine) { + final QueryEngine queryEngine, final Properties queryHints) { // When parallel, the program is translated to a UNION. Else STEPS. final boolean isParallel = program.isParallel(); @@ -1076,7 +1240,8 @@ for (int i = 0; i < arity; i++) { // convert the child IStep - BOpBase tmp = convert(steps[i], idFactory, db, queryEngine); + final BOpBase tmp = convert(steps[i], idFactory, db, queryEngine, + queryHints); /* * @todo Route binding sets around the UNION/STEPS operator. We need Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -28,22 +28,17 @@ import java.util.Collection; import java.util.LinkedList; -import org.openrdf.model.BNode; -import org.openrdf.model.Literal; -import org.openrdf.model.Resource; + import org.openrdf.model.URI; -import org.openrdf.model.impl.BNodeImpl; -import org.openrdf.model.impl.LiteralImpl; import org.openrdf.model.impl.URIImpl; -import org.openrdf.model.vocabulary.RDF; -import org.openrdf.model.vocabulary.RDFS; import org.openrdf.query.BindingSet; import org.openrdf.query.QueryLanguage; import org.openrdf.query.TupleQuery; import org.openrdf.query.TupleQueryResult; import org.openrdf.query.impl.BindingImpl; -import com.bigdata.rdf.store.BD; +import com.bigdata.bop.join.PipelineJoin; + /** * Unit tests the query hints aspect of the {@link BigdataSail} implementation. * @@ -68,7 +63,11 @@ /** * Tests adding query hints in SPARQL. * - * @throws Exception + * @throws Exception + * + * @todo Unfortunately, this does not really _test_ anything since the query + * should be answered correctly regardless of the query hint(s) + * specified. */ public void testQueryHints() throws Exception { @@ -102,20 +101,22 @@ { - String query = - "PREFIX "+BD.QUERY_HINTS_NAMESPACE+": " + - " <http://www.bigdata.com/queryOption#com.bigdata.relation.rule.eval.DefaultRuleTaskFactory.nestedSubquery=true&com.bigdata.fullScanTreshold=1000> " + - "SELECT * " + - "WHERE { " + - " <"+a+"> ?p ?o " + - "}"; - + final String query = "PREFIX " + QueryHints.NAMESPACE + + ": " + "<http://www.bigdata.com/queryOption#" + // + PipelineJoin.Annotations.MAX_PARALLEL + "=-5" // + + "&" + "com.bigdata.fullScanTreshold=1000" // + + ">\n"// + + "SELECT * " + // + "WHERE { " + // + " <" + a + "> ?p ?o " + // + "}"; + final TupleQuery tupleQuery = cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); tupleQuery.setIncludeInferred(true /* includeInferred */); - TupleQueryResult result = tupleQuery.evaluate(); + final TupleQueryResult result = tupleQuery.evaluate(); - Collection<BindingSet> answer = new LinkedList<BindingSet>(); + final Collection<BindingSet> answer = new LinkedList<BindingSet>(); answer.add(createBindingSet( new BindingImpl("p", b), new BindingImpl("o", c) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |