[Bigdata-commit] SF.net SVN: bigdata:[3449] branches/QUADS_QUERY_BRANCH/bigdata-sails/src/ java/com

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 3449
          http://bigdata.svn.sourceforge.net/bigdata/?rev=3449&view=rev
Author:   mrpersonick
Date:     2010-08-19 20:55:32 +0000 (Thu, 19 Aug 2010)

Log Message:
-----------
renamed the evaluation strategy class

Modified Paths:
--------------
    branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java

Added Paths:
-----------
    branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java

Removed Paths:
-------------
    branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java

Copied: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java (from rev 3405, branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java)
===================================================================

--- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java	2010-08-19 20:55:32 UTC (rev 3449)
@@ -0,0 +1,2078 @@
+package com.bigdata.rdf.sail;
+
+import info.aduna.iteration.CloseableIteration;
+import info.aduna.iteration.EmptyIteration;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import org.apache.log4j.Logger;
+import org.openrdf.model.Literal;
+import org.openrdf.model.URI;
+import org.openrdf.model.Value;
+import org.openrdf.query.BindingSet;
+import org.openrdf.query.Dataset;
+import org.openrdf.query.QueryEvaluationException;
+import org.openrdf.query.algebra.Compare;
+import org.openrdf.query.algebra.Filter;
+import org.openrdf.query.algebra.Group;
+import org.openrdf.query.algebra.Join;
+import org.openrdf.query.algebra.LeftJoin;
+import org.openrdf.query.algebra.MultiProjection;
+import org.openrdf.query.algebra.Or;
+import org.openrdf.query.algebra.Order;
+import org.openrdf.query.algebra.Projection;
+import org.openrdf.query.algebra.ProjectionElem;
+import org.openrdf.query.algebra.ProjectionElemList;
+import org.openrdf.query.algebra.QueryModelNode;
+import org.openrdf.query.algebra.QueryRoot;
+import org.openrdf.query.algebra.SameTerm;
+import org.openrdf.query.algebra.StatementPattern;
+import org.openrdf.query.algebra.TupleExpr;
+import org.openrdf.query.algebra.UnaryTupleOperator;
+import org.openrdf.query.algebra.Union;
+import org.openrdf.query.algebra.ValueConstant;
+import org.openrdf.query.algebra.ValueExpr;
+import org.openrdf.query.algebra.Var;
+import org.openrdf.query.algebra.Compare.CompareOp;
+import org.openrdf.query.algebra.StatementPattern.Scope;
+import org.openrdf.query.algebra.evaluation.impl.EvaluationStrategyImpl;
+import org.openrdf.query.algebra.evaluation.iterator.FilterIterator;
+import org.openrdf.query.algebra.helpers.QueryModelVisitorBase;
+import com.bigdata.BigdataStatics;
+import com.bigdata.bop.Constant;
+import com.bigdata.bop.IBindingSet;
+import com.bigdata.bop.IConstraint;
+import com.bigdata.bop.IPredicate;
+import com.bigdata.bop.IVariable;
+import com.bigdata.bop.IVariableOrConstant;
+import com.bigdata.bop.ap.Predicate;
+import com.bigdata.bop.constraint.EQ;
+import com.bigdata.bop.constraint.EQConstant;
+import com.bigdata.bop.constraint.IN;
+import com.bigdata.bop.constraint.NE;
+import com.bigdata.bop.constraint.NEConstant;
+import com.bigdata.bop.constraint.OR;
+import com.bigdata.btree.keys.IKeyBuilderFactory;
+import com.bigdata.rdf.internal.DummyIV;
+import com.bigdata.rdf.internal.IV;
+import com.bigdata.rdf.internal.IVUtility;
+import com.bigdata.rdf.internal.constraints.InlineEQ;
+import com.bigdata.rdf.internal.constraints.InlineGE;
+import com.bigdata.rdf.internal.constraints.InlineGT;
+import com.bigdata.rdf.internal.constraints.InlineLE;
+import com.bigdata.rdf.internal.constraints.InlineLT;
+import com.bigdata.rdf.internal.constraints.InlineNE;
+import com.bigdata.rdf.lexicon.LexiconRelation;
+import com.bigdata.rdf.model.BigdataValue;
+import com.bigdata.rdf.rules.RuleContextEnum;
+import com.bigdata.rdf.sail.BigdataSail.Options;
+import com.bigdata.rdf.spo.DefaultGraphSolutionExpander;
+import com.bigdata.rdf.spo.ExplicitSPOFilter;
+import com.bigdata.rdf.spo.ISPO;
+import com.bigdata.rdf.spo.NamedGraphSolutionExpander;
+import com.bigdata.rdf.spo.SPOPredicate;
+import com.bigdata.rdf.spo.SPOStarJoin;
+import com.bigdata.rdf.store.AbstractTripleStore;
+import com.bigdata.rdf.store.BD;
+import com.bigdata.rdf.store.BigdataSolutionResolverator;
+import com.bigdata.rdf.store.IRawTripleStore;
+import com.bigdata.relation.accesspath.IAccessPath;
+import com.bigdata.relation.accesspath.IBuffer;
+import com.bigdata.relation.accesspath.IElementFilter;
+import com.bigdata.relation.rule.IProgram;
+import com.bigdata.relation.rule.IQueryOptions;
+import com.bigdata.relation.rule.IRule;
+import com.bigdata.relation.rule.ISolutionExpander;
+import com.bigdata.relation.rule.ISortOrder;
+import com.bigdata.relation.rule.IStep;
+import com.bigdata.relation.rule.Program;
+import com.bigdata.relation.rule.QueryOptions;
+import com.bigdata.relation.rule.Rule;
+import com.bigdata.relation.rule.eval.ActionEnum;
+import com.bigdata.relation.rule.eval.DefaultEvaluationPlanFactory2;
+import com.bigdata.relation.rule.eval.IEvaluationPlanFactory;
+import com.bigdata.relation.rule.eval.IJoinNexus;
+import com.bigdata.relation.rule.eval.IJoinNexusFactory;
+import com.bigdata.relation.rule.eval.IRuleTaskFactory;
+import com.bigdata.relation.rule.eval.ISolution;
+import com.bigdata.relation.rule.eval.NestedSubqueryWithJoinThreadsTask;
+import com.bigdata.relation.rule.eval.RuleStats;
+import com.bigdata.search.FullTextIndex;
+import com.bigdata.search.IHit;
+import com.bigdata.striterator.DistinctFilter;
+import com.bigdata.striterator.IChunkedOrderedIterator;
+
+/**
+ * Extended to rewrite Sesame {@link TupleExpr}s onto native {@link Rule}s and
+ * to evaluate magic predicates for full text search, etc. Query evaluation can
+ * proceed either by Sesame 2 evaluation or, if {@link Options#NATIVE_JOINS} is
+ * enabled, then by translation of Sesame 2 query expressions into native
+ * {@link IRule}s and native evaluation of those {@link IRule}s.
+ * 
+ * <h2>Query options</h2>
+ * The following summarizes how various high-level query language feature are
+ * mapped onto native {@link IRule}s.
+ * <dl>
+ * <dt>DISTINCT</dt>
+ * <dd>{@link IQueryOptions#isDistinct()}, which is realized using
+ * {@link DistinctFilter}.</dd>
+ * <dt>ORDER BY</dt>
+ * <dd>{@link IQueryOptions#getOrderBy()} is effected by a custom
+ * {@link IKeyBuilderFactory} which generates sort keys that capture the desired
+ * sort order from the bindings in an {@link ISolution}. Unless DISTINCT is
+ * also specified, the generated sort keys are made unique by appending a one up
+ * long integer to the key - this prevents sort keys that otherwise compare as
+ * equals from dropping solutions. Note that the SORT is actually imposed by the
+ * {@link DistinctFilter} using an {@link IKeyBuilderFactory} assembled from the
+ * ORDER BY constraints.
+ * 
+ * FIXME BryanT - implement the {@link IKeyBuilderFactory}.
+ * 
+ * FIXME MikeP - assemble the {@link ISortOrder}[] from the query and set on
+ * the {@link IQueryOptions}.</dd>
+ * <dt>OFFSET and LIMIT</dt>
+ * <dd>
+ * <p>
+ * {@link IQueryOptions#getSlice()}, which is effected as a conditional in
+ * {@link NestedSubqueryWithJoinThreadsTask} based on the
+ * {@link RuleStats#solutionCount}. Query {@link ISolution}s are counted as
+ * they are generated, but they are only entered into the {@link ISolution}
+ * {@link IBuffer} when the solutionCount is GE the OFFSET and LT the LIMIT.
+ * Query evaluation halts once the LIMIT is reached.
+ * </p>
+ * <p>
+ * Note that when DISTINCT and either LIMIT and/or OFFSET are specified
+ * together, then the LIMIT and OFFSET <strong>MUST</strong> be applied after
+ * the solutions have been generated since we may have to generate more than
+ * LIMIT solutions in order to have LIMIT <em>DISTINCT</em> solutions. We
+ * handle this for now by NOT translating the LIMIT and OFFSET onto the
+ * {@link IRule} and instead let Sesame close the iterator once it has enough
+ * solutions.
+ * </p>
+ * <p>
+ * Note that LIMIT and SLICE requires an evaluation plan that provides stable
+ * results. For a simple query this is achieved by setting
+ * {@link IQueryOptions#isStable()} to <code>true</code>.
+ * <p>
+ * For a UNION query, you must also set {@link IProgram#isParallel()} to
+ * <code>false</code> to prevent parallelized execution of the {@link IRule}s
+ * in the {@link IProgram}.
+ * </p>
+ * </dd>
+ * <dt>UNION</dt>
+ * <dd>A UNION is translated into an {@link IProgram} consisting of one
+ * {@link IRule} for each clause in the UNION.
+ * 
+ * FIXME MikeP - implement.</dd>
+ * </dl>
+ * <h2>Filters</h2>
+ * The following provides a summary of how various kinds of FILTER are handled.
+ * A filter that is not explicitly handled is left untranslated and will be
+ * applied by Sesame against the generated {@link ISolution}s.
+ * <p>
+ * Whenever possible, a FILTER is translated into an {@link IConstraint} on an
+ * {@link IPredicate} in the generated native {@link IRule}. Some filters are
+ * essentially JOINs against the {@link LexiconRelation}. Those can be handled
+ * either as JOINs (generating an additional {@link IPredicate} in the
+ * {@link IRule}) or as an {@link IN} constraint, where the inclusion set is
+ * pre-populated by some operation on the {@link LexiconRelation}.
+ * <dl>
+ * <dt>EQ</dt>
+ * <dd>Translated into an {@link EQ} constraint on an {@link IPredicate}.</dd>
+ * <dt>NE</dt>
+ * <dd>Translated into an {@link NE} constraint on an {@link IPredicate}.</dd>
+ * <dt>IN</dt>
+ * <dd>Translated into an {@link IN} constraint on an {@link IPredicate}.</dd>
+ * <dt>OR</dt>
+ * <dd>Translated into an {@link OR} constraint on an {@link IPredicate}.</dd>
+ * <dt></dt>
+ * <dd></dd>
+ * </dl>
+ * <h2>Magic predicates</h2>
+ * <p>
+ * {@link BD#SEARCH} is the only magic predicate at this time. When the object
+ * position is bound to a constant, the magic predicate is evaluated once and
+ * the result is used to generate a set of term identifiers that are matches for
+ * the token(s) extracted from the {@link Literal} in the object position. Those
+ * term identifiers are then used to populate an {@link IN} constraint. The
+ * object position in the {@link BD#SEARCH} MUST be bound to a constant.
+ * </p>
+ * 
+ * FIXME We are not in fact rewriting the query operation at all, simply
+ * choosing a different evaluation path as we go. The rewrite should really be
+ * isolated from the execution, e.g., in its own class. That more correct
+ * approach is more than I want to get into right now as we will have to define
+ * variants on the various operators that let us model the native rule system
+ * directly, e.g., an n-ary IProgram, n-ary IRule operator, an IPredicate
+ * operator, etc. Then we can handle evaluation using their model with anything
+ * re-written to our custom operators being caught by our custom evaluate()
+ * methods and everything else running their default methods. Definitely the
+ * right approach, and much easier to write unit tests.
+ * 
+ * @todo REGEX : if there is a &quot;&circ;&quot; literal followed by a wildcard
+ *       AND there are no flags which would cause problems (case-folding, etc)
+ *       then the REGEX can be rewritten as a prefix scan on the lexicon, which
+ *       is very efficient, and converted to an IN filter. When the set size is
+ *       huge we should rewrite it as another tail in the query instead.
+ *       <p>
+ *       Otherwise, regex filters are left outside of the rule. We can't
+ *       optimize that until we generate rules that perform JOINs across the
+ *       lexicon and the spo relations (which we could do, in which case it
+ *       becomes a constraint on that join).
+ *       <p>
+ *       We don't have any indices that are designed to optimize regex scans,
+ *       but we could process a regex scan as a parallel iterator scan against
+ *       the lexicon.
+ * 
+ * @todo Roll more kinds of filters into the native {@link IRule}s as
+ *       {@link IConstraint}s on {@link IPredicate}s.
+ *       <p>
+ *       isURI(), etc. can be evaluated by testing a bit flag on the term
+ *       identifier, which is very efficient.
+ *       <p>
+ * 
+ * @todo Verify handling of datatype operations.
+ * 
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id: BigdataEvaluationStrategyImpl.java 2272 2009-11-04 02:10:19Z
+ *          mrpersonick $
+ */
+public class BigdataEvaluationStrategyImpl extends EvaluationStrategyImpl {
+    
+    /**
+     * Logger.
+     */
+    protected static final Logger log = 
+        Logger.getLogger(BigdataEvaluationStrategyImpl.class);
+
+//    protected static final boolean INFO = log.isInfoEnabled();
+//
+//    protected static final boolean DEBUG = log.isDebugEnabled();
+
+    protected final BigdataTripleSource tripleSource;
+
+    protected final Dataset dataset;
+
+    private final AbstractTripleStore database;
+
+    private final boolean nativeJoins;
+    
+    private final boolean starJoins;
+    
+    private final boolean inlineTerms;
+    
+    // private boolean slice = false, distinct = false, union = false;
+    //    
+    // // Note: defaults are illegal values.
+    // private long offset = -1L, limit = 0L;
+    // /**
+    // * @param tripleSource
+    // */
+    // public BigdataEvaluationStrategyImpl(final BigdataTripleSource
+    // tripleSource) {
+    //
+    // this(tripleSource, null/* dataset */, false WHY FALSE? /* nativeJoins
+    // */);
+    //
+    // }
+    /**
+     * @param tripleSource
+     * @param dataset
+     */
+    public BigdataEvaluationStrategyImpl(
+            final BigdataTripleSource tripleSource, final Dataset dataset,
+            final boolean nativeJoins, final boolean starJoins, 
+            final boolean inlineTerms) {
+        
+        super(tripleSource, dataset);
+        
+        this.tripleSource = tripleSource;
+        this.dataset = dataset;
+        this.database = tripleSource.getDatabase();
+        this.nativeJoins = nativeJoins;
+        this.starJoins = starJoins;
+        this.inlineTerms = inlineTerms;
+        
+    }
+
+    // @Override
+    // public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(
+    // org.openrdf.query.algebra.Slice slice, BindingSet bindings)
+    // throws QueryEvaluationException {
+    // /*
+    // * Note: Sesame has somewhat different semantics for offset and limit.
+    // * They are [int]s. -1 is used to indicate the the offset or limit was
+    // * not specified. you use hasFoo() to see if there is an offset or a
+    // * limit and then assign the value. For bigdata, the NOP offset is 0L
+    // * and the NOP limit is Long.MAX_VALUE.
+    // *
+    // * Note: We can't process the offset natively unless we remove the slice
+    // * from the Sesame operator tree. If we did then we would skip over the
+    // * first OFFSET solutions and Sesame would skip over the first OFFSET
+    // * solutions that we passed on, essentially doubling the offset.
+    // *
+    // * FIXME native rule slices work, but they can not be applied if there
+    // * is a non-native filter outside of the join. This code could be
+    // * modified to test for that using tuplExpr.visit(...), but really we
+    // * just need to do a proper rewrite of the query expressions that is
+    // * distinct from their evaluation!
+    // */
+    // //// if (!slice.hasOffset()) {
+    // // this.slice = true;
+    // // this.offset = slice.hasOffset() ? slice.getOffset() : 0L;
+    // // this.limit = slice.hasLimit() ? slice.getLimit() : Long.MAX_VALUE;
+    // //// return evaluate(slice.getArg(), bindings);
+    // //// }
+    // return super.evaluate(slice, bindings);
+    // }
+    //    
+    // @Override
+    // public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(
+    // Union union, BindingSet bindings) throws QueryEvaluationException {
+    // this.union = true;
+    // return super.evaluate(union, bindings);
+    // }
+    //
+    // @Override
+    // public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(
+    // Distinct distinct, BindingSet bindings)
+    // throws QueryEvaluationException {
+    // this.distinct = true;
+    // return super.evaluate(distinct, bindings);
+    // }
+
+    /**
+     * A set of properties that act as query hints for the join nexus.
+     */
+    private Properties queryHints;
+    
+    /**
+     * This is the top-level method called by the SAIL to evaluate a query.
+     * The TupleExpr parameter here is guaranteed to be the root of the operator
+     * tree for the query.  Query hints are parsed by the SAIL from the
+     * namespaces in the original query.  See {@link BD#QUERY_HINTS_NAMESPACE}.
+     */
+    public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(
+            TupleExpr expr, BindingSet bindings, Properties queryHints)
+            throws QueryEvaluationException {
+        
+        // spit out the whole operator tree
+        if (log.isInfoEnabled()) {
+            log.info("operator tree:\n" + expr);
+        }
+
+        this.queryHints = queryHints;
+
+        if (log.isInfoEnabled()) {
+            log.info("queryHints:\n" + queryHints);
+        }
+        
+        return super.evaluate(expr, bindings);
+        
+    }
+
+    
+    
+    /**
+     * Eventually we will want to translate the entire operator tree into a
+     * native bigdata program. For now this is just a means of inspecting it.
+     */
+    @Override
+    public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(
+            TupleExpr expr, BindingSet bindings)
+            throws QueryEvaluationException {
+        
+        if (log.isDebugEnabled()) {
+            log.debug("tuple expr:\n" + expr);
+        }
+        
+        return super.evaluate(expr, bindings);
+        
+    }
+
+    /**
+     * Translate top-level UNIONs into native bigdata programs for execution.
+     * This will attempt to look down the operator tree from this point and turn
+     * the Sesame operators into a set of native rules within a single program.
+     * <p>
+     * FIXME A Union is a BinaryTupleOperator composed of two expressions.  This 
+     * native evaluation only handles the special case where the left and right
+     * args are one of: {Join, LeftJoin, StatementPattern, Union}.  It's
+     * possible that the left or right arg is something other than one of those
+     * operators, in which case we punt to the Sesame evaluation, which
+     * degrades performance.
+     * <p>
+     * FIXME Also, even if the left or right arg is one of the cases we handle, 
+     * it's possible that the translation of that arg into a native rule will 
+     * fail because of an unsupported SPARQL language feature, such as an 
+     * embedded UNION or an unsupported filter type.
+     */
+    @Override
+    public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(
+            Union union, BindingSet bindings) throws QueryEvaluationException {
+        
+        if (!nativeJoins) {
+            // Use Sesame 2 evaluation
+            return super.evaluate(union, bindings);
+        }
+
+        if (log.isDebugEnabled()) {
+            log.debug("union:\n" + union);
+        }
+        
+        /*
+         * FIXME Another deficiency in the native rule model. We can only handle
+         * top-level UNIONs for now.
+         */
+        QueryModelNode operator = union;
+        while ((operator = operator.getParentNode()) != null) {
+            if (operator instanceof LeftJoin || operator instanceof Join) {
+                // Use Sesame 2 evaluation
+                
+                if (log.isInfoEnabled()) {
+                    log.info("could not evaluate natively, punting to Sesame"); 
+                }
+                if (log.isDebugEnabled()) {
+                    log.debug(operator);
+                }
+                
+                return super.evaluate(union, bindings);
+            }
+        }
+        
+        
+        try {
+            
+            IStep query = createNativeQuery(union);
+            
+            if (query == null) {
+                return new EmptyIteration<BindingSet, QueryEvaluationException>();
+            }
+
+            return execute(query);
+            
+        } catch (UnknownOperatorException ex) {
+            
+            // Use Sesame 2 evaluation
+            
+            if (log.isInfoEnabled()) {
+                log.info("could not evaluate natively, punting to Sesame"); 
+            }
+            if (log.isDebugEnabled()) {
+                log.debug(ex.getOperator());
+            }
+
+            return super.evaluate(union, bindings);
+            
+        }
+        
+    }
+    
+    /**
+     * Override evaluation of StatementPatterns to recognize magic search 
+     * predicate.
+    @Override
+    public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(
+            final StatementPattern sp, final BindingSet bindings)
+            throws QueryEvaluationException {
+        
+        // no check against the nativeJoins property here because we are simply
+        // using the native execution model to take care of magic searches.
+        
+        if (log.isDebugEnabled()) {
+            log.debug("evaluating statement pattern:\n" + sp);
+        }
+        
+        IStep query = createNativeQuery(sp);
+        
+        if (query == null) {
+            return new EmptyIteration<BindingSet, QueryEvaluationException>();
+        }
+
+        return execute(query);
+        
+    }
+     */
+    
+    /**
+     * Translate top-level JOINs into native bigdata programs for execution.
+     * This will attempt to look down the operator tree from this point and turn
+     * the Sesame operators into a native rule.
+     * <p>
+     * FIXME It's possible that the translation of the left or right arg into a 
+     * native rule will fail because of an unsupported SPARQL language feature, 
+     * such as an embedded UNION or an unsupported filter type.
+     */
+    @Override
+    public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(
+            Join join, BindingSet bindings) throws QueryEvaluationException {
+        
+        if (!nativeJoins) {
+            // Use Sesame 2 evaluation
+            return super.evaluate(join, bindings);
+        }
+
+        if (log.isDebugEnabled()) {
+            log.debug("join:\n" + join);
+        }
+        
+        /*
+         * FIXME Another deficiency in the native rule model. If we are doing
+         * a join that is nested inside an optional, we don't have the
+         * appropriate variable bindings to arrive at the correct answer.
+         * Example:
+         * select * 
+         * { 
+         *    :x1 :p ?v .
+         *    OPTIONAL { :x3 :q ?w }
+         *    OPTIONAL { :x3 :q ?w . :x2 :p ?v }
+         * }
+         * 
+         * 1. LeftJoin 
+         *    2. LeftJoin 
+         *       3. StatementPattern
+         *       4. StatementPattern
+         *    5. Join 
+         *       6. StatementPattern
+         *       7. StatementPattern
+         *       
+         * (1) punts, because the right arg is a Join and we can't mark an
+         * entire Join as optional.  Then, (5) makes it here, to the evaluate
+         * method.  But we can't evaluate it in isolation, we need to pump
+         * the bindings in from the stuff above it.
+         */
+        QueryModelNode operator = join;
+        while ((operator = operator.getParentNode()) != null) {
+            if (operator instanceof LeftJoin) {
+
+                // Use Sesame 2 evaluation
+                
+                if (log.isInfoEnabled()) {
+                    log.info("could not evaluate natively, punting to Sesame"); 
+                }
+                if (log.isDebugEnabled()) {
+                    log.debug(operator);
+                }
+                
+                return super.evaluate(join, bindings);
+            }
+        }
+        
+        try {
+            
+            IStep query = createNativeQuery(join);
+            
+            if (query == null) {
+                return new EmptyIteration<BindingSet, QueryEvaluationException>();
+            }
+
+            return execute(query);
+            
+        } catch (UnknownOperatorException ex) {
+            
+            // Use Sesame 2 evaluation
+            
+            if (log.isInfoEnabled()) {
+                log.info("could not evaluate natively, punting to Sesame"); 
+            }
+            if (log.isDebugEnabled()) {
+                log.debug(ex.getOperator());
+            }
+
+            return super.evaluate(join, bindings);
+            
+        }
+        
+    }
+    
+    /**
+     * Translate top-level LEFTJOINs into native bigdata programs for execution.
+     * This will attempt to look down the operator tree from this point and turn
+     * the Sesame operators into a native rule.
+     * <p>
+     * FIXME It's possible that the translation of the left or right arg into a 
+     * native rule will fail because of an unsupported SPARQL language feature, 
+     * such as an embedded UNION or an unsupported filter type.
+     */
+    @Override
+    public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(
+            LeftJoin join, BindingSet bindings) throws QueryEvaluationException {
+        
+        if (!nativeJoins) {
+            // Use Sesame 2 evaluation
+            return super.evaluate(join, bindings);
+        }
+
+        if (log.isDebugEnabled()) {
+            log.debug("left join:\n" + join);
+        }
+        
+        /*
+         * FIXME Another deficiency in the native rule model. If we are doing
+         * a left join that is nested inside an optional, we don't have the
+         * appropriate variable bindings to arrive at the correct answer.
+         * Example:
+         *  SELECT *
+         *  { 
+         *      :x1 :p ?v .
+         *      OPTIONAL
+         *      {
+         *        :x3 :q ?w .
+         *        OPTIONAL { :x2 :p ?v }
+         *      }
+         *  }         
+         * 
+         * 1. LeftJoin 
+         *    2. StatementPattern 
+         *    3. LeftJoin 
+         *       4. StatementPattern
+         *       5. StatementPattern
+         *       
+         * (1) punts, because the right arg is a LeftJoin and we can't mark an
+         * entire Join as optional.  Then, (3) makes it here, to the evaluate
+         * method.  But we can't evaluate it in isolation, we need to pump
+         * the bindings in from the LeftJoin above it.
+         */
+        QueryModelNode operator = join;
+        while ((operator = operator.getParentNode()) != null) {
+            if (operator instanceof LeftJoin) {
+
+                // Use Sesame 2 evaluation
+                
+                if (log.isInfoEnabled()) {
+                    log.info("could not evaluate natively, punting to Sesame"); 
+                }
+                if (log.isDebugEnabled()) {
+                    log.debug(operator);
+                }
+
+                return super.evaluate(join, bindings);
+            }
+        }
+        
+        try {
+            
+            IStep query = createNativeQuery(join);
+            
+            if (query == null) {
+                return new EmptyIteration<BindingSet, QueryEvaluationException>();
+            }
+            
+            return execute(query);
+            
+        } catch (UnknownOperatorException ex) {
+            
+            // Use Sesame 2 evaluation
+            
+            if (log.isInfoEnabled()) {
+                log.info("could not evaluate natively, punting to Sesame"); 
+            }
+            if (log.isDebugEnabled()) {
+                log.debug(ex.getOperator());
+            }
+
+            return super.evaluate(join, bindings);
+            
+        }
+        
+    }
+    
+    /**
+     * This is the method that will attempt to take a top-level join or left
+     * join and turn it into a native bigdata rule. The Sesame operators Join
+     * and LeftJoin share only the common base class BinaryTupleOperator, but
+     * other BinaryTupleOperators are not supported by this method. Other
+     * specific types of BinaryTupleOperators will cause this method to throw
+     * an exception.
+     * <p>
+     * This method will also turn a single top-level StatementPattern into a
+     * rule with one predicate in it.
+     * <p>
+     * Note: As a pre-condition, the {@link Value}s in the query expression
+     * MUST have been rewritten as {@link BigdataValue}s and their term
+     * identifiers MUST have been resolved. Any term identifier that remains
+     * {@link IRawTripleStore#NULL} is an indication that there is no entry for
+     * that {@link Value} in the database. Since the JOINs are required (vs
+     * OPTIONALs), that means that there is no solution for the JOINs and an
+     * {@link EmptyIteration} is returned rather than evaluating the query.
+     * 
+     * @param join
+     * @return native bigdata rule
+     * @throws UnknownOperatorException
+     *          this exception will be thrown if the Sesame join contains any
+     *          SPARQL language constructs that cannot be converted into
+     *          the bigdata native rule model
+     * @throws QueryEvaluationException
+     */
+    private IRule createNativeQuery(final TupleExpr join)
+            throws UnknownOperatorException,
+            QueryEvaluationException {
+
+        if (!(join instanceof StatementPattern || 
+              join instanceof Join || join instanceof LeftJoin || 
+              join instanceof Filter)) {
+            throw new AssertionError(
+                    "only StatementPattern, Join, and LeftJoin supported");
+        }
+
+        // flattened collection of statement patterns nested within this join,
+        // along with whether or not each one is optional
+        final Map<StatementPattern, Boolean> stmtPatterns = 
+            new LinkedHashMap<StatementPattern, Boolean>();
+        // flattened collection of filters nested within this join
+        final Collection<Filter> filters = new LinkedList<Filter>();
+        
+        // will throw EncounteredUnknownTupleExprException if the join
+        // contains something we don't handle yet
+        collectStatementPatterns(join, stmtPatterns, filters);
+        
+        if (false) {
+            for (Map.Entry<StatementPattern, Boolean> entry : 
+                    stmtPatterns.entrySet()) {
+                log.debug(entry.getKey() + ", optional=" + entry.getValue());
+            }
+            for (Filter filter : filters) {
+                log.debug(filter.getCondition());
+            }
+        }
+        
+        // generate tails
+        Collection<IPredicate> tails = new LinkedList<IPredicate>();
+        // keep a list of free text searches for later to solve a named graphs
+        // problem
+        final Map<IPredicate, StatementPattern> searches = 
+            new HashMap<IPredicate, StatementPattern>();
+        for (Map.Entry<StatementPattern, Boolean> entry : stmtPatterns
+                .entrySet()) {
+            StatementPattern sp = entry.getKey();
+            boolean optional = entry.getValue();
+            IPredicate tail = generateTail(sp, optional);
+            // encountered a value not in the database lexicon
+            if (tail == null) {
+                if (log.isDebugEnabled()) {
+                    log.debug("could not generate tail for: " + sp);
+                }
+                if (optional) {
+                    // for optionals, just skip the tail
+                    continue;
+                } else {
+                    // for non-optionals, skip the entire rule
+                    return null;
+                }
+            }
+            if (tail.getSolutionExpander() instanceof FreeTextSearchExpander) {
+                searches.put(tail, sp);
+            }
+            tails.add(tail);
+        }
+        
+        /*
+         * When in quads mode, we need to go through the free text searches and
+         * make sure that they are properly filtered for the dataset where
+         * needed. Joins will take care of this, so we only need to add a filter
+         * when a search variable does not appear in any other tails that are
+         * non-optional.
+         * 
+         * @todo Bryan seems to think this can be fixed with a DISTINCT JOIN
+         * mechanism in the rule evaluation.
+         */
+        if (database.isQuads() && dataset != null) {
+            for (IPredicate search : searches.keySet()) {
+                final Set<URI> graphs;
+                StatementPattern sp = searches.get(search);
+                switch (sp.getScope()) {
+                case DEFAULT_CONTEXTS: {
+                    /*
+                     * Query against the RDF merge of zero or more source
+                     * graphs.
+                     */
+                    graphs = dataset.getDefaultGraphs();
+                    break;
+                }
+                case NAMED_CONTEXTS: {
+                    /*
+                     * Query against zero or more named graphs.
+                     */
+                    graphs = dataset.getNamedGraphs();
+                    break;
+                }
+                default:
+                    throw new AssertionError();
+                }
+                if (graphs == null) {
+                    continue;
+                }
+                // why would we use a constant with a free text search???
+                if (search.get(0).isConstant()) {
+                    throw new AssertionError();
+                }
+                // get ahold of the search variable
+                com.bigdata.bop.Var searchVar = 
+                    (com.bigdata.bop.Var) search.get(0);
+                if (log.isDebugEnabled()) {
+                    log.debug(searchVar);
+                }
+                // start by assuming it needs filtering, guilty until proven
+                // innocent
+                boolean needsFilter = true;
+                // check the other tails one by one
+                for (IPredicate<ISPO> tail : tails) {
+                    ISolutionExpander<ISPO> expander = 
+                        tail.getSolutionExpander();
+                    // only concerned with non-optional tails that are not
+                    // themselves magic searches
+                    if (expander instanceof FreeTextSearchExpander
+                            || tail.isOptional()) {
+                        continue;
+                    }
+                    // see if the search variable appears in this tail
+                    boolean appears = false;
+                    for (int i = 0; i < tail.arity(); i++) {
+                        IVariableOrConstant term = tail.get(i);
+                        if (log.isDebugEnabled()) {
+                            log.debug(term);
+                        }
+                        if (term.equals(searchVar)) {
+                            appears = true;
+                            break;
+                        }
+                    }
+                    // if it appears, we don't need a filter
+                    if (appears) {
+                        needsFilter = false;
+                        break;
+                    }
+                }
+                // if it needs a filter, add it to the expander
+                if (needsFilter) {
+                    if (log.isDebugEnabled()) {
+                        log.debug("needs filter: " + searchVar);
+                    }
+                    FreeTextSearchExpander expander = (FreeTextSearchExpander) 
+                            search.getSolutionExpander();
+                    expander.addNamedGraphsFilter(graphs);
+                }
+            }
+        }
+        
+        // generate constraints
+        final Collection<IConstraint> constraints = 
+            new LinkedList<IConstraint>();
+        final Iterator<Filter> filterIt = filters.iterator();
+        while (filterIt.hasNext()) {
+            final Filter filter = filterIt.next();
+            final IConstraint constraint = generateConstraint(filter);
+            if (constraint != null) {
+                // remove if we are able to generate a native constraint for it
+                if (log.isDebugEnabled()) {
+                    log.debug("able to generate a constraint: " + constraint);
+                }
+                filterIt.remove();
+                constraints.add(constraint);
+            }
+        }
+        
+        /*
+         * FIXME Native slice, DISTINCT, etc. are all commented out for now.
+         * Except for ORDER_BY, support exists for all of these features in the
+         * native rules, but we need to separate the rewrite of the tupleExpr
+         * and its evaluation in order to properly handle this stuff.
+         */
+        IQueryOptions queryOptions = QueryOptions.NONE;
+        // if (slice) {
+        // if (!distinct && !union) {
+        // final ISlice slice = new Slice(offset, limit);
+        // queryOptions = new QueryOptions(false/* distinct */,
+        // true/* stable */, null/* orderBy */, slice);
+        // }
+        // } else {
+        // if (distinct && !union) {
+        // queryOptions = QueryOptions.DISTINCT;
+        // }
+        // }
+        
+        if (log.isDebugEnabled()) {
+            for (IPredicate<ISPO> tail : tails) {
+                ISolutionExpander<ISPO> expander = tail.getSolutionExpander();
+                if (expander != null) {
+                    IAccessPath<ISPO> accessPath = database.getSPORelation()
+                            .getAccessPath(tail);
+                    accessPath = expander.getAccessPath(accessPath);
+                    IChunkedOrderedIterator<ISPO> it = accessPath.iterator();
+                    while (it.hasNext()) {
+                        log.debug(it.next().toString(database));
+                    }
+                }
+            }
+        }
+        
+        /*
+         * Collect a set of variables required beyond just the join (i.e.
+         * aggregation, projection, filters, etc.)
+         */
+        Set<String> required = new HashSet<String>(); 
+        
+        try {
+            
+            QueryModelNode p = join;
+            while (true) {
+                p = p.getParentNode();
+                if (log.isDebugEnabled()) {
+                    log.debug(p.getClass());
+                }
+                if (p instanceof UnaryTupleOperator) {
+                    required.addAll(collectVariables((UnaryTupleOperator) p));
+                }
+                if (p instanceof QueryRoot) {
+                    break;
+                }
+            }
+
+            if (filters.size() > 0) {
+                for (Filter filter : filters) {
+                    required.addAll(collectVariables((UnaryTupleOperator) filter));
+                }
+            }
+            
+        } catch (Exception ex) {
+            throw new QueryEvaluationException(ex);
+        }
+
+        IVariable[] requiredVars = new IVariable[required.size()];
+        int i = 0;
+        for (String v : required) {
+            requiredVars[i++] = com.bigdata.bop.Var.var(v);
+        }
+        
+        if (log.isDebugEnabled()) {
+            log.debug("required binding names: " + Arrays.toString(requiredVars));
+        }
+        
+        if (starJoins) { // database.isQuads() == false) {
+            if (log.isDebugEnabled()) {
+                log.debug("generating star joins");
+            }
+            tails = generateStarJoins(tails);
+        }
+        
+        // generate native rule
+        IRule rule = new Rule("nativeJoin", 
+                // @todo should serialize the query string here for the logs.
+                null, // head
+                tails.toArray(new IPredicate[tails.size()]), queryOptions,
+                // constraints on the rule.
+                constraints.size() > 0 ? constraints
+                        .toArray(new IConstraint[constraints.size()]) : null,
+                null/* constants */, null/* taskFactory */, requiredVars);
+        
+        if (BigdataStatics.debug) {
+            System.err.println(join.toString());
+            System.err.println(rule.toString());
+        }
+
+        // we have filters that we could not translate natively
+        if (filters.size() > 0) {
+            if (log.isDebugEnabled()) {
+                log.debug("could not translate " + filters.size()
+                        + " filters into native constraints:");
+                for (Filter filter : filters) {
+                    log.debug("\n" + filter.getCondition());
+                }
+            }
+            // use the basic filter iterator for remaining filters
+            rule = new ProxyRuleWithSesameFilters(rule, filters);
+        }
+        
+        return rule;
+        
+    }
+
+    /**
+     * Collect the variables used by this <code>UnaryTupleOperator</code> so
+     * they can be added to the list of required variables in the query for
+     * correct binding set pruning.
+     * 
+     * @param uto
+     *          the <code>UnaryTupleOperator</code>
+     * @return
+     *          the variables it uses
+     */
+    protected Set<String> collectVariables(UnaryTupleOperator uto) 
+        throws Exception {
+
+        final Set<String> vars = new HashSet<String>();
+        if (uto instanceof Projection) {
+            List<ProjectionElem> elems = 
+                ((Projection) uto).getProjectionElemList().getElements();
+            for (ProjectionElem elem : elems) {
+                vars.add(elem.getSourceName());
+            }
+        } else if (uto instanceof MultiProjection) {
+            List<ProjectionElemList> elemLists = 
+                ((MultiProjection) uto).getProjections();
+            for (ProjectionElemList list : elemLists) {
+                List<ProjectionElem> elems = list.getElements();
+                for (ProjectionElem elem : elems) {
+                    vars.add(elem.getSourceName());
+                }
+            }
+        } else if (uto instanceof Filter) {
+            Filter f = (Filter) uto;
+            ValueExpr ve = f.getCondition();
+            ve.visit(new QueryModelVisitorBase<Exception>() {
+                @Override
+                public void meet(Var v) throws Exception {
+                    vars.add(v.getName());
+                }
+            });
+        } else if (uto instanceof Group) {
+            Group g = (Group) uto;
+            g.visit(new QueryModelVisitorBase<Exception>() {
+                @Override
+                public void meet(Var v) {
+                    vars.add(v.getName());
+                }
+            });
+        } else if (uto instanceof Order) {
+            Order o = (Order) uto;
+            o.visit(new QueryModelVisitorBase<Exception>() {
+                @Override
+                public void meet(Var v) {
+                    vars.add(v.getName());
+                }
+            });
+        }
+        return vars;
+
+    }
+    
+    /**
+     * This method will take a Union and attempt to turn it into a native
+     * bigdata program. If either the left or right arg is a Union, the method
+     * will act recursively to flatten the nested Unions into a single program.
+     * <p>
+     * See comments for {@link #evaluate(Union, BindingSet)}.
+     * 
+     * @param union
+     * @return native bigdata program
+     * @throws UnknownOperatorException
+     *             this exception will be thrown if the Sesame join contains any
+     *             SPARQL language constructs that cannot be converted into the
+     *             bigdata native rule model
+     * @throws QueryEvaluationException
+     */
+    private IProgram createNativeQuery(Union union)
+            throws UnknownOperatorException,
+            QueryEvaluationException {
+        
+        // create a new program that can run in parallel
+        Program program = new Program("union", true);
+        
+        TupleExpr left = union.getLeftArg();
+        // if the left arg is a union, create a program for it and merge it
+        if (left instanceof Union) {
+            Program p2 = (Program) createNativeQuery((Union) left);
+            program.addSteps(p2.steps());
+        } else if (left instanceof Join || left instanceof LeftJoin || 
+                left instanceof Filter) {
+            IRule rule = createNativeQuery(left);
+            if (rule != null) {
+                if (rule instanceof ProxyRuleWithSesameFilters) {
+                    // unfortunately I think we just have to punt to be super safe
+                    Collection<Filter> filters = 
+                        ((ProxyRuleWithSesameFilters) rule).getSesameFilters();
+                    if (log.isDebugEnabled()) {
+                        log.debug("could not translate " + filters.size()
+                                + " filters into native constraints:");
+                        for (Filter filter : filters) {
+                            log.debug("\n" + filter.getCondition());
+                        }
+                    }
+                    throw new UnknownOperatorException(filters.iterator().next());
+                }
+                program.addStep(rule);
+            }
+        } else if (left instanceof StatementPattern) {
+            IRule rule = createNativeQuery((StatementPattern) left);
+            if (rule != null) {
+                program.addStep(rule);
+            }
+        } else {
+            throw new UnknownOperatorException(left);
+        }
+        
+        TupleExpr right = union.getRightArg();
+        // if the right arg is a union, create a program for it and merge it
+        if (right instanceof Union) {
+            Program p2 = (Program) createNativeQuery((Union) right);
+            program.addSteps(p2.steps());
+        } else if (right instanceof Join || right instanceof LeftJoin ||
+                right instanceof Filter) {
+            IRule rule = createNativeQuery(right);
+            if (rule != null) {
+                if (rule instanceof ProxyRuleWithSesameFilters) {
+                    // unfortunately I think we just have to punt to be super safe
+                    Collection<Filter> filters = 
+                        ((ProxyRuleWithSesameFilters) rule).getSesameFilters();
+                    if (log.isDebugEnabled()) {
+                        log.debug("could not translate " + filters.size()
+                                + " filters into native constraints:");
+                        for (Filter filter : filters) {
+                            log.debug("\n" + filter.getCondition());
+                        }
+                    }
+                    throw new UnknownOperatorException(filters.iterator().next());
+                }
+                program.addStep(rule);
+            }
+        } else if (right instanceof StatementPattern) {
+            IRule rule = createNativeQuery((StatementPattern) right);
+            if (rule != null) {
+                program.addStep(rule);
+            }
+        } else {
+            throw new UnknownOperatorException(right);
+        }
+        
+        return program;
+        
+    }
+
+    /**
+     * Take the supplied tuple expression and flatten all the statement patterns
+     * into a collection that can then be fed into a bigdata rule.  So if the
+     * tuple expression is itself a statement pattern or a filter, simply cast 
+     * and add it to the appropriate collection.  If the tuple expression is a
+     * join or left join, use recursion on the left and right argument of the
+     * join.  If the tuple expression is anything else, for example a Union,
+     * this method will throw an exception.  Currently Unions nested inside
+     * of joins is not supported due to deficiencies in the native bigdata
+     * rule model.
+     * <p>
+     * @todo support nested Unions
+     *
+     * @param tupleExpr
+     * @param stmtPatterns
+     * @param filters
+     */
+    private void collectStatementPatterns(final TupleExpr tupleExpr,
+            final Map<StatementPattern, Boolean> stmtPatterns,
+            final Collection<Filter> filters) 
+        throws UnknownOperatorException {
+        
+        if (tupleExpr instanceof StatementPattern) {
+            stmtPatterns.put((StatementPattern) tupleExpr, Boolean.FALSE);
+        } else if (tupleExpr instanceof Filter) {
+            final Filter filter = (Filter) tupleExpr;
+            filters.add(filter);
+            final TupleExpr arg = filter.getArg();
+            collectStatementPatterns(arg, stmtPatterns, filters);
+        } else if (tupleExpr instanceof Join) {
+            final Join join = (Join) tupleExpr;
+            final TupleExpr left = join.getLeftArg();
+            final TupleExpr right = join.getRightArg();
+            collectStatementPatterns(left, stmtPatterns, filters);
+            collectStatementPatterns(right, stmtPatterns, filters);
+        } else if (tupleExpr instanceof LeftJoin) {
+
+            final LeftJoin join = (LeftJoin) tupleExpr;
+            
+            /*
+             * FIXME Another deficiency in the native rule model. Incorrect
+             * scoping of join.
+             * Example:
+             * SELECT *
+             * { 
+             *   ?X  :name "paul"
+             *   {?Y :name "george" . OPTIONAL { ?X :email ?Z } }
+             * }
+             * 
+             * 1. Join 
+             *    2. StatementPattern 
+             *    3. LeftJoin
+             *       4. StatementPattern
+             *       5. StatementPattern 
+             *       
+             * (1) starts collecting its child nodes and gets to (3), which
+             * puts us here in the code.  But this is not a case where we
+             * can just flatten the whole tree.  (3) needs to be evaluated
+             * independently, as a subprogram.
+             */
+            QueryModelNode operator = join;
+            while ((operator = operator.getParentNode()) != null) {
+                if (operator instanceof Join) {
+                    // Use Sesame 2 evaluation
+                    throw new UnknownOperatorException(join);
+                }
+            }
+            
+            // FIXME is this right?  what about multiple optionals - do they nest?
+            final TupleExpr left = join.getLeftArg();
+            final TupleExpr right = join.getRightArg();
+            // all we know how to handle right now is a left join of:
+            // { StatementPattern || Join || LeftJoin } x { StatementPattern }
+            if (!(right instanceof StatementPattern)) {
+                throw new UnknownOperatorException(right);
+            }
+            final ValueExpr condition = join.getCondition();
+            if (condition != null) {
+                /*
+                Filter filter = new Filter(right, condition);
+                // fake a filter, we just need the value expr later
+                filters.add(filter);
+                */
+                // we have to punt on nested optional filters just to be safe
+                throw new UnknownOperatorException(join);
+            }
+            stmtPatterns.put((StatementPattern) right, Boolean.TRUE);
+            collectStatementPatterns(left, stmtPatterns, filters);
+        } else {
+            throw new UnknownOperatorException(tupleExpr);
+        }
+        
+    }
+
+    /**
+     * Generate a bigdata {@link IPredicate} (tail) for the supplied
+     * StatementPattern.
+     * <p>
+     * As a shortcut, if the StatementPattern contains any bound values that
+     * are not in the database, this method will return null.
+     * 
+     * @param stmtPattern
+     * @param optional
+     * @return the generated bigdata {@link Predicate} or <code>null</code> if
+     *         the statement pattern contains bound values not in the database.
+     * @throws QueryEvaluationException
+     */
+    private IPredicate generateTail(final StatementPattern stmtPattern,
+            final boolean optional) throws QueryEvaluationException {
+        
+        // create a solution expander for free text search if necessary
+        ISolutionExpander<ISPO> expander = null;
+        final Value predValue = stmtPattern.getPredicateVar().getValue();
+        if (log.isDebugEnabled()) {
+            log.debug(predValue);
+        }
+        if (predValue != null && BD.SEARCH.equals(predValue)) {
+            final Value objValue = stmtPattern.getObjectVar().getValue();
+            if (log.isDebugEnabled()) {
+                log.debug(objValue);
+            }
+            if (objValue != null && objValue instanceof Literal) {
+                expander = new FreeTextSearchExpander(database,
+                        (Literal) objValue);
+            }
+        }
+        
+        // @todo why is [s] handled differently?
+        // because [s] is the variable in free text searches, no need to test
+        // to see if the free text search expander is in place
+        final IVariableOrConstant<IV> s = generateVariableOrConstant(
+                stmtPattern.getSubjectVar());
+        if (s == null) {
+            return null;
+        }
+        
+        final IVariableOrConstant<IV> p;
+        if (expander == null) {
+            p = generateVariableOrConstant(stmtPattern.getPredicateVar());
+        } else {
+            p = new Constant(DummyIV.INSTANCE);
+        }
+        if (p == null) {
+            return null;
+        }
+        
+        final IVariableOrConstant<IV> o;
+        if (expander == null) {
+            o = generateVariableOrConstant(stmtPattern.getObjectVar());
+        } else {
+            o = new Constant(DummyIV.INSTANCE);
+        }
+        if (o == null) {
+            return null;
+        }
+        
+        final IVariableOrConstant<IV> c;
+        if (!database.isQuads()) {
+            /*
+             * Either triple store mode or provenance mode.
+             */
+            final Var var = stmtPattern.getContextVar();
+            if (var == null) {
+                // context position is not used.
+                c = null;
+            } else {
+                final Value val = var.getValue();
+                if (val != null && database.isStatementIdentifiers()) {
+                    /*
+                     * Note: The context position is used as a statement
+                     * identifier (SID). SIDs may be used to retrieve provenance
+                     * statements (statements about statement) using high-level
+                     * query. SIDs are represented as blank nodes and is not
+                     * possible to have them bound in the original query. They
+                     * only become bound during query evaluation.
+                     */
+                    throw new QueryEvaluationException(
+                            "Context position is a statement identifier and may not be bound in the original query: "
+                                    + stmtPattern);
+                }
+                final String name = var.getName();
+                c = com.bigdata.bop.Var.var(name);
+            }
+        } else {
+            /*
+             * Quad store mode.
+             * 
+             * FIXME Scale-out joins depend on knowledge of the best access path
+             * and the index partitions (aka shards) which it will traverse.
+             * Review all of the new expanders and make sure that they do not
+             * violate this principle. Expanders tend to lazily determine the
+             * access path, and I believe that RDFJoinNexus#getTailAccessPath()
+             * may even refuse to operate with expanders. If this is the case,
+             * then the choice of the access path needs to be completely coded
+             * into the predicate as a combination of binding or clearing the
+             * context variable and setting an appropriate constraint (filter).
+             */
+            if (BigdataStatics.debug) {
+                if (dataset == null) {
+                    System.err.println("No dataset.");
+                } else {
+                    final int defaultGraphSize = dataset.getDefaultGraphs()
+                            .size();
+                    final int namedGraphSize = dataset.getNamedGraphs().size();
+                    if (defaultGraphSize > 10 || namedGraphSize > 10) {
+                        System.err.println("large dataset: defaultGraphs="
+                                + defaultGraphSize + ", namedGraphs="
+                                + namedGraphSize);
+                    } else {
+                        System.err.println(dataset.toString());
+                    }
+                }
+                System.err.println(stmtPattern.toString());
+            }
+            if (expander != null) {
+                /*
+                 * @todo can this happen? If it does then we need to look at how
+                 * to layer the expanders.
+                 */
+                // throw new AssertionError("expander already set");
+                // we are doing a free text search, no need to do any named or
+                // default graph expansion work
+                c = null;
+            } else {
+                final Var cvar = stmtPattern.getContextVar();
+                if (dataset == null) {
+                    if (cvar == null) {
+                        /*
+                         * There is no dataset and there is no graph variable,
+                         * so the default graph will be the RDF Merge of ALL
+                         * graphs in the quad store.
+                         * 
+                         * This code path uses an "expander" which strips off
+                         * the context information and filters for the distinct
+                         * (s,p,o) triples to realize the RDF Merge of the
+                         * source graphs for the default graph.
+                         */
+                        c = null;
+                        expander = new DefaultGraphSolutionExpander(null/* ALL */);
+                    } else {
+                        /*
+                         * There is no data set and there is a graph variable,
+                         * so the query will run against all named graphs and
+                         * [cvar] will be to the context of each (s,p,o,c) in
+                         * turn. This handles constructions such as:
+                         * 
+                         * "SELECT * WHERE {graph ?g {?g :p :o } }"
+                         */
+                        expander = new NamedGraphSolutionExpander(null/* ALL */);
+                        c = generateVariableOrConstant(cvar);
+                    }
+                } else { // dataset != null
+                    switch (stmtPattern.getScope()) {
+                    case DEFAULT_CONTEXTS: {
+                        /*
+                         * Query against the RDF merge of zero or more source
+                         * graphs.
+                         */
+                        expander = new DefaultGraphSolutionExpander(dataset
+                                .getDefaultGraphs());
+                        /*
+                         * Note: cvar can not become bound since context is
+                         * stripped for the default graph.
+                         */
+                        if (cvar == null)
+                            c = null;
+                        el...
 
[truncated message content]

[Bigdata-commit] SF.net SVN: bigdata:[3449] branches/QUADS_QUERY_BRANCH/bigdata-sails/src/ java/com

Fast, scalable, robust graph database platform

[Bigdata-commit] SF.net SVN: bigdata:[3449] branches/QUADS_QUERY_BRANCH/bigdata-sails/src/ java/com/bigdata/rdf/sail