[Bigdata-commit] SF.net SVN: bigdata:[3705] branches/QUADS_QUERY_BRANCH/bigdata

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 3705
          http://bigdata.svn.sourceforge.net/bigdata/?rev=3705&view=rev
Author:   thompsonbry
Date:     2010-09-30 20:23:37 +0000 (Thu, 30 Sep 2010)

Log Message:
-----------
This commit incorporates the named graph decision tree and the various cost models into Rule2BOpUtility.  I will do the default graph query patterns tomorrow.  The new named graph and default graph logic is not yet enabled when running CI.  The cost models have been implemented based on the worksheets.  However they are not currently connected to the query planner.  

The only remaining features for the quads query branch are:

- PipelineJoin annotation to indicate an empty access path.

- UNION (this is basically a subquery operator).

- Advanced pattern for the ___C indices for default graph queries.

- ISimpleSplitHandler for the SPOC and SOPC shards.

CI is pretty close.  The lack of a native UNION operator is responsible for most of the SPARQL query errors that remain.  There are also a number of unit tests which have not yet been written for the bigdata operators, which accounts for most of the remaining errors.

Modified Paths:
--------------
    branches/QUADS_QUERY_BRANCH/bigdata/src/architecture/query-cost-model.xls
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INBinarySearch.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/join/DataSetJoin.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/InGraphBinarySearchFilter.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/InGraphHashSetFilter.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOAccessPath.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java
    branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java

Added Paths:
-----------
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/BTreeCostModel.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/DiskCostModel.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/IndexSegmentCostModel.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/package.html

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/architecture/query-cost-model.xls
===================================================================
(Binary files differ)

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java
===================================================================

--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java	2010-09-30 19:49:43 UTC (rev 3704)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java	2010-09-30 20:23:37 UTC (rev 3705)
@@ -30,6 +30,7 @@
 
 import java.io.Serializable;
 
+import com.bigdata.bop.ap.Predicate;
 import com.bigdata.bop.ap.filter.BOpFilterBase;
 import com.bigdata.bop.ap.filter.BOpTupleFilter;
 import com.bigdata.bop.ap.filter.DistinctFilter;
@@ -469,9 +470,29 @@
     public IConstant<?> get(E e, int index);
 
     /**
-     * A copy of this {@link IPredicate} in which zero or more variables have
-     * been bound to constants using the given {@link IBindingSet}.
+     * Return a new instance in which all occurrences of the given variable have
+     * been replaced by the specified constant.
+     * 
+     * @param var
+     *            The variable.
+     * @param val
+     *            The constant.
+     * 
+     * @return A new instance of the predicate in which all occurrences of the
+     *         variable have been replaced by the constant.
+     * 
+     * @throws IllegalArgumentException
+     *             if either argument is <code>null</code>.
      */
+    public Predicate<E> asBound(final IVariable<?> var, final IConstant<?> val);
+
+    /**
+     * Return a new instance in which all occurrences of the variable appearing
+     * in the binding set have been replaced by their bound values.
+     * 
+     * @param bindingSet
+     *            The binding set.
+     */
     public IPredicate<E> asBound(IBindingSet bindingSet);
 
     /**

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java	2010-09-30 19:49:43 UTC (rev 3704)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java	2010-09-30 20:23:37 UTC (rev 3705)
@@ -34,6 +34,7 @@
 import cern.colt.Arrays;
 
 import com.bigdata.bop.AbstractAccessPathOp;
+import com.bigdata.bop.ArrayBindingSet;
 import com.bigdata.bop.BOp;
 import com.bigdata.bop.Constant;
 import com.bigdata.bop.IBindingSet;
@@ -43,6 +44,8 @@
 import com.bigdata.bop.IVariable;
 import com.bigdata.bop.IVariableOrConstant;
 import com.bigdata.bop.NV;
+import com.bigdata.rdf.internal.IV;
+import com.bigdata.rdf.spo.SPOPredicate;
 import com.bigdata.relation.accesspath.ElementFilter;
 import com.bigdata.relation.accesspath.IElementFilter;
 import com.bigdata.relation.rule.ISolutionExpander;
@@ -305,6 +308,13 @@
         return getProperty(Annotations.REMOTE_ACCESS_PATH,
                 Annotations.DEFAULT_REMOTE_ACCESS_PATH);
     }
+   
+    public Predicate<E> asBound(final IVariable<?> var, final IConstant<?> val) {
+
+        return asBound(new ArrayBindingSet(new IVariable[] { var },
+                new IConstant[] { val }));
+        
+    }
     
     public Predicate<E> asBound(final IBindingSet bindingSet) {
 
@@ -323,13 +333,13 @@
 
             final IVariableOrConstant<?> t = (IVariableOrConstant<?>) get(i);
 
-            if (t == null) {
-                /*
-                 * Note: t != null handles the case where the [c] position of an
-                 * SPO is allowed to be null.
-                 */
-                continue;
-            }
+//            if (t == null) {
+//                /*
+//                 * Note: t != null handles the case where the [c] position of an
+//                 * SPO is allowed to be null.
+//                 */
+//                continue;
+//            }
 
             if (t.isConstant())
                 continue;

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INBinarySearch.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INBinarySearch.java	2010-09-30 19:49:43 UTC (rev 3704)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INBinarySearch.java	2010-09-30 20:23:37 UTC (rev 3705)
@@ -63,10 +63,6 @@
  * 
  * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
  * @version $Id$
- * 
- * @todo unit tests.
- * 
- * @todo variant based on a {@link ConcurrentHashMap}.
  */
 public class INBinarySearch<T> extends INConstraint<T> {
 

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/BTreeCostModel.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/BTreeCostModel.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/BTreeCostModel.java	2010-09-30 20:23:37 UTC (rev 3705)
@@ -0,0 +1,113 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2010.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Sep 30, 2010
+ */
+package com.bigdata.bop.cost;
+
+import com.bigdata.btree.AbstractBTree;
+import com.bigdata.btree.BTree;
+import com.bigdata.journal.IIndexManager;
+import com.bigdata.journal.Journal;
+
+/**
+ * A cost model for a range scan on a {@link BTree} backed by a {@link Journal}.
+ * The on disk representation of the {@link BTree} does not reflect the index
+ * order so a range scan on the {@link BTree} is basically turned into one
+ * random seek per node or leaf visited.
+ * 
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id$
+ * 
+ * @todo Add a parameter for the write retention queue? The capacity of the
+ *       queue could be turned into an estimate of the #of nodes and leaves
+ *       buffered. Alternatively, we have an estimate of the #of distinct nodes
+ *       and leaves on the queue in
+ *       {@link AbstractBTree#ndistinctOnWriteRetentionQueue}. With that, we
+ *       could decide how likely it is that the first N leaves of the
+ *       {@link BTree} are in the cache. However, this is all fuzzy since a
+ *       focus on one branch of the {@link BTree} could cause nothing but the
+ *       root to be in the cache when probing a different branch.
+ */
+public class BTreeCostModel {
+
+    /**
+     * Return the estimated cost of a range scan of the index.
+     * 
+     * @param diskCostModel
+     *            The cost model for the disk.
+     * @param rangeCount
+     *            The range count for the scan.
+     * @param btree
+     *            The index.
+     * 
+     * @return The estimated cost (milliseconds).
+     * 
+     * @todo how to get the right view onto the BTree without locking? or raise
+     *       the cost model into the {@link IIndexManager}?
+     */
+    public double rangeScan(final DiskCostModel diskCostModel,
+            final int rangeCount, final BTree btree) {
+
+        if (rangeCount == 0)
+            return 0d;
+
+        // double height = (Math.log(branchingFactor) / Math.log(entryCount)) -
+        // 1;
+
+        final int m = btree.getBranchingFactor();
+
+        final int entryCount = btree.getEntryCount();
+
+        final int height = btree.getHeight();
+
+        // average seek time to a leaf.
+        final double averageSeekTime = Math.max(0, (height - 1))
+                * diskCostModel.seekTime;
+
+        // the percentage of the leaves which are full.
+        // final double leafFillRate = .70d;
+        final double leafFillRate = ((double) btree.getUtilization()[1]) / 100;
+
+        /*
+         * The expected #of leaves to visit for that range scan.
+         * 
+         * Note: There is an edge condition when the root leaf is empty
+         * (fillRate is zero).
+         */
+        final double expectedLeafCount = Math.ceil((rangeCount / m)
+                * Math.min(1, (1 / leafFillRate)));
+
+        /*
+         * Expected total time for the key range scan. Overestimates since
+         * ignores cache reuse and OS caching of visited nodes. Ignores transfer
+         * costs.
+         */
+        final double estimatedCost = averageSeekTime * expectedLeafCount;
+
+        return estimatedCost;
+
+    }
+
+}


Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/BTreeCostModel.java
___________________________________________________________________
Added: svn:keywords
   + Id Date Revision Author HeadURL

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/DiskCostModel.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/DiskCostModel.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/DiskCostModel.java	2010-09-30 20:23:37 UTC (rev 3705)
@@ -0,0 +1,62 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2010.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Sep 30, 2010
+ */
+package com.bigdata.bop.cost;
+
+/**
+ * A cost model of the disk.
+ * 
+ * @todo Develop disk models for SAS,SATA,SSD and various RAID configurations,
+ *       including the #of spindles in the RAID array.
+ * @todo Develop disk models for SAN, NAS, NFS, parallel file systems, etc.
+ * @todo Conditionally copy the desired disk model parameters into the fields
+ *       above to see the performance estimates for a given configuration.
+ * @todo The scattered and sustained write rates can be estimated from the
+ *       transfer rate. However, SCSI does much better than SATA when it can
+ *       reorder the writes for improved locality.
+ */
+public class DiskCostModel {
+
+    public static final DiskCostModel DEFAULT = new DiskCostModel(10d, 41943040);
+
+    /**
+     * The average disk seek time (milliseconds).
+     */
+    final public double seekTime;
+
+    final public double transferRate;
+
+    /**
+     * 
+     * @param seekTime
+     * @param transferRate
+     */
+    public DiskCostModel(double seekTime, double transferRate) {
+        this.seekTime = seekTime;
+        this.transferRate = transferRate;
+    }
+
+}


Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/DiskCostModel.java
___________________________________________________________________
Added: svn:keywords
   + Id Date Revision Author HeadURL

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/IndexSegmentCostModel.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/IndexSegmentCostModel.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/IndexSegmentCostModel.java	2010-09-30 20:23:37 UTC (rev 3705)
@@ -0,0 +1,99 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2010.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Sep 30, 2010
+ */
+package com.bigdata.bop.cost;
+
+import com.bigdata.btree.IndexSegment;
+
+/**
+ * A cost model for a range scan on an {@link IndexSegment}.
+ * <p>
+ * Note: This uses a summary description of the {@link IndexSegment} for the
+ * cost model. This makes sense because we generally have 100s of index segments
+ * in scale-out and we do not want to probe them all for their exact costs.
+ * 
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id$
+ * 
+ * @todo
+ */
+public class IndexSegmentCostModel {
+
+    /**
+     * 
+     * @param diskCostModel
+     *            The disk cost model.
+     * @param rangeCount
+     *            The range count for the index scan.
+     * @param branchingFactor
+     *            The branching factor for the index segments for this scale-out
+     *            index.
+     * @param averageBytesPerLeaf
+     *            The average #of bytes per leaf for this scale-out index.
+     * @param xferBufferSize
+     *            The size of the disk transfer buffer.
+     * 
+     * @return The estimated time for the range scan (milliseconds).
+     */
+    public double rangeScan(final DiskCostModel diskCostModel,
+            final int rangeCount, final int branchingFactor,
+            final int averageBytesPerLeaf, final int xferBufferSize) {
+
+        if (rangeCount == 0)
+            return 0d;
+
+        if (xferBufferSize == 0)
+            throw new IllegalArgumentException();
+
+        // One seek per leaf.
+        final double averageSeekTime = diskCostModel.seekTime;
+
+        // Expected #of leaves to visit.
+        final int expectedLeafCount = (int) Math.ceil(((double) rangeCount)
+                / branchingFactor);
+
+        // Expected #of bytes to transfer.
+        final int leafBytesToXFer = expectedLeafCount * averageBytesPerLeaf;
+
+        // Expected #of disk transfers.
+        final int xfers = (int) Math.ceil(((double) leafBytesToXFer)
+                / xferBufferSize);
+
+        // Expected transfer time (ms).
+        final double xferTime = leafBytesToXFer
+                / (diskCostModel.transferRate / 1000);
+
+        // Expected disk seek time (ms).
+        final double seekTime = averageSeekTime * xfers;
+
+        // Expected total time (ms).
+        final double totalTime = seekTime + xferTime;
+
+        return totalTime;
+
+    }
+
+}


Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/IndexSegmentCostModel.java
___________________________________________________________________
Added: svn:keywords
   + Id Date Revision Author HeadURL

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/package.html
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/package.html	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/package.html	2010-09-30 20:23:37 UTC (rev 3705)
@@ -0,0 +1,14 @@
+<html>
+<head>
+<title>Cost models</title>
+</head>
+<body>
+
+<p>
+
+    This package provides cost models for various things.
+    
+</p>
+
+</body>
+</html>
\ No newline at end of file


Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/package.html
___________________________________________________________________
Added: svn:keywords
   + Id Date Revision Author HeadURL

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java	2010-09-30 19:49:43 UTC (rev 3704)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java	2010-09-30 20:23:37 UTC (rev 3705)
@@ -27,6 +27,7 @@
 
 package com.bigdata.bop.engine;
 
+import java.io.Serializable;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -37,27 +38,44 @@
 import java.util.Set;
 
 import org.apache.log4j.Logger;
+import org.openrdf.model.URI;
+import org.openrdf.query.Dataset;
+import org.openrdf.query.algebra.StatementPattern.Scope;
 
 import com.bigdata.bop.BOp;
 import com.bigdata.bop.BOpContextBase;
 import com.bigdata.bop.BOpEvaluationContext;
 import com.bigdata.bop.BOpUtility;
-import com.bigdata.bop.PipelineOp;
+import com.bigdata.bop.Constant;
 import com.bigdata.bop.IConstraint;
 import com.bigdata.bop.IPredicate;
 import com.bigdata.bop.IVariable;
 import com.bigdata.bop.NV;
+import com.bigdata.bop.PipelineOp;
 import com.bigdata.bop.ap.Predicate;
 import com.bigdata.bop.bset.StartOp;
 import com.bigdata.bop.join.PipelineJoin;
+import com.bigdata.bop.rdf.join.DataSetJoin;
 import com.bigdata.bop.solutions.SliceOp;
+import com.bigdata.rdf.internal.IV;
+import com.bigdata.rdf.lexicon.LexiconRelation;
+import com.bigdata.rdf.model.BigdataURI;
+import com.bigdata.rdf.sail.BigdataEvaluationStrategyImpl;
 import com.bigdata.rdf.sail.BigdataSail;
+import com.bigdata.rdf.spo.DefaultGraphSolutionExpander;
+import com.bigdata.rdf.spo.ISPO;
+import com.bigdata.rdf.spo.InGraphHashSetFilter;
+import com.bigdata.rdf.spo.NamedGraphSolutionExpander;
+import com.bigdata.rdf.store.AbstractTripleStore;
+import com.bigdata.rdf.store.IRawTripleStore;
+import com.bigdata.relation.accesspath.ElementFilter;
+import com.bigdata.relation.accesspath.IElementFilter;
 import com.bigdata.relation.rule.IProgram;
 import com.bigdata.relation.rule.IRule;
 import com.bigdata.relation.rule.IStep;
-import com.bigdata.relation.rule.Program;
 import com.bigdata.relation.rule.eval.DefaultEvaluationPlan2;
 import com.bigdata.relation.rule.eval.IRangeCountFactory;
+import com.bigdata.relation.rule.eval.RuleState;
 
 /**
  * Utility class converts {@link IRule}s to {@link BOp}s.
@@ -73,8 +91,113 @@
 public class Rule2BOpUtility {
 
     protected static final Logger log = Logger.getLogger(Rule2BOpUtility.class);
+
+    /**
+     * Flag to conditionally enable the new named and default graph support.
+     * <p>
+     * Note: When enabled, the {@link NamedGraphSolutionExpander} and
+     * {@link DefaultGraphSolutionExpander} must be stripped from the
+     * {@link IPredicate.Annotations#EXPANDER}. In the long term, we will simply
+     * no longer generate them in {@link BigdataEvaluationStrategyImpl}.
+     * <p>
+     * Note: If you want to test just the named graph stuff, then the default
+     * graph processing could be handed off to the
+     * {@link DefaultGraphSolutionExpander}.
+     */
+    private static boolean enableDecisionTree = false;
     
     /**
+     * Annotations used by the {@link BigdataEvaluationStrategyImpl} to
+     * communicate with the {@link Rule2BOpUtility}.
+     * <p>
+     * <h3>Quads Mode</h3>
+     * Several annotations are used to mark named and default graph patterns on
+     * the {@link IPredicate}s. Rather than attaching a named or default graph
+     * expander, we annotate the predicate with the metadata for the access path
+     * and then convert that annotation to the appropriate bop pattern in
+     * {@link Rule2BOpUtility}.
+     */
+    public interface Annotations {
+
+        /**
+         * Boolean flag indicates that the database is operating in quads mode.
+         */
+        String QUADS = Rule2BOpUtility.class.getName() + ".quads";
+
+        boolean DEFAULT_QUADS = false;
+
+        /**
+         * The {@link Dataset} associated with the access path (quads mode
+         * only). The {@link Dataset} is only provided by openrdf when FROM or
+         * FROM NAMED was used in the query. Otherwise the {@link Dataset} will
+         * be <code>null</code> and is not attached as an annotation.
+         * <p>
+         * Note: This annotation MUST be stripped from the query plan to prevent
+         * an attempt to serialized it for RMI in scale-out (the {@link Dataset}
+         * is not {@link Serializable}, can be quite large, and is captured by
+         * other constructions in the generated query plan).
+         */
+        String DATASET = Rule2BOpUtility.class.getName() + ".dataset";
+
+        /**
+         * The {@link Scope} of the access path (quads mode only). In quads mode
+         * the {@link Scope} is always provided by openrdf.
+         * 
+         * @see Scope#NAMED_CONTEXTS
+         * @see Scope#DEFAULT_CONTEXTS
+         */
+        String SCOPE = Rule2BOpUtility.class.getName() + ".scope";
+
+        /**
+         * The graph variable specified in the query (quads mode only). This is
+         * <p>
+         * Note: This is not used for SIDs mode because we use the standard
+         * triple store access paths.
+         * 
+         * @see org.openrdf.query.algebra.Var
+         * 
+         * @todo can we just use pred.get(3)?
+         */
+        String CVAR = Rule2BOpUtility.class.getName() + ".cvar";
+
+        /*
+         * Cost estimates.
+         */
+
+        /**
+         * The estimated cost of a SCAN + FILTER approach to a default graph or
+         * named graph query.
+         */
+        String COST_SCAN = Rule2BOpUtility.class.getName() + ".costScan";
+
+        /**
+         * The estimated cost of a SUBQUERY approach to a default graph or named
+         * graph query.
+         */
+        String COST_SUBQUERY = Rule2BOpUtility.class.getName()
+                + ".costSubquery";
+
+        /**
+         * The #of known graphs in the {@link Dataset} for a default graph or
+         * named graph query.
+         */
+        String NKNOWN = Rule2BOpUtility.class.getName() + ".nknown";
+
+    }
+
+    /**
+     * A list of annotations to be cleared from {@link Predicate} when they are
+     * copied into a query plan.
+     */
+    private static final String[] ANNS_TO_CLEAR_FROM_PREDICATE = new String[] {
+            Annotations.QUADS,//
+            Annotations.DATASET,//
+            Annotations.SCOPE,//
+            Annotations.CVAR,//
+            IPredicate.Annotations.OPTIONAL //
+    };
+    
+    /**
      * Convert an {@link IStep} into an operator tree. This should handle
      * {@link IRule}s and {@link IProgram}s as they are currently implemented
      * and used by the {@link BigdataSail}.
@@ -84,13 +207,13 @@
      * 
      * @return
      */
-    public static PipelineOp convert(final IStep step, 
-            final int startId, final QueryEngine queryEngine) {
-        
+    public static PipelineOp convert(final IStep step, final int startId,
+            final AbstractTripleStore db, final QueryEngine queryEngine) {
+
         if (step instanceof IRule)
-            return convert((IRule) step, startId, queryEngine);
+            return convert((IRule) step, startId, db, queryEngine);
         
-        throw new UnsupportedOperationException();
+        return convert((IProgram) step, startId, db, queryEngine);
 
     }
 
@@ -101,11 +224,11 @@
      * 
      * @return
      */
-    public static PipelineOp convert(final IRule rule, 
-            final int startId, final QueryEngine queryEngine) {
+    public static PipelineOp convert(final IRule rule, final int startId,
+            final AbstractTripleStore db, final QueryEngine queryEngine) {
 
         int bopId = startId;
-        
+
         final PipelineOp startOp = new StartOp(new BOp[] {},
                 NV.asMap(new NV[] {//
                         new NV(Predicate.Annotations.BOP_ID, bopId++),//
@@ -128,8 +251,13 @@
             
         }, rule);
         
+        // evaluation plan order.
         final int[] order = plan.getOrder();
         
+        // variables to be retained for each join.
+        final IVariable[][] selectVars = RuleState
+                .computeRequiredVarsForEachTail(rule, order);
+        
         /*
          * Map the constraints from the variables they use.  This way, we can
          * properly attach constraints to only the first tail in which the
@@ -174,8 +302,9 @@
             final int joinId = bopId++;
             
             // assign a bop id to the predicate
-            final IPredicate<?> pred = rule.getTail(order[i]).setBOpId(bopId++);
-            
+            Predicate<?> pred = (Predicate<?>) rule.getTail(order[i]).setBOpId(
+                    bopId++);
+
             /*
              * Collect all the constraints for this predicate based on which
              * variables make their first appearance in this tail
@@ -189,8 +318,8 @@
              * that make their first appearance in this tail.
              */
             for (BOp arg : pred.args()) {
-                if (arg instanceof IVariable) {
-                    final IVariable<?> v = (IVariable) arg;
+                if (arg instanceof IVariable<?>) {
+                    final IVariable<?> v = (IVariable<?>) arg;
                     /*
                      * We do a remove because we don't ever need to run these
                      * constraints again during subsequent joins once they
@@ -204,22 +333,94 @@
                         constraints.addAll(constraintsByVar.remove(v));
                 }
             }
+           
+            // annotations for this join.
+            final List<NV> anns = new LinkedList<NV>();
             
-            final PipelineOp joinOp = new PipelineJoin(//
-                    left, pred,//
-                    NV.asMap(new NV[] {//
-                            new NV(BOp.Annotations.BOP_ID, joinId),//
-                            new NV(PipelineJoin.Annotations.CONSTRAINTS, 
-                                    constraints.size() > 0 ? 
-                                            constraints.toArray(new IConstraint[constraints.size()]) : null),//
-                            new NV(PipelineJoin.Annotations.OPTIONAL, pred.isOptional()),//
-                            // Note: shard-partitioned joins!
-                            new NV( Predicate.Annotations.EVALUATION_CONTEXT,
-                                    BOpEvaluationContext.SHARDED),//
-                            }));
+            anns.add(new NV(BOp.Annotations.BOP_ID, joinId));
+
+            anns.add(new NV(PipelineJoin.Annotations.SELECT,
+                    selectVars[order[i]]));
             
-            left = joinOp;
+            if (pred.isOptional())
+                anns.add(new NV(PipelineJoin.Annotations.OPTIONAL, pred
+                        .isOptional()));
             
+            if (!constraints.isEmpty())
+                anns.add(new NV(PipelineJoin.Annotations.CONSTRAINTS,
+                        constraints
+                                .toArray(new IConstraint[constraints.size()])));
+            
+            final Scope scope = (Scope) pred.getProperty(Annotations.SCOPE);
+
+            // @todo can we just use pred.get(3)?
+            final org.openrdf.query.algebra.Var cvar = (org.openrdf.query.algebra.Var) pred
+                    .getProperty(Annotations.CVAR);
+
+            // true iff this is a quads access path.
+            final boolean quads = pred.getProperty(Annotations.QUADS,
+                    Annotations.DEFAULT_QUADS);
+
+            // strip off annotations that we do not want to propagate.
+            pred = pred.clearAnnotations(ANNS_TO_CLEAR_FROM_PREDICATE);
+
+            if (quads) {
+
+                /*
+                 * Quads mode.
+                 */
+
+                if (enableDecisionTree) {
+                    /*
+                     * Strip off the named graph or default graph expander (in
+                     * the long term it will simply not be generated.)
+                     */
+                    pred = pred
+                            .clearAnnotations(new String[] { IPredicate.Annotations.EXPANDER });
+
+                    switch (scope) {
+                    case NAMED_CONTEXTS:
+                        left = namedGraphJoin(queryEngine, left, anns, pred,
+                                cvar);
+                        break;
+                    case DEFAULT_CONTEXTS:
+                        left = defaultGraphJoin(queryEngine, left, anns, pred,
+                                cvar);
+                        break;
+                    default:
+                        throw new AssertionError();
+                    }
+                    
+                } else {
+
+                    /*
+                     * This is basically the old way of handling quads query
+                     * using expanders which were attached by
+                     * BigdataEvaluationStrategyImpl.
+                     */
+                    
+                    final boolean scaleOut = queryEngine.isScaleOut();
+                    if (scaleOut)
+                        throw new UnsupportedOperationException();
+                    
+                    anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT,
+                            BOpEvaluationContext.ANY));
+
+                    left = new PipelineJoin(new BOp[] { left, pred }, anns
+                            .toArray(new NV[anns.size()]));
+
+                }
+
+            } else {
+
+                /*
+                 * Triples or provenance mode.
+                 */
+
+                left = triplesModeJoin(queryEngine, left, anns, pred);
+
+            }
+
         }
         
         // just for now while i'm debugging
@@ -228,42 +429,309 @@
         return left;
         
     }
-    
-    private static String toString(BOp bop) {
+
+    /**
+     * Generate a {@link PipelineJoin} for a triples mode access path.
+     * 
+     * @param queryEngine
+     * @param left
+     * @param anns
+     * @param pred
+     * 
+     * @return The join operator.
+     */
+    private static PipelineOp triplesModeJoin(final QueryEngine queryEngine,
+            final PipelineOp left, final List<NV> anns, final Predicate pred) {
+
+        final boolean scaleOut = queryEngine.isScaleOut();
+        if (scaleOut) {
+            anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT,
+                    BOpEvaluationContext.SHARDED));
+        } else {
+            anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT,
+                    BOpEvaluationContext.ANY));
+        }
+
+        return new PipelineJoin(new BOp[] { left, pred }, anns
+                .toArray(new NV[anns.size()]));
+
+    }
+
+    /**
+     * Generate a named graph join (quads mode).
+     * 
+     * @param queryEngine
+     * @param left
+     * @param anns
+     * @param pred
+     * @param cvar
+     * @return
+     */
+    private static PipelineOp namedGraphJoin(final QueryEngine queryEngine,
+            final PipelineOp left, final List<NV> anns, Predicate pred,
+            final org.openrdf.query.algebra.Var cvar) {
+
+        final Dataset dataset = (Dataset) pred.getProperty(Annotations.DATASET);
+
+        final boolean scaleOut = queryEngine.isScaleOut();
+        if (scaleOut) {
+            anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT,
+                    BOpEvaluationContext.SHARDED));
+        } else {
+            anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT,
+                    BOpEvaluationContext.ANY));
+        }
+
+        final DataSetSummary summary = new DataSetSummary(dataset
+                .getNamedGraphs());
+
+        anns.add(new NV(Annotations.NKNOWN, summary.nknown));
+
+        // true iff C is bound to a constant.
+        final boolean isCBound = cvar.getValue() != null;
         
+        if (isCBound) {
+
+            /*
+             * C is already bound.  The unmodified access path is used. 
+             */
+
+            return new PipelineJoin(new BOp[] { left, pred }, anns
+                    .toArray(new NV[anns.size()]));
+
+        } else if (summary.nknown == 0) {
+
+            /*
+             * The data set is empty (no graphs). Return a join backed by an
+             * empty access path.
+             * 
+             * Note: Since the join could be optional or part of an optional
+             * join group, we can not just drop it. Instead we need to return a
+             * join against an empty access path. Since the join could also
+             * "select" for some subset of variables, it seems that we really
+             * need to modify PipelineJoin to recognize an annotation indicating
+             * an empty access path. It can then substitute the empty access
+             * path when processing the source binding sets. There should be
+             * unit tests for this.
+             * 
+             * FIXME Return PipelineJoin with an EMPTY ACCESS PATH.
+             */
+            
+            throw new UnsupportedOperationException();
+            
+        } else if (summary.nknown == 1) {
+
+            /*
+             * The dataset contains exactly one graph. Bind C.
+             */
+            
+            pred = pred.asBound((IVariable) pred.get(3), new Constant(
+                    summary.firstContext));
+            
+            return new PipelineJoin(new BOp[] { left, pred }, anns
+                    .toArray(new NV[anns.size()]));
+
+        } else if (dataset == null) {
+
+            /*
+             * The dataset is all graphs. C is left unbound and the unmodified
+             * access path is used.
+             */
+
+            return new PipelineJoin(new BOp[] { left, pred }, anns
+                    .toArray(new NV[anns.size()]));
+
+        } else {
+
+            /*
+             * Estimate cost of SCAN with C unbound)
+             */
+            final double scanCost = getScanCost(pred);
+
+            anns.add(new NV(Annotations.COST_SCAN, scanCost));
+
+            /*
+             * Estimate cost of SUBQUERY with C bound.
+             */
+            final double subqueryCost = getSubqueryCost(pred);
+
+            anns.add(new NV(Annotations.COST_SUBQUERY, subqueryCost));
+
+            if (scanCost < subqueryCost * summary.nknown) {
+
+                /*
+                 * Scan and filter. C is left unbound. We do a range scan on the
+                 * index and filter using an IN constraint.
+                 */
+
+                // IN filter for the named graphs.
+                final IElementFilter<ISPO> test = new InGraphHashSetFilter<ISPO>(
+                        summary.nknown, summary.graphs);
+
+                // layer filter onto the predicate.
+                pred = pred
+                        .addIndexLocalFilter(ElementFilter.newInstance(test));
+                
+                return new PipelineJoin(new BOp[] { left, pred }, anns
+                        .toArray(new NV[anns.size()]));
+
+            } else {
+
+                /*
+                 * Parallel Subquery.
+                 */
+
+                /*
+                 * Setup the data set join.
+                 * 
+                 * @todo When the #of named graphs is large we need to do
+                 * something special to avoid sending huge graph sets around
+                 * with the query. For example, we should create named data sets
+                 * and join against them rather than having an in-memory
+                 * DataSetJoin.
+                 * 
+                 * @todo The historical approach performed parallel subquery
+                 * using an expander pattern rather than a data set join. The
+                 * data set join should have very much the same effect, but it
+                 * may need to emit multiple chunks to have good parallelism.
+                 */
+
+                // The variable to be bound.
+                final IVariable var = (IVariable) pred.get(3);
+                
+                // The data set join.
+                final DataSetJoin dataSetJoin = new DataSetJoin(
+                        new BOp[] { var }, NV.asMap(new NV[] {
+                                new NV(DataSetJoin.Annotations.VAR, var),
+                                new NV(DataSetJoin.Annotations.GRAPHS, summary
+                                        .getGraphs()) }));
+
+                if (scaleOut) {
+                    anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT,
+                            BOpEvaluationContext.SHARDED));
+                    anns.add(new NV(Predicate.Annotations.REMOTE_ACCESS_PATH,
+                            false));
+                } else {
+                    anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT,
+                            BOpEvaluationContext.ANY));
+                    anns.add(new NV(Predicate.Annotations.REMOTE_ACCESS_PATH,
+                            false));
+                }
+
+                return new PipelineJoin(new BOp[] { left, pred }, anns
+                        .toArray(new NV[anns.size()]));
+
+            }
+
+        }
+
+    }
+
+    /**
+     * 
+     * @param pred
+     * @return
+     * 
+     * FIXME Cost models have been implemented, but are not yet hooked in.
+     */
+    static double getScanCost(Predicate pred) {
+        /*
+         * @todo Scan is more expensive on the Journal so this is set to ONE (1)
+         * and subquery is set to ZERO (0). This will get replaced by the actual
+         * computed costs shortly.
+         */
+        return 1d;
+    }
+
+    /**
+     * 
+     * @param pred
+     * @return
+     * 
+     *         FIXME Cost models have been implemented, but are not yet hooked
+     *         in.
+     */
+    static double getSubqueryCost(Predicate pred) {
+        return 0d;
+    }
+
+    /**
+     * Generate a default graph join (quads mode).
+     * 
+     * @param queryEngine
+     * @param left
+     * @param anns
+     * @param pred
+     * @return
+     * 
+     * @todo The default graph remote access path query estimates do not take
+     *       RMI costs into account. This is Ok since we are only comparing
+     *       remote access paths with other remote access paths.
+     */
+    private static PipelineOp defaultGraphJoin(final QueryEngine queryEngine,
+            final PipelineOp left, final List<NV> anns, final Predicate pred,
+            final org.openrdf.query.algebra.Var cvar) {
+
+        // @todo decision of local vs remote ap.
+        final boolean scaleOut = queryEngine.isScaleOut();
+        if (scaleOut) {
+            anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT,
+                    BOpEvaluationContext.SHARDED));
+        } else {
+            anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT,
+                    BOpEvaluationContext.ANY));
+        }
+
+        /*
+         * FIXME implement the default graph decision tree. 
+         */
+        throw new UnsupportedOperationException();
+
+    }
+
+    /**
+     * Pretty print (aspects of) a bop.
+     * 
+     * @param bop
+     *            The bop.
+     *            
+     * @return The formatted representation.
+     */
+    private static String toString(final BOp bop) {
+        
         StringBuilder sb = new StringBuilder();
         
         toString(bop, sb, 0);
         
         // chop off the last \n
-        sb.setLength(sb.length()-1);
-        
+        sb.setLength(sb.length() - 1);
+
         return sb.toString();
-        
+
     }
-    
-    private static void toString(final BOp bop, final StringBuilder sb, 
+
+    private static void toString(final BOp bop, final StringBuilder sb,
             final int indent) {
-        
+
         for (int i = 0; i < indent; i++) {
             sb.append(' ');
         }
         sb.append(bop).append('\n');
 
         if (bop != null) {
-            List<BOp> args = bop.args();
+            final List<BOp> args = bop.args();
             for (BOp arg : args) {
-                toString(arg, sb, indent+4);
+                toString(arg, sb, indent + 4);
             }
-            IConstraint[] constraints = (IConstraint[]) bop
+            final IConstraint[] constraints = (IConstraint[]) bop
                     .getProperty(PipelineJoin.Annotations.CONSTRAINTS);
             if (constraints != null) {
                 for (IConstraint c : constraints) {
-                    toString(c, sb, indent+4);
+                    toString(c, sb, indent + 4);
                 }
             }
         }
-        
+
     }
     
     /**
@@ -275,10 +743,139 @@
      * 
      * FIXME What is the pattern for UNION?
      */
-    public static PipelineOp convert(final Program program) {
+    public static PipelineOp convert(final IProgram rule, final int startId,
+            final AbstractTripleStore db, final QueryEngine queryEngine) {
 
         throw new UnsupportedOperationException();
 
     }
 
+    /**
+     * Helper class summarizes the named graphs for a quads mode query.
+     * 
+     * @todo This could be used for either named or default graphs. All it does
+     *       not report the #of URIs known to the database.
+     * 
+     * @todo This summary could be computed once for a given query for its named
+     *       graphs and once for its default graph. We do not need to do this
+     *       for each predicate in the query.
+     */
+    private static class DataSetSummary {
+
+        /**
+         * The set of graphs. The {@link URI}s MUST have been resolved against
+         * the appropriate {@link LexiconRelation} such that their term
+         * identifiers (when the exist) are known. If any term identifier is
+         * {@link IRawTripleStore#NULL}, then the corresponding graph does not
+         * exist and no access path will be queried for that graph. However, a
+         * non- {@link IRawTripleStore#NULL} term identifier may also identify a
+         * graph which does not exist, in which case an access path will be
+         * created for that {@link URI}s but will not visit any data.
+         */
+        public final Iterable<? extends URI> graphs;
+
+        /**
+         * The #of graphs in {@link #graphs} whose term identifier is known.
+         * While this is not proof that there is data in the quad store for a
+         * graph having the corresponding {@link URI}, it does allow the
+         * possibility that a graph could exist for that {@link URI}.
+         */
+        public final int nknown;
+//        * <p>
+//        * If {@link #nknown} is ZERO (0), then the access path is empty.
+//        * <p>
+//        * If {@link #nknown} is ONE (1), then the caller's {@link IAccessPath}
+//        * should be used and filtered to remove the context information. If
+//        * {@link #graphs} is <code>null</code>, which implies that ALL graphs
+//        * in the quad store will be used as the default graph, then
+//        * {@link #nknown} will be {@link Integer#MAX_VALUE}.
+
+        /**
+         * The term identifier for the first graph and
+         * {@link IRawTripleStore#NULL} if no graphs were specified having a
+         * term identifier.
+         */
+        public final IV firstContext;
+
+        /**
+         * 
+         * @param graphs
+         *            The set of named graphs in the SPARQL DATASET (optional).
+         *            A runtime exception will be thrown during evaluation of
+         *            the if the {@link URI}s are not {@link BigdataURI}s. If
+         *            <code>graphs := null</code>, then the set of named graphs
+         *            is understood to be ALL graphs in the quad store.
+         */
+        public DataSetSummary(final Iterable<? extends URI> graphs) {
+
+            this.graphs = graphs;
+
+            IV firstContext = null;
+
+            if (graphs == null) {
+
+                nknown = Integer.MAX_VALUE;
+
+            } else {
+
+                final Iterator<? extends URI> itr = graphs.iterator();
+
+                int nknown = 0;
+
+                while (itr.hasNext()) {
+
+                    final BigdataURI uri = (BigdataURI) itr.next();
+
+                    if (uri.getIV() != null) {
+
+                        if (++nknown == 1) {
+
+                            firstContext = uri.getIV();
+
+                        }
+
+                    }
+
+                } // while
+
+                this.nknown = nknown;
+
+            }
+
+            this.firstContext = firstContext;
+
+        }
+
+        /**
+         * Return a dense array of the {@link IV}s for the graphs known to the
+         * database.
+         */
+        public IV[] getGraphs() {
+            
+            final IV[] a = new IV[nknown];
+            
+            final Iterator<? extends URI> itr = graphs.iterator();
+
+            int nknown = 0;
+
+            while (itr.hasNext()) {
+
+                final BigdataURI uri = (BigdataURI) itr.next();
+
+                final IV id = uri.getIV();
+
+                if (id != null) {
+
+                    a[nknown++] = id;
+
+                }
+
+            } // while
+            
+            return a;
+            
+        }
+        
+    } // DataSetSummary
+
 }

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/join/DataSetJoin.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/join/DataSetJoin.java	2010-09-30 19:49:43 UTC (rev 3704)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/join/DataSetJoin.java	2010-09-30 20:23:37 UTC (rev 3705)
@@ -82,7 +82,7 @@
         String VAR = DataSetJoin.class.getName() + ".var";
 
         /**
-         * The {@link IV}s to be bound. This is logically a set and SHOULD NOT
+         * The {@link IV}[]s to be bound. This is logically a set and SHOULD NOT
          * include duplicates. The elements in this array SHOULD be ordered for
          * improved efficiency.
          */

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/InGraphBinarySearchFilter.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/InGraphBinarySearchFilter.java	2010-09-30 19:49:43 UTC (rev 3704)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/InGraphBinarySearchFilter.java	2010-09-30 20:23:37 UTC (rev 3705)
@@ -6,12 +6,11 @@
 import java.io.ObjectOutput;
 import java.util.Arrays;
 import java.util.HashSet;
+
 import org.openrdf.model.URI;
 
-import com.bigdata.bop.constraint.INBinarySearch;
 import com.bigdata.rdf.internal.IV;
 import com.bigdata.rdf.model.BigdataURI;
-import com.bigdata.relation.rule.eval.ISolution;
 
 /**
  * "IN" filter for the context position based on a sorted long[] of the
@@ -24,8 +23,6 @@
  * @version $Id$
  * 
  * @see InGraphHashSetFilter
- * 
- * @todo reconcile with {@link INBinarySearch}
  */
 public final class InGraphBinarySearchFilter<E extends ISPO> extends SPOFilter<E>
         implements Externalizable {

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/InGraphHashSetFilter.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/InGraphHashSetFilter.java	2010-09-30 19:49:43 UTC (rev 3704)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/InGraphHashSetFilter.java	2010-09-30 20:23:37 UTC (rev 3705)
@@ -1,31 +1,23 @@
 package com.bigdata.rdf.spo;
 
 import java.util.HashSet;
-import it.unimi.dsi.fastutil.longs.LongLinkedOpenHashSet;
 
 import org.openrdf.model.URI;
 
-import com.bigdata.bop.constraint.INHashMap;
 import com.bigdata.rdf.internal.IV;
 import com.bigdata.rdf.model.BigdataURI;
-import com.bigdata.rdf.store.IRawTripleStore;
-import com.bigdata.relation.rule.eval.ISolution;
 
 /**
  * "IN" filter for the context position based on a native long hash set
- * containing the acceptable graph identifiers. While evaluation of the
- * access path will be ordered, the filter does not maintain evolving state
- * so a hash set will likely beat a binary search.
+ * containing the acceptable graph identifiers. While evaluation of the access
+ * path will be ordered, the filter does not maintain evolving state so a hash
+ * set will likely beat a binary search.
  * 
- * @author <a href="mailto:tho...@us...">Bryan
- *         Thompson</a>
- * @version $Id$
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id: InGraphHashSetFilter.java 3694 2010-09-30 14:54:59Z mrpersonick
+ *          $
  * 
  * @see InGraphBinarySearchFilter
- * 
- * @todo reconcile with {@link INHashMap}.
- * 
- * @todo tighten serialization?
  */
 public final class InGraphHashSetFilter<E extends ISPO> extends SPOFilter<E> {
 
@@ -34,7 +26,7 @@
      */
     private static final long serialVersionUID = -6059009162692785772L;
 
-    final HashSet<IV> contextSet;
+    private final HashSet<IV> contextSet;
     
     /**
      * 

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOAccessPath.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOAccessPath.java	2010-09-30 19:49:43 UTC (rev 3704)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOAccessPath.java	2010-09-30 20:23:37 UTC (rev 3705)
@@ -181,6 +181,9 @@
      *            The context term identifier.
      * 
      * @return The constrained {@link IAccessPath}.
+     * 
+     * @deprecated with {@link DefaultGraphSolutionExpander} and
+     *             {@link NamedGraphSolutionExpander}.
      */
     public SPOAccessPath bindContext(final IV c) {
 
@@ -205,8 +208,10 @@
      *            is to be set
      * 
      * @return The constrained {@link IAccessPath}.
+     * 
+     * @deprecated with {@link #bindContext(IV)}
      */
-    public SPOAccessPath bindPosition(final int position, final IV v) {
+    private SPOAccessPath bindPosition(final int position, final IV v) {
 
         if (v == null) {
 

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java	2010-09-30 19:49:43 UTC (rev 3704)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java	2010-09-30 20:23:37 UTC (rev 3705)
@@ -25,11 +25,9 @@
 
 import java.util.Map;
 
-import com.bigdata.bop.ArrayBindingSet;
 import com.bigdata.bop.BOp;
 import com.bigdata.bop.IBindingSet;
 import com.bigdata.bop.IConstant;
-import com.bigdata.bop.IVariable;
 import com.bigdata.bop.IVariableOrConstant;
 import com.bigdata.bop.NV;
 import com.bigdata.bop.ap.Predicate;
@@ -51,20 +49,6 @@
      */
     private static final long serialVersionUID = 1L;
 
-//    /**
-//     * The arity is 3 unless the context position was given (as either a
-//     * variable or bound to a constant) in which case it is 4.
-//     * 
-//     * @todo rather than having a conditional arity, modify the SPOPredicate
-//     *       constructor to pass on either args[3] or args[3] depending on
-//     *       whether we are using triples or quads.
-//     */
-//    public final int arity() {
-//        
-//        return get(3/*c*/) == null ? 3 : 4;
-//        
-//    }
-
     /**
      * Variable argument version of the shallow copy constructor.
      */
@@ -236,191 +220,6 @@
     
     }
 
-//    /**
-//     * Fully specified ctor.
-//     * 
-//     * @param relationName
-//     * @param partitionId
-//     * @param s
-//     * @param p
-//     * @param o
-//     * @param c
-//     *            MAY be <code>null</code>.
-//     * @param optional
-//     * @param constraint
-//     *            MAY be <code>null</code>.
-//     * @param expander
-//     *            MAY be <code>null</code>.
-//     * @param timestamp
-//     *            The timestamp or transaction identifier against which the
-//     *            predicate will read or write.
-//     */
-//    public SPOPredicate(//
-//            final String[] relationName, //
-//            final int partitionId, //
-//            final IVariableOrConstant<IV> s,//
-//            final IVariableOrConstant<IV> p,//
-//            final IVariableOrConstant<IV> o,//
-//            final IVariableOrConstant<IV> c,//
-//            final boolean optional, //
-//            final IElementFilter<ISPO> constraint,//
-//            final ISolutionExpander<ISPO> expander//
-////            final long timestamp
-//            ) {
-//        
-//        super((c == null ? new IVariableOrConstant[] { s, p, o }
-//                : new IVariableOrConstant[] { s, p, o, c }), //
-//                new NV(Annotations.RELATION_NAME, relationName), //
-//                new NV(Annotations.PARTITION_ID, partitionId), //
-//                new NV(Annotations.OPTIONAL, optional), //
-//                new NV(Annotations.CONSTRAINT, constraint), //
-//                new NV(Annotations.EXPANDER, expander));
-//
-////        if (relationName == null)
-////            throw new IllegalArgumentException();
-////       
-////        for (int i = 0; i < relationName.length; i++) {
-////            
-////            if (relationName[i] == null)
-////                throw new IllegalArgumentException();
-////            
-////        }
-////        
-////        if (relationName.length == 0)
-////            throw new IllegalArgumentException();
-////        
-////        if (partitionId < -1)
-////            throw new IllegalArgumentException();
-////        
-////        if (s == null)
-////            throw new IllegalArgumentException();
-////        
-////        if (p == null)
-////            throw new IllegalArgumentException();
-////        
-////        if (o == null)
-////            throw new IllegalArgumentException();
-////        
-////        this.relationName = relationName;
-////        
-////        this.partitionId = partitionId;
-////        
-////        this.s = s;
-////        this.p = p;
-////        this.o = o;
-////        this.c = c; // MAY be null.
-////
-////        this.optional = optional;
-////        
-////        this.constraint = constraint; /// MAY be null.
-////        
-////        this.expander = expander;// MAY be null.
-//        
-//    }
-
-//    /**
-//     * Copy constructor overrides the relation name(s).
-//     * 
-//     * @param relationName
-//     *            The new relation name(s).
-//     */
-//    protected SPOPredicate(final SPOPredicate src, final String[] relationName) {
-//        
-//        if (relationName == null)
-//            throw new IllegalArgumentException();
-//       
-//        for(int i=0; i<relationName.length; i++) {
-//            
-//            if (relationName[i] == null)
-//                throw new IllegalArgumentException();
-//            
-//        }
-//        
-//        if (relationName.length == 0)
-//            throw new IllegalArgumentException();
-// 
-//        this.partitionId = src.partitionId;
-//        
-//        this.s = src.s;
-//        this.p = src.p;
-//        this.o = src.o;
-//        this.c = src.c;
-//        
-//        this.relationName = relationName; // override.
-//     
-//        this.optional = src.optional;
-//        
-//        this.constraint = src.constraint;
-//        
-//        this.expander = src.expander;
-//        
-//    }
-
-//    /**
-//     * Copy constructor sets the index partition identifier.
-//     * 
-//     * @param partitionId
-//     *            The index partition identifier.
-//     *            
-//     * @throws IllegalArgumentException
-//     *             if the index partition identified is a negative integer.
-//     * @throws IllegalStateException
-//     *             if the index partition identifier was already specified.
-//     */
-//    protected SPOPredicate(final SPOPredicate src, final int partitionId) {
-//
-//        //@todo uncomment the other half of this test to make it less paranoid.
-//        if (src.partitionId != -1 ) {//&& this.partitionId != partitionId) {
-//            
-//            throw new IllegalStateException();
-//
-//        }
-//
-//        if (partitionId < 0) {
-//
-//            throw new IllegalArgumentException();
-//
-//        }
-//
-//        this.relationName = src.relationName;
-//        
-//        this.partitionId = partitionId;
-//        
-//        this.s = src.s;
-//        this.p = src.p;
-//        this.o = src.o;
-//        this.c = src.c;
-//   ...
 
[truncated message content]

[Bigdata-commit] SF.net SVN: bigdata:[3705] branches/QUADS_QUERY_BRANCH/bigdata

Fast, scalable, robust graph database platform

[Bigdata-commit] SF.net SVN: bigdata:[3705] branches/QUADS_QUERY_BRANCH/bigdata