Thread: [Bigdata-commit] SF.net SVN: bigdata:[3713] branches/QUADS_QUERY_BRANCH/bigdata/src/java/com /bigda

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 3713
          http://bigdata.svn.sourceforge.net/bigdata/?rev=3713&view=rev
Author:   thompsonbry
Date:     2010-10-01 15:36:09 +0000 (Fri, 01 Oct 2010)

Log Message:
-----------
Added some interfaces for reporting on those aspects of a B+Tree which feed into cost estimates for query plans.

Integrated the named graph decision tree logic with the B+Tree cost model in QueryEngine. I've only done this for standalone as this point.  We will have to override this for scale-out to use an approximation of the mixtures of cost models for a BTree on the Journal and ~3 index segment files.  That can be overridden in FederatedQueryEngine.

Modified Paths:
--------------
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/BTreeCostModel.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/DiskCostModel.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/IndexSegmentCostModel.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/AbstractBTree.java

Added Paths:
-----------
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/BTreeStatistics.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/BTreeUtilizationReport.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IBTreeStatistics.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IBTreeUtilizationReport.java

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java
===================================================================

--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java	2010-10-01 15:14:05 UTC (rev 3712)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java	2010-10-01 15:36:09 UTC (rev 3713)
@@ -453,15 +453,32 @@
                 sb.append(',');
             sb.append(t.getClass().getSimpleName());
         }
-        sb.append(")[");
-        final Integer id = (Integer) annotations.get(Annotations.BOP_ID);
-        if (id != null)
-            sb.append("Annotations.BOP_ID=" + id);
-        sb.append("]");
+        sb.append(")");
+        annotationsToString(sb);
         return sb.toString();
 
     }
 
+    protected void annotationsToString(final StringBuilder sb) {
+        final Map<String,Object> annotations = annotations();
+        if (!annotations.isEmpty()) {
+            sb.append("[");
+            boolean first = true;
+            for (Map.Entry<String, Object> e : annotations.entrySet()) {
+                if (!first)
+                    sb.append(", ");
+                if (e.getValue() != null && e.getValue().getClass().isArray()) {
+                    sb.append(e.getKey() + "="
+                            + Arrays.toString((Object[]) e.getValue()));
+                } else {
+                    sb.append(e.getKey() + "=" + e.getValue());
+                }
+                first = false;
+            }
+            sb.append("]");
+        }
+    }
+    
     final public BOpEvaluationContext getEvaluationContext() {
 
         return getProperty(Annotations.EVALUATION_CONTEXT,

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java	2010-10-01 15:14:05 UTC (rev 3712)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java	2010-10-01 15:36:09 UTC (rev 3713)
@@ -31,8 +31,6 @@
 import java.util.Iterator;
 import java.util.Map;
 
-import cern.colt.Arrays;
-
 import com.bigdata.bop.AbstractAccessPathOp;
 import com.bigdata.bop.ArrayBindingSet;
 import com.bigdata.bop.BOp;
@@ -44,8 +42,6 @@
 import com.bigdata.bop.IVariable;
 import com.bigdata.bop.IVariableOrConstant;
 import com.bigdata.bop.NV;
-import com.bigdata.rdf.internal.IV;
-import com.bigdata.rdf.spo.SPOPredicate;
 import com.bigdata.relation.accesspath.ElementFilter;
 import com.bigdata.relation.accesspath.IElementFilter;
 import com.bigdata.relation.rule.ISolutionExpander;
@@ -553,28 +549,7 @@
         }
 
         sb.append(")");
-
-        final Map<String,Object> annotations = annotations();
-        if (!annotations.isEmpty()) {
-            sb.append("[");
-            boolean first = true;
-            for (Map.Entry<String, Object> e : annotations.entrySet()) {
-                if (!first)
-                    sb.append(", ");
-                // @todo remove relation name hack when making relation name a scalar.
-                if (Annotations.RELATION_NAME.equals(e.getKey())
-                        && e.getValue() != null
-                        && e.getValue().getClass().isArray()) {
-                    sb.append(e.getKey() + "="
-                            + Arrays.toString((String[]) e.getValue()));
-                } else {
-                    sb.append(e.getKey() + "=" + e.getValue());
-                }
-                first = false;
-            }
-            sb.append("]");
-        }
-        
+        annotationsToString(sb);
         return sb.toString();
 
     }

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/BTreeCostModel.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/BTreeCostModel.java	2010-10-01 15:14:05 UTC (rev 3712)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/BTreeCostModel.java	2010-10-01 15:36:09 UTC (rev 3713)
@@ -26,9 +26,13 @@
  */
 package com.bigdata.bop.cost;
 
+import java.text.NumberFormat;
+
 import com.bigdata.btree.AbstractBTree;
 import com.bigdata.btree.BTree;
-import com.bigdata.journal.IIndexManager;
+import com.bigdata.btree.BTreeUtilizationReport;
+import com.bigdata.btree.IBTreeStatistics;
+import com.bigdata.btree.IBTreeUtilizationReport;
 import com.bigdata.journal.Journal;
 
 /**
@@ -52,43 +56,71 @@
  */
 public class BTreeCostModel {
 
+    private final DiskCostModel diskCostModel;
+
     /**
+     * 
+     * @param diskCostModel
+     *            The cost model for the disk on which the {@link Journal}
+     *            backing the {@link BTree} is located.
+     */
+    public BTreeCostModel(final DiskCostModel diskCostModel) {
+        
+        if (diskCostModel == null)
+            throw new IllegalArgumentException();
+        
+        this.diskCostModel = diskCostModel;
+        
+    }
+
+    /**
+     * Compute the height of the B+Tree from its entry count and branching
+     * factor (this can also be used to find the minimum height at which there
+     * could exist a given range count).
+     */
+    static public int estimateHeight(final int entryCount,
+            final int branchingFactor) {
+
+        if (entryCount < branchingFactor)
+            return 0;
+
+        final double logm = Math.log(branchingFactor);
+
+        final double logn = Math.log(entryCount);
+        
+        final double h = (logm / logn) - 1;
+
+        return (int) Math.ceil(h);
+
+    }
+    
+    /**
      * Return the estimated cost of a range scan of the index.
      * 
-     * @param diskCostModel
-     *            The cost model for the disk.
      * @param rangeCount
      *            The range count for the scan.
-     * @param btree
-     *            The index.
+     * @param m
+     *            The B+Tree branching factor.
+     * @param h
+     *            The B+Tree height.
+     * @param leafUtilization
+     *            The leaf utilization percentage [0:100].
      * 
      * @return The estimated cost (milliseconds).
-     * 
-     * @todo how to get the right view onto the BTree without locking? or raise
-     *       the cost model into the {@link IIndexManager}?
      */
-    public double rangeScan(final DiskCostModel diskCostModel,
-            final int rangeCount, final BTree btree) {
+    public double rangeScan(final long rangeCount, final int m, final int h,
+            final int leafUtilization) {
 
         if (rangeCount == 0)
             return 0d;
 
-        // double height = (Math.log(branchingFactor) / Math.log(entryCount)) -
-        // 1;
-
-        final int m = btree.getBranchingFactor();
-
-        final int entryCount = btree.getEntryCount();
-
-        final int height = btree.getHeight();
-
         // average seek time to a leaf.
-        final double averageSeekTime = Math.max(0, (height - 1))
+        final double averageSeekTime = Math.max(0, (h - 1))
                 * diskCostModel.seekTime;
 
         // the percentage of the leaves which are full.
         // final double leafFillRate = .70d;
-        final double leafFillRate = ((double) btree.getUtilization()[1]) / 100;
+        final double leafFillRate = ((double) leafUtilization) / 100;
 
         /*
          * The expected #of leaves to visit for that range scan.
@@ -96,7 +128,7 @@
          * Note: There is an edge condition when the root leaf is empty
          * (fillRate is zero).
          */
-        final double expectedLeafCount = Math.ceil((rangeCount / m)
+        final double expectedLeafCount = Math.ceil((((double) rangeCount) / m)
                 * Math.min(1, (1 / leafFillRate)));
 
         /*
@@ -110,4 +142,140 @@
 
     }
 
+    /**
+     * Prints out some tables based on different disk cost models, branching
+     * factors, and range scans. To validate this, you can do a scatter plot of
+     * the rangeCount and cost columns and observe the log linear curve of the
+     * B+Tree.
+     * 
+     * @param args
+     *            ignored.
+     * 
+     * @see <a
+     *      href="src/resources/architectures/query-cost-model.xls">query-cost-model.xls</a>
+     */
+    public static void main(String[] args) {
+
+        final DiskCostModel[] diskCostModels = new DiskCostModel[] { DiskCostModel.DEFAULT };
+
+        final int[] branchingFactors = new int[] { 32, 64, 128, 256, 512, 1024 };
+
+        final int[] heights = new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+        
+        final int[] rangeCounts = new int[] { 1, 10, 100, 1000, 2000, 5000, 10000, 2000, 50000, 100000 };
+
+        final int leafUtilization = 65; // average percent "full" per leaf.
+        
+        System.out.println("seekTime\txferRate\tleafUtil\tm\theight\trangeCount\tcost(ms)");
+        
+        final NumberFormat millisFormat = NumberFormat.getIntegerInstance();
+        millisFormat.setGroupingUsed(true);
+
+        final NumberFormat percentFormat = NumberFormat.getPercentInstance();
+        percentFormat.setMinimumFractionDigits(0);
+
+        final StringBuilder sb = new StringBuilder();
+
+        for (DiskCostModel diskCostModel : diskCostModels) {
+
+            final BTreeCostModel btreeCostModel = new BTreeCostModel(
+                    diskCostModel);
+
+            for (int m : branchingFactors) {
+
+                for (int h : heights) {
+
+                    for (int rangeCount : rangeCounts) {
+
+                        final int estimatedHeight = estimateHeight(rangeCount,
+                                m);
+
+                        if (estimatedHeight > h) {
+                            /*
+                             * Skip range counts which are too large for the
+                             * current B+Tree height.
+                             */
+                            break;
+                        }
+                        
+                        final double cost = btreeCostModel.rangeScan(
+                                rangeCount, m, h, leafUtilization);
+
+                        sb.setLength(0); // reset.
+                        sb.append(millisFormat.format(diskCostModel.seekTime));
+                        sb.append('\t');
+                        sb.append(millisFormat
+                                .format(diskCostModel.transferRate));
+                        sb.append('\t');
+                        sb.append(percentFormat.format(leafUtilization / 100d));
+                        sb.append('\t');
+                        sb.append(m);
+                        sb.append('\t');
+                        sb.append(h);
+                        sb.append('\t');
+                        sb.append(rangeCount);
+                        sb.append('\t');
+                        sb.append(millisFormat.format(cost));
+                        System.out.println(sb);
+
+                    }
+
+                }
+
+            }
+            
+        }
+
+    }
+
+    private static class MockBTreeStatistics implements IBTreeStatistics {
+
+        private final int m;
+
+        private final int entryCount;
+
+        private final int height;
+
+        private final int leafCount;
+
+        private final int nodeCount;
+
+        private final IBTreeUtilizationReport utilReport;
+
+        public MockBTreeStatistics(final int m, final int entryCount,
+                final int height, final int leafCount, final int nodeCount) {
+            this.m = m;
+            this.entryCount = entryCount;
+            this.height = height;
+            this.leafCount = leafCount;
+            this.nodeCount = nodeCount;
+            this.utilReport = new BTreeUtilizationReport(this);
+        }
+
+        public int getBranchingFactor() {
+            return m;
+        }
+
+        public int getEntryCount() {
+            return entryCount;
+        }
+
+        public int getHeight() {
+            return height;
+        }
+
+        public int getLeafCount() {
+            return leafCount;
+        }
+
+        public int getNodeCount() {
+            return nodeCount;
+        }
+
+        public IBTreeUtilizationReport getUtilization() {
+            return utilReport;
+        }
+
+    }
+
 }

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/DiskCostModel.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/DiskCostModel.java	2010-10-01 15:14:05 UTC (rev 3712)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/DiskCostModel.java	2010-10-01 15:36:09 UTC (rev 3713)
@@ -47,16 +47,24 @@
      */
     final public double seekTime;
 
+    /**
+     * The average disk transfer rate (megabytes per second).
+     */
     final public double transferRate;
 
     /**
      * 
      * @param seekTime
+     *            The average disk seek time (milliseconds).
      * @param transferRate
+     *            The average disk transfer rate (megabytes per second).
      */
     public DiskCostModel(double seekTime, double transferRate) {
+
         this.seekTime = seekTime;
+
         this.transferRate = transferRate;
+
     }
 
 }

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/IndexSegmentCostModel.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/IndexSegmentCostModel.java	2010-10-01 15:14:05 UTC (rev 3712)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/cost/IndexSegmentCostModel.java	2010-10-01 15:36:09 UTC (rev 3713)
@@ -37,15 +37,27 @@
  * 
  * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
  * @version $Id$
- * 
- * @todo
  */
 public class IndexSegmentCostModel {
 
+    private final DiskCostModel diskCostModel;
+
     /**
      * 
      * @param diskCostModel
      *            The disk cost model.
+     */
+    public IndexSegmentCostModel(final DiskCostModel diskCostModel) {
+        
+        if (diskCostModel == null)
+            throw new IllegalArgumentException();
+        
+        this.diskCostModel = diskCostModel;
+        
+    }
+    
+    /**
+     * 
      * @param rangeCount
      *            The range count for the index scan.
      * @param branchingFactor
@@ -58,8 +70,7 @@
      * 
      * @return The estimated time for the range scan (milliseconds).
      */
-    public double rangeScan(final DiskCostModel diskCostModel,
-            final int rangeCount, final int branchingFactor,
+    public double rangeScan(final int rangeCount, final int branchingFactor,
             final int averageBytesPerLeaf, final int xferBufferSize) {
 
         if (rangeCount == 0)

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java	2010-10-01 15:14:05 UTC (rev 3712)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java	2010-10-01 15:36:09 UTC (rev 3713)
@@ -42,12 +42,24 @@
 import org.apache.log4j.Logger;
 
 import com.bigdata.bop.BOp;
+import com.bigdata.bop.BOpContextBase;
+import com.bigdata.bop.IBindingSet;
 import com.bigdata.bop.PipelineOp;
-import com.bigdata.bop.IBindingSet;
+import com.bigdata.bop.ap.Predicate;
+import com.bigdata.bop.cost.BTreeCostModel;
+import com.bigdata.bop.cost.DiskCostModel;
+import com.bigdata.btree.AbstractBTree;
 import com.bigdata.btree.BTree;
+import com.bigdata.btree.IBTreeStatistics;
 import com.bigdata.btree.IndexSegment;
+import com.bigdata.btree.UnisolatedReadWriteIndex;
 import com.bigdata.btree.view.FusedView;
 import com.bigdata.journal.IIndexManager;
+import com.bigdata.journal.Journal;
+import com.bigdata.journal.NoSuchIndexException;
+import com.bigdata.journal.TimestampUtility;
+import com.bigdata.relation.IRelation;
+import com.bigdata.relation.accesspath.IAccessPath;
 import com.bigdata.resources.IndexManager;
 import com.bigdata.service.IBigdataFederation;
 import com.bigdata.service.IDataService;
@@ -771,4 +783,78 @@
         // NOP
     }
 
+    /*
+     * Cost models.
+     */
+
+    /**
+     * The cost model associated with the disk on which the indices are stored.
+     * For a {@link Journal}, this is just the cost model of the backing disk.
+     * For the federation, this should be an average cost model.
+     * 
+     * @todo This is not parameterized. A simple cost model is always assumed.
+     *       The correct cost model is necessary in order to get the tradeoff
+     *       point right for SCAN+FILTER versus SUBQUERY on SSD or RAID arrays
+     *       with lots of spindles versus normal disk.
+     * 
+     * @todo In a shared disk deployment, we might introduce one cost model for
+     *       local SSD used to cache journals, one for local non-SSD disks used
+     *       to cache index segments, and one for remote storage used to
+     *       materialize historical journals and index segments for query.
+     * 
+     * @todo In a federation, this should be reported out as metadata for the
+     *       federation. Perhaps as a Jini attribute.
+     */
+    private static final DiskCostModel diskCostModel = DiskCostModel.DEFAULT;
+
+    /**
+     * Return an estimate of the cost of a scan on the predicate.
+     * 
+     * @param pred
+     *            The predicate.
+     * 
+     * @return The estimated cost of a scan on that predicate.
+     * 
+     * @todo This tunnels through to the {@link AbstractBTree} class and is thus
+     *       specific to standalone and also may run into trouble once we
+     *       support unisolated access paths for reads or mutation since it may
+     *       encounter an {@link UnisolatedReadWriteIndex} instead of an
+     *       {@link AbstractBTree}.
+     */
+    public <E> double estimateCost(final BOpContextBase context,
+            final Predicate<E> pred) {
+     
+        final IRelation<E> r = context.getRelation(pred);
+        
+        final IAccessPath<E> ap = context.getAccessPath(r, pred);
+        
+        final long rangeCount = ap.rangeCount(false/* exact */);
+
+        /*
+         * Drill through to the actual index object on the local index manager
+         * (standalone only). This avoids the UnisolatedReadWriteIndex class
+         * used to protect the index from concurrent modifications.
+         */
+        final String name = pred.getOnlyRelationName() + "."
+                + pred.getKeyOrder();
+        
+        final long timestamp = (Long) pred
+                .getRequiredProperty(BOp.Annotations.TIMESTAMP);
+        
+        final AbstractBTree index = (AbstractBTree) getIndexManager().getIndex(
+                name, timestamp);
+
+        if (index == null)
+            throw new NoSuchIndexException(name + ", timestamp="
+                    + TimestampUtility.toString(timestamp));
+
+        final IBTreeStatistics stats = index.getStatistics();
+
+        final double cost = new BTreeCostModel(diskCostModel).rangeScan(
+                rangeCount, stats.getBranchingFactor(), stats.getHeight(),
+                stats.getUtilization().getLeafUtilization());
+
+        return cost;
+    }
+
 }

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java	2010-10-01 15:14:05 UTC (rev 3712)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java	2010-10-01 15:36:09 UTC (rev 3713)
@@ -28,6 +28,7 @@
 package com.bigdata.bop.engine;
 
 import java.io.Serializable;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -47,17 +48,24 @@
 import com.bigdata.bop.BOpEvaluationContext;
 import com.bigdata.bop.BOpUtility;
 import com.bigdata.bop.Constant;
+import com.bigdata.bop.HashBindingSet;
+import com.bigdata.bop.IBindingSet;
+import com.bigdata.bop.IConstant;
 import com.bigdata.bop.IConstraint;
 import com.bigdata.bop.IPredicate;
 import com.bigdata.bop.IVariable;
+import com.bigdata.bop.IVariableOrConstant;
 import com.bigdata.bop.NV;
 import com.bigdata.bop.PipelineOp;
+import com.bigdata.bop.Var;
 import com.bigdata.bop.ap.Predicate;
 import com.bigdata.bop.bset.StartOp;
 import com.bigdata.bop.join.PipelineJoin;
 import com.bigdata.bop.rdf.join.DataSetJoin;
 import com.bigdata.bop.solutions.SliceOp;
 import com.bigdata.rdf.internal.IV;
+import com.bigdata.rdf.internal.TermId;
+import com.bigdata.rdf.internal.VTE;
 import com.bigdata.rdf.lexicon.LexiconRelation;
 import com.bigdata.rdf.model.BigdataURI;
 import com.bigdata.rdf.sail.BigdataEvaluationStrategyImpl;
@@ -68,6 +76,7 @@
 import com.bigdata.rdf.spo.NamedGraphSolutionExpander;
 import com.bigdata.rdf.store.AbstractTripleStore;
 import com.bigdata.rdf.store.IRawTripleStore;
+import com.bigdata.relation.IRelation;
 import com.bigdata.relation.accesspath.ElementFilter;
 import com.bigdata.relation.accesspath.IElementFilter;
 import com.bigdata.relation.rule.IProgram;
@@ -76,6 +85,7 @@
 import com.bigdata.relation.rule.eval.DefaultEvaluationPlan2;
 import com.bigdata.relation.rule.eval.IRangeCountFactory;
 import com.bigdata.relation.rule.eval.RuleState;
+import com.bigdata.striterator.IKeyOrder;
 
 /**
  * Utility class converts {@link IRule}s to {@link BOp}s.
@@ -168,16 +178,23 @@
          * The estimated cost of a SCAN + FILTER approach to a default graph or
          * named graph query.
          */
-        String COST_SCAN = Rule2BOpUtility.class.getName() + ".costScan";
+        String COST_SCAN = Rule2BOpUtility.class.getName() + ".cost.scan";
 
         /**
          * The estimated cost of a SUBQUERY approach to a default graph or named
          * graph query.
          */
         String COST_SUBQUERY = Rule2BOpUtility.class.getName()
-                + ".costSubquery";
+                + ".cost.subquery";
 
         /**
+         * The #of samples used when estimating the cost of a SUBQUERY approach
+         * to a default graph or named graph query.
+         */
+        String COST_SUBQUERY_SAMPLE_COUNT = Rule2BOpUtility.class.getName()
+                + ".cost.subquerySampleCount";
+
+        /**
          * The #of known graphs in the {@link Dataset} for a default graph or
          * named graph query.
          */
@@ -254,7 +271,14 @@
         // evaluation plan order.
         final int[] order = plan.getOrder();
         
-        // variables to be retained for each join.
+        // the #of variables in each tail of the rule.
+        final int[] nvars = new int[rule.getTailCount()];
+
+        // the index assigned to each tail of the rule.
+        final IKeyOrder[] keyOrder = computeKeyOrderForEachTail(rule, context,
+                order, nvars);
+
+        // the variables to be retained for each join.
         final IVariable[][] selectVars = RuleState
                 .computeRequiredVarsForEachTail(rule, order);
         
@@ -304,6 +328,9 @@
             // assign a bop id to the predicate
             Predicate<?> pred = (Predicate<?>) rule.getTail(order[i]).setBOpId(
                     bopId++);
+            
+            // decorate the predicate with the assigned index.
+            pred = pred.setKeyOrder(keyOrder[order[i]]);
 
             /*
              * Collect all the constraints for this predicate based on which
@@ -361,6 +388,9 @@
             final boolean quads = pred.getProperty(Annotations.QUADS,
                     Annotations.DEFAULT_QUADS);
 
+            // pull of the Sesame dataset before we strip the annotations.
+            final Dataset dataset = (Dataset) pred.getProperty(Annotations.DATASET);
+
             // strip off annotations that we do not want to propagate.
             pred = pred.clearAnnotations(ANNS_TO_CLEAR_FROM_PREDICATE);
 
@@ -380,12 +410,12 @@
 
                     switch (scope) {
                     case NAMED_CONTEXTS:
-                        left = namedGraphJoin(queryEngine, left, anns, pred,
-                                cvar);
+                        left = namedGraphJoin(queryEngine, context, left, anns,
+                                pred, dataset, cvar);
                         break;
                     case DEFAULT_CONTEXTS:
-                        left = defaultGraphJoin(queryEngine, left, anns, pred,
-                                cvar);
+                        left = defaultGraphJoin(queryEngine, context, left,
+                                anns, pred, dataset, cvar);
                         break;
                     default:
                         throw new AssertionError();
@@ -468,11 +498,10 @@
      * @return
      */
     private static PipelineOp namedGraphJoin(final QueryEngine queryEngine,
-            final PipelineOp left, final List<NV> anns, Predicate pred,
+            final BOpContextBase context, final PipelineOp left,
+            final List<NV> anns, Predicate pred, final Dataset dataset,
             final org.openrdf.query.algebra.Var cvar) {
 
-        final Dataset dataset = (Dataset) pred.getProperty(Annotations.DATASET);
-
         final boolean scaleOut = queryEngine.isScaleOut();
         if (scaleOut) {
             anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT,
@@ -482,8 +511,7 @@
                     BOpEvaluationContext.ANY));
         }
 
-        final DataSetSummary summary = new DataSetSummary(dataset
-                .getNamedGraphs());
+        final DataSetSummary summary = new DataSetSummary(dataset.getNamedGraphs());
 
         anns.add(new NV(Annotations.NKNOWN, summary.nknown));
 
@@ -544,18 +572,41 @@
         } else {
 
             /*
-             * Estimate cost of SCAN with C unbound)
+             * Estimate cost of SCAN with C unbound.
              */
-            final double scanCost = getScanCost(pred);
+            final double scanCost = queryEngine.estimateCost(context, pred);
 
             anns.add(new NV(Annotations.COST_SCAN, scanCost));
 
             /*
-             * Estimate cost of SUBQUERY with C bound.
+             * Estimate cost of SUBQUERY with C bound (sampling).
+             * 
+             * @todo This should randomly sample in case there is bias in the
+             * order in which the URIs are presented here. However, the only
+             * thing which would be likely to create a strong bias is if someone
+             * sorted them on the IVs or if the URIs were in the same ordering
+             * in which their IVs were assigned AND the data were somehow
+             * correlated with that order. I rate the latter as pretty unlikely
+             * and the former is not true, so this sampling approach should be
+             * pretty good.
+             * 
+             * @todo parameter for the #of samples to take.
              */
-            final double subqueryCost = getSubqueryCost(pred);
+            double subqueryCost = 0d;
+            final int limit = 100;
+            int nsamples = 0;
+            for (URI uri : summary.graphs) {
+                if (nsamples == limit)
+                    break;
+                final IV graph = ((BigdataURI) uri).getIV();
+                subqueryCost += queryEngine.estimateCost(context, pred.asBound(
+                        (IVariable) pred.get(3), new Constant(graph)));
+                nsamples++;
+            }
+            subqueryCost /= nsamples;
 
             anns.add(new NV(Annotations.COST_SUBQUERY, subqueryCost));
+            anns.add(new NV(Annotations.COST_SUBQUERY_SAMPLE_COUNT, nsamples));
 
             if (scanCost < subqueryCost * summary.nknown) {
 
@@ -628,34 +679,6 @@
     }
 
     /**
-     * 
-     * @param pred
-     * @return
-     * 
-     * FIXME Cost models have been implemented, but are not yet hooked in.
-     */
-    static double getScanCost(Predicate pred) {
-        /*
-         * @todo Scan is more expensive on the Journal so this is set to ONE (1)
-         * and subquery is set to ZERO (0). This will get replaced by the actual
-         * computed costs shortly.
-         */
-        return 1d;
-    }
-
-    /**
-     * 
-     * @param pred
-     * @return
-     * 
-     *         FIXME Cost models have been implemented, but are not yet hooked
-     *         in.
-     */
-    static double getSubqueryCost(Predicate pred) {
-        return 0d;
-    }
-
-    /**
      * Generate a default graph join (quads mode).
      * 
      * @param queryEngine
@@ -669,7 +692,8 @@
      *       remote access paths with other remote access paths.
      */
     private static PipelineOp defaultGraphJoin(final QueryEngine queryEngine,
-            final PipelineOp left, final List<NV> anns, final Predicate pred,
+            final BOpContextBase context, final PipelineOp left,
+            final List<NV> anns, final Predicate pred, final Dataset dataset,
             final org.openrdf.query.algebra.Var cvar) {
 
         // @todo decision of local vs remote ap.
@@ -878,4 +902,106 @@
         
     } // DataSetSummary
 
+    /**
+     * Return an array indicating the {@link IKeyOrder} that will be used when
+     * reading on each of the tail predicates. The array is formed using a
+     * private {@link IBindingSet} and propagating fake bindings to each
+     * predicate in turn using the given evaluation order.
+     * 
+     * @param order
+     *            The evaluation order.
+     * @param nvars
+     *            The #of unbound variables for each tail predicate is assigned
+     *            by side-effect.
+     * 
+     * @return An array of the {@link IKeyOrder}s for each tail predicate. The
+     *         array is correlated with the predicates index in the tail of the
+     *         rule NOT its evaluation order.
+     */
+    @SuppressWarnings("unchecked")
+    static private IKeyOrder[] computeKeyOrderForEachTail(final IRule rule,
+            final BOpContextBase context, final int[] order, final int[] nvars) {
+
+        if (order == null)
+            throw new IllegalArgumentException();
+
+        if (order.length != rule.getTailCount())
+            throw new IllegalArgumentException();
+
+        final int tailCount = rule.getTailCount();
+
+        final IKeyOrder[] a = new IKeyOrder[tailCount];
+        
+        final IBindingSet bindingSet = new HashBindingSet();
+        
+        for (int orderIndex = 0; orderIndex < tailCount; orderIndex++) {
+
+            final int tailIndex = order[orderIndex];
+
+            final IPredicate pred = rule.getTail(tailIndex);
+
+            final IRelation rel = context.getRelation(pred);
+            
+            final IPredicate asBound = pred.asBound(bindingSet);
+            
+            final IKeyOrder keyOrder = context.getAccessPath(
+                    rel, asBound).getKeyOrder();
+
+            if (log.isDebugEnabled())
+                log.debug("keyOrder=" + keyOrder + ", orderIndex=" + orderIndex
+                        + ", tailIndex=" + orderIndex + ", pred=" + pred
+                        + ", bindingSet=" + bindingSet + ", rule=" + rule);
+
+            // save results.
+            a[tailIndex] = keyOrder;
+            nvars[tailIndex] = keyOrder == null ? asBound.getVariableCount()
+                    : asBound.getVariableCount((IKeyOrder) keyOrder);
+
+            final int arity = pred.arity();
+
+            for (int j = 0; j < arity; j++) {
+
+                final IVariableOrConstant<?> t = pred.get(j);
+
+                if (t.isVar()) {
+
+                    final Var<?> var = (Var<?>) t;
+
+                    if (log.isDebugEnabled()) {
+
+                        log.debug("Propagating binding: pred=" + pred
+                                        + ", var=" + var + ", bindingSet="
+                                        + bindingSet);
+                        
+                    }
+                    
+                    bindingSet.set(var, fakeTermId);
+
+                }
+
+            }
+
+        }
+
+        if (log.isDebugEnabled()) {
+
+            log.debug("keyOrder[]=" + Arrays.toString(a) + ", nvars="
+                    + Arrays.toString(nvars) + ", rule=" + rule);
+
+        }
+
+        return a;
+
+    }
+
+    /**
+     * A fake value that is propagated when we compute the {@link IKeyOrder} for
+     * a series of joins based on an assigned join evaluation order.
+     * 
+     * @todo This has to be of the appropriate data type or we run into class
+     * cast exceptions. 
+     */
+    final private static transient IConstant<IV> fakeTermId = new Constant<IV>(
+            new TermId(VTE.URI, -1L));
+
 }

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java	2010-10-01 15:14:05 UTC (rev 3712)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java	2010-10-01 15:36:09 UTC (rev 3713)
@@ -215,11 +215,27 @@
         this.resourceService = resourceService;
 
         if(fed instanceof JiniFederation<?>) {
-            // the proxy for this query engine when used as a query controller.
-        	this.clientProxy = (IQueryClient) ((JiniFederation<?>)fed).getProxy(this, false/*enableDGC*/);
+
+            /*
+             * The proxy for this query engine when used as a query controller.
+             * 
+             * 
+             * Should the data services expose their query engine in this
+             * manner?
+             * 
+             * @todo We need to unexport the proxy as well when the service is
+             * shutdown. This should follow the same pattern as DataService ->
+             * DataServer. E.g., a QueryEngineServer class.
+             */
+
+            this.clientProxy = (IQueryClient) ((JiniFederation<?>) fed)
+                    .getProxy(this, false/* enableDGC */);
+
         } else {
-        	// E.g., an EmbeddedFederation in the test suite. 
+        
+            // E.g., an EmbeddedFederation in the test suite. 
         	this.clientProxy = this;
+        	
         }
 
     }

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/AbstractBTree.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/AbstractBTree.java	2010-10-01 15:14:05 UTC (rev 3712)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/AbstractBTree.java	2010-10-01 15:36:09 UTC (rev 3713)
@@ -138,7 +138,7 @@
  * @see KeyBuilder
  */
 abstract public class AbstractBTree implements IIndex, IAutoboxBTree,
-        ILinearList {
+        ILinearList, IBTreeStatistics {
 
     /**
      * The index is already closed.
@@ -787,8 +787,7 @@
         // the % utilization in [0:1] for the whole tree (nodes + leaves).
         counterSet.addCounter("% utilization", new Instrument<Double>(){
             protected void sample() {
-                final int[] tmp = getUtilization();
-                setValue(tmp[2] / 100d);
+                setValue(getUtilization().getTotalUtilization() / 100d);
             }
         });
         
@@ -1617,10 +1616,11 @@
         return true;
             
     }
-     
-    /**
-     * The branching factor for the btree.
+    
+    /*
+     * IBTreeStatistics
      */
+    
     final public int getBranchingFactor() {
 
         return branchingFactor;
@@ -1636,33 +1636,14 @@
      */
     final int minChildren;
 
-    /**
-     * The height of the btree. The height is the #of levels minus one. A btree
-     * with only a root leaf has <code>height := 0</code>. A btree with a
-     * root node and one level of leaves under it has <code>height := 1</code>.
-     * Note that all leaves of a btree are at the same height (this is what is
-     * means for the btree to be "balanced"). Also note that the height only
-     * changes when we split or join the root node (a btree maintains balance by
-     * growing and shrinking in levels from the top rather than the leaves).
-     */
     abstract public int getHeight();
 
-    /**
-     * The #of non-leaf nodes in the {@link AbstractBTree}. This is zero (0)
-     * for a new btree.
-     */
     abstract public int getNodeCount();
 
-    /**
-     * The #of leaf nodes in the {@link AbstractBTree}. This is one (1) for a
-     * new btree.
-     */
     abstract public int getLeafCount();
 
     /**
-     * The #of entries (aka values) in the {@link AbstractBTree}. This is zero
-     * (0) for a new B+Tree. Note that this value is tracked explicitly so it
-     * requires no IOs.
+     * {@inheritDoc}
      * 
      * @todo this could be re-defined as the exact entry count if we tracked the
      *       #of deleted index entries and subtracted that from the total #of
@@ -1677,6 +1658,21 @@
     abstract public int getEntryCount();
 
     /**
+     * Return a statistics snapshot of the B+Tree.
+     */
+    public IBTreeStatistics getStatistics() {
+    
+        return new BTreeStatistics(this);
+        
+    }
+    
+    public IBTreeUtilizationReport getUtilization() {
+
+        return new BTreeUtilizationReport(this);
+
+    }
+
+    /**
      * The object responsible for (de-)serializing the nodes and leaves of the
      * {@link IIndex}.
      */
@@ -3197,47 +3193,6 @@
 //    abstract public ILeafCursor newLeafCursor(ILeafCursor leafCursor);
     
     /**
-     * Computes and returns the utilization of the tree. The utilization figures
-     * do not factor in the space requirements of nodes and leaves.
-     * 
-     * @return An array whose elements are:
-     *         <ul>
-     *         <li>0 - the leaf utilization percentage [0:100]. The leaf
-     *         utilization is computed as the #of values stored in the tree
-     *         divided by the #of values that could be stored in the #of
-     *         allocated leaves.</li>
-     *         <li>1 - the node utilization percentage [0:100]. The node
-     *         utilization is computed as the #of non-root nodes divided by the
-     *         #of non-root nodes that could be addressed by the tree.</li>
-     *         <li>2 - the total utilization percentage [0:100]. This is the
-     *         average of the leaf utilization and the node utilization.</li>
-     *         </ul>
-     */
-    public int[] getUtilization() {
-
-        final int nnodes = getNodeCount();
-
-        final int nleaves = getLeafCount();
-
-        final int nentries = getEntryCount();
-
-        final int numNonRootNodes = nnodes + nleaves - 1;
-
-        final int branchingFactor = getBranchingFactor();
-
-        final int nodeUtilization = nnodes == 0 ? 100 : (100 * numNonRootNodes)
-                / (nnodes * branchingFactor);
-
-        final int leafUtilization = (100 * nentries)
-                / (nleaves * branchingFactor);
-
-        final int utilization = (nodeUtilization + leafUtilization) / 2;
-
-        return new int[] { nodeUtilization, leafUtilization, utilization };
-
-    }
-
-    /**
      * Recursive dump of the tree.
      * 
      * @param out
@@ -3256,7 +3211,7 @@
         // True iff we will write out the node structure.
         final boolean info = level.toInt() <= Level.INFO.toInt();
 
-        final int[] utils = getUtilization();
+        final IBTreeUtilizationReport utils = getUtilization();
 
         if (info) {
 
@@ -3273,8 +3228,9 @@
             out.println("height=" + height + ", branchingFactor="
                     + branchingFactor + ", #nodes=" + nnodes + ", #leaves="
                     + nleaves + ", #entries=" + nentries + ", nodeUtil="
-                    + utils[0] + "%, leafUtil=" + utils[1] + "%, utilization="
-                    + utils[2] + "%");
+                    + utils.getNodeUtilization() + "%, leafUtil="
+                    + utils.getLeafUtilization() + "%, utilization="
+                    + utils.getTotalUtilization() + "%");
         }
 
         if (root != null) {

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/BTreeStatistics.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/BTreeStatistics.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/BTreeStatistics.java	2010-10-01 15:36:09 UTC (rev 3713)
@@ -0,0 +1,90 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2010.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Oct 1, 2010
+ */
+
+package com.bigdata.btree;
+
+import java.io.Serializable;
+
+/**
+ * A snapshot of the B+Tree statistics.
+ * 
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id$
+ */
+public class BTreeStatistics implements IBTreeStatistics, Serializable {
+
+    /**
+     * 
+     */
+    private static final long serialVersionUID = 1L;
+
+    private final int m;
+
+    private final int entryCount;
+
+    private final int height;
+
+    private final int leafCount;
+
+    private final int nodeCount;
+
+    private final IBTreeUtilizationReport utilReport;
+
+    public BTreeStatistics(final AbstractBTree btree) {
+        this.m = btree.getBranchingFactor();
+        this.entryCount = btree.getEntryCount();
+        this.height = btree.getHeight();
+        this.leafCount = btree.getLeafCount();
+        this.nodeCount = btree.getNodeCount();
+        this.utilReport = btree.getUtilization();
+    }
+
+    public int getBranchingFactor() {
+        return m;
+    }
+
+    public int getEntryCount() {
+        return entryCount;
+    }
+
+    public int getHeight() {
+        return height;
+    }
+
+    public int getLeafCount() {
+        return leafCount;
+    }
+
+    public int getNodeCount() {
+        return nodeCount;
+    }
+
+    public IBTreeUtilizationReport getUtilization() {
+        return utilReport;
+    }
+
+}


Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/BTreeStatistics.java
___________________________________________________________________
Added: svn:keywords
   + Id Date Revision Author HeadURL

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/BTreeUtilizationReport.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/BTreeUtilizationReport.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/BTreeUtilizationReport.java	2010-10-01 15:36:09 UTC (rev 3713)
@@ -0,0 +1,85 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2010.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Oct 1, 2010
+ */
+
+package com.bigdata.btree;
+
+import java.io.Serializable;
+
+/**
+ * A btree utilization report.
+ * 
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id$
+ */
+public class BTreeUtilizationReport implements IBTreeUtilizationReport,
+        Serializable {
+
+    /**
+     * 
+     */
+    private static final long serialVersionUID = 1L;
+
+    private final int leafUtilization;
+
+    private final int nodeUtilization;
+
+    private final int totalUtilization;
+
+    public BTreeUtilizationReport(final IBTreeStatistics stats) {
+
+        final int nnodes = stats.getNodeCount();
+
+        final int nleaves = stats.getLeafCount();
+
+        final int nentries = stats.getEntryCount();
+
+        final int numNonRootNodes = nnodes + nleaves - 1;
+
+        final int branchingFactor = stats.getBranchingFactor();
+
+        nodeUtilization = nnodes == 0 ? 100 : (100 * numNonRootNodes)
+                / (nnodes * branchingFactor);
+
+        leafUtilization = (100 * nentries) / (nleaves * branchingFactor);
+
+        totalUtilization = (nodeUtilization + leafUtilization) / 2;
+
+    }
+
+    public int getLeafUtilization() {
+        return leafUtilization;
+    }
+
+    public int getNodeUtilization() {
+        return nodeUtilization;
+    }
+
+    public int getTotalUtilization() {
+        return totalUtilization;
+    }
+
+}


Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/BTreeUtilizationReport.java
___________________________________________________________________
Added: svn:keywords
   + Id Date Revision Author HeadURL

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IBTreeStatistics.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IBTreeStatistics.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IBTreeStatistics.java	2010-10-01 15:36:09 UTC (rev 3713)
@@ -0,0 +1,83 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2010.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Oct 1, 2010
+ */
+
+package com.bigdata.btree;
+
+/**
+ * Interface used to report out some statistics about a B+Tree. These statistics
+ * may be used in combination with a disk cost model to predict the cost
+ * (latency) associated with a variety of operations on the B+Tree. All values
+ * reported by this interface are tracked explicitly by the
+ * {@link AbstractBTree} and do not require DISK IO.
+ * 
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id$
+ */
+public interface IBTreeStatistics {
+
+    /**
+     * The branching factor for the btree.
+     */
+    int getBranchingFactor();
+    
+    /**
+     * The height of the btree. The height is the #of levels minus one. A btree
+     * with only a root leaf has <code>height := 0</code>. A btree with a
+     * root node and one level of leaves under it has <code>height := 1</code>.
+     * Note that all leaves of a btree are at the same height (this is what is
+     * means for the btree to be "balanced"). Also note that the height only
+     * changes when we split or join the root node (a btree maintains balance by
+     * growing and shrinking in levels from the top rather than the leaves).
+     */
+    int getHeight();
+    
+    /**
+     * The #of non-leaf nodes in the {@link AbstractBTree}. This is zero (0)
+     * for a new btree.
+     */
+    int getNodeCount();
+
+    /**
+     * The #of leaf nodes in the {@link AbstractBTree}. This is one (1) for a
+     * new btree.
+     */
+    int getLeafCount();
+
+    /**
+     * The #of entries (aka tuples) in the {@link AbstractBTree}. This is zero
+     * (0) for a new B+Tree. When the B+Tree supports delete markers, this value
+     * also includes tuples which have been marked as deleted.
+     */
+    int getEntryCount();
+
+    /**
+     * Computes and returns the utilization of the tree. The utilization figures
+     * do not factor in the space requirements of nodes and leaves.
+     */
+    IBTreeUtilizationReport getUtilization();
+    
+}


Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IBTreeStatistics.java
___________________________________________________________________
Added: svn:keywords
   + Id Date Revision Author HeadURL

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IBTreeUtilizationReport.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IBTreeUtilizationReport.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IBTreeUtilizationReport.java	2010-10-01 15:36:09 UTC (rev 3713)
@@ -0,0 +1,58 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2010.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Oct 1, 2010
+ */
+
+package com.bigdata.btree;
+
+/**
+ * B+Tree utilization report.
+ * 
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id$
+ */
+public interface IBTreeUtilizationReport {
+
+    /**
+     * The leaf utilization percentage [0:100]. The leaf utilization is computed
+     * as the #of values stored in the tree divided by the #of values that could
+     * be stored in the #of allocated leaves.
+     */
+    int getLeafUtilization();
+
+    /**
+     * The node utilization percentage [0:100]. The node utilization is computed
+     * as the #of non-root nodes divided by the #of non-root nodes that could be
+     * addressed by the tree.
+     */
+    int getNodeUtilization();
+
+    /**
+     * The total utilization percentage [0:100]. This is the average of the leaf
+     * utilization and the node utilization.
+     */
+    int getTotalUtilization();
+
+}


Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/IBTreeUtilizationReport.java
___________________________________________________________________
Added: svn:keywords
   + Id Date Revision Author HeadURL


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




Thread: [Bigdata-commit] SF.net SVN: bigdata:[3713] branches/QUADS_QUERY_BRANCH/bigdata/src/java/com /bigda

Fast, scalable, robust graph database platform

bigdata-commit