[Bigdata-commit] SF.net SVN: bigdata:[6385] branches/BIGDATA_RELEASE_1_2_0

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 6385
          http://bigdata.svn.sourceforge.net/bigdata/?rev=6385&view=rev
Author:   thompsonbry
Date:     2012-07-20 21:21:08 +0000 (Fri, 20 Jul 2012)
Log Message:
-----------
Added a DISTINCT (s,p,o) filter to the CONSTRUCT iterator (ASTConstructIterator) in support of [1].  The DISTINCT SPO filter is always enabled.  When the hint:analytic mode is enabled or when the hint:nativeDistinctSPO mode are enabled, then the CONSTUCT will use a filter backed by the native process heap. Otherwise it will use a filter backed by the JVM heap.

Some unit tests have been added to verify that the NativeDistinctFilter is used when the appropriate query hint is specified. 

I pulled out an ICloseable interface from ICloseableIterator. The NativeDistinctFilter's internal DistinctFilterImpl class now implements ICloseable.  An issue [2] has been created to extend the striterator protocol to support the ICloseable interface on IFilter and IFilterTest.

[1] https://sourceforge.net/apps/trac/bigdata/ticket/579 (CONSTRUCT should impose distinct (s,p,o) filter)
[2] http://sourceforge.net/apps/trac/bigdata/ticket/582 (IStriterator does not support close() protocol for IFilter)

Modified Paths:
--------------
    branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/ap/filter/DistinctFilter.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseableIterator.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/filter/NativeDistinctFilter.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ConstructNode.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUpdate.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTConstructIterator.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTConstructOptimizer.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTDescribeOptimizer.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/filter/TestNativeDistinctFilter.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/TestBasicQuery.java
    branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/java/cutthecrap/utils/striterators/IFilter.java

Added Paths:
-----------
    branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseable.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1a.rq
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1b.rq

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/ap/filter/DistinctFilter.java
===================================================================

--- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/ap/filter/DistinctFilter.java	2012-07-18 13:48:50 UTC (rev 6384)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/ap/filter/DistinctFilter.java	2012-07-20 21:21:08 UTC (rev 6385)
@@ -1,21 +1,27 @@
 package com.bigdata.bop.ap.filter;
 
+import java.util.HashSet;
 import java.util.Iterator;
-import java.util.LinkedHashSet;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 
 import com.bigdata.bop.BOp;
 import com.bigdata.bop.HashMapAnnotations;
+import com.bigdata.rdf.spo.SPO;
 import com.bigdata.striterator.IChunkConverter;
 import com.bigdata.striterator.MergeFilter;
 
 import cutthecrap.utils.striterators.Filter;
 import cutthecrap.utils.striterators.Filterator;
+import cutthecrap.utils.striterators.IPropertySet;
 
 /**
  * A DISTINCT operator based for elements in a relation. The operator is based
  * on an in-memory hash table.
+ * <p>
+ * Note: This is used for the in-memory {@link SPO} distinct filter, but
+ * it is more general and can be applied to any data type that can be
+ * inserted into a set.
  * 
  * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
  * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z
@@ -46,7 +52,9 @@
      * A instance using the default configuration for the in memory hash map.
      */
     public static DistinctFilter newInstance() {
+
         return new DistinctFilter(BOp.NOARGS, BOp.NOANNS);
+        
     }
     
     /**
@@ -66,34 +74,39 @@
 
     }
 
-    /**
-     * @see Annotations#INITIAL_CAPACITY
-     */
-    public int getInitialCapacity() {
+//    /**
+//     * @see Annotations#INITIAL_CAPACITY
+//     */
+//    public int getInitialCapacity() {
+//
+//        return getProperty(Annotations.INITIAL_CAPACITY,
+//                Annotations.DEFAULT_INITIAL_CAPACITY);
+//
+//    }
+//
+//    /**
+//     * @see Annotations#LOAD_FACTOR
+//     */
+//    public float getLoadFactor() {
+//
+//        return getProperty(Annotations.LOAD_FACTOR,
+//                Annotations.DEFAULT_LOAD_FACTOR);
+//
+//    }
 
-        return getProperty(Annotations.INITIAL_CAPACITY,
-                Annotations.DEFAULT_INITIAL_CAPACITY);
-
-    }
-
-    /**
-     * @see Annotations#LOAD_FACTOR
-     */
-    public float getLoadFactor() {
-
-        return getProperty(Annotations.LOAD_FACTOR,
-                Annotations.DEFAULT_LOAD_FACTOR);
-
-    }
-
     @Override
     final protected Iterator filterOnce(Iterator src, final Object context) {
         
-        return new Filterator(src, context, new DistinctFilterImpl());
+        return new Filterator(src, context, new DistinctFilterImpl(this));
         
     }
 
-    private class DistinctFilterImpl extends Filter {
+    /**
+     * DISTINCT filter based on Java heap data structures.
+     * 
+     * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+     */
+    public static class DistinctFilterImpl extends Filter {
 
         private static final long serialVersionUID = 1L;
 
@@ -102,18 +115,46 @@
          * {@link ConcurrentHashMap} here.
          */
         @SuppressWarnings("rawtypes")
-        private final LinkedHashSet members;
+        private final HashSet members;
 
+        @SuppressWarnings("unchecked")
+        static private <T> T getProperty(final IPropertySet pset,
+                final String name, final T defaultValue) {
+
+            final Object val = pset.getProperty(name);
+
+            if (val != null)
+                return (T) val;
+
+            return defaultValue;
+
+        }
+
+        /**
+         * DISTINCT filter based on Java heap data structures.
+         * 
+         * @param propertySet
+         *            Used to configured the DISTINCT filter.
+         * 
+         * @see DistinctFilter.Annotations
+         */
         @SuppressWarnings("rawtypes")
-        public DistinctFilterImpl() {
+        public DistinctFilterImpl(final IPropertySet propertySet) {
+
+            final int initialCapacity = getProperty(propertySet,
+                    Annotations.INITIAL_CAPACITY,
+                    Annotations.DEFAULT_INITIAL_CAPACITY);
             
-            members = new LinkedHashSet(getInitialCapacity(), getLoadFactor());
+            final float loadFactor = getProperty(propertySet,
+                    Annotations.LOAD_FACTOR, Annotations.DEFAULT_LOAD_FACTOR);
 
+            members = new HashSet(initialCapacity, loadFactor);
+
         }
 
         @SuppressWarnings("unchecked")
         @Override
-        public boolean isValid(Object obj) {
+        public boolean isValid(final Object obj) {
 
             return members.add(obj);
 

Added: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseable.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseable.java	                        (rev 0)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseable.java	2012-07-20 21:21:08 UTC (rev 6385)
@@ -0,0 +1,46 @@
+/*
+
+Copyright (C) SYSTAP, LLC 2006-2008.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+*/
+/*
+ * Created on Jul 20, 2012
+ */
+package com.bigdata.striterator;
+
+/**
+ * Interface for objects which can have resources which must be explicitly
+ * closed (typically iterators).
+ * 
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ */
+public interface ICloseable {
+
+    /**
+     * Closes the object, releasing any associated resources. This method MAY be
+     * invoked safely if the object is already closed. Implementations of this
+     * interface MUST invoke {@link #close()} once it is known that the object
+     * will no longer be used.
+     */
+    public void close();
+    
+}

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseableIterator.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseableIterator.java	2012-07-18 13:48:50 UTC (rev 6384)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseableIterator.java	2012-07-20 21:21:08 UTC (rev 6385)
@@ -38,7 +38,7 @@
  * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
  * @version $Id$
  */
-public interface ICloseableIterator<E> extends Iterator<E> {
+public interface ICloseableIterator<E> extends Iterator<E>, ICloseable {
 
     /**
      * Closes the iterator, releasing any associated resources. This method MAY

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/filter/NativeDistinctFilter.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/filter/NativeDistinctFilter.java	2012-07-18 13:48:50 UTC (rev 6384)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/filter/NativeDistinctFilter.java	2012-07-20 21:21:08 UTC (rev 6385)
@@ -7,12 +7,14 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.UUID;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import com.bigdata.bop.BOp;
 import com.bigdata.bop.BTreeAnnotations;
 import com.bigdata.bop.HTreeAnnotations;
 import com.bigdata.bop.HashMapAnnotations;
 import com.bigdata.bop.ap.filter.BOpFilterBase;
+import com.bigdata.bop.ap.filter.DistinctFilter;
 import com.bigdata.bop.engine.IRunningQuery;
 import com.bigdata.btree.BTree;
 import com.bigdata.btree.BloomFilterFactory;
@@ -30,14 +32,17 @@
 import com.bigdata.io.DirectBufferPool;
 import com.bigdata.rdf.internal.IV;
 import com.bigdata.rdf.internal.IVUtility;
+import com.bigdata.rdf.sparql.ast.eval.ASTConstructIterator;
 import com.bigdata.rdf.spo.ISPO;
 import com.bigdata.rdf.spo.SPO;
 import com.bigdata.rdf.spo.SPOKeyOrder;
 import com.bigdata.rwstore.sector.MemStore;
 import com.bigdata.rwstore.sector.MemoryManager;
+import com.bigdata.striterator.ICloseable;
 
 import cutthecrap.utils.striterators.Filter;
 import cutthecrap.utils.striterators.Filterator;
+import cutthecrap.utils.striterators.IPropertySet;
 
 /**
  * A scalable DISTINCT operator based for {@link SPO}s.
@@ -144,18 +149,18 @@
     @Override
     final protected Iterator filterOnce(Iterator src, final Object context) {
         
-        return new Filterator(src, context, new DistinctFilterImpl());
+        return new Filterator(src, context, new DistinctFilterImpl(this));
         
     }
 
     /**
      * Return the 3-component key order which has the best locality given that
-     * the SPOs will be ariving in the natural order of the
+     * the SPOs will be arriving in the natural order of the
      * <i>indexKeyOrder</i>. This is the keyOrder that we will use for the
      * filter. This gives the filter index structure the best possible locality
      * in terms of the order in which the SPOs are arriving.
      * <p>
-     * The return valuer is an <code>int[3]</code>. The index is the orderinal
+     * The return valuer is an <code>int[3]</code>. The index is the ordinal
      * position of the triples mode key component for the filter keys. The value
      * at that index is the position in the {@link SPOKeyOrder} of the quads
      * mode index whose natural order determines the order of arrival of the
@@ -183,19 +188,36 @@
      * 
      * which models the <code>PSO</code> key order for the purposes of this
      * class.
+     * <p>
+     * Note: This method now accepts triples in support of the
+     * {@link ASTConstructIterator}
      * 
      * @see Annotations#INDEX_KEY_ORDER
+     * 
+     * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/579">
+     *      CONSTRUCT should apply DISTINCT (s,p,o) filter </a>
      */
-    public static int[] getFilterKeyOrder(SPOKeyOrder indexKeyOrder) {
+    public static int[] getFilterKeyOrder(final SPOKeyOrder indexKeyOrder) {
 
         if (indexKeyOrder == null)
             throw new IllegalArgumentException();
 
-        if (indexKeyOrder.getKeyArity() != 4)
-            throw new IllegalArgumentException();
+//        if (indexKeyOrder.getKeyArity() != 4)
+//            throw new IllegalArgumentException();
 
         final int[] filterKeyOrder;
         switch (indexKeyOrder.index()) {
+        // TRIPLES
+        case SPOKeyOrder._SPO:
+            filterKeyOrder = new int[] { 0, 1, 2 };
+            break;
+        case SPOKeyOrder._POS:
+            filterKeyOrder = new int[] { 1, 2, 0 };
+            break;
+        case SPOKeyOrder._OSP:
+            filterKeyOrder = new int[] { 2, 0, 1 };
+            break;
+        // QUADS
         case SPOKeyOrder._SPOC:
             filterKeyOrder = new int[] { 0, 1, 2 };
             break;
@@ -220,7 +242,15 @@
         return filterKeyOrder;
     }
     
-    private class DistinctFilterImpl extends Filter {
+    /**
+     * A {@link Filter} which passes only the DISTINCT {@link ISPO}s and is
+     * backed by a scalable data structure (BTree or HTree).
+     * 
+     * @author <a href="mailto:tho...@us...">Bryan
+     *         Thompson</a>
+     */
+    static public class DistinctFilterImpl extends Filter implements
+            ICloseable {
 
         private static final long serialVersionUID = 1L;
 
@@ -232,7 +262,7 @@
         /**
          * The fast JVM based cache. This is always allocated.
          */
-        private final LinkedHashMap<SPO, byte[]> lru;
+        private final LinkedHashMap<ISPO, byte[]> lru;
 
         /**
          * The metadata used to create the index.
@@ -262,10 +292,15 @@
         private volatile ICheckpointProtocol index;
         
         /**
+         * <code>true</code> until {@link #close() closed}.
+         */
+        private final AtomicBoolean open = new AtomicBoolean(true);
+        
+        /**
          * When <code>true</code>, the {@link BTree} will be used. When
          * <code>false</code> the {@link HTree}.
          * <p>
-         * Note: Hisorical testing indicated that the {@link BTree} was faster
+         * Note: Historical testing indicated that the {@link BTree} was faster
          * for this application.
          * 
          * TODO Edit HTree/BTree here.
@@ -279,28 +314,85 @@
         
         @Override
         protected void finalize() throws Throwable {
+            close();
             super.finalize();
-            if (index != null) {
-                index.close();
-                index = null;
+        }
+
+        /**
+         * Release resources associated with the filter.
+         * <p>
+         * Note: This is done automatically by {@link #finalize()}, but it
+         * should be done pro-actively whenever possible.
+         * 
+         * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/582">
+         *      IStriterator does not support close() protocol for Ifilter </a>
+         */
+        @Override
+        public void close() {
+            if (open.compareAndSet(true/* expect */, false/* update */)) {
+                /*
+                 * Close when first invoked.
+                 */
+                if (index != null) {
+                    index.close();
+                    index = null;
+                }
+                if (store != null) {
+                    store.close();
+                    store = null;
+                }
             }
-            if (store != null) {
-                store.close();
-                store = null;
-            }
         }
+
+        @SuppressWarnings("unchecked")
+        static private <T> T getRequiredProperty(final IPropertySet pset,
+                final String name) {
+
+            final Object val = pset.getProperty(name);
+
+            if (val == null)
+
+                if (val == null)
+                    throw new IllegalStateException("Required property: "
+                            + name + " : " + NativeDistinctFilter.class);
+
+            return (T) val;
+
+        }
+
+        @SuppressWarnings("unchecked")
+        static private <T> T getProperty(final IPropertySet pset,
+                final String name, final T defaultValue) {
+
+            final Object val = pset.getProperty(name);
+
+            if (val != null)
+                return (T) val;
+
+            return defaultValue;
+
+        }
         
-        public DistinctFilterImpl() {
+        /**
+         * DISTINCT {@link ISPO} filter based on persistence capable data
+         * structures.
+         * 
+         * @param properties
+         *            Used to configure the DISTINCT filter.
+         * 
+         * @see DistinctFilter.Annotations
+         */
+        public DistinctFilterImpl(final IPropertySet properties) {
             
-            final int initialCapacity = NativeDistinctFilter.this.getProperty(
+            final int initialCapacity = getProperty(properties,
                     Annotations.INITIAL_CAPACITY,
                     Annotations.DEFAULT_INITIAL_CAPACITY);
 
-            final float loadFactor = NativeDistinctFilter.this.getProperty(
+            final float loadFactor = getProperty(properties,
                     Annotations.LOAD_FACTOR,
                     Annotations.DEFAULT_LOAD_FACTOR);
                     
-            lru = new LinkedHashMap<SPO, byte[]>(initialCapacity, loadFactor);
+            lru = new LinkedHashMap<ISPO, byte[]>(initialCapacity, loadFactor);
 
             this.nominalCapacity = initialCapacity;
 
@@ -312,8 +404,8 @@
              */
             {
 
-                final SPOKeyOrder indexKeyOrder = (SPOKeyOrder) NativeDistinctFilter.this
-                        .getRequiredProperty(Annotations.KEY_ORDER);
+                final SPOKeyOrder indexKeyOrder = (SPOKeyOrder) getRequiredProperty(
+                        properties, Annotations.KEY_ORDER);
 
                 filterKeyOrder = getFilterKeyOrder(indexKeyOrder);
 
@@ -322,20 +414,19 @@
                         : new HTreeIndexMetadata(UUID.randomUUID());
 
                 // IFF BTree
-                metadata.setBranchingFactor(NativeDistinctFilter.this.getProperty(
+                metadata.setBranchingFactor(getProperty(properties,
                         BTreeAnnotations.BRANCHING_FACTOR,
                         256));// TODO Overridden here. BTreeAnnotations.DEFAULT_BRANCHING_FACTOR));
 
                 // IFF HTree
                 if (metadata instanceof HTreeIndexMetadata) {
                     ((HTreeIndexMetadata) metadata)
-                            .setAddressBits(NativeDistinctFilter.this
-                                    .getProperty(
+                            .setAddressBits(getProperty(properties,
                                             HTreeAnnotations.ADDRESS_BITS,
                                             HTreeAnnotations.DEFAULT_ADDRESS_BITS));
                 }
 
-                metadata.setRawRecords(NativeDistinctFilter.this.getProperty(
+                metadata.setRawRecords(getProperty(properties,
                         Annotations.RAW_RECORDS,
                         Annotations.DEFAULT_RAW_RECORDS));
 
@@ -344,10 +435,9 @@
 
                 metadata.setBloomFilterFactory(BloomFilterFactory.DEFAULT);
 
-                metadata.setWriteRetentionQueueCapacity(NativeDistinctFilter.this
-                        .getProperty(
-                                Annotations.WRITE_RETENTION_QUEUE_CAPACITY,
-                                Annotations.DEFAULT_WRITE_RETENTION_QUEUE_CAPACITY));
+                metadata.setWriteRetentionQueueCapacity(getProperty(properties,
+                        Annotations.WRITE_RETENTION_QUEUE_CAPACITY,
+                        Annotations.DEFAULT_WRITE_RETENTION_QUEUE_CAPACITY));
 
                 final int ratio = 32; // TODO Config/tune front-coding ratio.
 
@@ -378,8 +468,8 @@
             final int n = lru.size();
             final byte[][] a = new byte[n][];
             {
-                // Evict everthing into an array.
-                final Iterator<Map.Entry<SPO, byte[]>> itr = lru.entrySet()
+                // Evict everything into an array.
+                final Iterator<Map.Entry<ISPO, byte[]>> itr = lru.entrySet()
                         .iterator();
                 int i = 0;
                 while (itr.hasNext()) {
@@ -409,6 +499,11 @@
 
             if (index != null)
                 throw new IllegalStateException();
+
+            if (!open.get()) {
+                // Explicitly closed.
+                throw new IllegalStateException();
+            }
             
             /*
              * This wraps an efficient raw store interface around a child memory
@@ -435,7 +530,7 @@
         @Override
         public boolean isValid(final Object obj) {
 
-            final SPO spo = (SPO) obj;
+            final ISPO spo = (ISPO) obj;
 
             return add(spo);
             
@@ -457,7 +552,7 @@
          * 
          * @return <code>true</code> if the collection was modified.
          */
-        private boolean add(final SPO spo) {
+        private boolean add(final ISPO spo) {
 
             if(lru.containsKey(spo)) {
                 // already in the LRU

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ConstructNode.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ConstructNode.java	2012-07-18 13:48:50 UTC (rev 6384)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ConstructNode.java	2012-07-20 21:21:08 UTC (rev 6385)
@@ -30,6 +30,8 @@
 import java.util.Map;
 
 import com.bigdata.bop.BOp;
+import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
+import com.bigdata.rdf.spo.ISPO;
 
 /**
  * A template for the construction of one or more graphs based on the solutions
@@ -47,10 +49,47 @@
      */
     private static final long serialVersionUID = 1L;
 
+    public interface Annotations extends AbstractStatementContainer.Annotations {
+
+        /**
+         * Boolean property (default {@value #DEFAULT_ALL_GRAPHS}) which is
+         * <code>true</code> iff a native DISTINCT {@link ISPO} filter should be
+         * applied (large cardinality is expected for the constructed graph).
+         * When <code>false</code>, a JVM based DISTINCT {@link ISPO} filter
+         * will be applied.
+         * <p>
+         * Note: This can be set using either {@link QueryHints#ANALYTIC} or
+         * {@link QueryHints#NATIVE_DISTINCT_SPO}.
+         * 
+         * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/579">
+         *      CONSTRUCT should apply DISTINCT (s,p,o) filter </a>
+         */
+        String NATIVE_DISTINCT = "nativeDistinct";
+
+        boolean DEFAULT_NATIVE_DISTINCT = false;
+
+    }
+
     public ConstructNode() {
+
         super();
+        
     }
 
+    public ConstructNode(final AST2BOpContext ctx) {
+
+        super();
+        
+        if (ctx.nativeDistinctSPO) {
+         
+            // Native DISTINCT SPO FILTER is requested.
+            
+            setNativeDistinct(true);
+            
+        }
+
+    }
+    
     /**
      * Required deep copy constructor.
      */
@@ -80,6 +119,29 @@
         
         return this;
     }
+
+    /**
+     * When <code>true</code>, a native DISTINCT {@link ISPO} filter will be
+     * applied to the constructed graph, otherwise a Java Heap based DISTINCT
+     * {@link ISPO} filter will be applied.
+     * 
+     * @see Annotations#NATIVE_DISTINCT
+     * 
+     * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/579">
+     *      CONSTRUCT should apply DISTINCT (s,p,o) filter </a>
+     */
+    public boolean isNativeDistinct() {
+
+        return getProperty(Annotations.NATIVE_DISTINCT,
+                Annotations.DEFAULT_NATIVE_DISTINCT);
+
+    }
+
+    public void setNativeDistinct(final boolean nativeDistinct) {
+        
+        setProperty(Annotations.NATIVE_DISTINCT, nativeDistinct);
+        
+    }
     
     @Override
     public String toString(final int indent) {
@@ -97,7 +159,10 @@
         }
 
         sb.append("\n").append(s).append("}");
-        
+
+        if (isNativeDistinct())
+            sb.append(" [nativeDistinct]");
+
         return sb.toString();
 
     }

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUpdate.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUpdate.java	2012-07-18 13:48:50 UTC (rev 6384)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUpdate.java	2012-07-20 21:21:08 UTC (rev 6385)
@@ -721,7 +721,7 @@
 							 */
 							
 							final ConstructNode template = op.getDeleteClause()
-									.getQuadData().flatten(new ConstructNode());
+									.getQuadData().flatten(new ConstructNode(context));
 
 							final ASTConstructIterator itr = new ASTConstructIterator(
 									context.conn.getTripleStore(), template,
@@ -789,7 +789,7 @@
 							 */
 							
 							final ConstructNode template = op.getInsertClause()
-									.getQuadData().flatten(new ConstructNode());
+									.getQuadData().flatten(new ConstructNode(context));
 
 							final ASTConstructIterator itr = new ASTConstructIterator(
 									context.conn.getTripleStore(), template,
@@ -898,7 +898,7 @@
                     // Flatten the original WHERE clause into a CONSTRUCT
                     // template.
                     final ConstructNode template = quadData
-                            .flatten(new ConstructNode());
+                            .flatten(new ConstructNode(context));
 
                     // Set the CONSTRUCT template (quads patterns).
                     queryRoot.setConstruct(template);

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTConstructIterator.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTConstructIterator.java	2012-07-18 13:48:50 UTC (rev 6384)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTConstructIterator.java	2012-07-20 21:21:08 UTC (rev 6385)
@@ -44,6 +44,12 @@
 import org.openrdf.query.QueryEvaluationException;
 import org.openrdf.query.impl.EmptyBindingSet;
 
+import com.bigdata.bop.BOp;
+import com.bigdata.bop.IPredicate;
+import com.bigdata.bop.IVariableOrConstant;
+import com.bigdata.bop.ap.filter.DistinctFilter;
+import com.bigdata.bop.rdf.filter.NativeDistinctFilter;
+import com.bigdata.rdf.internal.IV;
 import com.bigdata.rdf.model.BigdataBNode;
 import com.bigdata.rdf.model.BigdataStatement;
 import com.bigdata.rdf.model.BigdataValue;
@@ -53,8 +59,14 @@
 import com.bigdata.rdf.sparql.ast.StatementPatternNode;
 import com.bigdata.rdf.sparql.ast.TermNode;
 import com.bigdata.rdf.sparql.ast.VarNode;
+import com.bigdata.rdf.spo.ISPO;
+import com.bigdata.rdf.spo.SPOKeyOrder;
+import com.bigdata.rdf.spo.SPOPredicate;
 import com.bigdata.rdf.store.AbstractTripleStore;
+import com.bigdata.striterator.ICloseable;
 
+import cutthecrap.utils.striterators.IFilterTest;
+
 /**
  * Iterator consumes the solutions from a query and interprets them according to
  * a {@link ConstructNode}. Ground triples in the template are output
@@ -98,6 +110,24 @@
      */
     private final List<BigdataStatement> buffer = new LinkedList<BigdataStatement>();
 
+    /**
+     * A filter which restricts the emitted statements to the distinct
+     * {@link ISPO}. The {@link DistinctFilter} is based on the Java heap. The
+     * {@link NativeDistinctFilter} is based on persistence capable data
+     * structures and can scale to high cardinality outputs. Unfortunately, a
+     * complex CONSTRUCT template can make it impossible to predict the
+     * <p>
+     * Note: It is possible to disable this filter (in the code) by setting it
+     * to <code>null</code>.
+     * 
+     * @see DistinctFilter
+     * @see NativeDistinctFilter
+     * 
+     * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/579">
+     *      CONSTRUCT should apply DISTINCT (s,p,o) filter </a>
+     */
+    private final IFilterTest filter;
+    
 //    /**
 //     * Return <code>true</code>iff {@link LexiconRelation#isStoreBlankNodes()}
 //     * is <code>true</code>.
@@ -176,6 +206,57 @@
 
         this.src = src;
 
+        /*
+         * Setup the DISTINCT SPO filter.
+         */
+        
+        final boolean nativeDistinct = construct.isNativeDistinct();
+        if (nativeDistinct) {
+            
+            /*
+             * Construct a predicate for the first triple template. We will use
+             * that as the bias for the scalable DISTINCT SPO filter.
+             */
+            final IPredicate pred;
+            {
+
+                final StatementPatternNode sp = templates.get(0/* index */);
+
+                final IVariableOrConstant<IV> s = sp.s().getValueExpression();
+                final IVariableOrConstant<IV> p = sp.p().getValueExpression();
+                final IVariableOrConstant<IV> o = sp.o().getValueExpression();
+
+//                // The graph term/variable iff specified by the query.
+//                final TermNode cvar = sp.c();
+//                final IVariableOrConstant<IV> c = cvar == null ? null : cvar
+//                        .getValueExpression();
+
+                final BOp[] vars = new BOp[] { s, p, o /*, c*/ };
+
+                pred = new SPOPredicate(vars, BOp.NOANNS);
+
+            }
+            
+            /*
+             * The index that will be used to read on the B+Tree access path.
+             */
+            @SuppressWarnings({ "unchecked", "rawtypes" })
+            final SPOKeyOrder indexKeyOrder = SPOKeyOrder.getKeyOrder(
+                    (IPredicate) pred, 3/* keyArity */);
+            
+            construct.setProperty(NativeDistinctFilter.Annotations.KEY_ORDER,
+                    indexKeyOrder);
+
+            // Native memory based DISTINCT filter.
+            filter = new NativeDistinctFilter.DistinctFilterImpl(construct);
+
+        } else {
+            
+            // JVM Based DISTINCT filter.
+            filter = new DistinctFilter.DistinctFilterImpl(construct);
+            
+        }
+        
     }
     
     public boolean hasNext() throws QueryEvaluationException {
@@ -243,6 +324,21 @@
             
             src.close();
             
+            if (filter instanceof ICloseable) {
+
+                /*
+                 * Ensure that we release the backing MemoryManager in a timely
+                 * fashion.
+                 * 
+                 * @see <a
+                 * href="https://sourceforge.net/apps/trac/bigdata/ticket/582">
+                 * IStriterator does not support close() protocol for IFilter
+                 * </a>
+                 */
+                ((ICloseable) filter).close();
+
+            }
+
         }
 
     }
@@ -316,7 +412,28 @@
         if(log.isDebugEnabled())
             log.debug(stmt.toString());
         
-        buffer.add(stmt);
+        if(filter != null) {
+
+            /*
+             * Impose a DISTINCT SPO filter on the generated statements in the
+             * constructed graph.
+             * 
+             * @see <a
+             * href="https://sourceforge.net/apps/trac/bigdata/ticket/579">
+             * CONSTRUCT should apply DISTINCT (s,p,o) filter </a>
+             */
+            
+            if(filter.isValid(stmt)) {
+                
+                buffer.add(stmt);
+                
+            }
+            
+        } else {
+
+            buffer.add(stmt);
+
+        }
         
     }
     

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTConstructOptimizer.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTConstructOptimizer.java	2012-07-18 13:48:50 UTC (rev 6384)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTConstructOptimizer.java	2012-07-20 21:21:08 UTC (rev 6385)
@@ -95,7 +95,13 @@
             projection.addProjectionVar(new VarNode(itr.next().getName()));
 
         }
-        
+
+        if (context.nativeDistinctSPO) {
+
+            constructNode.setNativeDistinct(true);
+
+        }
+
         return queryRoot;
 
     }

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTDescribeOptimizer.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTDescribeOptimizer.java	2012-07-18 13:48:50 UTC (rev 6384)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTDescribeOptimizer.java	2012-07-20 21:21:08 UTC (rev 6385)
@@ -118,7 +118,7 @@
 		
 		where.addChild(union); // append UNION to WHERE clause.
 
-        final ConstructNode construct = new ConstructNode();
+        final ConstructNode construct = new ConstructNode(context);
 
 		final ProjectionNode projection = queryRoot.getProjection();
 		

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/filter/TestNativeDistinctFilter.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/filter/TestNativeDistinctFilter.java	2012-07-18 13:48:50 UTC (rev 6384)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/filter/TestNativeDistinctFilter.java	2012-07-20 21:21:08 UTC (rev 6385)
@@ -175,7 +175,7 @@
     /**
      * Unit test for {@link NativeDistinctFilter#getFilterKeyOrder(SPOKeyOrder)}
      */
-    public void test_filterKeyOrder() {
+    public void test_filterKeyOrder_quads() {
 
         assertEquals(new int[] {0,1,2},
                 NativeDistinctFilter.getFilterKeyOrder(SPOKeyOrder.SPOC));
@@ -197,6 +197,22 @@
 
     }
     
+    /**
+     * Unit test for {@link NativeDistinctFilter#getFilterKeyOrder(SPOKeyOrder)}
+     */
+    public void test_filterKeyOrder_triples() {
+
+        assertEquals(new int[] { 0, 1, 2 },
+                NativeDistinctFilter.getFilterKeyOrder(SPOKeyOrder.SPO));
+
+        assertEquals(new int[] { 1, 2, 0 },
+                NativeDistinctFilter.getFilterKeyOrder(SPOKeyOrder.POS));
+
+        assertEquals(new int[] { 2, 0, 1 },
+                NativeDistinctFilter.getFilterKeyOrder(SPOKeyOrder.OSP));
+
+    }
+    
     public void test_htreeDistinctSPOFilter() {
 
         final JoinSetup setup = new JoinSetup(getName());

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/TestBasicQuery.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/TestBasicQuery.java	2012-07-18 13:48:50 UTC (rev 6384)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/TestBasicQuery.java	2012-07-20 21:21:08 UTC (rev 6385)
@@ -27,6 +27,9 @@
 
 package com.bigdata.rdf.sparql.ast.eval;
 
+import com.bigdata.rdf.sparql.ast.ASTContainer;
+import com.bigdata.rdf.sparql.ast.ConstructNode;
+
 /**
  * Data driven test suite.
  * 
@@ -93,16 +96,93 @@
      */
     public void test_construct_1() throws Exception {
 
-        new TestHelper(
+        final ASTContainer ast = new TestHelper(
                 "construct-1", // testURI,
                 "construct-1.rq",// queryFileURL
                 "construct-1.trig",// dataFileURL
                 "construct-1-result.trig"// resultFileURL
                 ).runTest();
         
+        final ConstructNode construct = ast.getOptimizedAST().getConstruct();
+
+        assertNotNull(construct);
+
+        assertFalse(construct.isNativeDistinct());
+
     }
 
     /**
+     * A simple CONSTRUCT query using a native DISTINCT filter.
+     * 
+     * <pre>
+     * PREFIX rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+     * PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+     * PREFIX foaf: <http://xmlns.com/foaf/0.1/>
+     * 
+     * CONSTRUCT {
+     *   <http://www.bigdata.com/DC> rdfs:label "DC" .
+     *   ?x rdf:type foaf:Person . 
+     * } where {
+     *   # Enable the native DISTINCT SPO filter.
+     *   hint:Query hint:nativeDistinctSPO true .
+     *   ?x rdf:type foaf:Person 
+     * }
+     * </pre>
+     */
+    public void test_construct_1a() throws Exception {
+
+        final ASTContainer ast = new TestHelper(
+                "construct-1", // testURI,
+                "construct-1a.rq",// queryFileURL
+                "construct-1.trig",// dataFileURL
+                "construct-1-result.trig"// resultFileURL
+                ).runTest();
+
+        final ConstructNode construct = ast.getOptimizedAST().getConstruct();
+
+        assertNotNull(construct);
+
+        assertTrue(construct.isNativeDistinct());
+        
+    }
+
+    /**
+     * A simple CONSTRUCT query using a native DISTINCT filter (enabled
+     * via the "analytic" query hint).
+     * 
+     * <pre>
+     * PREFIX rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+     * PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+     * PREFIX foaf: <http://xmlns.com/foaf/0.1/>
+     * 
+     * CONSTRUCT {
+     *   <http://www.bigdata.com/DC> rdfs:label "DC" .
+     *   ?x rdf:type foaf:Person . 
+     * } where {
+     *   # Enable the native DISTINCT SPO filter.
+     *   hint:Query hint:analytic true .
+     *   ?x rdf:type foaf:Person 
+     * }
+     * </pre>
+     */
+    public void test_construct_1b() throws Exception {
+
+        final ASTContainer ast = new TestHelper(
+                "construct-1", // testURI,
+                "construct-1b.rq",// queryFileURL
+                "construct-1.trig",// dataFileURL
+                "construct-1-result.trig"// resultFileURL
+                ).runTest();
+
+        final ConstructNode construct = ast.getOptimizedAST().getConstruct();
+
+        assertNotNull(construct);
+
+        assertTrue(construct.isNativeDistinct());
+        
+    }
+
+    /**
      * A CONSTRUCT without a template and having a ground triple in the WHERE
      * clause. For this variant of the test, the triple is not in the KB.
      * 

Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1a.rq
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1a.rq	                        (rev 0)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1a.rq	2012-07-20 21:21:08 UTC (rev 6385)
@@ -0,0 +1,12 @@
+PREFIX rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX foaf: <http://xmlns.com/foaf/0.1/>
+
+CONSTRUCT {
+  <http://www.bigdata.com/DC> rdfs:label "DC" .
+  ?x rdf:type foaf:Person . 
+} where {
+  # Enable the native DISTINCT SPO filter.
+  hint:Query hint:nativeDistinctSPO true .
+  ?x rdf:type foaf:Person 
+}
\ No newline at end of file

Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1b.rq
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1b.rq	                        (rev 0)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1b.rq	2012-07-20 21:21:08 UTC (rev 6385)
@@ -0,0 +1,12 @@
+PREFIX rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX foaf: <http://xmlns.com/foaf/0.1/>
+
+CONSTRUCT {
+  <http://www.bigdata.com/DC> rdfs:label "DC" .
+  ?x rdf:type foaf:Person . 
+} where {
+  # Enable the native DISTINCT SPO filter.
+  hint:Query hint:nativeDistinctSPO true .
+  ?x rdf:type foaf:Person 
+}
\ No newline at end of file

Modified: branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/java/cutthecrap/utils/striterators/IFilter.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/java/cutthecrap/utils/striterators/IFilter.java	2012-07-18 13:48:50 UTC (rev 6384)
+++ branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/java/cutthecrap/utils/striterators/IFilter.java	2012-07-20 21:21:08 UTC (rev 6385)
@@ -29,8 +29,17 @@
 import java.util.Iterator;
 import java.io.Serializable;
 
+import com.bigdata.striterator.ICloseableIterator;
+
 /**
  * Provides the hook interface that allows use by Striterators
+ * 
+ * TODO The {@link Striterator} protocol does not support a close() method for
+ * {@link Filter}s. That method should be invoked by an
+ * {@link ICloseableIterator}.
+ * 
+ * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/582">
+ *      IStriterator does not support close() protocol for Ifilter </a>
  */
 public interface IFilter extends Serializable, IPropertySet {
 	/**

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





[Bigdata-commit] SF.net SVN: bigdata:[6385] branches/BIGDATA_RELEASE_1_2_0

Fast, scalable, robust graph database platform

[Bigdata-commit] SF.net SVN: bigdata:[6385] branches/BIGDATA_RELEASE_1_2_0