From: <tho...@us...> - 2012-07-20 21:21:16
|
Revision: 6385 http://bigdata.svn.sourceforge.net/bigdata/?rev=6385&view=rev Author: thompsonbry Date: 2012-07-20 21:21:08 +0000 (Fri, 20 Jul 2012) Log Message: ----------- Added a DISTINCT (s,p,o) filter to the CONSTRUCT iterator (ASTConstructIterator) in support of [1]. The DISTINCT SPO filter is always enabled. When the hint:analytic mode is enabled or when the hint:nativeDistinctSPO mode are enabled, then the CONSTUCT will use a filter backed by the native process heap. Otherwise it will use a filter backed by the JVM heap. Some unit tests have been added to verify that the NativeDistinctFilter is used when the appropriate query hint is specified. I pulled out an ICloseable interface from ICloseableIterator. The NativeDistinctFilter's internal DistinctFilterImpl class now implements ICloseable. An issue [2] has been created to extend the striterator protocol to support the ICloseable interface on IFilter and IFilterTest. [1] https://sourceforge.net/apps/trac/bigdata/ticket/579 (CONSTRUCT should impose distinct (s,p,o) filter) [2] http://sourceforge.net/apps/trac/bigdata/ticket/582 (IStriterator does not support close() protocol for IFilter) Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/ap/filter/DistinctFilter.java branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseableIterator.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/filter/NativeDistinctFilter.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ConstructNode.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUpdate.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTConstructIterator.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTConstructOptimizer.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTDescribeOptimizer.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/filter/TestNativeDistinctFilter.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/TestBasicQuery.java branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/java/cutthecrap/utils/striterators/IFilter.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseable.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1a.rq branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1b.rq Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/ap/filter/DistinctFilter.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/ap/filter/DistinctFilter.java 2012-07-18 13:48:50 UTC (rev 6384) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/ap/filter/DistinctFilter.java 2012-07-20 21:21:08 UTC (rev 6385) @@ -1,21 +1,27 @@ package com.bigdata.bop.ap.filter; +import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashSet; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import com.bigdata.bop.BOp; import com.bigdata.bop.HashMapAnnotations; +import com.bigdata.rdf.spo.SPO; import com.bigdata.striterator.IChunkConverter; import com.bigdata.striterator.MergeFilter; import cutthecrap.utils.striterators.Filter; import cutthecrap.utils.striterators.Filterator; +import cutthecrap.utils.striterators.IPropertySet; /** * A DISTINCT operator based for elements in a relation. The operator is based * on an in-memory hash table. + * <p> + * Note: This is used for the in-memory {@link SPO} distinct filter, but + * it is more general and can be applied to any data type that can be + * inserted into a set. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z @@ -46,7 +52,9 @@ * A instance using the default configuration for the in memory hash map. */ public static DistinctFilter newInstance() { + return new DistinctFilter(BOp.NOARGS, BOp.NOANNS); + } /** @@ -66,34 +74,39 @@ } - /** - * @see Annotations#INITIAL_CAPACITY - */ - public int getInitialCapacity() { +// /** +// * @see Annotations#INITIAL_CAPACITY +// */ +// public int getInitialCapacity() { +// +// return getProperty(Annotations.INITIAL_CAPACITY, +// Annotations.DEFAULT_INITIAL_CAPACITY); +// +// } +// +// /** +// * @see Annotations#LOAD_FACTOR +// */ +// public float getLoadFactor() { +// +// return getProperty(Annotations.LOAD_FACTOR, +// Annotations.DEFAULT_LOAD_FACTOR); +// +// } - return getProperty(Annotations.INITIAL_CAPACITY, - Annotations.DEFAULT_INITIAL_CAPACITY); - - } - - /** - * @see Annotations#LOAD_FACTOR - */ - public float getLoadFactor() { - - return getProperty(Annotations.LOAD_FACTOR, - Annotations.DEFAULT_LOAD_FACTOR); - - } - @Override final protected Iterator filterOnce(Iterator src, final Object context) { - return new Filterator(src, context, new DistinctFilterImpl()); + return new Filterator(src, context, new DistinctFilterImpl(this)); } - private class DistinctFilterImpl extends Filter { + /** + * DISTINCT filter based on Java heap data structures. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ + public static class DistinctFilterImpl extends Filter { private static final long serialVersionUID = 1L; @@ -102,18 +115,46 @@ * {@link ConcurrentHashMap} here. */ @SuppressWarnings("rawtypes") - private final LinkedHashSet members; + private final HashSet members; + @SuppressWarnings("unchecked") + static private <T> T getProperty(final IPropertySet pset, + final String name, final T defaultValue) { + + final Object val = pset.getProperty(name); + + if (val != null) + return (T) val; + + return defaultValue; + + } + + /** + * DISTINCT filter based on Java heap data structures. + * + * @param propertySet + * Used to configured the DISTINCT filter. + * + * @see DistinctFilter.Annotations + */ @SuppressWarnings("rawtypes") - public DistinctFilterImpl() { + public DistinctFilterImpl(final IPropertySet propertySet) { + + final int initialCapacity = getProperty(propertySet, + Annotations.INITIAL_CAPACITY, + Annotations.DEFAULT_INITIAL_CAPACITY); - members = new LinkedHashSet(getInitialCapacity(), getLoadFactor()); + final float loadFactor = getProperty(propertySet, + Annotations.LOAD_FACTOR, Annotations.DEFAULT_LOAD_FACTOR); + members = new HashSet(initialCapacity, loadFactor); + } @SuppressWarnings("unchecked") @Override - public boolean isValid(Object obj) { + public boolean isValid(final Object obj) { return members.add(obj); Added: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseable.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseable.java (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseable.java 2012-07-20 21:21:08 UTC (rev 6385) @@ -0,0 +1,46 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Jul 20, 2012 + */ +package com.bigdata.striterator; + +/** + * Interface for objects which can have resources which must be explicitly + * closed (typically iterators). + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public interface ICloseable { + + /** + * Closes the object, releasing any associated resources. This method MAY be + * invoked safely if the object is already closed. Implementations of this + * interface MUST invoke {@link #close()} once it is known that the object + * will no longer be used. + */ + public void close(); + +} Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseableIterator.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseableIterator.java 2012-07-18 13:48:50 UTC (rev 6384) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ICloseableIterator.java 2012-07-20 21:21:08 UTC (rev 6385) @@ -38,7 +38,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ -public interface ICloseableIterator<E> extends Iterator<E> { +public interface ICloseableIterator<E> extends Iterator<E>, ICloseable { /** * Closes the iterator, releasing any associated resources. This method MAY Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/filter/NativeDistinctFilter.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/filter/NativeDistinctFilter.java 2012-07-18 13:48:50 UTC (rev 6384) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/filter/NativeDistinctFilter.java 2012-07-20 21:21:08 UTC (rev 6385) @@ -7,12 +7,14 @@ import java.util.Map; import java.util.Set; import java.util.UUID; +import java.util.concurrent.atomic.AtomicBoolean; import com.bigdata.bop.BOp; import com.bigdata.bop.BTreeAnnotations; import com.bigdata.bop.HTreeAnnotations; import com.bigdata.bop.HashMapAnnotations; import com.bigdata.bop.ap.filter.BOpFilterBase; +import com.bigdata.bop.ap.filter.DistinctFilter; import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.btree.BTree; import com.bigdata.btree.BloomFilterFactory; @@ -30,14 +32,17 @@ import com.bigdata.io.DirectBufferPool; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; +import com.bigdata.rdf.sparql.ast.eval.ASTConstructIterator; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPO; import com.bigdata.rdf.spo.SPOKeyOrder; import com.bigdata.rwstore.sector.MemStore; import com.bigdata.rwstore.sector.MemoryManager; +import com.bigdata.striterator.ICloseable; import cutthecrap.utils.striterators.Filter; import cutthecrap.utils.striterators.Filterator; +import cutthecrap.utils.striterators.IPropertySet; /** * A scalable DISTINCT operator based for {@link SPO}s. @@ -144,18 +149,18 @@ @Override final protected Iterator filterOnce(Iterator src, final Object context) { - return new Filterator(src, context, new DistinctFilterImpl()); + return new Filterator(src, context, new DistinctFilterImpl(this)); } /** * Return the 3-component key order which has the best locality given that - * the SPOs will be ariving in the natural order of the + * the SPOs will be arriving in the natural order of the * <i>indexKeyOrder</i>. This is the keyOrder that we will use for the * filter. This gives the filter index structure the best possible locality * in terms of the order in which the SPOs are arriving. * <p> - * The return valuer is an <code>int[3]</code>. The index is the orderinal + * The return valuer is an <code>int[3]</code>. The index is the ordinal * position of the triples mode key component for the filter keys. The value * at that index is the position in the {@link SPOKeyOrder} of the quads * mode index whose natural order determines the order of arrival of the @@ -183,19 +188,36 @@ * * which models the <code>PSO</code> key order for the purposes of this * class. + * <p> + * Note: This method now accepts triples in support of the + * {@link ASTConstructIterator} * * @see Annotations#INDEX_KEY_ORDER + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/579"> + * CONSTRUCT should apply DISTINCT (s,p,o) filter </a> */ - public static int[] getFilterKeyOrder(SPOKeyOrder indexKeyOrder) { + public static int[] getFilterKeyOrder(final SPOKeyOrder indexKeyOrder) { if (indexKeyOrder == null) throw new IllegalArgumentException(); - if (indexKeyOrder.getKeyArity() != 4) - throw new IllegalArgumentException(); +// if (indexKeyOrder.getKeyArity() != 4) +// throw new IllegalArgumentException(); final int[] filterKeyOrder; switch (indexKeyOrder.index()) { + // TRIPLES + case SPOKeyOrder._SPO: + filterKeyOrder = new int[] { 0, 1, 2 }; + break; + case SPOKeyOrder._POS: + filterKeyOrder = new int[] { 1, 2, 0 }; + break; + case SPOKeyOrder._OSP: + filterKeyOrder = new int[] { 2, 0, 1 }; + break; + // QUADS case SPOKeyOrder._SPOC: filterKeyOrder = new int[] { 0, 1, 2 }; break; @@ -220,7 +242,15 @@ return filterKeyOrder; } - private class DistinctFilterImpl extends Filter { + /** + * A {@link Filter} which passes only the DISTINCT {@link ISPO}s and is + * backed by a scalable data structure (BTree or HTree). + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + static public class DistinctFilterImpl extends Filter implements + ICloseable { private static final long serialVersionUID = 1L; @@ -232,7 +262,7 @@ /** * The fast JVM based cache. This is always allocated. */ - private final LinkedHashMap<SPO, byte[]> lru; + private final LinkedHashMap<ISPO, byte[]> lru; /** * The metadata used to create the index. @@ -262,10 +292,15 @@ private volatile ICheckpointProtocol index; /** + * <code>true</code> until {@link #close() closed}. + */ + private final AtomicBoolean open = new AtomicBoolean(true); + + /** * When <code>true</code>, the {@link BTree} will be used. When * <code>false</code> the {@link HTree}. * <p> - * Note: Hisorical testing indicated that the {@link BTree} was faster + * Note: Historical testing indicated that the {@link BTree} was faster * for this application. * * TODO Edit HTree/BTree here. @@ -279,28 +314,85 @@ @Override protected void finalize() throws Throwable { + close(); super.finalize(); - if (index != null) { - index.close(); - index = null; + } + + /** + * Release resources associated with the filter. + * <p> + * Note: This is done automatically by {@link #finalize()}, but it + * should be done pro-actively whenever possible. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/582"> + * IStriterator does not support close() protocol for Ifilter </a> + */ + @Override + public void close() { + if (open.compareAndSet(true/* expect */, false/* update */)) { + /* + * Close when first invoked. + */ + if (index != null) { + index.close(); + index = null; + } + if (store != null) { + store.close(); + store = null; + } } - if (store != null) { - store.close(); - store = null; - } } + + @SuppressWarnings("unchecked") + static private <T> T getRequiredProperty(final IPropertySet pset, + final String name) { + + final Object val = pset.getProperty(name); + + if (val == null) + + if (val == null) + throw new IllegalStateException("Required property: " + + name + " : " + NativeDistinctFilter.class); + + return (T) val; + + } + + @SuppressWarnings("unchecked") + static private <T> T getProperty(final IPropertySet pset, + final String name, final T defaultValue) { + + final Object val = pset.getProperty(name); + + if (val != null) + return (T) val; + + return defaultValue; + + } - public DistinctFilterImpl() { + /** + * DISTINCT {@link ISPO} filter based on persistence capable data + * structures. + * + * @param properties + * Used to configure the DISTINCT filter. + * + * @see DistinctFilter.Annotations + */ + public DistinctFilterImpl(final IPropertySet properties) { - final int initialCapacity = NativeDistinctFilter.this.getProperty( + final int initialCapacity = getProperty(properties, Annotations.INITIAL_CAPACITY, Annotations.DEFAULT_INITIAL_CAPACITY); - final float loadFactor = NativeDistinctFilter.this.getProperty( + final float loadFactor = getProperty(properties, Annotations.LOAD_FACTOR, Annotations.DEFAULT_LOAD_FACTOR); - lru = new LinkedHashMap<SPO, byte[]>(initialCapacity, loadFactor); + lru = new LinkedHashMap<ISPO, byte[]>(initialCapacity, loadFactor); this.nominalCapacity = initialCapacity; @@ -312,8 +404,8 @@ */ { - final SPOKeyOrder indexKeyOrder = (SPOKeyOrder) NativeDistinctFilter.this - .getRequiredProperty(Annotations.KEY_ORDER); + final SPOKeyOrder indexKeyOrder = (SPOKeyOrder) getRequiredProperty( + properties, Annotations.KEY_ORDER); filterKeyOrder = getFilterKeyOrder(indexKeyOrder); @@ -322,20 +414,19 @@ : new HTreeIndexMetadata(UUID.randomUUID()); // IFF BTree - metadata.setBranchingFactor(NativeDistinctFilter.this.getProperty( + metadata.setBranchingFactor(getProperty(properties, BTreeAnnotations.BRANCHING_FACTOR, 256));// TODO Overridden here. BTreeAnnotations.DEFAULT_BRANCHING_FACTOR)); // IFF HTree if (metadata instanceof HTreeIndexMetadata) { ((HTreeIndexMetadata) metadata) - .setAddressBits(NativeDistinctFilter.this - .getProperty( + .setAddressBits(getProperty(properties, HTreeAnnotations.ADDRESS_BITS, HTreeAnnotations.DEFAULT_ADDRESS_BITS)); } - metadata.setRawRecords(NativeDistinctFilter.this.getProperty( + metadata.setRawRecords(getProperty(properties, Annotations.RAW_RECORDS, Annotations.DEFAULT_RAW_RECORDS)); @@ -344,10 +435,9 @@ metadata.setBloomFilterFactory(BloomFilterFactory.DEFAULT); - metadata.setWriteRetentionQueueCapacity(NativeDistinctFilter.this - .getProperty( - Annotations.WRITE_RETENTION_QUEUE_CAPACITY, - Annotations.DEFAULT_WRITE_RETENTION_QUEUE_CAPACITY)); + metadata.setWriteRetentionQueueCapacity(getProperty(properties, + Annotations.WRITE_RETENTION_QUEUE_CAPACITY, + Annotations.DEFAULT_WRITE_RETENTION_QUEUE_CAPACITY)); final int ratio = 32; // TODO Config/tune front-coding ratio. @@ -378,8 +468,8 @@ final int n = lru.size(); final byte[][] a = new byte[n][]; { - // Evict everthing into an array. - final Iterator<Map.Entry<SPO, byte[]>> itr = lru.entrySet() + // Evict everything into an array. + final Iterator<Map.Entry<ISPO, byte[]>> itr = lru.entrySet() .iterator(); int i = 0; while (itr.hasNext()) { @@ -409,6 +499,11 @@ if (index != null) throw new IllegalStateException(); + + if (!open.get()) { + // Explicitly closed. + throw new IllegalStateException(); + } /* * This wraps an efficient raw store interface around a child memory @@ -435,7 +530,7 @@ @Override public boolean isValid(final Object obj) { - final SPO spo = (SPO) obj; + final ISPO spo = (ISPO) obj; return add(spo); @@ -457,7 +552,7 @@ * * @return <code>true</code> if the collection was modified. */ - private boolean add(final SPO spo) { + private boolean add(final ISPO spo) { if(lru.containsKey(spo)) { // already in the LRU Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ConstructNode.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ConstructNode.java 2012-07-18 13:48:50 UTC (rev 6384) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ConstructNode.java 2012-07-20 21:21:08 UTC (rev 6385) @@ -30,6 +30,8 @@ import java.util.Map; import com.bigdata.bop.BOp; +import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext; +import com.bigdata.rdf.spo.ISPO; /** * A template for the construction of one or more graphs based on the solutions @@ -47,10 +49,47 @@ */ private static final long serialVersionUID = 1L; + public interface Annotations extends AbstractStatementContainer.Annotations { + + /** + * Boolean property (default {@value #DEFAULT_ALL_GRAPHS}) which is + * <code>true</code> iff a native DISTINCT {@link ISPO} filter should be + * applied (large cardinality is expected for the constructed graph). + * When <code>false</code>, a JVM based DISTINCT {@link ISPO} filter + * will be applied. + * <p> + * Note: This can be set using either {@link QueryHints#ANALYTIC} or + * {@link QueryHints#NATIVE_DISTINCT_SPO}. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/579"> + * CONSTRUCT should apply DISTINCT (s,p,o) filter </a> + */ + String NATIVE_DISTINCT = "nativeDistinct"; + + boolean DEFAULT_NATIVE_DISTINCT = false; + + } + public ConstructNode() { + super(); + } + public ConstructNode(final AST2BOpContext ctx) { + + super(); + + if (ctx.nativeDistinctSPO) { + + // Native DISTINCT SPO FILTER is requested. + + setNativeDistinct(true); + + } + + } + /** * Required deep copy constructor. */ @@ -80,6 +119,29 @@ return this; } + + /** + * When <code>true</code>, a native DISTINCT {@link ISPO} filter will be + * applied to the constructed graph, otherwise a Java Heap based DISTINCT + * {@link ISPO} filter will be applied. + * + * @see Annotations#NATIVE_DISTINCT + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/579"> + * CONSTRUCT should apply DISTINCT (s,p,o) filter </a> + */ + public boolean isNativeDistinct() { + + return getProperty(Annotations.NATIVE_DISTINCT, + Annotations.DEFAULT_NATIVE_DISTINCT); + + } + + public void setNativeDistinct(final boolean nativeDistinct) { + + setProperty(Annotations.NATIVE_DISTINCT, nativeDistinct); + + } @Override public String toString(final int indent) { @@ -97,7 +159,10 @@ } sb.append("\n").append(s).append("}"); - + + if (isNativeDistinct()) + sb.append(" [nativeDistinct]"); + return sb.toString(); } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUpdate.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUpdate.java 2012-07-18 13:48:50 UTC (rev 6384) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUpdate.java 2012-07-20 21:21:08 UTC (rev 6385) @@ -721,7 +721,7 @@ */ final ConstructNode template = op.getDeleteClause() - .getQuadData().flatten(new ConstructNode()); + .getQuadData().flatten(new ConstructNode(context)); final ASTConstructIterator itr = new ASTConstructIterator( context.conn.getTripleStore(), template, @@ -789,7 +789,7 @@ */ final ConstructNode template = op.getInsertClause() - .getQuadData().flatten(new ConstructNode()); + .getQuadData().flatten(new ConstructNode(context)); final ASTConstructIterator itr = new ASTConstructIterator( context.conn.getTripleStore(), template, @@ -898,7 +898,7 @@ // Flatten the original WHERE clause into a CONSTRUCT // template. final ConstructNode template = quadData - .flatten(new ConstructNode()); + .flatten(new ConstructNode(context)); // Set the CONSTRUCT template (quads patterns). queryRoot.setConstruct(template); Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTConstructIterator.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTConstructIterator.java 2012-07-18 13:48:50 UTC (rev 6384) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTConstructIterator.java 2012-07-20 21:21:08 UTC (rev 6385) @@ -44,6 +44,12 @@ import org.openrdf.query.QueryEvaluationException; import org.openrdf.query.impl.EmptyBindingSet; +import com.bigdata.bop.BOp; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.ap.filter.DistinctFilter; +import com.bigdata.bop.rdf.filter.NativeDistinctFilter; +import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.model.BigdataBNode; import com.bigdata.rdf.model.BigdataStatement; import com.bigdata.rdf.model.BigdataValue; @@ -53,8 +59,14 @@ import com.bigdata.rdf.sparql.ast.StatementPatternNode; import com.bigdata.rdf.sparql.ast.TermNode; import com.bigdata.rdf.sparql.ast.VarNode; +import com.bigdata.rdf.spo.ISPO; +import com.bigdata.rdf.spo.SPOKeyOrder; +import com.bigdata.rdf.spo.SPOPredicate; import com.bigdata.rdf.store.AbstractTripleStore; +import com.bigdata.striterator.ICloseable; +import cutthecrap.utils.striterators.IFilterTest; + /** * Iterator consumes the solutions from a query and interprets them according to * a {@link ConstructNode}. Ground triples in the template are output @@ -98,6 +110,24 @@ */ private final List<BigdataStatement> buffer = new LinkedList<BigdataStatement>(); + /** + * A filter which restricts the emitted statements to the distinct + * {@link ISPO}. The {@link DistinctFilter} is based on the Java heap. The + * {@link NativeDistinctFilter} is based on persistence capable data + * structures and can scale to high cardinality outputs. Unfortunately, a + * complex CONSTRUCT template can make it impossible to predict the + * <p> + * Note: It is possible to disable this filter (in the code) by setting it + * to <code>null</code>. + * + * @see DistinctFilter + * @see NativeDistinctFilter + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/579"> + * CONSTRUCT should apply DISTINCT (s,p,o) filter </a> + */ + private final IFilterTest filter; + // /** // * Return <code>true</code>iff {@link LexiconRelation#isStoreBlankNodes()} // * is <code>true</code>. @@ -176,6 +206,57 @@ this.src = src; + /* + * Setup the DISTINCT SPO filter. + */ + + final boolean nativeDistinct = construct.isNativeDistinct(); + if (nativeDistinct) { + + /* + * Construct a predicate for the first triple template. We will use + * that as the bias for the scalable DISTINCT SPO filter. + */ + final IPredicate pred; + { + + final StatementPatternNode sp = templates.get(0/* index */); + + final IVariableOrConstant<IV> s = sp.s().getValueExpression(); + final IVariableOrConstant<IV> p = sp.p().getValueExpression(); + final IVariableOrConstant<IV> o = sp.o().getValueExpression(); + +// // The graph term/variable iff specified by the query. +// final TermNode cvar = sp.c(); +// final IVariableOrConstant<IV> c = cvar == null ? null : cvar +// .getValueExpression(); + + final BOp[] vars = new BOp[] { s, p, o /*, c*/ }; + + pred = new SPOPredicate(vars, BOp.NOANNS); + + } + + /* + * The index that will be used to read on the B+Tree access path. + */ + @SuppressWarnings({ "unchecked", "rawtypes" }) + final SPOKeyOrder indexKeyOrder = SPOKeyOrder.getKeyOrder( + (IPredicate) pred, 3/* keyArity */); + + construct.setProperty(NativeDistinctFilter.Annotations.KEY_ORDER, + indexKeyOrder); + + // Native memory based DISTINCT filter. + filter = new NativeDistinctFilter.DistinctFilterImpl(construct); + + } else { + + // JVM Based DISTINCT filter. + filter = new DistinctFilter.DistinctFilterImpl(construct); + + } + } public boolean hasNext() throws QueryEvaluationException { @@ -243,6 +324,21 @@ src.close(); + if (filter instanceof ICloseable) { + + /* + * Ensure that we release the backing MemoryManager in a timely + * fashion. + * + * @see <a + * href="https://sourceforge.net/apps/trac/bigdata/ticket/582"> + * IStriterator does not support close() protocol for IFilter + * </a> + */ + ((ICloseable) filter).close(); + + } + } } @@ -316,7 +412,28 @@ if(log.isDebugEnabled()) log.debug(stmt.toString()); - buffer.add(stmt); + if(filter != null) { + + /* + * Impose a DISTINCT SPO filter on the generated statements in the + * constructed graph. + * + * @see <a + * href="https://sourceforge.net/apps/trac/bigdata/ticket/579"> + * CONSTRUCT should apply DISTINCT (s,p,o) filter </a> + */ + + if(filter.isValid(stmt)) { + + buffer.add(stmt); + + } + + } else { + + buffer.add(stmt); + + } } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTConstructOptimizer.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTConstructOptimizer.java 2012-07-18 13:48:50 UTC (rev 6384) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTConstructOptimizer.java 2012-07-20 21:21:08 UTC (rev 6385) @@ -95,7 +95,13 @@ projection.addProjectionVar(new VarNode(itr.next().getName())); } - + + if (context.nativeDistinctSPO) { + + constructNode.setNativeDistinct(true); + + } + return queryRoot; } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTDescribeOptimizer.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTDescribeOptimizer.java 2012-07-18 13:48:50 UTC (rev 6384) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTDescribeOptimizer.java 2012-07-20 21:21:08 UTC (rev 6385) @@ -118,7 +118,7 @@ where.addChild(union); // append UNION to WHERE clause. - final ConstructNode construct = new ConstructNode(); + final ConstructNode construct = new ConstructNode(context); final ProjectionNode projection = queryRoot.getProjection(); Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/filter/TestNativeDistinctFilter.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/filter/TestNativeDistinctFilter.java 2012-07-18 13:48:50 UTC (rev 6384) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/filter/TestNativeDistinctFilter.java 2012-07-20 21:21:08 UTC (rev 6385) @@ -175,7 +175,7 @@ /** * Unit test for {@link NativeDistinctFilter#getFilterKeyOrder(SPOKeyOrder)} */ - public void test_filterKeyOrder() { + public void test_filterKeyOrder_quads() { assertEquals(new int[] {0,1,2}, NativeDistinctFilter.getFilterKeyOrder(SPOKeyOrder.SPOC)); @@ -197,6 +197,22 @@ } + /** + * Unit test for {@link NativeDistinctFilter#getFilterKeyOrder(SPOKeyOrder)} + */ + public void test_filterKeyOrder_triples() { + + assertEquals(new int[] { 0, 1, 2 }, + NativeDistinctFilter.getFilterKeyOrder(SPOKeyOrder.SPO)); + + assertEquals(new int[] { 1, 2, 0 }, + NativeDistinctFilter.getFilterKeyOrder(SPOKeyOrder.POS)); + + assertEquals(new int[] { 2, 0, 1 }, + NativeDistinctFilter.getFilterKeyOrder(SPOKeyOrder.OSP)); + + } + public void test_htreeDistinctSPOFilter() { final JoinSetup setup = new JoinSetup(getName()); Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/TestBasicQuery.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/TestBasicQuery.java 2012-07-18 13:48:50 UTC (rev 6384) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/TestBasicQuery.java 2012-07-20 21:21:08 UTC (rev 6385) @@ -27,6 +27,9 @@ package com.bigdata.rdf.sparql.ast.eval; +import com.bigdata.rdf.sparql.ast.ASTContainer; +import com.bigdata.rdf.sparql.ast.ConstructNode; + /** * Data driven test suite. * @@ -93,16 +96,93 @@ */ public void test_construct_1() throws Exception { - new TestHelper( + final ASTContainer ast = new TestHelper( "construct-1", // testURI, "construct-1.rq",// queryFileURL "construct-1.trig",// dataFileURL "construct-1-result.trig"// resultFileURL ).runTest(); + final ConstructNode construct = ast.getOptimizedAST().getConstruct(); + + assertNotNull(construct); + + assertFalse(construct.isNativeDistinct()); + } /** + * A simple CONSTRUCT query using a native DISTINCT filter. + * + * <pre> + * PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + * PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> + * PREFIX foaf: <http://xmlns.com/foaf/0.1/> + * + * CONSTRUCT { + * <http://www.bigdata.com/DC> rdfs:label "DC" . + * ?x rdf:type foaf:Person . + * } where { + * # Enable the native DISTINCT SPO filter. + * hint:Query hint:nativeDistinctSPO true . + * ?x rdf:type foaf:Person + * } + * </pre> + */ + public void test_construct_1a() throws Exception { + + final ASTContainer ast = new TestHelper( + "construct-1", // testURI, + "construct-1a.rq",// queryFileURL + "construct-1.trig",// dataFileURL + "construct-1-result.trig"// resultFileURL + ).runTest(); + + final ConstructNode construct = ast.getOptimizedAST().getConstruct(); + + assertNotNull(construct); + + assertTrue(construct.isNativeDistinct()); + + } + + /** + * A simple CONSTRUCT query using a native DISTINCT filter (enabled + * via the "analytic" query hint). + * + * <pre> + * PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + * PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> + * PREFIX foaf: <http://xmlns.com/foaf/0.1/> + * + * CONSTRUCT { + * <http://www.bigdata.com/DC> rdfs:label "DC" . + * ?x rdf:type foaf:Person . + * } where { + * # Enable the native DISTINCT SPO filter. + * hint:Query hint:analytic true . + * ?x rdf:type foaf:Person + * } + * </pre> + */ + public void test_construct_1b() throws Exception { + + final ASTContainer ast = new TestHelper( + "construct-1", // testURI, + "construct-1b.rq",// queryFileURL + "construct-1.trig",// dataFileURL + "construct-1-result.trig"// resultFileURL + ).runTest(); + + final ConstructNode construct = ast.getOptimizedAST().getConstruct(); + + assertNotNull(construct); + + assertTrue(construct.isNativeDistinct()); + + } + + /** * A CONSTRUCT without a template and having a ground triple in the WHERE * clause. For this variant of the test, the triple is not in the KB. * Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1a.rq =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1a.rq (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1a.rq 2012-07-20 21:21:08 UTC (rev 6385) @@ -0,0 +1,12 @@ +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX foaf: <http://xmlns.com/foaf/0.1/> + +CONSTRUCT { + <http://www.bigdata.com/DC> rdfs:label "DC" . + ?x rdf:type foaf:Person . +} where { + # Enable the native DISTINCT SPO filter. + hint:Query hint:nativeDistinctSPO true . + ?x rdf:type foaf:Person +} \ No newline at end of file Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1b.rq =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1b.rq (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/construct-1b.rq 2012-07-20 21:21:08 UTC (rev 6385) @@ -0,0 +1,12 @@ +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX foaf: <http://xmlns.com/foaf/0.1/> + +CONSTRUCT { + <http://www.bigdata.com/DC> rdfs:label "DC" . + ?x rdf:type foaf:Person . +} where { + # Enable the native DISTINCT SPO filter. + hint:Query hint:nativeDistinctSPO true . + ?x rdf:type foaf:Person +} \ No newline at end of file Modified: branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/java/cutthecrap/utils/striterators/IFilter.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/java/cutthecrap/utils/striterators/IFilter.java 2012-07-18 13:48:50 UTC (rev 6384) +++ branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/java/cutthecrap/utils/striterators/IFilter.java 2012-07-20 21:21:08 UTC (rev 6385) @@ -29,8 +29,17 @@ import java.util.Iterator; import java.io.Serializable; +import com.bigdata.striterator.ICloseableIterator; + /** * Provides the hook interface that allows use by Striterators + * + * TODO The {@link Striterator} protocol does not support a close() method for + * {@link Filter}s. That method should be invoked by an + * {@link ICloseableIterator}. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/582"> + * IStriterator does not support close() protocol for Ifilter </a> */ public interface IFilter extends Serializable, IPropertySet { /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |