From: <tho...@us...> - 2010-10-22 19:58:51
|
Revision: 3840 http://bigdata.svn.sourceforge.net/bigdata/?rev=3840&view=rev Author: thompsonbry Date: 2010-10-22 19:58:41 +0000 (Fri, 22 Oct 2010) Log Message: ----------- Added support for sampling from a local access path. I still need to add support for sampling in scale-out. This is in service of adaptive query optimization. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestAll.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalBTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalShard.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalShard.java Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,125 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 16, 2010 - */ - -package com.bigdata.bop.ap; - -import com.bigdata.bop.AbstractAccessPathOp; -import com.bigdata.bop.BOp; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IPredicate; -import com.bigdata.bop.NV; -import com.bigdata.btree.IIndex; -import com.bigdata.relation.accesspath.IAccessPath; - -/** - * Abstract base class for sampling operator for an {@link IIndex}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * @param <E> - * The generic type of the elements materialized from that index. - * - * @todo Implement sample operator. E.g., sampleRange(fromKey,toKey,limit). This - * could be on {@link IIndex} or on {@link IAccessPath}. For a shard view, - * it must proportionally select from among the ordered components of the - * view. For a hash table it would be sample(limit) since range based - * operations are not efficient. - * <p> - * This should accept an index, not a predicate (for RDF we determine the - * index an analysis of the bound and unbound arguments on the predicate - * and always have a good index, but this is not true in the general - * case). When the index is remote, it should be executed at the remote - * index. - * - * @todo This needs to operation on element chunks, not {@link IBindingSet} - * chunks. It also may not require pipelining. - */ -abstract public class AbstractSampleIndex<E> extends AbstractAccessPathOp<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * Known annotations. - */ - public interface Annotations extends BOp.Annotations { - /** - * The sample limit. - */ - String LIMIT = "limit"; - } - - protected AbstractSampleIndex(final IPredicate<E> pred, final int limit) { - - super(new BOp[] { pred }, NV.asMap(new NV[] {// - new NV(Annotations.LIMIT, Integer.valueOf(limit)) // - })); - - if (pred == null) - throw new IllegalArgumentException(); - - if (limit <= 0) - throw new IllegalArgumentException(); - - switch (getEvaluationContext()) { - case HASHED: - case SHARDED: - break; - default: - throw new UnsupportedOperationException( - Annotations.EVALUATION_CONTEXT + "=" - + getEvaluationContext()); - } - - } - - @SuppressWarnings("unchecked") - public IPredicate<E> pred() { - - return (IPredicate<E>) get(0); - - } - - public int limit() { - - return (Integer) getRequiredProperty(Annotations.LIMIT); - - } - -// /** -// * This is a shard wise operator. -// */ -// @Override -// public BOpEvaluationContext getEvaluationContext() { -// -// return BOpEvaluationContext.SHARDED; -// -// } - -} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java (from rev 3756, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -0,0 +1,451 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 16, 2010 + */ + +package com.bigdata.bop.ap; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.Callable; + +import com.bigdata.bop.AbstractAccessPathOp; +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContextBase; +import com.bigdata.bop.IPredicate; +import com.bigdata.btree.AbstractBTree; +import com.bigdata.btree.ILeafCursor; +import com.bigdata.btree.ILinearList; +import com.bigdata.btree.IRangeQuery; +import com.bigdata.btree.ITuple; +import com.bigdata.btree.ITupleCursor; +import com.bigdata.btree.filter.Advancer; +import com.bigdata.btree.view.FusedView; +import com.bigdata.relation.IRelation; +import com.bigdata.relation.accesspath.AccessPath; +import com.bigdata.relation.accesspath.IAccessPath; +import com.bigdata.relation.rule.IAccessPathExpander; +import com.bigdata.striterator.IKeyOrder; + +import cutthecrap.utils.striterators.IFilter; + +/** + * Sampling operator for the {@link IAccessPath} implied by an + * {@link IPredicate}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: AbstractSampleIndex.java 3672 2010-09-28 23:39:42Z thompsonbry + * $ + * @param <E> + * The generic type of the elements materialized from that index. + * + * @todo This is a basic operator which is designed to support adaptive query + * optimization. However, there are a lot of possible semantics for + * sampling, including: uniform distribution, randomly distribution, tuple + * at a time versus clustered (sampling with leaves), adaptive sampling + * until the sample reflects some statistical property of the underlying + * population, etc. + */ +public class SampleIndex<E> extends AbstractAccessPathOp<E> { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * Known annotations. + */ + public interface Annotations extends BOp.Annotations { + + /** + * The sample limit (default {@value #DEFAULT_LIMIT}). + */ + String LIMIT = "limit"; + + int DEFAULT_LIMIT = 100; + + /** + * The {@link IPredicate} describing the access path to be sampled + * (required). + */ + String PREDICATE = SampleIndex.class.getName() + ".predicate"; + + } + + public SampleIndex(SampleIndex<E> op) { + + super(op); + + } + + public SampleIndex(BOp[] args, Map<String, Object> annotations) { + + super(args, annotations); + + } + + public int limit() { + + return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); + + } + + @SuppressWarnings("unchecked") + public IPredicate<E> getPredicate() { + + return (IPredicate<E>) getRequiredProperty(Annotations.PREDICATE); + + } + + /** + * Return a sample from the access path associated with the + * {@link Annotations#PREDICATE}. + */ + public E[] eval(final BOpContextBase context) { + + try { + return new SampleTask(context).call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + } + + /** + * Sample an {@link IAccessPath}. + * + * FIXME This needs to handle each of the following conditions: + * <p> + * Timestamp {read-historical, read-committed, read-write tx, unisolated}<br> + * Index view {standalone, partitioned,global view of partitioned}<br> + * + * @todo The general approach uses the {@link ILinearList} interface to take + * evenly distributed or randomly distributed samples from the + * underlying index. This is done using an {@link IFilter} which is + * evaluated local to the index. This works whether or not the access + * path is using a partitioned view of the index. + * <p> + * When sampling an index shard the {@link ILinearList} API is not + * defined for the {@link FusedView}. Since this sampling operator + * exists for the purposes of estimating the cardinality of an access + * path, we can dispense with the fused view and collect a number of + * samples from each component of that view which is proportional to + * the range count of the view divided by the range count of the + * component index. This may cause tuples which have since been + * deleted to become visible, but this should not cause problems when + * estimating the cardinality of a join path as long as we always + * report the actual tuples from the fused view in the case where the + * desired sample size is LTE the estimated range count of the access + * path. + * + * @todo Better performance could be realized by accepting all tuples in a + * leaf. This requires a sensitivity to the leaf boundaries which + * might be obtained with an {@link ITupleCursor} extension interface + * for local indices or with the {@link ILeafCursor} interface if that + * can be exposed from a sufficiently low level {@link ITupleCursor} + * implementation. However, when they are further constraints layered + * onto the access path by the {@link IPredicate} it may be that such + * clustered (leaf at once) sampling is not practical. + * + * @todo When sampling a global view of a partitioned index, we should focus + * the sample on a subset of the index partitions in order to + * "cluster" the effort. This can of course introduce bias. However, + * if there are a lot of index partitions then the sample will of + * necessity be very small in proportion to the data volume and the + * opportunity for bias will be correspondingly large. + * + * @todo If there is an {@link IAccessPathExpander} then + */ + private class SampleTask implements Callable<E[]> { + + private final BOpContextBase context; + + SampleTask(final BOpContextBase context) { + + this.context = context; + + } + + /** Return a sample from the access path. */ + public E[] call() throws Exception { + + return sample(limit(), getPredicate()).getSample(); + + } + + /** + * Return a sample from the access path. + * + * @param limit + * @return + */ + public AccessPathSample<E> sample(final int limit, + IPredicate<E> predicate) { + + final IRelation<E> relation = context.getRelation(predicate); + + // @todo assumes raw AP. + final AccessPath<E> accessPath = (AccessPath<E>) context + .getAccessPath(relation, predicate); + + final long rangeCount = accessPath.rangeCount(false/* exact */); + + if (limit > rangeCount) { + + /* + * The sample will contain everything in the access path. + */ + return new AccessPathSample<E>(limit, accessPath); + + } + + /* + * Add the CURSOR and PARALLEL flags to the predicate. + * + * @todo turn off REVERSE if specified. + */ + final int flags = predicate.getProperty( + IPredicate.Annotations.FLAGS, + IPredicate.Annotations.DEFAULT_FLAGS) + | IRangeQuery.CURSOR + | IRangeQuery.PARALLEL; + + predicate = (IPredicate<E>) predicate.setProperty( + IPredicate.Annotations.FLAGS, flags); + + /* + * Add advancer to collect sample. + */ + predicate = ((Predicate<E>) predicate) + .addIndexLocalFilter(new SampleAdvancer<E>(//rangeCount, + limit, accessPath.getFromKey(), accessPath + .getToKey())); + + return new AccessPathSample<E>(limit, context.getAccessPath( + relation, predicate)); + + } + + } + + /** + * An advancer pattern which is designed to take evenly distributed samples + * from an index. The caller specifies the #of tuples to be skipped after + * each tuple visited. That number should be computed based on the estimated + * range count of the index and the desired sample size. This can fail to + * gather the desired number of sample if additional filters are applied + * which further restrict the elements selected by the predicate. However, + * it will still faithfully represent the expected cardinality of the + * sampled access path. + * + * @author tho...@us... + * + * @param <E> + * The generic type of the elements visited by that access path. + */ + private static class SampleAdvancer<E> extends Advancer<E> { + + private static final long serialVersionUID = 1L; + + /** The desired total limit on the sample. */ + private final int limit; + + private final byte[] /*fromKey,*/ toKey; + + /* + * Transient data. This gets initialized when we visit the first tuple. + */ + + /** The #of tuples to be skipped after every tuple visited. */ + private transient int skipCount; + /** The #of tuples accepted so far. */ + private transient int nread = 0; + /** The inclusive lower bound of the first tuple actually visited. */ + private transient int fromIndex; + /** The exclusive upper bound of the last tuple which could be visited. */ + private transient int toIndex; + + /** + * + * @param limit + * The #of samples to visit. + */ + public SampleAdvancer(final int limit, final byte[] fromKey, + final byte[] toKey) { + + this.limit = limit; + this.toKey = toKey; + } + + /** + * @todo This is taking evenly spaced samples. It is much more efficient + * to take clusters of samples when you can accept the bias. + * Taking a clustered sample really requires knowing where the + * leaf boundaries are in the index, e.g., using + * {@link ILeafCursor}. + */ + @Override + protected void advance(final ITuple<E> tuple) { + + final AbstractBTree ndx = (AbstractBTree) src.getIndex(); + + final int currentIndex = ndx.indexOf(tuple.getKey()); + + if (nread == 0) { + + // inclusive lower bound. + fromIndex = currentIndex; + + // exclusive upper bound. + toIndex = toKey == null ? ndx.getEntryCount() : ndx + .indexOf(toKey); + + final int rangeCount = (toIndex - fromIndex); + + skipCount = Math.max(1, rangeCount / limit); + + // minus one since src.next() already consumed one tuple. + skipCount -= 1; + +// System.err.println("limit=" + limit + ", rangeCount=" +// + rangeCount + ", skipCount=" + skipCount); + + } + + nread++; + + if (skipCount > 0) { + + /* + * If the skip count is positive, then skip over N tuples. + */ + + final int nextIndex = Math.min(ndx.getEntryCount() - 1, + currentIndex + skipCount); + + src.seek(ndx.keyAt(nextIndex)); + + } + + } + + } // class SampleAdvancer + + /** + * A sample from an access path. + * + * @param <E> + * The generic type of the elements visited by that access + * path. + * + * @author tho...@us... + */ + public static class AccessPathSample<E> implements Serializable { + + private static final long serialVersionUID = 1L; + + private final IPredicate<E> pred; + private final IKeyOrder<E> keyOrder; + private final int limit; + private final E[] sample; + + /** + * Constructor populates the sample using the caller's + * {@link IAccessPath#iterator()}. The caller is responsible for setting + * up the {@link IAccessPath} such that it provides an efficient sample + * of the access path with the appropriate constraints. + * + * @param limit + * @param accessPath + */ + private AccessPathSample(final int limit, + final IAccessPath<E> accessPath) { + + if (limit <= 0) + throw new IllegalArgumentException(); + + if (accessPath == null) + throw new IllegalArgumentException(); + + this.pred = accessPath.getPredicate(); + + this.keyOrder = accessPath.getKeyOrder(); + + this.limit = limit; + + // drain the access path iterator. + final ArrayList<E> tmp = new ArrayList<E>(limit); + + int nsamples = 0; + + final Iterator<E> src = accessPath.iterator(0L/* offset */, limit, + limit/* capacity */); + + while (src.hasNext() && nsamples < limit) { + + tmp.add(src.next()); + + nsamples++; + + } + + // convert to an array of the appropriate type. + sample = tmp.toArray((E[]) java.lang.reflect.Array.newInstance( + tmp.get(0).getClass(), tmp.size())); + + } + + public IPredicate<E> getPredicate() { + return pred; + } + + public boolean isEmpty() { + return sample != null; + } + + public int sampleSize() { + return sample == null ? 0 : sample.length; + } + + public int limit() { + return limit; + } + + /** + * The sample. + * + * @return The sample -or- <code>null</code> if the sample was + * empty. + */ + public E[] getSample() { + return sample; + } + + } // AccessPathSample + +} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalBTree.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalBTree.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalBTree.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,95 +0,0 @@ -package com.bigdata.bop.ap; - -import java.util.concurrent.Callable; -import java.util.concurrent.FutureTask; - -import com.bigdata.bop.BOpContext; -import com.bigdata.bop.IPredicate; -import com.bigdata.btree.AbstractBTree; -import com.bigdata.relation.accesspath.IBlockingBuffer; - -/** - * Sampling operator for an {@link AbstractBTree}. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ -public class SampleLocalBTree<E> extends AbstractSampleIndex<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public SampleLocalBTree(final IPredicate<E> pred, final int limit) { - - super(pred, limit); - - } - - public FutureTask<Void> eval(final BOpContext<E> context) { - - if (context.getPartitionId() != -1) { - // Must not be specific to a shard. - throw new UnsupportedOperationException(); - } - - return new FutureTask<Void>(new LocalBTreeSampleTask(context)); - - } - - /** - * Sample an {@link AbstractBTree}. - */ - private class LocalBTreeSampleTask implements - Callable<Void> { - - private final BOpContext<E> context; - - private final IBlockingBuffer<E[]> sink; - - LocalBTreeSampleTask(final BOpContext<E> context) { - - this.context = context; - - this.sink = context.getSink(); - - } - - public Void call() throws Exception { - - /* - * FIXME Decide how we are going to resolve the appropriate index - * for the predicate. This could go through - * IJoinNexus.getTailRelationView() and - * IJoinNexus.getTailAccessPath(). Those are just going through the - * locator. Review how the actual access path is selected versus the - * IKeyOrder specified on the IPredicate. If the IKeyOrder of - * interest is on the IPredicate, then why not just use that? - */ - -// final IPredicate<E> pred = pred(); -// -// final String relationName = pred.getOnlyRelationName(); -// -// final IRelation<E> rel = (IRelation<E>) joinNexus.getIndexManager() -// .getResourceLocator().locate(relationName, -// joinNexus.getReadTimestamp()); -// -// final IAccessPath<E> accessPath = rel.getAccessPath(pred); - - /* - * FIXME Sample N randomly chosen indices or evenly selected? - * - * Note: If there are only 100 leaves and we sample evenly, that - * could result in reading all the leaves. However, when the - * B+Tree is large we will only touch a few leaves even with - * uniform sampling. - */ - throw new UnsupportedOperationException(); - - } - - } // class LocalBTreeSampleTask - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalShard.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalShard.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalShard.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,87 +0,0 @@ -package com.bigdata.bop.ap; - -import java.util.concurrent.Callable; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; - -import com.bigdata.bop.BOpContext; -import com.bigdata.bop.IPredicate; -import com.bigdata.btree.AbstractBTree; -import com.bigdata.relation.IRelation; -import com.bigdata.relation.accesspath.IAccessPath; -import com.bigdata.relation.accesspath.IBlockingBuffer; - -/** - * Sampling operator for a shard view. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ -public class SampleLocalShard<E> extends AbstractSampleIndex<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public SampleLocalShard(final IPredicate<E> pred, final int limit) { - - super(pred,limit); - - } - - /* - * Note: This is done at evaluation time, local to the data. - */ - public FutureTask<Void> eval(final BOpContext<E> context) { - - if (context.getPartitionId() == -1) { - // Must be specific to a shard. - throw new UnsupportedOperationException(); - } - - return new FutureTask<Void>(new LocalShardSampleTask(context)); - - } - - /** - * Sample an {@link AbstractBTree}. - */ - private class LocalShardSampleTask implements Callable<Void> { - - private final BOpContext<E> context; - private final IBlockingBuffer<E[]> sink; - - LocalShardSampleTask(final BOpContext<E> context) { - - this.context = context; - - this.sink = context.getSink(); - - } - - public Void call() throws Exception { - - final IPredicate<E> pred = pred(); - - final IRelation<E> view = context.getRelation(pred); - - final IAccessPath<E> accessPath = view.getAccessPath(pred); - - /* - * FIXME Sample N tuples based on a uniform offset distribution, - * discarding duplicates or tuples which are deleted in their - * most recent revision. - * - * Note: If there are only 100 leaves and we sample evenly, that - * could result in reading all the leaves. However, when the - * B+Tree is large we will only touch a few leaves even with - * uniform sampling. - */ - throw new UnsupportedOperationException(); - - } - - } // class LocalShardSampleTask - -} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1583,11 +1583,15 @@ if (partitionCount == 0) { - /* - * SWAG in case zero partition count is reported (I am not sure that - * this code path is possible). - */ - return new ScanCostReport(0L/* rangeCount */, partitionCount, 100/* millis */); +// /* +// * SWAG in case zero partition count is reported (I am not sure that +// * this code path is possible). +// */ +// return new ScanCostReport(0L/* rangeCount */, partitionCount, 100/* millis */); + /* + * Should never be "zero" partition count. + */ + throw new AssertionError(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestAll.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestAll.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -24,8 +24,6 @@ package com.bigdata.bop.ap; -import com.bigdata.bop.ap.filter.TestDistinctFilter; - import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; @@ -72,12 +70,9 @@ /* * Sampling an access path. */ - - // test sampling from an AbstractBTree. - suite.addTestSuite(TestSampleLocalBTree.class); - // test sampling from an FusedView. - suite.addTestSuite(TestSampleLocalBTree.class); + // test sampling form an index. + suite.addTestSuite(TestSampleIndex.class); return suite; Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java (from rev 3756, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -0,0 +1,234 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 19, 2010 + */ + +package com.bigdata.bop.ap; + +import java.text.NumberFormat; +import java.util.Arrays; +import java.util.Properties; +import java.util.Random; + +import junit.framework.TestCase2; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContextBase; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.Var; +import com.bigdata.journal.BufferMode; +import com.bigdata.journal.ITx; +import com.bigdata.journal.Journal; +import com.bigdata.striterator.ChunkedArrayIterator; + +/** + * Test suite for {@link SampleIndex}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestSampleLocalBTree.java 3665 2010-09-28 16:53:22Z thompsonbry + * $ + * + * FIXME Just like {@link TestPredicateAccessPath}, this test suite + * needs to cover all of the combinations of global views of + * partitioned and unpartitioned indices. + */ +public class TestSampleIndex extends TestCase2 { + + /** + * + */ + public TestSampleIndex() { + } + + /** + * @param name + */ + public TestSampleIndex(String name) { + super(name); + } + + @Override + public Properties getProperties() { + + final Properties p = new Properties(super.getProperties()); + + p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient + .toString()); + + return p; + + } + + static private final String namespace = "ns"; + + Journal jnl; + + R rel; + + public void setUp() throws Exception { + + jnl = new Journal(getProperties()); + + } + + /** + * Create and populate relation in the {@link #namespace}. + * + * @return The #of distinct entries. + */ + private int loadData(final int scale) { + + final String[] names = new String[] { "John", "Mary", "Saul", "Paul", + "Leon", "Jane", "Mike", "Mark", "Jill", "Jake", "Alex", "Lucy" }; + + final Random rnd = new Random(); + + // #of distinct instances of each name. + final int populationSize = Math.max(10, (int) Math.ceil(scale / 10.)); + + // #of trailing zeros for each name. + final int nzeros = 1 + (int) Math.ceil(Math.log10(populationSize)); + +// System.out.println("scale=" + scale + ", populationSize=" +// + populationSize + ", nzeros=" + nzeros); + + final NumberFormat fmt = NumberFormat.getIntegerInstance(); + fmt.setMinimumIntegerDigits(nzeros); + fmt.setMaximumIntegerDigits(nzeros); + fmt.setGroupingUsed(false); + + // create the relation. + final R rel = new R(jnl, namespace, ITx.UNISOLATED, new Properties()); + rel.create(); + + // data to insert. + final E[] a = new E[scale]; + + for (int i = 0; i < scale; i++) { + + final String n1 = names[rnd.nextInt(names.length)] + + fmt.format(rnd.nextInt(populationSize)); + + final String n2 = names[rnd.nextInt(names.length)] + + fmt.format(rnd.nextInt(populationSize)); + +// System.err.println("i=" + i + ", n1=" + n1 + ", n2=" + n2); + + a[i] = new E(n1, n2); + + } + + // sort before insert for efficiency. + Arrays.sort(a,R.primaryKeyOrder.getComparator()); + + // insert data (the records are not pre-sorted). + final long ninserts = rel.insert(new ChunkedArrayIterator<E>(a.length, a, null/* keyOrder */)); + + // Do commit since not scale-out. + jnl.commit(); + + // should exist as of the last commit point. + this.rel = (R) jnl.getResourceLocator().locate(namespace, + ITx.READ_COMMITTED); + + assertNotNull(rel); + + return (int) ninserts; + + } + + public void tearDown() throws Exception { + + if (jnl != null) { + jnl.destroy(); + jnl = null; + } + + // clear reference. + rel = null; + + } + + /** + * Unit test verifies some aspects of a sample taken from a local index + * (primarily that the sample respects the limit). + */ + public void test_something() { + + final int scale = 10000; + + final int nrecords = loadData(scale); + + final IVariable<?> x = Var.var("x"); + + final IVariable<?> y = Var.var("y"); + + final IPredicate<E> predicate = new Predicate<E>(new BOp[] { x, y }, + new NV(IPredicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(IPredicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED)// + ); + + final BOpContextBase context = new BOpContextBase(null/* fed */, jnl/* indexManager */); + + final int[] limits = new int[] { // + 1, 9, 19, 100, 217, 900,// + nrecords, + nrecords + 1 + }; + + for (int limit : limits) { + + final SampleIndex<E> sampleOp = new SampleIndex<E>( + new BOp[0], + NV + .asMap( + // + new NV(SampleIndex.Annotations.PREDICATE, + predicate),// + new NV(SampleIndex.Annotations.LIMIT, limit)// + )); + + final E[] a = sampleOp.eval(context); + +// System.err.println("limit=" + limit + ", nrecords=" + nrecords +// + ", nsamples=" + a.length); +// +// for (int i = 0; i < a.length && i < 10; i++) { +// System.err.println("a[" + i + "]=" + a[i]); +// } + + final int nexpected = Math.min(nrecords, limit); + + assertEquals("#samples (limit=" + limit + ", nrecords=" + nrecords + + ", nexpected=" + nexpected + ")", nexpected, a.length); + + } + + } + +} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,59 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 19, 2010 - */ - -package com.bigdata.bop.ap; - -import com.bigdata.bop.ap.SampleLocalBTree; - -import junit.framework.TestCase2; - -/** - * Test suite for {@link SampleLocalBTree}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class TestSampleLocalBTree extends TestCase2 { - - /** - * - */ - public TestSampleLocalBTree() { - } - - /** - * @param name - */ - public TestSampleLocalBTree(String name) { - super(name); - } - - public void test_something() { - fail("write tests"); - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalShard.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalShard.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalShard.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,59 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 19, 2010 - */ - -package com.bigdata.bop.ap; - -import com.bigdata.bop.ap.SampleLocalShard; - -import junit.framework.TestCase2; - -/** - * Test suite for {@link SampleLocalShard}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class TestSampleLocalShard extends TestCase2 { - - /** - * - */ - public TestSampleLocalShard() { - } - - /** - * @param name - */ - public TestSampleLocalShard(String name) { - super(name); - } - - public void test_something() { - fail("write tests"); - } - -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |