This list is closed, nobody may subscribe to it.
2010 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(139) |
Aug
(94) |
Sep
(232) |
Oct
(143) |
Nov
(138) |
Dec
(55) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2011 |
Jan
(127) |
Feb
(90) |
Mar
(101) |
Apr
(74) |
May
(148) |
Jun
(241) |
Jul
(169) |
Aug
(121) |
Sep
(157) |
Oct
(199) |
Nov
(281) |
Dec
(75) |
2012 |
Jan
(107) |
Feb
(122) |
Mar
(184) |
Apr
(73) |
May
(14) |
Jun
(49) |
Jul
(26) |
Aug
(103) |
Sep
(133) |
Oct
(61) |
Nov
(51) |
Dec
(55) |
2013 |
Jan
(59) |
Feb
(72) |
Mar
(99) |
Apr
(62) |
May
(92) |
Jun
(19) |
Jul
(31) |
Aug
(138) |
Sep
(47) |
Oct
(83) |
Nov
(95) |
Dec
(111) |
2014 |
Jan
(125) |
Feb
(60) |
Mar
(119) |
Apr
(136) |
May
(270) |
Jun
(83) |
Jul
(88) |
Aug
(30) |
Sep
(47) |
Oct
(27) |
Nov
(23) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(3) |
Oct
|
Nov
|
Dec
|
2016 |
Jan
|
Feb
|
Mar
(4) |
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <tho...@us...> - 2010-10-28 18:39:56
|
Revision: 3849 http://bigdata.svn.sourceforge.net/bigdata/?rev=3849&view=rev Author: thompsonbry Date: 2010-10-28 18:39:50 +0000 (Thu, 28 Oct 2010) Log Message: ----------- Fixed Divided by Zero error in showAllocators Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-10-27 16:25:14 UTC (rev 3848) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-10-28 18:39:50 UTC (rev 3849) @@ -2139,7 +2139,7 @@ str.append("Allocation: " + stats[i].m_blockSize); str.append(", slots: " + stats[i].m_filledSlots + "/" + stats[i].m_reservedSlots); str.append(", storage: " + filled + "/" + reserved); - str.append(", usage: " + (filled * 100 / reserved) + "%"); + str.append(", usage: " + (reserved==0?0:(filled * 100 / reserved)) + "%"); str.append("\n"); } str.append("Total - file: " + convertAddr(m_fileSize) + ", slots: " + tfilledSlots + "/" + treservedSlots + ", storage: " + tfilled + "/" + treserved + "\n"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dm...@us...> - 2010-10-27 16:25:20
|
Revision: 3848 http://bigdata.svn.sourceforge.net/bigdata/?rev=3848&view=rev Author: dmacgbr Date: 2010-10-27 16:25:14 +0000 (Wed, 27 Oct 2010) Log Message: ----------- Modified to make compatible with changes made to AbstractTransactionService in revision 3804 Revision Links: -------------- http://bigdata.svn.sourceforge.net/bigdata/?rev=3804&view=rev Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/journal/TestTransactionService.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/journal/TestTransactionService.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/journal/TestTransactionService.java 2010-10-27 10:27:22 UTC (rev 3847) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/journal/TestTransactionService.java 2010-10-27 16:25:14 UTC (rev 3848) @@ -40,7 +40,6 @@ import com.bigdata.service.AbstractTransactionService; import com.bigdata.service.CommitTimeIndex; import com.bigdata.service.TxServiceRunState; -import com.bigdata.util.MillisecondTimestampFactory; /** * Unit tests of the {@link AbstractTransactionService} using a mock client. @@ -271,11 +270,8 @@ @Override public long nextTimestamp() { - // skip at least one millisecond. - MillisecondTimestampFactory.nextMillis(); - - return MillisecondTimestampFactory.nextMillis(); - + super.nextTimestamp () ; + return super.nextTimestamp () ; } } @@ -909,7 +905,12 @@ */ try { - service.newTx(10); + /** + * FIXME Modified to be compatible with changes made to AbstractTransactionService, revision 3804. + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/187">Trac 187</a> + */ +// service.newTx(10); + service.newTx(service.nextTimestamp () + 10); fail("Expecting: "+IllegalStateException.class); } catch(IllegalStateException ex) { log.info("Ignoring expected exception: "+ex); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dm...@us...> - 2010-10-27 10:27:28
|
Revision: 3847 http://bigdata.svn.sourceforge.net/bigdata/?rev=3847&view=rev Author: dmacgbr Date: 2010-10-27 10:27:22 +0000 (Wed, 27 Oct 2010) Log Message: ----------- Remove trap for zero partition count within 'estimateCost(...)' Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java 2010-10-26 16:31:54 UTC (rev 3846) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java 2010-10-27 10:27:22 UTC (rev 3847) @@ -71,7 +71,6 @@ import com.bigdata.mdi.IMetadataIndex; import com.bigdata.mdi.LocalPartitionMetadata; import com.bigdata.rawstore.Bytes; -import com.bigdata.relation.AbstractRelation; import com.bigdata.relation.AbstractResource; import com.bigdata.relation.IRelation; import com.bigdata.service.AbstractClient; @@ -1583,15 +1582,15 @@ if (partitionCount == 0) { -// /* -// * SWAG in case zero partition count is reported (I am not sure that -// * this code path is possible). -// */ -// return new ScanCostReport(0L/* rangeCount */, partitionCount, 100/* millis */); + /* + * SWAG in case zero partition count is reported (I am not sure that + * this code path is possible). + */ + return new ScanCostReport(0L/* rangeCount */, partitionCount, 100/* millis */); /* * Should never be "zero" partition count. */ - throw new AssertionError(); +// throw new AssertionError(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dm...@us...> - 2010-10-26 16:32:00
|
Revision: 3846 http://bigdata.svn.sourceforge.net/bigdata/?rev=3846&view=rev Author: dmacgbr Date: 2010-10-26 16:31:54 +0000 (Tue, 26 Oct 2010) Log Message: ----------- Cause the ctc-striterator unit test suite to run by a) fixing typo in package name and b) compiling the test class. NB running this test suite no longer results in an error but there is a failure because of an empty place-holder test class. The remaining tests defined in the suite pass. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/build.xml Modified: branches/QUADS_QUERY_BRANCH/build.xml =================================================================== --- branches/QUADS_QUERY_BRANCH/build.xml 2010-10-25 17:45:26 UTC (rev 3845) +++ branches/QUADS_QUERY_BRANCH/build.xml 2010-10-26 16:31:54 UTC (rev 3846) @@ -1501,6 +1501,7 @@ <src path="${bigdata.dir}/bigdata-jini/src/test" /> <src path="${bigdata.dir}/bigdata-rdf/src/test" /> <src path="${bigdata.dir}/bigdata-sails/src/test" /> + <src path="${bigdata.dir}/ctc-striterators/src/test" /> <!-- <src path="${bigdata.dir}/bigdata-gom/src/test" /> --> @@ -1846,7 +1847,7 @@ <!-- Test suites to run when -DtestName is not set --> - <test name="cutthecrap.utils.striterator.TestAll" todir="${test.results.dir}" unless="testName" /> + <test name="cutthecrap.utils.striterators.TestAll" todir="${test.results.dir}" unless="testName" /> <test name="com.bigdata.cache.TestAll" todir="${test.results.dir}" unless="testName" /> <test name="com.bigdata.io.TestAll" todir="${test.results.dir}" unless="testName" /> <test name="com.bigdata.net.TestAll" todir="${test.results.dir}" unless="testName" /> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-25 17:45:32
|
Revision: 3845 http://bigdata.svn.sourceforge.net/bigdata/?rev=3845&view=rev Author: thompsonbry Date: 2010-10-25 17:45:26 +0000 (Mon, 25 Oct 2010) Log Message: ----------- took out problematic @Override on some methods Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java 2010-10-25 15:43:17 UTC (rev 3844) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java 2010-10-25 17:45:26 UTC (rev 3845) @@ -91,53 +91,44 @@ throw new UnsupportedOperationException(); } - @Override public Map<Integer, BOp> getBOpIndex() { return null; } - @Override public Map<Integer, BOpStats> getStats() { return null; } - @Override public long getDeadline() { // TODO Auto-generated method stub return 0; } - @Override public long getDoneTime() { // TODO Auto-generated method stub return 0; } - @Override public long getElapsed() { // TODO Auto-generated method stub return 0; } - @Override public long getStartTime() { // TODO Auto-generated method stub return 0; } - @Override public Throwable getCause() { // TODO Auto-generated method stub return null; } - @Override public BOp getQuery() { // TODO Auto-generated method stub return null; } - @Override public UUID getQueryId() { // TODO Auto-generated method stub return null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-10-25 15:43:24
|
Revision: 3844 http://bigdata.svn.sourceforge.net/bigdata/?rev=3844&view=rev Author: mrpersonick Date: 2010-10-25 15:43:17 +0000 (Mon, 25 Oct 2010) Log Message: ----------- testing single tail rules Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestMultiGraphs.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSparqlTest.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/org/openrdf/query/parser/sparql/SPARQLQueryTest.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSesameMultiGraphs.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2010-10-25 15:40:55 UTC (rev 3843) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2010-10-25 15:43:17 UTC (rev 3844) @@ -466,7 +466,7 @@ return new EmptyIteration<BindingSet, QueryEvaluationException>(); } - return execute(query, bindings); + return execute(query); } catch (UnknownOperatorException ex) { @@ -577,7 +577,7 @@ return new EmptyIteration<BindingSet, QueryEvaluationException>(); } - return execute(query, bindings); + return execute(query); } catch (UnknownOperatorException ex) { @@ -683,7 +683,7 @@ return new EmptyIteration<BindingSet, QueryEvaluationException>(); } - return execute(query, bindings); + return execute(query); } catch (UnknownOperatorException ex) { @@ -1668,16 +1668,8 @@ * * @throws QueryEvaluationException */ -// protected CloseableIteration<BindingSet, QueryEvaluationException> execute( -// final IStep step) -// throws Exception { -// -// return execute(step, null); -// -// } - protected CloseableIteration<BindingSet, QueryEvaluationException> execute( - final IStep step, final BindingSet constants) + final IStep step) throws Exception { final QueryEngine queryEngine = tripleSource.getSail().getQueryEngine(); @@ -1721,7 +1713,7 @@ CloseableIteration<BindingSet, QueryEvaluationException> result = new Bigdata2Sesame2BindingSetIterator<QueryEvaluationException>( new BigdataBindingSetResolverator(database, it2).start(database - .getExecutorService()), constants); + .getExecutorService())); try { // Wait for the Future (checks for errors). @@ -1842,6 +1834,10 @@ final StatementPattern sp, final BindingSet bindings) throws QueryEvaluationException { + if (sp.getParentNode() instanceof Projection) { + return evaluateSingleTailRule(sp, bindings); + } + if (log.isDebugEnabled()) { log.debug("evaluating statement pattern:\n" + sp); } @@ -1873,39 +1869,38 @@ } -// /** -// * Override evaluation of StatementPatterns to recognize magic search -// * predicate. -// */ -// @Override -// public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( -// final StatementPattern sp, final BindingSet bindings) -// throws QueryEvaluationException { -// -// // no check against the nativeJoins property here because we are simply -// // using the native execution model to take care of magic searches. -// -// if (log.isDebugEnabled()) { -// log.debug("evaluating statement pattern:\n" + sp); -// } -// -// final IStep query = createNativeQuery(sp); -// -// if (query == null) { -// return new EmptyIteration<BindingSet, QueryEvaluationException>(); -// } -// -// try { -// -// return execute(query, bindings); -// -// } catch (Exception ex) { -// -// throw new QueryEvaluationException(ex); -// -// } -// -// } + /** + * Override evaluation of StatementPatterns to recognize magic search + * predicate. + */ + public CloseableIteration<BindingSet, QueryEvaluationException> evaluateSingleTailRule( + final StatementPattern sp, final BindingSet bindings) + throws QueryEvaluationException { + + // no check against the nativeJoins property here because we are simply + // using the native execution model to take care of magic searches. + + if (log.isDebugEnabled()) { + log.debug("evaluating statement pattern:\n" + sp); + } + + final IStep query = createNativeQuery(sp); + + if (query == null) { + return new EmptyIteration<BindingSet, QueryEvaluationException>(); + } + + try { + + return execute(query); + + } catch (Exception ex) { + + throw new QueryEvaluationException(ex); + + } + + } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestMultiGraphs.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestMultiGraphs.java 2010-10-25 15:40:55 UTC (rev 3843) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestMultiGraphs.java 2010-10-25 15:43:17 UTC (rev 3844) @@ -163,7 +163,7 @@ "PREFIX rdfs: <"+RDFS.NAMESPACE+"> " + "PREFIX ns: <"+ns+"> " + - "select ?p ?o " + + "select distinct ?p ?o " + "WHERE { " + // " ?s rdf:type ns:Person . " + " ns:Mike ?p ?o . " + Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSesameMultiGraphs.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSesameMultiGraphs.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSesameMultiGraphs.java 2010-10-25 15:43:17 UTC (rev 3844) @@ -0,0 +1,163 @@ +/** +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 16, 2009 + */ + +package com.bigdata.rdf.sail; + +import org.openrdf.model.URI; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.vocabulary.RDF; +import org.openrdf.model.vocabulary.RDFS; +import org.openrdf.query.QueryLanguage; +import org.openrdf.query.TupleQuery; +import org.openrdf.query.TupleQueryResult; +import org.openrdf.repository.sail.SailRepository; +import org.openrdf.repository.sail.SailRepositoryConnection; +import org.openrdf.sail.Sail; +import org.openrdf.sail.memory.MemoryStore; + +/** + * @author <a href="mailto:mrp...@us...">Mike Personick</a> + * @version $Id$ + */ +public class TestSesameMultiGraphs { + + public static void main(String[] args) throws Exception { + + final Sail sail; + final SailRepository repo; + final SailRepositoryConnection cxn; + + sail = new MemoryStore(); + repo = new SailRepository(sail); + + repo.initialize(); + cxn = repo.getConnection(); + cxn.setAutoCommit(false); + + try { + + final ValueFactory vf = sail.getValueFactory(); + + final String ns = "http://namespace/"; + + URI a = vf.createURI(ns+"a"); + URI b = vf.createURI(ns+"b"); + URI c = vf.createURI(ns+"c"); + URI g1 = vf.createURI(ns+"graph1"); + URI g2 = vf.createURI(ns+"graph2"); +/**/ + cxn.setNamespace("ns", ns); + + cxn.add(a, b, c, g1, g2); + + /* + * Note: The either flush() or commit() is required to flush the + * statement buffers to the database before executing any operations + * that go around the sail. + */ + cxn.commit();// + + { + + String query = + "PREFIX rdf: <"+RDF.NAMESPACE+"> " + + "PREFIX rdfs: <"+RDFS.NAMESPACE+"> " + + "PREFIX ns: <"+ns+"> " + + + "select ?p ?o " + + "WHERE { " + + " ns:a ?p ?o . " + + "}"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + TupleQueryResult result = tupleQuery.evaluate(); + + System.err.println("no dataset specified, RDF-MERGE, should produce one solution:"); + while (result.hasNext()) { + System.err.println(result.next()); + } + + } + + { + + String query = + "PREFIX rdf: <"+RDF.NAMESPACE+"> " + + "PREFIX rdfs: <"+RDFS.NAMESPACE+"> " + + "PREFIX ns: <"+ns+"> " + + + "select ?p ?o " + + "from <"+g1+">" + + "from <"+g2+">" + + "WHERE { " + + " ns:a ?p ?o . " + + "}"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + TupleQueryResult result = tupleQuery.evaluate(); + + System.err.println("default graph query, RDF-MERGE, should produce one solution:"); + while (result.hasNext()) { + System.err.println(result.next()); + } + + } + + { + + String query = + "PREFIX rdf: <"+RDF.NAMESPACE+"> " + + "PREFIX rdfs: <"+RDFS.NAMESPACE+"> " + + "PREFIX ns: <"+ns+"> " + + + "select ?p ?o " + + "from named <"+g1+">" + + "from named <"+g2+">" + + "WHERE { " + + " graph ?g { ns:a ?p ?o . } " + + "}"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + TupleQueryResult result = tupleQuery.evaluate(); + + System.err.println("named graph query, no RDF-MERGE, should produce two solutions:"); + while (result.hasNext()) { + System.err.println(result.next()); + } + + } + + } finally { + cxn.close(); + sail.shutDown(); + } + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSparqlTest.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSparqlTest.java 2010-10-25 15:40:55 UTC (rev 3843) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSparqlTest.java 2010-10-25 15:43:17 UTC (rev 3844) @@ -28,6 +28,7 @@ package com.bigdata.rdf.sail.tck; import info.aduna.io.IOUtil; +import info.aduna.iteration.Iterations; import java.io.InputStream; import java.io.InputStreamReader; @@ -36,15 +37,27 @@ import java.util.Collection; import java.util.Enumeration; import java.util.Properties; +import java.util.Set; import junit.framework.Test; import junit.framework.TestSuite; +import org.apache.log4j.Logger; +import org.openrdf.model.Statement; +import org.openrdf.query.BooleanQuery; import org.openrdf.query.Dataset; +import org.openrdf.query.GraphQuery; +import org.openrdf.query.GraphQueryResult; +import org.openrdf.query.Query; +import org.openrdf.query.QueryLanguage; +import org.openrdf.query.TupleQuery; +import org.openrdf.query.TupleQueryResult; import org.openrdf.query.parser.sparql.ManifestTest; import org.openrdf.query.parser.sparql.SPARQLQueryTest; import org.openrdf.repository.Repository; +import org.openrdf.repository.RepositoryConnection; import org.openrdf.repository.RepositoryException; +import org.openrdf.repository.RepositoryResult; import org.openrdf.repository.dataset.DatasetRepository; import org.openrdf.repository.sail.SailRepository; import org.openrdf.sail.memory.MemoryStore; @@ -65,6 +78,9 @@ */ public class BigdataSparqlTest extends SPARQLQueryTest { + static protected final Logger log = Logger.getLogger(BigdataSparqlTest.class); + + /** * We cannot use inlining for these test because we do normalization on * numeric values and these tests test for syntatic differences, i.e. @@ -168,22 +184,22 @@ * run. */ static final Collection<String> testURIs = Arrays.asList(new String[] { -/* + // busted with EvalStrategy1 - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#nested-opt-2", - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#filter-scope-1", - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#join-scope-1", - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest#dawg-optional-complex-4", +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#nested-opt-2", +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#filter-scope-1", +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#join-scope-1", +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest#dawg-optional-complex-4", // busted with EvalStrategy2 with LeftJoin enabled - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-eq-12", - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#nested-opt-1", - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#opt-filter-1", - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#opt-filter-2", - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest#dawg-optional-complex-3", - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-001", - "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-004", -*/ +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#open-eq-12", +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#nested-opt-1", +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#opt-filter-1", +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest#opt-filter-2", +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest#dawg-optional-complex-3", +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-001", +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-004", + // Dataset crap // "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest#normalization-1", @@ -207,6 +223,7 @@ // "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/basic/manifest#prefix-name-1",//OK // "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/basic/manifest#spoo-1",//BOOM +// "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/graph/manifest#dawg-graph-05", }); /** @@ -424,11 +441,6 @@ super.setUp(); } - @Override - public void runTest() throws Exception { - super.runTest(); - } - public Repository getRepository() { return dataRep; } @@ -448,5 +460,43 @@ } +// @Override +// protected void runTest() +// throws Exception +// { +// RepositoryConnection con = getQueryConnection(dataRep); +// try { +// +// log.info("database dump:"); +// RepositoryResult<Statement> stmts = con.getStatements(null, null, null, false); +// while (stmts.hasNext()) { +// log.info(stmts.next()); +// } +// log.info("dataset:\n" + dataset); +// +// String queryString = readQueryString(); +// log.info("query:\n" + getQueryString()); +// +// Query query = con.prepareQuery(QueryLanguage.SPARQL, queryString, queryFileURL); +// if (dataset != null) { +// query.setDataset(dataset); +// } +// +// if (query instanceof TupleQuery) { +// TupleQueryResult queryResult = ((TupleQuery)query).evaluate(); +// while (queryResult.hasNext()) { +// log.info("query result:\n" + queryResult.next()); +// } +// } +// +// } +// finally { +// con.close(); +// } +// +// super.runTest(); +// } + + } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/org/openrdf/query/parser/sparql/SPARQLQueryTest.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/org/openrdf/query/parser/sparql/SPARQLQueryTest.java 2010-10-25 15:40:55 UTC (rev 3843) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/org/openrdf/query/parser/sparql/SPARQLQueryTest.java 2010-10-25 15:43:17 UTC (rev 3844) @@ -435,7 +435,7 @@ } } - private String readQueryString() + protected String readQueryString() throws IOException { InputStream stream = new URL(queryFileURL).openStream(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-25 15:41:03
|
Revision: 3843 http://bigdata.svn.sourceforge.net/bigdata/?rev=3843&view=rev Author: thompsonbry Date: 2010-10-25 15:40:55 +0000 (Mon, 25 Oct 2010) Log Message: ----------- accessPathDups were not being detected due to a change in the quads query branch such that hashCode() and equals() for predicates can not be used to test for duplicate patterns of variables and constants. This was fixed by adding the HashedPredicate class. That change cuts significant time from Q2 and Q9. This change did break 2 of the unit tests in TestQueryEngine and I have not yet diagnosed the problem there. However, the SAIL test suites are all good with this change. This change also reintroduces query-level logging with breakouts for each join in the pipeline evaluation. This logging level is controlled by QueryLog and corresponds closely to the older RuleStats logging. At this point, lexicon materialization (Q6,Q14) appears to be slightly slower in the branch, Q2 is faster, and Q9 is slightly slower. The remaining performance difference could be: - lexicon materialization changes. - chaining buffers in the trunk but not in the branch. - buffer configuration properties (explore this again for Q2 and Q9 now that dups are being eliminated). [java] ### Finished testing BIGDATA_SPARQL_ENDPOINT ### [java] BIGDATA_SPARQL_ENDPOINT #trials=10 #parallel=1 [java] query Time Result# [java] query1 56 4 [java] query3 37 6 [java] query4 68 34 [java] query5 108 719 [java] query7 34 61 [java] query8 379 6463 [java] query10 25 0 [java] query11 24 0 [java] query12 26 0 [java] query13 25 0 [java] query14 4046 393730 [java] query6 4056 430114 [java] query2 983 130 [java] query9 5280 8627 [java] Total 15147 procs -----------memory---------- ---swap-- -----io---- --system-- -----cpu------ r b swpd free buff cache si so bi bo in cs us sy id wa st 0 0 0 9886784 337552 4922340 0 0 7 5 42 38 2 0 98 0 0 3 0 0 7829228 337628 4922436 0 0 0 19 1117 186494 54 6 41 0 0 7 0 0 7718196 337700 4922428 0 0 0 9 1109 233047 73 6 20 0 0 0 0 0 8014492 337764 4922372 0 0 0 9 1114 8260 63 3 34 0 0 Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederationChunkHandler.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -283,17 +283,17 @@ boolean DEFAULT_CONTROLLER = false; - /** - * For hash partitioned operators, this is the set of the member nodes - * for the operator. - * <p> - * This annotation is required for such operators since the set of known - * nodes of a given type (such as all data services) can otherwise - * change at runtime. - * - * @todo Move onto an interface parallel to {@link IShardwisePipelineOp} - */ - String MEMBER_SERVICES = "memberServices"; +// /** +// * For hash partitioned operators, this is the set of the member nodes +// * for the operator. +// * <p> +// * This annotation is required for such operators since the set of known +// * nodes of a given type (such as all data services) can otherwise +// * change at runtime. +// * +// * @todo Move onto an interface parallel to {@link IShardwisePipelineOp} +// */ +// String MEMBER_SERVICES = "memberServices"; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -549,6 +549,49 @@ } + /** + * Return a list containing the evaluation order for the pipeline. Only the + * child operands are visited. Operators in subqueries are not visited since + * they will be assigned {@link BOpStats} objects when they are run as a + * subquery. The evaluation order is given by the depth-first left-deep + * traversal of the query. + * + * @todo unit tests. + */ + public static Integer[] getEvaluationOrder(final BOp op) { + + final List<Integer> order = new LinkedList<Integer>(); + + getEvaluationOrder(op, order, 0/*depth*/); + + return order.toArray(new Integer[order.size()]); + + } + + private static void getEvaluationOrder(final BOp op, final List<Integer> order, final int depth) { + + if(!(op instanceof PipelineOp)) + return; + + final int bopId = op.getId(); + + if (depth == 0 + || !op.getProperty(BOp.Annotations.CONTROLLER, + BOp.Annotations.DEFAULT_CONTROLLER)) { + + if (op.arity() > 0) { + + // left-deep recursion + getEvaluationOrder(op.get(0), order, depth + 1); + + } + + } + + order.add(bopId); + + } + /** * Combine chunks drawn from an iterator into a single chunk. This is useful * when materializing intermediate results for an all-at-once operator. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -543,70 +543,91 @@ } - /* - * Intentionally removed. See BOpBase. - * - * hashCode() and equals() for Predicate were once used to cache access - * paths, but that code was history long before we developed the bop model. - */ - -// public boolean equals(final Object other) { -// -// if (this == other) -// return true; -// -// if(!(other instanceof IPredicate<?>)) -// return false; -// -// final IPredicate<?> o = (IPredicate<?>)other; -// -// final int arity = arity(); -// -// if(arity != o.arity()) return false; -// -// for (int i = 0; i < arity; i++) { -// -// final IVariableOrConstant<?> x = get(i); -// -// final IVariableOrConstant<?> y = o.get(i); -// -// if (x != y && !(x.equals(y))) { -// -// return false; -// -// } -// -// } -// -// return true; -// -// } -// -// public int hashCode() { -// -// int h = hash; -// -// if (h == 0) { -// -// final int n = arity(); -// -// for (int i = 0; i < n; i++) { -// -// h = 31 * h + get(i).hashCode(); -// -// } -// -// hash = h; -// -// } -// -// return h; -// -// } -// -// /** -// * Caches the hash code. -// */ -// private int hash = 0; + /** + * This class may be used to insert instances of {@link IPredicate}s into a + * hash map where equals is decided based solely on the pattern of variables + * and constants found on the {@link IPredicate}. This may be used to create + * access path caches or to identify and eliminate duplicate requests for + * the same access path. + */ + public static class HashedPredicate<E> { + /** + * The predicate. + */ + public final IPredicate<E> pred; + + /** + * The cached hash code. + */ + final private int hash; + + public HashedPredicate(final IPredicate<E> pred) { + + if (pred == null) + throw new IllegalArgumentException(); + + this.pred = pred; + + this.hash = computeHash(); + + } + + public boolean equals(final Object other) { + + if (this == other) + return true; + + if (!(other instanceof HashedPredicate<?>)) + return false; + + final IPredicate<?> o = ((HashedPredicate<?>) other).pred; + + final int arity = pred.arity(); + + if (arity != o.arity()) + return false; + + for (int i = 0; i < arity; i++) { + + final IVariableOrConstant<?> x = pred.get(i); + + final IVariableOrConstant<?> y = o.get(i); + + if (x != y && !(x.equals(y))) { + + return false; + + } + + } + + return true; + + } + + public int hashCode() { + + return hash; + + } + + private final int computeHash() { + + int h = 0; + + final int n = pred.arity(); + + for (int i = 0; i < n; i++) { + + h = 31 * h + pred.get(i).hashCode(); + + } + + return h; + + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -47,13 +47,13 @@ */ private static final long serialVersionUID = 1L; -// /** -// * The timestamp (nanoseconds) assigned when this {@link BOpStats} object -// * was creatred. This can not be directly aggregated into wall time since -// * concurrent processes are nearly always used during query evaluation. -// */ -// private final long startTime = System.nanoTime(); - + /** + * The elapsed time (milliseconds) for the corresponding operation. When + * aggregated, this will generally exceed the wall time since concurrent + * processes are nearly always used during query evaluation. + */ + final public CAT elapsed = new CAT(); + /** * #of chunks in. */ @@ -97,6 +97,7 @@ // Do not add to self! return; } + elapsed.add(o.elapsed.get()); chunksIn.add(o.chunksIn.get()); unitsIn.add(o.unitsIn.get()); unitsOut.add(o.unitsOut.get()); @@ -111,7 +112,8 @@ public String toString() { final StringBuilder sb = new StringBuilder(); sb.append(super.toString()); - sb.append("{chunksIn=" + chunksIn.get()); + sb.append("{elapsed=" + elapsed.get()); + sb.append(",chunksIn=" + chunksIn.get()); sb.append(",unitsIn=" + unitsIn.get()); sb.append(",chunksOut=" + chunksOut.get()); sb.append(",unitsOut=" + unitsOut.get()); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -27,18 +27,35 @@ package com.bigdata.bop.engine; +import java.util.Map; +import java.util.UUID; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.PipelineOp; import com.bigdata.btree.ILocalBTreeView; import com.bigdata.journal.IIndexManager; import com.bigdata.service.IBigdataFederation; /** - * Interface exposing a limited set of the state of an executing query. + * Non-Remote interface exposing a limited set of the state of an executing + * query. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ public interface IRunningQuery { + /** + * The query. + */ + BOp getQuery(); + + /** + * The unique identifier for this query. + */ + UUID getQueryId(); + /** * The {@link IBigdataFederation} IFF the operator is being evaluated on an * {@link IBigdataFederation}. When evaluating operations against an @@ -59,8 +76,51 @@ * The query engine. This may be used to submit subqueries for evaluation. */ QueryEngine getQueryEngine(); + + /** + * Return an unmodifiable index from {@link BOp.Annotations#BOP_ID} to + * {@link BOp}. This index may contain operators which are not part of the + * pipeline evaluation, such as {@link IPredicate}s. + */ + Map<Integer/*bopId*/,BOp> getBOpIndex(); + + /** + * Return an unmodifiable map exposing the statistics for the operators in + * the query and <code>null</code> unless this is the query controller. + * There will be a single entry in the map for each distinct + * {@link PipelineOp}. Entries might not appear until that operator has + * either begun or completed at least one evaluation phase. This index only + * contains operators which are actually part of the pipeline evaluation. + */ + Map<Integer/* bopId */, BOpStats> getStats(); /** + * Return the query deadline (the time at which it will terminate regardless + * of its run state). + * + * @return The query deadline (milliseconds since the epoch) and + * {@link Long#MAX_VALUE} if no explicit deadline was specified. + */ + public long getDeadline(); + + /** + * The timestamp (ms) when the query began execution. + */ + public long getStartTime(); + + /** + * The timestamp (ms) when the query was done and ZERO (0) if the query is + * not yet done. + */ + public long getDoneTime(); + + /** + * The elapsed time (ms) for the query. This will be updated for each call + * until the query is done executing. + */ + public long getElapsed(); + + /** * Cancel the running query (normal termination). * <p> * Note: This method provides a means for an operator to indicate that the @@ -84,5 +144,11 @@ * if the argument is <code>null</code>. */ Throwable halt(final Throwable t); + + /** + * Return the cause if the query was terminated by an exception. + * @return + */ + Throwable getCause(); } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -0,0 +1,304 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Jun 22, 2009 + */ + +package com.bigdata.bop.engine; + +import java.text.DateFormat; +import java.util.Date; +import java.util.Map; +import java.util.UUID; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; +import com.bigdata.rdf.sail.Rule2BOpUtility; +import com.bigdata.striterator.IKeyOrder; + +/** + * Class defines the log on which summary operator execution statistics are + * written.. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: RuleLog.java 3448 2010-08-18 20:55:58Z thompsonbry $ + */ +public class QueryLog { + + protected static final transient Logger log = Logger + .getLogger(QueryLog.class); + + static { + if(log.isInfoEnabled()) + log.info(QueryLog.getTableHeader()); + } + + /** + * Log rule execution statistics. + * + * @param stats + * The rule execution statistics. + * + * @todo need start and end time for the query. + */ + static public void log(final IRunningQuery q) { + + if (log.isInfoEnabled()) { + + final Integer[] order = BOpUtility.getEvaluationOrder(q.getQuery()); + + log.info(getTableRow(q, -1/* orderIndex */, q.getQuery().getId(), + true/* summary */)); + + int orderIndex = 0; + for (Integer bopId : order) { + log.info(getTableRow(q, orderIndex, bopId, false/* summary */)); + orderIndex++; + } + + } + + } + + static private String getTableHeader() { + + final StringBuilder sb = new StringBuilder(); + + /* + * Common columns for the overall query and for each pipeline operator. + */ + sb.append("queryId"); + sb.append("\tbeginTime"); + sb.append("\tdoneTime"); + sb.append("\tdeadline"); + sb.append("\telapsed"); + sb.append("\tserviceId"); + sb.append("\tcause"); + sb.append("\tbop"); + /* + * Columns for each pipeline operator. + */ + sb.append("\tevalOrder"); // [0..n-1] + sb.append("\tbopId"); + sb.append("\tevalContext"); + sb.append("\tcontroller"); + // metadata considered by the static optimizer. + sb.append("\tstaticBestKeyOrder"); // original key order assigned by static optimizer. + sb.append("\tnvars"); // #of variables in the predicate for a join. + sb.append("\tfastRangeCount"); // fast range count used by the static optimizer. + // dynamics (aggregated for totals as well). + sb.append("\tfanIO"); + sb.append("\tsumMillis"); // cumulative milliseconds for eval of this operator. + sb.append("\tchunksIn"); + sb.append("\tunitsIn"); + sb.append("\tchunksOut"); + sb.append("\tunitsOut"); + sb.append("\tmultipler"); // expansion rate multipler in the solution count. + sb.append("\taccessPathDups"); + sb.append("\taccessPathCount"); + sb.append("\taccessPathChunksIn"); + sb.append("\taccessPathUnitsIn"); + // dynamics based on elapsed wall clock time. + sb.append("\tsolutions/ms"); + sb.append("\tmutations/ms"); + // + // cost model(s) + // + sb.append('\n'); + + return sb.toString(); + + } + + /** + * Return a tabular representation of the query {@link RunState}. + * + * @param q The {@link IRunningQuery}. + * @param evalOrder The evaluation order for the operator. + * @param bopId The identifier for the operator. + * @param summary <code>true</code> iff the summary for the query should be written. + * @return The row of the table. + */ + static private String getTableRow(final IRunningQuery q, final int evalOrder, final Integer bopId, final boolean summary) { + + final StringBuilder sb = new StringBuilder(); + + final DateFormat dateFormat = DateFormat.getDateTimeInstance( + DateFormat.FULL, DateFormat.FULL); + + // The elapsed time for the query (wall time in milliseconds). + final long elapsed = q.getElapsed(); + + // The serviceId on which the query is running : null unless scale-out. + final UUID serviceId = q.getQueryEngine().getServiceUUID(); + + // The thrown cause : null unless the query was terminated abnormally. + final Throwable cause = q.getCause(); + + sb.append(q.getQueryId()); + sb.append('\t'); + sb.append(dateFormat.format(new Date(q.getStartTime()))); + sb.append('\t'); + sb.append(dateFormat.format(new Date(q.getDoneTime()))); + sb.append('\t'); + if(q.getDeadline()!=Long.MAX_VALUE) + sb.append(dateFormat.format(new Date(q.getDeadline()))); + sb.append('\t'); + sb.append(elapsed); + sb.append('\t'); + sb.append(serviceId == null ? "N/A" : serviceId.toString()); + sb.append('\t'); + if (cause != null) + sb.append(cause.getLocalizedMessage()); + + final Map<Integer, BOp> bopIndex = q.getBOpIndex(); + final Map<Integer, BOpStats> statsMap = q.getStats(); + final BOp bop = bopIndex.get(bopId); + + // the operator. + sb.append('\t'); + if (summary) { + /* + * The entire query (recursively). New lines are translated out to + * keep this from breaking the table format. + */ + sb.append(BOpUtility.toString(q.getQuery()).replace('\n', ' ')); + } else { + // Otherwise how just this bop. + sb.append(bopIndex.get(bopId).toString()); + } + + sb.append('\t'); + sb.append(evalOrder); + sb.append('\t'); + sb.append(Integer.toString(bopId)); + sb.append('\t'); + sb.append(bop.getEvaluationContext()); + sb.append('\t'); + sb.append(bop.getProperty(BOp.Annotations.CONTROLLER, + BOp.Annotations.DEFAULT_CONTROLLER)); + + /* + * Static optimizer metadata. + * + * FIXME Should report [nvars] be the expected asBound #of variables + * given the assigned evaluation order and the expectation of propagated + * bindings (optionals may leave some unbound). + */ + { + + final IPredicate pred = (IPredicate<?>) bop + .getProperty(PipelineJoin.Annotations.PREDICATE); + + if (pred != null) { + + final IKeyOrder keyOrder = (IKeyOrder<?>) pred + .getProperty(Rule2BOpUtility.Annotations.ORIGINAL_INDEX); + + final Long rangeCount = (Long) pred + .getProperty(Rule2BOpUtility.Annotations.ESTIMATED_CARDINALITY); + + sb.append('\t'); // keyorder + if (keyOrder != null) + sb.append(keyOrder); + + sb.append('\t'); // nvars + if (keyOrder != null) + sb.append(pred.getVariableCount(keyOrder)); + + sb.append('\t'); // rangeCount + if (rangeCount!= null) + sb.append(rangeCount); + + } else { + sb.append('\t'); // keyorder + sb.append('\t'); // nvars + sb.append('\t'); // rangeCount + } + } + + /* + * Dynamics. + */ + + int fanIO = 0; // @todo aggregate from RunState. + + final PipelineJoinStats stats = new PipelineJoinStats(); + if(summary) { + // Aggregate the statistics for all pipeline operators. + for (BOpStats t : statsMap.values()) { + stats.add(t); + } + } else { + // Just this operator. + stats.add(statsMap.get(bopId)); + } + final long unitsIn = stats.unitsIn.get(); + final long unitsOut = stats.unitsOut.get(); + sb.append('\t'); + sb.append(Integer.toString(fanIO)); + sb.append('\t'); + sb.append(stats.elapsed.get()); + sb.append('\t'); + sb.append(stats.chunksIn.get()); + sb.append('\t'); + sb.append(stats.unitsIn.get()); + sb.append('\t'); + sb.append(stats.chunksOut.get()); + sb.append('\t'); + sb.append(stats.unitsOut.get()); + sb.append('\t'); + sb.append(unitsIn == 0 ? "N/A" : unitsOut / (double) unitsIn); + sb.append('\t'); + sb.append(stats.accessPathDups.get()); + sb.append('\t'); + sb.append(stats.accessPathCount.get()); + sb.append('\t'); + sb.append(stats.accessPathChunksIn.get()); + sb.append('\t'); + sb.append(stats.accessPathUnitsIn.get()); + + /* + * Use the total elapsed time for the query (wall time). + */ + // solutions/ms + sb.append('\t'); + sb.append(elapsed == 0 ? 0 : stats.unitsOut.get() / elapsed); + // mutations/ms : @todo mutations/ms. + sb.append('\t'); +// sb.append(elapsed==0?0:stats.unitsOut.get()/elapsed); + + sb.append('\n'); + + return sb.toString(); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -45,7 +45,9 @@ import org.apache.log4j.Logger; import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpUtility; import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; import com.bigdata.relation.accesspath.IBlockingBuffer; /** @@ -744,6 +746,8 @@ sb.append("\tlabel"); sb.append("\tbopId"); sb.append("\tserviceId"); + sb.append("\tevalContext"); + sb.append("\tcontroller"); sb.append("\tcause"); sb.append("\tbop"); sb.append("\tshardId"); @@ -767,7 +771,16 @@ } - sb.append("\tstats"); + sb.append("\telapsed"); + sb.append("\tchunksIn"); + sb.append("\tunitsIn"); + sb.append("\tchunksOut"); + sb.append("\tunitsOut"); + sb.append("\taccessPathDups"); + sb.append("\taccessPathCount"); + sb.append("\taccessPathChunksIn"); + sb.append("\taccessPathUnitsIn"); + //{chunksIn=1,unitsIn=100,chunksOut=4,unitsOut=313,accessPathDups=0,accessPathCount=100,chunkCount=100,elementCount=313} sb.append('\n'); @@ -830,14 +843,34 @@ sb.append('\t'); sb.append(serviceId == null ? "N/A" : serviceId.toString()); + { + final BOp bop = bopIndex.get(bopId); + sb.append('\t'); + sb.append(bop.getEvaluationContext()); + sb.append('\t'); + sb.append(bop.getProperty(BOp.Annotations.CONTROLLER, + BOp.Annotations.DEFAULT_CONTROLLER)); + } + // the thrown cause. sb.append('\t'); if (cause != null) sb.append(cause.getLocalizedMessage()); - // the operator. - sb.append('\t'); - sb.append(bopIndex.get(bopId)); + // the operator. + sb.append('\t'); + if (nsteps.get() == 1) { + /* + * For the startQ row @ nsteps==1, show the entire query. This is + * the only way people will be able to see the detailed annotations + * on predicates used in joins. New line characters are translated + * out to keep things in the table format. + */ + sb.append(BOpUtility.toString(query).replace('\n', ' ')); + } else { + // Otherwise how just this bop. + sb.append(bopIndex.get(bopId).toString()); + } sb.append('\t'); sb.append(Integer.toString(shardId)); @@ -873,11 +906,33 @@ } - // the statistics : this is at the end to keep the table pretty. - sb.append('\t'); + /* + * The statistics. This is at the end to keep the table pretty. + * Different kinds of operators may have additional statistics. They + * have to be explicitly handled here to format them into a table. + */ if (stats != null) { - // @todo use a multi-column version of stats. - sb.append(stats.toString()); + sb.append('\t'); + sb.append(stats.elapsed.get()); + sb.append('\t'); + sb.append(stats.chunksIn.get()); + sb.append('\t'); + sb.append(stats.unitsIn.get()); + sb.append('\t'); + sb.append(stats.chunksOut.get()); + sb.append('\t'); + sb.append(stats.unitsOut.get()); + if (stats instanceof PipelineJoinStats) { + final PipelineJoinStats t = (PipelineJoinStats) stats; + sb.append('\t'); + sb.append(t.accessPathDups.get()); + sb.append('\t'); + sb.append(t.accessPathCount.get()); + sb.append('\t'); + sb.append(t.accessPathChunksIn.get()); + sb.append('\t'); + sb.append(t.accessPathUnitsIn.get()); + } } sb.append('\n'); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -27,6 +27,7 @@ */ package com.bigdata.bop.engine; +import java.util.Collections; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -109,6 +110,18 @@ */ final private AtomicLong deadline = new AtomicLong(Long.MAX_VALUE); + /** + * The timestamp(ms) when the query begins to execute. + */ + final private AtomicLong startTime = new AtomicLong(System + .currentTimeMillis()); + + /** + * The timestamp (ms) when the query is done executing and ZERO (0L) if the + * query is not done. + */ + final private AtomicLong doneTime = new AtomicLong(0L); + /** * <code>true</code> iff the outer {@link QueryEngine} is the controller for * this query. @@ -304,19 +317,25 @@ } - /** - * Return the query deadline (the time at which it will terminate regardless - * of its run state). - * - * @return The query deadline (milliseconds since the epoch) and - * {@link Long#MAX_VALUE} if no explicit deadline was specified. - */ public long getDeadline() { - return deadline.get(); + } + public long getStartTime() { + return startTime.get(); } + public long getDoneTime() { + return doneTime.get(); + } + + public long getElapsed() { + long mark = doneTime.get(); + if (mark == 0L) + mark = System.currentTimeMillis(); + return mark - startTime.get(); + } + /** * The class executing the query on this node. */ @@ -366,31 +385,15 @@ } - /** - * Return the current statistics for the query and <code>null</code> unless - * this is the query controller. There will be a single entry in the map for - * each distinct {@link PipelineOp}. The map entries are inserted when we - * first begin to run an instance of that operator on some - * {@link IChunkMessage}. - */ public Map<Integer/* bopId */, BOpStats> getStats() { - return statsMap; + return Collections.unmodifiableMap(statsMap); } - /** - * Lookup and return the {@link BOp} with that identifier using an index. - * - * @param bopId - * The identifier. - * - * @return The {@link BOp} -or- <code>null</code> if no {@link BOp} was - * found in the query with for that identifier. - */ - public BOp getBOp(final int bopId) { + public Map<Integer,BOp> getBOpIndex() { - return bopIndex.get(bopId); + return bopIndex; } @@ -1295,10 +1298,16 @@ clientProxy.startOp(new StartOpMessage(queryId, t.bopId, t.partitionId, serviceId, t.messagesIn)); - /* - * Run the operator task. - */ - t.call(); + /* + * Run the operator task. + */ + final long begin = System.currentTimeMillis(); + try { + t.call(); + } finally { + t.context.getStats().elapsed.add(System.currentTimeMillis() + - begin); + } /* * Queue task to notify the query controller that operator task @@ -1972,6 +1981,11 @@ } // life cycle hook for the end of the query. lifeCycleTearDownQuery(); + // mark done time. + doneTime.set(System.currentTimeMillis()); + // log summary statistics for the query. + if (isController()) + QueryLog.log(this); } // remove from the collection of running queries. queryEngine.halt(this); @@ -2066,6 +2080,12 @@ } + final public Throwable getCause() { + + return future.getCause(); + + } + public IBigdataFederation<?> getFederation() { return queryEngine.getFederation(); @@ -2097,5 +2117,5 @@ return StandaloneChunkHandler.INSTANCE; } - + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederationChunkHandler.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederationChunkHandler.java 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederationChunkHandler.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -108,7 +108,7 @@ final FederatedRunningQuery q = (FederatedRunningQuery) query; - final BOp targetOp = q.getBOp(sinkId); + final BOp targetOp = q.getBOpIndex().get(sinkId); if (targetOp == null) throw new IllegalStateException("Not found: " + sinkId); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -46,13 +46,14 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpEvaluationContext; -import com.bigdata.bop.NV; -import com.bigdata.bop.PipelineOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IShardwisePipelineOp; import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.ap.Predicate.HashedPredicate; import com.bigdata.bop.engine.BOpStats; import com.bigdata.btree.BytesUtil; import com.bigdata.btree.keys.IKeyBuilder; @@ -179,36 +180,36 @@ /** * The #of chunks read from an {@link IAccessPath}. */ - public final CAT chunkCount = new CAT(); + public final CAT accessPathChunksIn = new CAT(); /** * The #of elements read from an {@link IAccessPath}. */ - public final CAT elementCount = new CAT(); + public final CAT accessPathUnitsIn = new CAT(); - /** - * The maximum observed fan in for this join dimension (maximum #of - * sources observed writing on any join task for this join dimension). - * Since join tasks may be closed and new join tasks re-opened for the - * same query, join dimension and index partition, and since each join - * task for the same join dimension could, in principle, have a - * different fan in based on the actual binding sets propagated this is - * not necessarily the "actual" fan in for the join dimension. You would - * have to track the #of distinct partitionId values to track that. - */ - public int fanIn; +// /** +// * The maximum observed fan in for this join dimension (maximum #of +// * sources observed writing on any join task for this join dimension). +// * Since join tasks may be closed and new join tasks re-opened for the +// * same query, join dimension and index partition, and since each join +// * task for the same join dimension could, in principle, have a +// * different fan in based on the actual binding sets propagated this is +// * not necessarily the "actual" fan in for the join dimension. You would +// * have to track the #of distinct partitionId values to track that. +// */ +// public int fanIn; +// +// /** +// * The maximum observed fan out for this join dimension (maximum #of +// * sinks on which any join task is writing for this join dimension). +// * Since join tasks may be closed and new join tasks re-opened for the +// * same query, join dimension and index partition, and since each join +// * task for the same join dimension could, in principle, have a +// * different fan out based on the actual binding sets propagated this is +// * not necessarily the "actual" fan out for the join dimension. +// */ +// public int fanOut; - /** - * The maximum observed fan out for this join dimension (maximum #of - * sinks on which any join task is writing for this join dimension). - * Since join tasks may be closed and new join tasks re-opened for the - * same query, join dimension and index partition, and since each join - * task for the same join dimension could, in principle, have a - * different fan out based on the actual binding sets propagated this is - * not necessarily the "actual" fan out for the join dimension. - */ - public int fanOut; - public void add(final BOpStats o) { super.add(o); @@ -221,18 +222,18 @@ accessPathCount.add(t.accessPathCount.get()); - chunkCount.add(t.chunkCount.get()); + accessPathChunksIn.add(t.accessPathChunksIn.get()); - elementCount.add(t.elementCount.get()); + accessPathUnitsIn.add(t.accessPathUnitsIn.get()); - if (t.fanIn > this.fanIn) { - // maximum reported fanIn for this join dimension. - this.fanIn = t.fanIn; - } - if (t.fanOut > this.fanOut) { - // maximum reported fanOut for this join dimension. - this.fanOut += t.fanOut; - } +// if (t.fanIn > this.fanIn) { +// // maximum reported fanIn for this join dimension. +// this.fanIn = t.fanIn; +// } +// if (t.fanOut > this.fanOut) { +// // maximum reported fanOut for this join dimension. +// this.fanOut += t.fanOut; +// } } @@ -242,8 +243,8 @@ protected void toString(final StringBuilder sb) { sb.append(",accessPathDups=" + accessPathDups.estimate_get()); sb.append(",accessPathCount=" + accessPathCount.estimate_get()); - sb.append(",chunkCount=" + chunkCount.estimate_get()); - sb.append(",elementCount=" + elementCount.estimate_get()); + sb.append(",accessPathChunksIn=" + accessPathChunksIn.estimate_get()); + sb.append(",accessPathUnitsIn=" + accessPathUnitsIn.estimate_get()); } } @@ -530,6 +531,8 @@ */ public Void call() throws Exception { +// final long begin = System.currentTimeMillis(); + if (log.isDebugEnabled()) log.debug("joinOp=" + joinOp); @@ -597,6 +600,10 @@ throw new RuntimeException(t); +// } finally { +// +// stats.elapsed.add(System.currentTimeMillis() - begin); + } } @@ -849,7 +856,7 @@ * Aggregate the source bindingSets that license the same * asBound predicate. */ - final Map<IPredicate<E>, Collection<IBindingSet>> map = combineBindingSets(chunk); + final Map<HashedPredicate<E>, Collection<IBindingSet>> map = combineBindingSets(chunk); /* * Generate an AccessPathTask from each distinct asBound @@ -936,13 +943,13 @@ * bindingSets in the chunk from which the predicate was * generated. */ - protected Map<IPredicate<E>, Collection<IBindingSet>> combineBindingSets( + protected Map<HashedPredicate<E>, Collection<IBindingSet>> combineBindingSets( final IBindingSet[] chunk) { if (log.isDebugEnabled()) log.debug("chunkSize=" + chunk.length); - final Map<IPredicate<E>, Collection<IBindingSet>> map = new LinkedHashMap<IPredicate<E>, Collection<IBindingSet>>( + final Map<HashedPredicate<E>, Collection<IBindingSet>> map = new LinkedHashMap<HashedPredicate<E>, Collection<IBindingSet>>( chunk.length); for (IBindingSet bindingSet : chunk) { @@ -970,7 +977,8 @@ } // lookup the asBound predicate in the map. - Collection<IBindingSet> values = map.get(asBound); + final HashedPredicate<E> hashedPred = new HashedPredicate<E>(asBound); + Collection<IBindingSet> values = map.get(hashedPred); if (values == null) { @@ -983,7 +991,7 @@ values = new LinkedList<IBindingSet>(); - map.put(asBound, values); + map.put(hashedPred, values); } else { @@ -1024,7 +1032,7 @@ * @throws Exception */ protected AccessPathTask[] getAccessPathTasks( - final Map<IPredicate<E>, Collection<IBindingSet>> map) { + final Map<HashedPredicate<E>, Collection<IBindingSet>> map) { final int n = map.size(); @@ -1033,7 +1041,7 @@ final AccessPathTask[] tasks = new JoinTask.AccessPathTask[n]; - final Iterator<Map.Entry<IPredicate<E>, Collection<IBindingSet>>> itr = map + final Iterator<Map.Entry<HashedPredicate<E>, Collection<IBindingSet>>> itr = map .entrySet().iterator(); int i = 0; @@ -1042,10 +1050,10 @@ halted(); - final Map.Entry<IPredicate<E>, Collection<IBindingSet>> entry = itr + final Map.Entry<HashedPredicate<E>, Collection<IBindingSet>> entry = itr .next(); - tasks[i++] = new AccessPathTask(entry.getKey(), entry + tasks[i++] = new AccessPathTask(entry.getKey().pred, entry .getValue()); } @@ -1363,7 +1371,7 @@ final Object[] chunk = itr.nextChunk(); - stats.chunkCount.increment(); + stats.accessPathChunksIn.increment(); // process the chunk in the caller's thread. final boolean somethingAccepted = new ChunkTask( @@ -1460,7 +1468,7 @@ numElements += chunk.length; - stats.chunkCount.increment(); + stats.accessPathChunksIn.increment(); nchunks++; @@ -1493,7 +1501,7 @@ } } } - stats.elementCount.add(numElements); + stats.accessPathUnitsIn.add(numElements); } @@ -1746,7 +1754,7 @@ // naccepted for the current element (trace only). int naccepted = 0; - stats.elementCount.increment(); + stats.accessPathUnitsIn.increment(); for (IBindingSet bset : bindingSets) { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties 2010-10-25 15:40:55 UTC (rev 3843) @@ -225,6 +225,21 @@ #log4j.appender.destPlain.layout.ConversionPattern= ## +# Summary query evaluation log (tab delimited file). +#log4j.logger.com.bigdata.bop.engine.QueryLog=INFO,queryLog +log4j.additivity.com.bigdata.bop.engine.QueryLog=false +log4j.appender.queryLog=org.apache.log4j.FileAppender +log4j.appender.queryLog.Threshold=ALL +log4j.appender.queryLog.File=queryLog.csv +log4j.appender.queryLog.Append=true +# I find that it is nicer to have this unbuffered since you can see what +# is going on and to make sure that I have complete rule evaluation logs +# on shutdown. +log4j.appender.queryLog.BufferedIO=false +log4j.appender.queryLog.layout=org.apache.log4j.PatternLayout +log4j.appender.queryLog.layout.ConversionPattern=%m + +## # BOp run state trace (tab delimited file). Uncomment the next line to enable. #log4j.logger.com.bigdata.bop.engine.RunState$TableLog=INFO,queryRunStateLog log4j.additivity.com.bigdata.bop.engine.RunState$TableLog=false Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -29,6 +29,7 @@ import java.util.Iterator; import java.util.Map; +import java.util.concurrent.FutureTask; import junit.framework.TestCase2; @@ -662,4 +663,55 @@ } + /** + * Unit tests for extracting the left-deep evaluation order for the query + * pipeline. + * <p> + * - test when the 1st operator is a control operator. + * <p> + * - test when there is an embedded control operator (subquery). + * <p> + * Note: this is not testing with left/right branches in the query plan. + * That sort of plan is not currently supported by pipeline evaluation. + */ + public void test_getEvaluationOrder() { + + final BOp op2 = new MyPipelineOp(new BOp[]{},NV.asMap(// + new NV(BOp.Annotations.BOP_ID,1)// +// new NV(BOp.Annotations.CONTROLLER,false)// + )); + final BOp op1 = new MyPipelineOp(new BOp[]{op2},NV.asMap(// + new NV(BOp.Annotations.BOP_ID,2)// +// new NV(BOp.Annotations.CONTROLLER,false)// + )); + final BOp op3 = new MyPipelineOp(new BOp[]{op1},NV.asMap(// + new NV(BOp.Annotations.BOP_ID,3),// + new NV(BOp.Annotations.CONTROLLER,true)// + )); + + assertEquals(new Integer[]{1,2,3},BOpUtility.getEvaluationOrder(op3)); + + } + + private static class MyPipelineOp extends PipelineOp { + + private static final long serialVersionUID = 1L; + + /** Deep copy constructor. */ + protected MyPipelineOp(MyPipelineOp op) { + super(op); + } + + /** Shallow copy constructor. */ + protected MyPipelineOp(BOp[] args, Map<String, Object> annotations) { + super(args, annotations); + } + + @Override + public FutureTask<Void> eval(BOpContext<IBindingSet> context) { + return null; + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -27,8 +27,12 @@ package com.bigdata.bop.engine; +import java.util.Map; +import java.util.UUID; + import org.apache.log4j.Logger; +import com.bigdata.bop.BOp; import com.bigdata.journal.IIndexManager; import com.bigdata.service.IBigdataFederation; @@ -87,4 +91,56 @@ throw new UnsupportedOperationException(); } + @Override + public Map<Integer, BOp> getBOpIndex() { + return null; + } + + @Override + public Map<Integer, BOpStats> getStats() { + return null; + } + + @Override + public long getDeadline() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public long getDoneTime() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public long getElapsed() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public long getStartTime() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public Throwable getCause() { + // TODO Auto-generated method stub + return null; + } + + @Override + public BOp getQuery() { + // TODO Auto-generated method stub + return null; + } + + @Override + public UUID getQueryId() { + // TODO Auto-generated method stub + return null; + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -1866,7 +1866,6 @@ } /** ->>>>>>> .r3835 * Verify the expected solutions. * * @param expected Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java 2010-10-25 15:40:55 UTC (rev 3843) @@ -62,6 +62,7 @@ import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.relation.accesspath.ThickAsynchronousIterator; import com.bigdata.striterator.ChunkedArrayIterator; +import com.bigdata.striterator.Dechunkerator; /** * Unit tests for the {@link PipelineJoin} operator. @@ -231,8 +232,8 @@ // access path assertEquals(0L, stats.accessPathDups.get()); assertEquals(1L, stats.accessPathCount.get()); - assertEquals(1L, stats.chunkCount.get()); - assertEquals(2L, stats.elementCount.get()); + assertEquals(1L, stats.accessPathChunksIn.get()); + assertEquals(2L, stats.accessPathUnitsIn.get()); assertTrue(ft.isDone()); assertFalse(ft.isCancelled()); @@ -241,6 +242,104 @@ } /** + * Unit test for a pipeline join in which we expect duplicate access paths to + * be eliminated. + * + * @throws ExecutionException + * @throws InterruptedException + */ + public void test_join_duplicateElimination() throws InterruptedException, ExecutionException { + + final int startId = 1; + final int joinId = 2; + final int predId = 3; + + final BOp startOp = new CopyOp(new BOp[] {}, NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + })); + + final Predicate<E> predOp = new Predicate<E>(new IVariableOrConstant[] { + new Constant<String>("Mary"), Var.var("x") }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId),// + new NV(Predicate.Annotations.TIMESTAMP, + ITx.READ_COMMITTED),// + })); + + final PipelineJoin<E> query = new PipelineJoin<E>( + new BOp[] { startOp },// + new NV(Predicate.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, predOp)); + + // the expected solutions (each solution appears twice since we feed two empty binding sets in). + final IBindingSet[] expected = new IBindingSet[] {// + new ArrayBindingSet(// + new IVariable[] { Var.var("x") },// + new IConstant[] { new Constant<String>("John") }// + ),// + new ArrayBindingSet(// + new IVariable[] { Var.var("x") },// + new IConstant[] { new Constant<String>("Paul") }// + ),// + new ArrayBindingSet(// + new IVariable[] { Var.var("x") },// + new IConstant[] { new Constant<String>("John") }// + ),// + new ArrayBindingSet(// + new IVariable[] { Var.var("x") },// + new IConstant[] { new Constant<String>("Paul") }// + ),// + }; + + final PipelineJoinStats stats = query.newStats(); + + // submit TWO (2) empty binding sets in ONE (1) chunk. + final IAsynchronousIterator<IBindingSet[]> source = new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { new IBindingSet[] { new HashBindingSet(), new HashBindingSet()} }); + + final IBlockingBuffer<IBindingSet[]> sink = new BlockingBufferWithStats<IBindingSet[]>(query, stats); + + final BOpContext<IBindingSet> context = new BOpContext<IBindingSet>( + new MockRunningQuery(null/* fed */, jnl/* indexManager */ + ), -1/* partitionId */, stats, + source, sink, null/* sink2 */); + + // get task. + final FutureTask<Void> ft = query.eval(context); + + // execute task. + jnl.getExecutorService().execute(ft); + + ft.get();// wait for completion (before showing stats), then look for errors. + + // show stats. + System.err.println("stats: "+stats); + + // verify solutions. + TestQueryEngine.assertSameSolutionsAnyOrder(expected, new Dechunkerator<IBindingSet>(sink.iterator())); + + // verify stats. + + // join task + assertEquals(1L, stats.chunksIn.get()); + assertEquals(2L, stats.unitsIn.get()); + assertEquals(4L, stats.unitsOut.get()); + assertEquals(1L, stats.chunksOut.get()); + // access path + assertEquals(1L, stats.accessPathDups.get()); + assertEquals(1L, stats.accessPathCount.get()); + assertEquals(1L, stats.accessPathChunksIn.get()); + assertEquals(2L, stats.accessPathUnitsIn.get()); + + assertTrue(ft.isDone()); + assertFalse(ft.isCancelled()); + ft.get(); // verify nothing thrown. + + } + + /** * Unit test for a join with an {@link IConstraint}. The constraint is used * to filter out one of the solutions where "Mary" is the present in the * first column of the relation. @@ -316,8 +415,8 @@ // access path assertEquals(0L, stats.accessPathDups.get()); assertEquals(1L, stats.accessPathCount.get()); - assertEquals(1L, stats.chunkCount.get()); - assertEquals(2L, stats.elementCount.get()); + assertEquals(1L, stats.accessPathChunksIn.get()); + assertEquals(2L, stats.accessPathUnitsIn.get()); assertTrue(ft.isDone()); assertFalse(ft.isCancelled()); @@ -426,8 +525,8 @@ // access path assertEquals(0L, stats.accessPathDups.get()); assertEquals(1L, stats.accessPathCount.get()); - assertEquals(1L, stats.chunkCount.get()); - assertEquals(5L, stats.elementCount.get()); + assertEquals(1L, stats.accessPathChunksIn.get()); + assertEquals(5L, stats.accessPathUnitsIn.get()); assertTrue(ft.isDone()); assertFalse(ft.isCancelled()); @@ -531,8 +630,8 @@ // access path assertEquals(0L, stats.accessPathDups.get()); assertEquals(2L, stats.accessPathCount.get()); - assertEquals(1L, stats.chunkCount.get()); - assertEquals(2L, stats.elementCount.get()); + assertEquals(1L, stats.accessPathChunksIn.get()); + assertEquals(2L, stats.accessPathUnitsIn.get()); assertTrue(ft.isDone()); assertFalse(ft.isCancelled()); @@ -641,8 +740,8 @@ // access path assertEquals(0L, stats.accessPathDups.get()); assertEquals(2L, stats.accessPathCount.get()); - assertEquals(1L, stats.chunkCount.get()); - assertEquals(2L, stats.elementCount.get()); + assertEquals(1L, stats.accessPathChunksIn.get()); + assertEquals(2L, stats.accessPathUnitsIn.get()); assertTrue(ft.isDone()); assertFalse(ft.isCancelled()); Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/logging/log4j.properties =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/logging/log4j.properties 2010-10-24 18:18:10 UTC (rev 3842) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/logging/log4j.properties 2010-10-25 15:40:55 UTC (rev 3843) @@ -222,3 +222,33 @@ log4j.appender.ruleLog.BufferedIO=false log4j.appender.ruleLog.layout=org.apache.log4j.PatternLayout log4j.appender.ruleLog.layout.ConversionPattern=%m + +## +# Summary query evaluation log (tab delimited file). +#log4j.logger.com.bigdata.bop.engine.QueryLog=INFO,queryLog +log4j.additivity.com.bigdata.bop.engine.QueryLog=false +log4j.appender.queryLog=org.apache.log4j.FileAppender +log4j.appender.queryLog.Threshold=ALL +log4j.appender.queryLog.File=queryLog.csv +log4j.appender.queryLog.Append=true +# I find that it is nicer to have this unbuffered since you can see what +# is going on and to make sure that I have complete rule evaluation logs +# on shutdown. +log4j.appender.queryLog.BufferedIO=false +log4j.appender.queryLog.layout=org.apache.log4j.PatternLayout +log4j.appender.queryLog.layout.ConversionPattern=%m + +## +# BOp run state trace (tab delimited file). Uncomment the next line to enable. +#log4j.logger.com.bigdata.bop.engine.RunState$TableLog=INFO,queryRunStateLog +log4j.additivity.com.bigdata.bop.engine.RunState$TableLog=false +log4j.appender.queryRunStateLog=org.apache.log4j.FileAppender +log4j.appender.queryRunStateLog.Threshold=ALL +log4j.appender.queryRunStateLog.File=queryRunState.log +log4j.appender.queryRunStateLog.Append=true +# I find that it is nicer to have this unbuffered since you can see what +# is going on and to make sure that I have complete rule evaluation logs +# on shutdown. +log4j.appender.queryRunStateLog.BufferedIO=false +log4j.appender.queryRunStateLog.layout=org.apache.log4j.PatternLayout +log4j.appender.queryRunStateLog.layout.ConversionPattern=%m Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/src/resources/logging... [truncated message content] |
From: <tho...@us...> - 2010-10-24 18:18:18
|
Revision: 3842 http://bigdata.svn.sourceforge.net/bigdata/?rev=3842&view=rev Author: thompsonbry Date: 2010-10-24 18:18:10 +0000 (Sun, 24 Oct 2010) Log Message: ----------- Modified PipelineJoin to make the predicate an annotation. This is in keeping with a design pattern where operands (other than simple variables and constants) are evaluated in the pipeline and where annotations are interpreted. This also simplifies the RunState logging format. Modified RunningQuery to track all Futures and permit more than one concurrent operator task per (bopId,shardId). There is now an annotation which controls how many such tasks may run concurrently. I've also experiment with the parameter space for the BufferAnnotations and the fullyBufferedReadThreshold. These do not appear to have much influence on query performance for either LUBM U50 or BSBM 100M. LUBM query performance remains significantly better in the trunk (13s vs 17s). There is a less significant difference in BSBM performance (4234 vs 4058). Since LUBM U50 tends to be memory based (after the first presentation of each query) this suggests that the performance difference is related more to in-memory dynamics than to disk access. The most significant difference right now between the trunk and the quads query branch is that we chain the input and output buffers of operators together in the trunk but run operators over chunks of materialized inputs in the branch. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BufferAnnotations.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestPipelineUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestJiniFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BufferAnnotations.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BufferAnnotations.java 2010-10-22 20:08:48 UTC (rev 3841) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BufferAnnotations.java 2010-10-24 18:18:10 UTC (rev 3842) @@ -52,7 +52,7 @@ /** * Default for {@link #CHUNK_OF_CHUNKS_CAPACITY} */ - int DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = 100; + int DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = 100;//trunk=1000 /** * Sets the capacity of the {@link IBuffer}[]s used to accumulate a chunk of @@ -66,7 +66,7 @@ /** * Default for {@link #CHUNK_CAPACITY} */ - int DEFAULT_CHUNK_CAPACITY = 100; + int DEFAULT_CHUNK_CAPACITY = 100;//trunk=100 /** * The timeout in milliseconds that the {@link BlockingBuffer} will wait for @@ -81,7 +81,7 @@ * * @todo this is probably much larger than we want. Try 10ms. */ - int DEFAULT_CHUNK_TIMEOUT = 20; + int DEFAULT_CHUNK_TIMEOUT = 20;//trunk=1000 /** * The {@link TimeUnit}s in which the {@link #CHUNK_TIMEOUT} is measured. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-10-22 20:08:48 UTC (rev 3841) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-10-24 18:18:10 UTC (rev 3842) @@ -42,6 +42,7 @@ import com.bigdata.btree.filter.Advancer; import com.bigdata.btree.filter.TupleFilter; import com.bigdata.mdi.PartitionLocator; +import com.bigdata.rawstore.Bytes; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.ElementFilter; @@ -255,7 +256,7 @@ * @todo Experiment with this. It should probably be something close to * the branching factor, e.g., 100. */ - int DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = 100; + int DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = 100;//trunk=20*Bytes.kilobyte32 /** * Specify the {@link IRangeQuery} flags for the {@link IAccessPath} ( Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java 2010-10-22 20:08:48 UTC (rev 3841) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java 2010-10-24 18:18:10 UTC (rev 3842) @@ -39,15 +39,6 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ - * - * @todo Add time per bop. This can not be directly aggregated into wall time - * since there are concurrent processes. However, this will be useful - * since we tend to process materialized chunks with the new - * {@link QueryEngine} such that the operator evaluation time now more or - * less directly corresponds to the time it takes to act on local data, - * producing local outputs. The {@link QueryEngine} itself now handles the - * transportation of data between the nodes so that time can be factored - * out of the local aspects of query execution. */ public class BOpStats implements Serializable { @@ -56,12 +47,12 @@ */ private static final long serialVersionUID = 1L; -// /** -// * The timestamp (milliseconds) associated with the start of execution for -// * the join dimension. This is not aggregated. It should only be used to -// * compute the elapsed time for the operator. -// */ -// private final long startTime; +// /** +// * The timestamp (nanoseconds) assigned when this {@link BOpStats} object +// * was creatred. This can not be directly aggregated into wall time since +// * concurrent processes are nearly always used during query evaluation. +// */ +// private final long startTime = System.nanoTime(); /** * #of chunks in. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java 2010-10-22 20:08:48 UTC (rev 3841) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java 2010-10-24 18:18:10 UTC (rev 3842) @@ -69,4 +69,9 @@ boolean DEFAULT_ONE_MESSAGE_PER_CHUNK = false; + String MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD = QueryEngineTestAnnotations.class.getName() + + ".maxConcurrentTasksPerOperatorAndShard"; + + int DEFAULT_MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD = Integer.MAX_VALUE; + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-10-22 20:08:48 UTC (rev 3841) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-10-24 18:18:10 UTC (rev 3842) @@ -37,7 +37,6 @@ import java.util.Map; import java.util.Set; import java.util.UUID; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; @@ -46,6 +45,7 @@ import org.apache.log4j.Logger; import com.bigdata.bop.BOp; +import com.bigdata.bop.PipelineOp; import com.bigdata.relation.accesspath.IBlockingBuffer; /** @@ -756,8 +756,13 @@ final Integer id = bopIds[i]; - sb.append("\tnavail(id=" + id + ")"); + final BOp bop = bopIndex.get(id); + + if(!(bop instanceof PipelineOp)) + continue; // skip non-pipeline operators. + sb.append("\tnavail(id=" + id + ")"); + sb.append("\tnrun(id=" + id + ")"); } @@ -853,6 +858,11 @@ final Integer id = bopIds[i]; + final BOp bop = bopIndex.get(id); + + if(!(bop instanceof PipelineOp)) + continue; // skip non-pipeline operators. + final AtomicLong nrunning = runningMap.get(id); final AtomicLong navailable = availableMap.get(id); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-22 20:08:48 UTC (rev 3841) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-24 18:18:10 UTC (rev 3842) @@ -28,7 +28,6 @@ package com.bigdata.bop.engine; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -145,12 +144,18 @@ */ final private Haltable<Void> future = new Haltable<Void>(); + /** + * The maximum number of operator tasks which may be concurrently executor + * for a given (bopId,shardId). + */ + final private int maxConcurrentTasksPerOperatorAndShard; + /** * A collection of (bopId,partitionId) keys mapped onto a collection of * operator task evaluation contexts for currently executing operators for * this query. */ - private final ConcurrentHashMap<BSBundle, ChunkFutureTask> operatorFutures; + private final ConcurrentHashMap<BSBundle, ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask>> operatorFutures; /** * A map of unbounded work queues for each (bopId,partitionId). Empty queues @@ -450,8 +455,13 @@ this.bopIndex = BOpUtility.getIndex(query); - this.operatorFutures = new ConcurrentHashMap<BSBundle, ChunkFutureTask>(); + this.maxConcurrentTasksPerOperatorAndShard = query + .getProperty( + QueryEngineTestAnnotations.MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD, + QueryEngineTestAnnotations.DEFAULT_MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD); + this.operatorFutures = new ConcurrentHashMap<BSBundle, ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask>>(); + this.operatorQueues = new ConcurrentHashMap<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>>(); /* @@ -520,11 +530,12 @@ } - /** - * Pre-populate a map with {@link BOpStats} objects for the query. Operators - * in subqueries are not visited since they will be assigned {@link BOpStats} - * objects when they are run as a subquery. - */ + /** + * Pre-populate a map with {@link BOpStats} objects for the query. Only the + * child operands are visited. Operators in subqueries are not visited since + * they will be assigned {@link BOpStats} objects when they are run as a + * subquery. + */ private void populateStatsMap(final BOp op) { if(!(op instanceof PipelineOp)) @@ -1139,14 +1150,26 @@ lock.lock(); try { // Make sure the query is still running. - future.halted(); - // Is there a Future for this (bopId,partitionId)? - final ChunkFutureTask cft = operatorFutures.get(bundle); - if (cft != null && !cft.isDone()) { - // already running. - return false; - } - // Remove the work queue for that (bopId,partitionId). + if(future.isDone()) + return false; + // Is there a Future for this (bopId,partitionId)? + ConcurrentHashMap<ChunkFutureTask, ChunkFutureTask> map = operatorFutures + .get(bundle); + if (map != null) { + int nrunning = 0; + for (ChunkFutureTask cft : map.keySet()) { + if (cft.isDone()) + map.remove(cft); + nrunning++; + } + if (map.isEmpty()) + operatorFutures.remove(bundle); + if (nrunning > maxConcurrentTasksPerOperatorAndShard) { + // Too many already running. + return false; + } + } + // Remove the work queue for that (bopId,partitionId). final BlockingQueue<IChunkMessage<IBindingSet>> queue = operatorQueues .remove(bundle); if (queue == null || queue.isEmpty()) { @@ -1165,16 +1188,26 @@ for (IChunkMessage<IBindingSet> msg : messages) { source.add(msg.getChunkAccessor().iterator()); } - /* - * Create task to consume that source. - */ - final ChunkFutureTask ft = new ChunkFutureTask(new ChunkTask( - bundle.bopId, bundle.shardId, nmessages, source)); - /* - * Submit task for execution (asynchronous). - */ - queryEngine.execute(ft); - return true; + /* + * Create task to consume that source. + */ + final ChunkFutureTask cft = new ChunkFutureTask(new ChunkTask( + bundle.bopId, bundle.shardId, nmessages, source)); + /* + * Save the Future for this task. Together with the logic above this + * may be used to limit the #of concurrent tasks per (bopId,shardId) + * to one for a given query. + */ + if (map == null) { + map = new ConcurrentHashMap<ChunkFutureTask, ChunkFutureTask>(); + operatorFutures.put(bundle, map); + } + map.put(cft, cft); + /* + * Submit task for execution (asynchronous). + */ + queryEngine.execute(cft); + return true; } finally { lock.unlock(); } @@ -1199,6 +1232,29 @@ } + public void run() { + + final ChunkTask t = chunkTask; + + super.run(); + + /* + * This task is done executing so remove its Future before we + * attempt to schedule another task for the same + * (bopId,partitionId). + */ + final ConcurrentHashMap<ChunkFutureTask, ChunkFutureTask> map = operatorFutures + .get(new BSBundle(t.bopId, t.partitionId)); + if (map != null) { + map.remove(this, this); + } + + // Schedule another task if any messages are waiting. + RunningQuery.this.scheduleNext(new BSBundle( + t.bopId, t.partitionId)); + + } + } /** @@ -1224,16 +1280,6 @@ public void run() { - // Run the task. - runOnce(); - - // Schedule another task if any messages are waiting. - RunningQuery.this.scheduleNext(new BSBundle( - t.bopId, t.partitionId)); - } - - private void runOnce() { - final UUID serviceId = queryEngine.getServiceUUID(); try { @@ -1467,24 +1513,25 @@ + bop); } - /* - * Setup the BOpStats object. For some operators, e.g., SliceOp, - * this MUST be the same object across all invocations of that - * instance of that operator for this query. This is marked by the - * PipelineOp#isSharedState() method and is handled by a - * putIfAbsent() pattern when that method returns true. - * - * Note: RunState#haltOp() avoids adding a BOpStats object to itself - * since that would cause double counting when the same object is - * used for each invocation of the operator. - * - * Note: By using a shared stats object we have live reporting on - * all instances of the task which are being evaluated on the query - * controller (tasks running on peers always have distinct stats - * objects and those stats are aggregated when the task finishes). - */ + /* + * Setup the BOpStats object. For some operators, e.g., SliceOp, + * this MUST be the same object across all invocations of that + * instance of that operator for this query. This is marked by the + * PipelineOp#isSharedState() method and is handled by a + * putIfAbsent() pattern when that method returns true. + * + * Note: RunState#haltOp() avoids adding a BOpStats object to itself + * since that would cause double counting when the same object is + * used for each invocation of the operator. + * + * Note: It tends to be more useful to have distinct BOpStats + * objects for each operator task instance that we run as this makes + * it possible to see how much work was performed by that task + * instance. The data are aggregated in the [statsMap] across the + * entire run of the query. + */ final BOpStats stats; - if (((PipelineOp) bop).isSharedState() || statsMap != null) { + if (((PipelineOp) bop).isSharedState()) {//|| statsMap != null) { // shared stats object. stats = statsMap.get(bopId); } else { @@ -1947,23 +1994,19 @@ boolean cancelled = false; - final Iterator<ChunkFutureTask> fitr = operatorFutures.values().iterator(); + final Iterator<ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask>> fitr = operatorFutures.values().iterator(); while (fitr.hasNext()) { - final ChunkFutureTask f = fitr.next(); - - try { - - if (f.cancel(mayInterruptIfRunning)) - cancelled = true; + final ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask> set = fitr.next(); - } finally { - -// fitr.remove(); - - } + for(ChunkFutureTask f : set.keySet()) { + if (f.cancel(mayInterruptIfRunning)) + cancelled = true; + + } + } return cancelled; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-10-22 20:08:48 UTC (rev 3841) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-10-24 18:18:10 UTC (rev 3842) @@ -79,15 +79,14 @@ /** * Pipelined join operator for online (selective) queries. The pipeline join - * accepts chunks of binding sets from its left operand, combines each binding - * set in turn with the right operand to produce an "asBound" predicate, and - * then executes a nested indexed subquery against that asBound predicate, - * writing out a new binding set for each element returned by the asBound - * predicate which satisfies the join constraint. + * accepts chunks of binding sets from its operand, combines each binding set in + * turn with its {@link IPredicate} annotation to produce an "asBound" + * predicate, and then executes a nested indexed subquery against that asBound + * predicate, writing out a new binding set for each element returned by the + * asBound predicate which satisfies the join constraint. * <p> * Note: In order to support pipelining, query plans need to be arranged in a - * "left-deep" manner and there may not be intervening operators between the - * pipeline join operator and the {@link IPredicate} on which it will read. + * "left-deep" manner. * <p> * Note: In scale-out, the {@link PipelineJoin} is generally annotated as a * {@link BOpEvaluationContext#SHARDED} or {@link BOpEvaluationContext#HASHED} @@ -113,6 +112,12 @@ public interface Annotations extends PipelineOp.Annotations { + /** + * The {@link IPredicate} which is used to generate the + * {@link IAccessPath}s during the join. + */ + String PREDICATE = PipelineJoin.class.getName() + ".predicate"; + /** * An optional {@link IVariable}[] identifying the variables to be * retained in the {@link IBindingSet}s written out by the operator. @@ -249,7 +254,7 @@ * @param args * @param annotations */ - public PipelineJoin(final BOp[] args, NV[] annotations) { + public PipelineJoin(final BOp[] args, NV... annotations) { this(args, NV.asMap(annotations)); @@ -265,37 +270,17 @@ super(args, annotations); - if (arity() != 2) + if (arity() != 1) throw new IllegalArgumentException(); if (left() == null) throw new IllegalArgumentException(); - if (right() == null) - throw new IllegalArgumentException(); - } - /** - * @param left - * The left operand, which must be an {@link IBindingSet} - * pipeline operator, such as another {@link PipelineJoin}. - * @param right - * The right operand, which must be an {@link IPredicate}. - * - * @param annotations - */ - public PipelineJoin(final PipelineOp left, - final IPredicate<?> right, final Map<String, Object> annotations) { - - this(new BOp[] { left, right }, annotations); - - } - - /** - * The left hand operator, which is the previous join in the pipeline join - * path. - */ + /** + * The sole operand, which is the previous join in the pipeline join path. + */ public PipelineOp left() { return (PipelineOp) get(0); @@ -303,28 +288,14 @@ } /** - * The right hand operator, which is the {@link IPredicate}. + * {@inheritDoc} + * + * @see Annotations#PREDICATE */ @SuppressWarnings("unchecked") - public IPredicate<E> right() { - - return (IPredicate<E>) get(1); - - } - - // /** - // * Returns {@link BOpEvaluationContext#SHARDED} - // */ - // @Override - // final public BOpEvaluationContext getEvaluationContext() { - // - // return BOpEvaluationContext.SHARDED; - // - // } - - public IPredicate<E> getPredicate() { + public IPredicate<E> getPredicate() { - return right(); + return (IPredicate<E>) getRequiredProperty(Annotations.PREDICATE); } @@ -408,7 +379,7 @@ final private Executor service; /** - * True iff the {@link #right} operand is an optional pattern (aka if + * True iff the {@link #predicate} operand is an optional pattern (aka if * this is a SPARQL style left join). */ final private boolean optional; @@ -420,18 +391,13 @@ */ final private IVariable<?>[] variablesToKeep; -// /** -// * The source for the binding sets. -// */ -// final BindingSetPipelineOp left; - /** * The source for the elements to be joined. */ - final private IPredicate<E> right; + final private IPredicate<E> predicate; /** - * The relation associated with the {@link #right} operand. + * The relation associated with the {@link #predicate} operand. */ final private IRelation<E> relation; @@ -519,10 +485,8 @@ if (context == null) throw new IllegalArgumentException(); -// this.fed = context.getFederation(); this.joinOp = joinOp; -// this.left = joinOp.left(); - this.right = joinOp.right(); + this.predicate = joinOp.getPredicate(); this.constraints = joinOp.constraints(); this.maxParallel = joinOp.getMaxParallel(); if (maxParallel > 0) { @@ -536,7 +500,7 @@ this.optional = joinOp.isOptional(); this.variablesToKeep = joinOp.variablesToKeep(); this.context = context; - this.relation = context.getRelation(right); + this.relation = context.getRelation(predicate); this.source = context.getSource(); this.sink = context.getSink(); this.sink2 = context.getSink2(); @@ -932,7 +896,7 @@ final IBindingSet bindingSet = chunk[0]; // constrain the predicate to the given bindings. - IPredicate<E> predicate = right.asBound(bindingSet); + IPredicate<E> asBound = predicate.asBound(bindingSet); if (partitionId != -1) { @@ -947,11 +911,11 @@ * for an index partition. */ - predicate = predicate.setPartitionId(partitionId); + asBound = asBound.setPartitionId(partitionId); } - new JoinTask.AccessPathTask(predicate, Arrays.asList(chunk)) + new JoinTask.AccessPathTask(asBound, Arrays.asList(chunk)) .call(); } @@ -986,7 +950,7 @@ halted(); // constrain the predicate to the given bindings. - IPredicate<E> predicate = right.asBound(bindingSet); + IPredicate<E> asBound = predicate.asBound(bindingSet); if (partitionId != -1) { @@ -1001,12 +965,12 @@ * for an index partition. */ - predicate = predicate.setPartitionId(partitionId); + asBound = asBound.setPartitionId(partitionId); } // lookup the asBound predicate in the map. - Collection<IBindingSet> values = map.get(predicate); + Collection<IBindingSet> values = map.get(asBound); if (values == null) { @@ -1019,7 +983,7 @@ values = new LinkedList<IBindingSet>(); - map.put(predicate, values); + map.put(asBound, values); } else { @@ -1793,7 +1757,7 @@ bset = bset.clone(); // propagate bindings from the visited element. - if (context.bind(right, constraints, e, bset)) { + if (context.bind(predicate, constraints, e, bset)) { // optionally strip off unnecessary variables. bset = variablesToKeep == null ? bset : bset Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestPipelineUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestPipelineUtility.java 2010-10-22 20:08:48 UTC (rev 3841) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestPipelineUtility.java 2010-10-24 18:18:10 UTC (rev 3842) @@ -104,17 +104,17 @@ })); @SuppressWarnings("unchecked") - final PipelineOp join1Op = new PipelineJoin(startOp, pred1Op, - NV.asMap(new NV[] { new NV(Predicate.Annotations.BOP_ID, - joinId1),// - })); + final PipelineOp join1Op = new PipelineJoin(new BOp[] { startOp }, + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred1Op) // + ); @SuppressWarnings("unchecked") - final PipelineOp join2Op = new PipelineJoin(join1Op, pred2Op, - NV.asMap(new NV[] { new NV(Predicate.Annotations.BOP_ID, - joinId2),// - })); - + final PipelineOp join2Op = new PipelineJoin(new BOp[] { join1Op }, // + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op) // + ); + final PipelineOp queryPlan = join2Op; final Map<Integer,BOp> queryIndex = BOpUtility.getIndex(queryPlan); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-10-22 20:08:48 UTC (rev 3841) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-10-24 18:18:10 UTC (rev 3842) @@ -309,30 +309,29 @@ public void test_query_join1() throws Exception { final int startId = 1; - final int joinId = 2; - final int predId = 3; - final PipelineOp query = new PipelineJoin<E>( - // left - new StartOp(new BOp[] {}, NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - new NV(SliceOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })), - // right - new Predicate<E>(new IVariableOrConstant[] { - new Constant<String>("Mary"), Var.var("value") }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId),// - new NV(Predicate.Annotations.TIMESTAMP,ITx.READ_COMMITTED),// - })), - // join annotations - NV.asMap(new NV[] { // - new NV(Predicate.Annotations.BOP_ID, joinId),// - })// - ); + final int joinId = 2; + final int predId = 3; + final StartOp startOp = new StartOp(new BOp[] {}, NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<E> pred = new Predicate<E>(new IVariableOrConstant[] { + new Constant<String>("Mary"), Var.var("value") }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId),// + new NV(Predicate.Annotations.TIMESTAMP, + ITx.READ_COMMITTED),// + })); + + final PipelineOp query = new PipelineJoin<E>(new BOp[] { startOp },// + new NV(Predicate.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, pred)); + // the expected solution. final IBindingSet[] expected = new IBindingSet[] {// new ArrayBindingSet(// @@ -434,15 +433,10 @@ ITx.READ_COMMITTED),// })); - final PipelineJoin<E> joinOp = new PipelineJoin<E>( - startOp/* left */, predOp/* right */, - // join annotations - NV.asMap(new NV[] { // - new NV(Predicate.Annotations.BOP_ID, joinId),// -// new NV(PipelineOp.Annotations.CHUNK_CAPACITY, 1),// -// new NV(PipelineOp.Annotations.CHUNK_OF_CHUNKS_CAPACITY, 1),// - })// - ); + final PipelineJoin<E> joinOp = new PipelineJoin<E>( + new BOp[] { startOp },// + new NV(Predicate.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, predOp)); final SliceOp sliceOp = new SliceOp(new BOp[] { joinOp }, // slice annotations @@ -868,13 +862,10 @@ ITx.READ_COMMITTED),// })); - final PipelineJoin<E> joinOp = new PipelineJoin<E>(startOp/* left */, - predOp/* right */, - // join annotations - NV.asMap(new NV[] { // - new NV(Predicate.Annotations.BOP_ID, joinId),// - })// - ); + final PipelineJoin<E> joinOp = new PipelineJoin<E>( + new BOp[] { startOp },// + new NV(Predicate.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, predOp)); final PipelineOp query = new SliceOp(new BOp[] { joinOp }, // slice annotations @@ -1002,18 +993,16 @@ // R.primaryKeyOrder),// })); - final PipelineJoin<E> joinOp = new PipelineJoin<E>(startOp/* left */, - predOp/* right */, - // join annotations - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId),// - // impose constraint on the join. - new NV(PipelineJoin.Annotations.CONSTRAINTS, - new IConstraint[] { new EQConstant(y, - new Constant<String>("Paul")) }),// - })// - ); - + final PipelineJoin<E> joinOp = new PipelineJoin<E>( + new BOp[] { startOp },// + new NV(Predicate.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, predOp),// + // impose constraint on the join. + new NV(PipelineJoin.Annotations.CONSTRAINTS, + new IConstraint[] { new EQConstant(y, + new Constant<String>("Paul")) })// + ); + final PipelineOp query = new SliceOp(new BOp[] { joinOp }, // slice annotations NV.asMap(new NV[] {// @@ -1170,19 +1159,17 @@ new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// })); - final PipelineOp join1Op = new PipelineJoin<E>(// - startOp, pred1Op,// - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - })); + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[] { startOp },// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred1Op)); - final PipelineOp join2Op = new PipelineJoin<E>(// - join1Op, pred2Op,// - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - })); + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { join1Op },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op)); - final PipelineOp query = join2Op; + final PipelineOp query = join2Op; // start the query. final UUID queryId = UUID.randomUUID(); @@ -1472,24 +1459,21 @@ })); final PipelineOp join1Op = new PipelineJoin<E>(// - startOp, pred1Op,// - NV.asMap(new NV[] {// + new BOp[]{startOp},// new NV(Predicate.Annotations.BOP_ID, joinId1),// - })); + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); - final PipelineOp join2Op = new PipelineJoin<E>(// - join1Op, pred2Op,// - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - // constraint x == z - new NV(PipelineJoin.Annotations.CONSTRAINTS,new IConstraint[]{ - new EQ(x,z) - }), - // join is optional. - new NV(PipelineJoin.Annotations.OPTIONAL,true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF,sliceId),// - })); + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { join1Op },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// + // constraint x == z + new NV(PipelineJoin.Annotations.CONSTRAINTS, + new IConstraint[] { new EQ(x, z) }), + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); final PipelineOp sliceOp = new SliceOp(// new BOp[]{join2Op}, @@ -1843,18 +1827,16 @@ new NV(ConditionalRoutingOp.Annotations.CONDITION, condition), })); - final PipelineOp join1Op = new PipelineJoin<E>(// - cond, pred1Op,// - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - })); + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[] { cond }, // + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred1Op)); + + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { join1Op },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op)); - final PipelineOp join2Op = new PipelineJoin<E>(// - join1Op, pred2Op, // - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - })); - final PipelineOp sliceOp = new SliceOp(// new BOp[]{join2Op}, NV.asMap(new NV[] {// Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java 2010-10-22 20:08:48 UTC (rev 3841) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java 2010-10-24 18:18:10 UTC (rev 3842) @@ -544,17 +544,14 @@ new NV( Predicate.Annotations.REMOTE_ACCESS_PATH,false), })); - final PipelineJoin<E> joinOp = new PipelineJoin<E>(startOp/* left */, - predOp/* right */, - // join annotations - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId),// - // Note: shard-partitioned joins! - new NV( Predicate.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.SHARDED),// - })// - ); - + final PipelineJoin<E> joinOp = new PipelineJoin<E>( + new BOp[] { startOp },// + new NV(Predicate.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, predOp),// + // Note: shard-partitioned joins! + new NV(Predicate.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.SHARDED)); + final PipelineOp query = new SliceOp(new BOp[] { joinOp }, // slice annotations NV.asMap(new NV[] {// @@ -712,20 +709,17 @@ new NV( Predicate.Annotations.REMOTE_ACCESS_PATH,false), })); - final PipelineJoin<E> joinOp = new PipelineJoin<E>(startOp/* left */, - predOp/* right */, - // join annotations - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId),// - // Note: shard-partitioned joins! - new NV( Predicate.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.SHARDED),// - // impose constraint on the join. - new NV(PipelineJoin.Annotations.CONSTRAINTS, - new IConstraint[] { new EQConstant(y, - new Constant<String>("Paul")) }),// - })// - ); + final PipelineJoin<E> joinOp = new PipelineJoin<E>( + new BOp[] { startOp },// + new NV(Predicate.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, predOp), + // Note: shard-partitioned joins! + new NV(Predicate.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.SHARDED),// + // impose constraint on the join. + new NV(PipelineJoin.Annotations.CONSTRAINTS, + new IConstraint[] { new EQConstant(y, + new Constant<String>("Paul")) })); final PipelineOp query = new SliceOp(new BOp[] { joinOp }, // slice annotations @@ -865,17 +859,14 @@ ITx.READ_COMMITTED),// })); - final PipelineJoin<E> joinOp = new PipelineJoin<E>(startOp/* left */, - predOp/* right */, - // join annotations - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId),// - // Note: shard-partitioned joins! - new NV( Predicate.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.SHARDED),// - })// - ); - + final PipelineJoin<E> joinOp = new PipelineJoin<E>( + new BOp[] { startOp },// + new NV(Predicate.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, predOp),// + // Note: shard-partitioned joins! + new NV(Predicate.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.SHARDED)); + final PipelineOp query = new SliceOp(new BOp[] { joinOp }, // slice annotations NV.asMap(new NV[] {// @@ -1030,22 +1021,20 @@ })); final PipelineOp join1Op = new PipelineJoin<E>(// - startOp, pred1Op,// - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - // Note: shard-partitioned joins! - new NV( Predicate.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.SHARDED),// - })); + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op),// + // Note: shard-partitioned joins! + new NV( Predicate.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.SHARDED)); - final PipelineOp join2Op = new PipelineJoin<E>(// - join1Op, pred2Op,// - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - // Note: shard-partitioned joins! - new NV( Predicate.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.SHARDED),// - })); + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { join1Op },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// + // Note: shard-partitioned joins! + new NV(Predicate.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.SHARDED)); final PipelineOp query = new SliceOp(new BOp[] { join2Op }, NV.asMap(new NV[] {// @@ -1228,31 +1217,28 @@ new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// })); - final PipelineOp join1Op = new PipelineJoin<E>(// - startOp, pred1Op,// - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - // Note: shard-partitioned joins! - new NV( Predicate.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.SHARDED),// - })); + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[] { startOp },// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred1Op),// + // Note: shard-partitioned joins! + new NV(Predicate.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.SHARDED)); - final PipelineOp join2Op = new PipelineJoin<E>(// - join1Op, pred2Op,// - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - // Note: shard-partitioned joins! - new NV( Predicate.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.SHARDED),// - // constraint x == z - new NV(PipelineJoin.Annotations.CONSTRAINTS,new IConstraint[]{ - new EQ(x,z) - }), - // join is optional. - new NV(PipelineJoin.Annotations.OPTIONAL,true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF,sliceId),// - })); + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { join1Op },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// + // Note: shard-partitioned joins! + new NV(Predicate.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.SHARDED),// + // constraint x == z + new NV(PipelineJoin.Annotations.CONSTRAINTS, + new IConstraint[] { new EQ(x, z) }), + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); final PipelineOp sliceOp = new SliceOp(// new BOp[]{join2Op}, Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java 2010-10-22 20:08:48 UTC (rev 3841) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java 2010-10-24 18:18:10 UTC (rev 3842) @@ -171,27 +171,26 @@ final int startId = 1; final int joinId = 2; final int predId = 3; - final PipelineJoin<E> query = new PipelineJoin<E>( - // left - new CopyOp(new BOp[] {}, NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - })), - // right - new Predicate<E>(new IVariableOrConstant[] { - new Constant<String>("Mary"), Var.var("x") }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId),// - new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })), - // join annotations - NV - .asMap(new NV[] { new NV(Predicate.Annotations.BOP_ID, - joinId),// - })// - ); + + final BOp startOp = new CopyOp(new BOp[] {}, NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + })); + final Predicate<E> predOp = new Predicate<E>(new IVariableOrConstant[] { + new Constant<String>("Mary"), Var.var("x") }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId),// + new NV(Predicate.Annotations.TIMESTAMP, + ITx.READ_COMMITTED),// + })); + + final PipelineJoin<E> query = new PipelineJoin<E>( + new BOp[] { startOp },// + new NV(Predicate.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, predOp)); + // the expected solutions. final IBindingSet[] expected = new IBindingSet[] {// new ArrayBindingSet(// @@ -260,29 +259,28 @@ final int startId = 1; final int joinId = 2; final int predId = 3; - final PipelineJoin<E> query = new PipelineJoin<E>( - // left - new CopyOp(new BOp[] {}, NV.asMap(new NV[] {// - new NV(BOpBase.Annotations.BOP_ID, startId),// - })), - // right - new Predicate<E>( - new IVariableOrConstant[] { new Constant<String>("Mary"), y },// - NV.asMap(new NV[] {// - new NV( - Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, - predId),// - new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })), - // join annotations + + final BOp startOp = new CopyOp(new BOp[] {}, NV.asMap(new NV[] {// + new NV(BOpBase.Annotations.BOP_ID, startId),// + })); + + final Predicate<E> predOp = new Predicate<E>( + new IVariableOrConstant[] { new Constant<String>("Mary"), y },// NV.asMap(new NV[] {// - new NV(BOpBase.Annotations.BOP_ID, joinId),// - new NV( PipelineJoin.Annotations.CONSTRAINTS, - new IConstraint[] { new INBinarySearch<String>( - y, set) }) })// - ); + new NV( + Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, + predId),// + new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineJoin<E> query = new PipelineJoin<E>( + new BOp[] { startOp },// + new NV(BOpBase.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, predOp),// + new NV( PipelineJoin.Annotations.CONSTRAINTS, + new IConstraint[] { new INBinarySearch<String>(y, set) })); // the expected solution (just one). final IBindingSet[] expected = new IBindingSet[] {// @@ -352,32 +350,29 @@ final Var<String> y = Var.var("y"); final int startId = 1; - final int joinId = 2; - final int predId = 3; - final PipelineJoin<E> query = new PipelineJoin<E>( - // left - new CopyOp(new BOp[] {}, NV.asMap(new NV[] {// - new NV(BOpBase.Annotations.BOP_ID, startId),// - })), - // right - new Predicate<E>( - new IVariableOrConstant[] { x, y },// - NV.asMap(new NV[] {// - new NV( - Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, - predId),// - new NV(Predicate.Annotations.TIMESTAMP, - ITx.READ_COMMITTED),// - })), - // join annotations - NV.asMap(new NV[] {// - new NV(BOpBase.Annotations.BOP_ID, joinId),// - new NV(PipelineJoin.Annotations.SELECT,new IVariable[]{y})// - })// - ); + final int joinId = 2; + final int predId = 3; + final BOp startOp = new CopyOp(new BOp[] {}, NV.asMap(new NV[] {// + new NV(BOpBase.Annotations.BOP_ID, startId),// + })); + + final Predicate<E> predOp = new Predicate<E>(new IVariableOrConstant[] { + x, y },// + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId),// + new NV(Predicate.Annotations.TIMESTAMP, + ITx.READ_COMMITTED),// + })); + + final PipelineJoin<E> query = new PipelineJoin<E>( + new BOp[] { startOp },// + new NV(BOpBase.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, predOp),// + new NV(PipelineJoin.Annotations.SELECT, new IVariable[] { y })); + /* * The expected solutions. */ @@ -458,31 +453,25 @@ final int joinId = 2; final int predId = 3; - final PipelineJoin<E> query = new PipelineJoin<E>( - // left - new CopyOp(new BOp[] {}, NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - })), - // right - new Predicate<E>(new IVariableOrConstant[] { - new Constant<String>("Mary"), x }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId),// - new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })), - // join annotations - NV - .asMap(new NV[] { // - new NV(BOpBase.Annotations.BOP_ID, - joinId), - new NV(PipelineJoin.Annotations.OPTIONAL, - Boolean.TRUE),// -// - })// - ); + final BOp startOp = new CopyOp(new BOp[] {}, NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + })); + final Predicate<E> pred = new Predicate<E>(new IVariableOrConstant[] { + new Constant<String>("Mary"), x }, NV.asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId),// + new NV(Predicate.Annotations.TIMESTAMP, + ITx.READ_COMMITTED),// + })); + + final PipelineJoin<E> query = new PipelineJoin<E>( + new BOp[] { startOp }, // + new NV(BOpBase.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, pred),// + new NV(PipelineJoin.Annotations.OPTIONAL, Boolean.TRUE)); + /* * Setup the source with two initial binding sets. One has nothing bound * and will join with (Mary,x:=John) and (Mary,x:=Paul). The other has @@ -565,33 +554,27 @@ final int startId = 1; final int joinId = 2; - final int predId = 3; + final int predId = 3; - final PipelineJoin<E> query = new PipelineJoin<E>( - // left - new CopyOp(new BOp[] {}, NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - })), - // right - new Predicate<E>(new IVariableOrConstant[] { - new Constant<String>("Mary"), x }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId),// - new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })), - // join annotations - NV - .asMap(new NV[] { // - new NV(BOpBase.Annotations.BOP_ID, - joinId), - new NV(PipelineJoin.Annotations.OPTIONAL, - Boolean.TRUE),// -// - })// - ); + final BOp startOp = new CopyOp(new BOp[] {}, NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + })); + final Predicate<E> pred = new Predicate<E>(new IVariableOrConstant[] { + new Constant<String>("Mary"), x }, NV.asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId),// + new NV(Predicate.Annotations.TIMESTAMP, + ITx.READ_COMMITTED),// + })); + + final PipelineJoin<E> query = new PipelineJoin<E>( + new BOp[] { startOp },// + new NV(BOpBase.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, pred),// + new NV(PipelineJoin.Annotations.OPTIONAL, Boolean.TRUE)); + /* * Setup the source with two initial binding sets. One has nothing bound * and will join with (Mary,x:=John) and (Mary,x:=Paul). The other has Modified: branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestJiniFederatedQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestJiniFederatedQueryEngine.java 2010-10-22 20:08:48 UTC (rev 3841) +++ branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestJiniFederatedQueryEngine.java 2010-10-24 18:18:10 UTC (rev 3842) @@ -453,41 +453,45 @@ final int joinId = 2; final int predId = 3; final int sliceId = 4; - final PipelineOp query = - new SliceOp(new BOp[]{new PipelineJoin<E>( - // left - new StartOp(new BOp[] {}, NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - new NV(SliceOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })), - // right - new Predicate<E>(new IVariableOrConstant[] { - new Constant<String>("Mary"), Var.var("value") }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - // Note: local access path! - new NV( Predicate.Annotations.REMOTE_ACCESS_PATH,false), - new NV(Predicate.Annotations.BOP_ID, predId),// - new NV... [truncated message content] |
From: <tho...@us...> - 2010-10-22 20:08:54
|
Revision: 3841 http://bigdata.svn.sourceforge.net/bigdata/?rev=3841&view=rev Author: thompsonbry Date: 2010-10-22 20:08:48 +0000 (Fri, 22 Oct 2010) Log Message: ----------- Replaced use of LinkedHashMap with ConcurrentHashMap to avoid problems with concurrent modification arising from an iterator over a set whose contents are being changed from within a subroutine in the same thread (versus a second thread). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-22 19:58:41 UTC (rev 3840) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-22 20:08:48 UTC (rev 3841) @@ -452,7 +452,7 @@ this.operatorFutures = new ConcurrentHashMap<BSBundle, ChunkFutureTask>(); - this.operatorQueues = new LinkedHashMap<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>>(); + this.operatorQueues = new ConcurrentHashMap<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>>(); /* * Setup the BOpStats object for each pipeline operator in the query. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-22 19:58:51
|
Revision: 3840 http://bigdata.svn.sourceforge.net/bigdata/?rev=3840&view=rev Author: thompsonbry Date: 2010-10-22 19:58:41 +0000 (Fri, 22 Oct 2010) Log Message: ----------- Added support for sampling from a local access path. I still need to add support for sampling in scale-out. This is in service of adaptive query optimization. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestAll.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalBTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalShard.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalShard.java Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,125 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 16, 2010 - */ - -package com.bigdata.bop.ap; - -import com.bigdata.bop.AbstractAccessPathOp; -import com.bigdata.bop.BOp; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IPredicate; -import com.bigdata.bop.NV; -import com.bigdata.btree.IIndex; -import com.bigdata.relation.accesspath.IAccessPath; - -/** - * Abstract base class for sampling operator for an {@link IIndex}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * @param <E> - * The generic type of the elements materialized from that index. - * - * @todo Implement sample operator. E.g., sampleRange(fromKey,toKey,limit). This - * could be on {@link IIndex} or on {@link IAccessPath}. For a shard view, - * it must proportionally select from among the ordered components of the - * view. For a hash table it would be sample(limit) since range based - * operations are not efficient. - * <p> - * This should accept an index, not a predicate (for RDF we determine the - * index an analysis of the bound and unbound arguments on the predicate - * and always have a good index, but this is not true in the general - * case). When the index is remote, it should be executed at the remote - * index. - * - * @todo This needs to operation on element chunks, not {@link IBindingSet} - * chunks. It also may not require pipelining. - */ -abstract public class AbstractSampleIndex<E> extends AbstractAccessPathOp<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * Known annotations. - */ - public interface Annotations extends BOp.Annotations { - /** - * The sample limit. - */ - String LIMIT = "limit"; - } - - protected AbstractSampleIndex(final IPredicate<E> pred, final int limit) { - - super(new BOp[] { pred }, NV.asMap(new NV[] {// - new NV(Annotations.LIMIT, Integer.valueOf(limit)) // - })); - - if (pred == null) - throw new IllegalArgumentException(); - - if (limit <= 0) - throw new IllegalArgumentException(); - - switch (getEvaluationContext()) { - case HASHED: - case SHARDED: - break; - default: - throw new UnsupportedOperationException( - Annotations.EVALUATION_CONTEXT + "=" - + getEvaluationContext()); - } - - } - - @SuppressWarnings("unchecked") - public IPredicate<E> pred() { - - return (IPredicate<E>) get(0); - - } - - public int limit() { - - return (Integer) getRequiredProperty(Annotations.LIMIT); - - } - -// /** -// * This is a shard wise operator. -// */ -// @Override -// public BOpEvaluationContext getEvaluationContext() { -// -// return BOpEvaluationContext.SHARDED; -// -// } - -} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java (from rev 3756, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/AbstractSampleIndex.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -0,0 +1,451 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 16, 2010 + */ + +package com.bigdata.bop.ap; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.Callable; + +import com.bigdata.bop.AbstractAccessPathOp; +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContextBase; +import com.bigdata.bop.IPredicate; +import com.bigdata.btree.AbstractBTree; +import com.bigdata.btree.ILeafCursor; +import com.bigdata.btree.ILinearList; +import com.bigdata.btree.IRangeQuery; +import com.bigdata.btree.ITuple; +import com.bigdata.btree.ITupleCursor; +import com.bigdata.btree.filter.Advancer; +import com.bigdata.btree.view.FusedView; +import com.bigdata.relation.IRelation; +import com.bigdata.relation.accesspath.AccessPath; +import com.bigdata.relation.accesspath.IAccessPath; +import com.bigdata.relation.rule.IAccessPathExpander; +import com.bigdata.striterator.IKeyOrder; + +import cutthecrap.utils.striterators.IFilter; + +/** + * Sampling operator for the {@link IAccessPath} implied by an + * {@link IPredicate}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: AbstractSampleIndex.java 3672 2010-09-28 23:39:42Z thompsonbry + * $ + * @param <E> + * The generic type of the elements materialized from that index. + * + * @todo This is a basic operator which is designed to support adaptive query + * optimization. However, there are a lot of possible semantics for + * sampling, including: uniform distribution, randomly distribution, tuple + * at a time versus clustered (sampling with leaves), adaptive sampling + * until the sample reflects some statistical property of the underlying + * population, etc. + */ +public class SampleIndex<E> extends AbstractAccessPathOp<E> { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * Known annotations. + */ + public interface Annotations extends BOp.Annotations { + + /** + * The sample limit (default {@value #DEFAULT_LIMIT}). + */ + String LIMIT = "limit"; + + int DEFAULT_LIMIT = 100; + + /** + * The {@link IPredicate} describing the access path to be sampled + * (required). + */ + String PREDICATE = SampleIndex.class.getName() + ".predicate"; + + } + + public SampleIndex(SampleIndex<E> op) { + + super(op); + + } + + public SampleIndex(BOp[] args, Map<String, Object> annotations) { + + super(args, annotations); + + } + + public int limit() { + + return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); + + } + + @SuppressWarnings("unchecked") + public IPredicate<E> getPredicate() { + + return (IPredicate<E>) getRequiredProperty(Annotations.PREDICATE); + + } + + /** + * Return a sample from the access path associated with the + * {@link Annotations#PREDICATE}. + */ + public E[] eval(final BOpContextBase context) { + + try { + return new SampleTask(context).call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + } + + /** + * Sample an {@link IAccessPath}. + * + * FIXME This needs to handle each of the following conditions: + * <p> + * Timestamp {read-historical, read-committed, read-write tx, unisolated}<br> + * Index view {standalone, partitioned,global view of partitioned}<br> + * + * @todo The general approach uses the {@link ILinearList} interface to take + * evenly distributed or randomly distributed samples from the + * underlying index. This is done using an {@link IFilter} which is + * evaluated local to the index. This works whether or not the access + * path is using a partitioned view of the index. + * <p> + * When sampling an index shard the {@link ILinearList} API is not + * defined for the {@link FusedView}. Since this sampling operator + * exists for the purposes of estimating the cardinality of an access + * path, we can dispense with the fused view and collect a number of + * samples from each component of that view which is proportional to + * the range count of the view divided by the range count of the + * component index. This may cause tuples which have since been + * deleted to become visible, but this should not cause problems when + * estimating the cardinality of a join path as long as we always + * report the actual tuples from the fused view in the case where the + * desired sample size is LTE the estimated range count of the access + * path. + * + * @todo Better performance could be realized by accepting all tuples in a + * leaf. This requires a sensitivity to the leaf boundaries which + * might be obtained with an {@link ITupleCursor} extension interface + * for local indices or with the {@link ILeafCursor} interface if that + * can be exposed from a sufficiently low level {@link ITupleCursor} + * implementation. However, when they are further constraints layered + * onto the access path by the {@link IPredicate} it may be that such + * clustered (leaf at once) sampling is not practical. + * + * @todo When sampling a global view of a partitioned index, we should focus + * the sample on a subset of the index partitions in order to + * "cluster" the effort. This can of course introduce bias. However, + * if there are a lot of index partitions then the sample will of + * necessity be very small in proportion to the data volume and the + * opportunity for bias will be correspondingly large. + * + * @todo If there is an {@link IAccessPathExpander} then + */ + private class SampleTask implements Callable<E[]> { + + private final BOpContextBase context; + + SampleTask(final BOpContextBase context) { + + this.context = context; + + } + + /** Return a sample from the access path. */ + public E[] call() throws Exception { + + return sample(limit(), getPredicate()).getSample(); + + } + + /** + * Return a sample from the access path. + * + * @param limit + * @return + */ + public AccessPathSample<E> sample(final int limit, + IPredicate<E> predicate) { + + final IRelation<E> relation = context.getRelation(predicate); + + // @todo assumes raw AP. + final AccessPath<E> accessPath = (AccessPath<E>) context + .getAccessPath(relation, predicate); + + final long rangeCount = accessPath.rangeCount(false/* exact */); + + if (limit > rangeCount) { + + /* + * The sample will contain everything in the access path. + */ + return new AccessPathSample<E>(limit, accessPath); + + } + + /* + * Add the CURSOR and PARALLEL flags to the predicate. + * + * @todo turn off REVERSE if specified. + */ + final int flags = predicate.getProperty( + IPredicate.Annotations.FLAGS, + IPredicate.Annotations.DEFAULT_FLAGS) + | IRangeQuery.CURSOR + | IRangeQuery.PARALLEL; + + predicate = (IPredicate<E>) predicate.setProperty( + IPredicate.Annotations.FLAGS, flags); + + /* + * Add advancer to collect sample. + */ + predicate = ((Predicate<E>) predicate) + .addIndexLocalFilter(new SampleAdvancer<E>(//rangeCount, + limit, accessPath.getFromKey(), accessPath + .getToKey())); + + return new AccessPathSample<E>(limit, context.getAccessPath( + relation, predicate)); + + } + + } + + /** + * An advancer pattern which is designed to take evenly distributed samples + * from an index. The caller specifies the #of tuples to be skipped after + * each tuple visited. That number should be computed based on the estimated + * range count of the index and the desired sample size. This can fail to + * gather the desired number of sample if additional filters are applied + * which further restrict the elements selected by the predicate. However, + * it will still faithfully represent the expected cardinality of the + * sampled access path. + * + * @author tho...@us... + * + * @param <E> + * The generic type of the elements visited by that access path. + */ + private static class SampleAdvancer<E> extends Advancer<E> { + + private static final long serialVersionUID = 1L; + + /** The desired total limit on the sample. */ + private final int limit; + + private final byte[] /*fromKey,*/ toKey; + + /* + * Transient data. This gets initialized when we visit the first tuple. + */ + + /** The #of tuples to be skipped after every tuple visited. */ + private transient int skipCount; + /** The #of tuples accepted so far. */ + private transient int nread = 0; + /** The inclusive lower bound of the first tuple actually visited. */ + private transient int fromIndex; + /** The exclusive upper bound of the last tuple which could be visited. */ + private transient int toIndex; + + /** + * + * @param limit + * The #of samples to visit. + */ + public SampleAdvancer(final int limit, final byte[] fromKey, + final byte[] toKey) { + + this.limit = limit; + this.toKey = toKey; + } + + /** + * @todo This is taking evenly spaced samples. It is much more efficient + * to take clusters of samples when you can accept the bias. + * Taking a clustered sample really requires knowing where the + * leaf boundaries are in the index, e.g., using + * {@link ILeafCursor}. + */ + @Override + protected void advance(final ITuple<E> tuple) { + + final AbstractBTree ndx = (AbstractBTree) src.getIndex(); + + final int currentIndex = ndx.indexOf(tuple.getKey()); + + if (nread == 0) { + + // inclusive lower bound. + fromIndex = currentIndex; + + // exclusive upper bound. + toIndex = toKey == null ? ndx.getEntryCount() : ndx + .indexOf(toKey); + + final int rangeCount = (toIndex - fromIndex); + + skipCount = Math.max(1, rangeCount / limit); + + // minus one since src.next() already consumed one tuple. + skipCount -= 1; + +// System.err.println("limit=" + limit + ", rangeCount=" +// + rangeCount + ", skipCount=" + skipCount); + + } + + nread++; + + if (skipCount > 0) { + + /* + * If the skip count is positive, then skip over N tuples. + */ + + final int nextIndex = Math.min(ndx.getEntryCount() - 1, + currentIndex + skipCount); + + src.seek(ndx.keyAt(nextIndex)); + + } + + } + + } // class SampleAdvancer + + /** + * A sample from an access path. + * + * @param <E> + * The generic type of the elements visited by that access + * path. + * + * @author tho...@us... + */ + public static class AccessPathSample<E> implements Serializable { + + private static final long serialVersionUID = 1L; + + private final IPredicate<E> pred; + private final IKeyOrder<E> keyOrder; + private final int limit; + private final E[] sample; + + /** + * Constructor populates the sample using the caller's + * {@link IAccessPath#iterator()}. The caller is responsible for setting + * up the {@link IAccessPath} such that it provides an efficient sample + * of the access path with the appropriate constraints. + * + * @param limit + * @param accessPath + */ + private AccessPathSample(final int limit, + final IAccessPath<E> accessPath) { + + if (limit <= 0) + throw new IllegalArgumentException(); + + if (accessPath == null) + throw new IllegalArgumentException(); + + this.pred = accessPath.getPredicate(); + + this.keyOrder = accessPath.getKeyOrder(); + + this.limit = limit; + + // drain the access path iterator. + final ArrayList<E> tmp = new ArrayList<E>(limit); + + int nsamples = 0; + + final Iterator<E> src = accessPath.iterator(0L/* offset */, limit, + limit/* capacity */); + + while (src.hasNext() && nsamples < limit) { + + tmp.add(src.next()); + + nsamples++; + + } + + // convert to an array of the appropriate type. + sample = tmp.toArray((E[]) java.lang.reflect.Array.newInstance( + tmp.get(0).getClass(), tmp.size())); + + } + + public IPredicate<E> getPredicate() { + return pred; + } + + public boolean isEmpty() { + return sample != null; + } + + public int sampleSize() { + return sample == null ? 0 : sample.length; + } + + public int limit() { + return limit; + } + + /** + * The sample. + * + * @return The sample -or- <code>null</code> if the sample was + * empty. + */ + public E[] getSample() { + return sample; + } + + } // AccessPathSample + +} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalBTree.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalBTree.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalBTree.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,95 +0,0 @@ -package com.bigdata.bop.ap; - -import java.util.concurrent.Callable; -import java.util.concurrent.FutureTask; - -import com.bigdata.bop.BOpContext; -import com.bigdata.bop.IPredicate; -import com.bigdata.btree.AbstractBTree; -import com.bigdata.relation.accesspath.IBlockingBuffer; - -/** - * Sampling operator for an {@link AbstractBTree}. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ -public class SampleLocalBTree<E> extends AbstractSampleIndex<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public SampleLocalBTree(final IPredicate<E> pred, final int limit) { - - super(pred, limit); - - } - - public FutureTask<Void> eval(final BOpContext<E> context) { - - if (context.getPartitionId() != -1) { - // Must not be specific to a shard. - throw new UnsupportedOperationException(); - } - - return new FutureTask<Void>(new LocalBTreeSampleTask(context)); - - } - - /** - * Sample an {@link AbstractBTree}. - */ - private class LocalBTreeSampleTask implements - Callable<Void> { - - private final BOpContext<E> context; - - private final IBlockingBuffer<E[]> sink; - - LocalBTreeSampleTask(final BOpContext<E> context) { - - this.context = context; - - this.sink = context.getSink(); - - } - - public Void call() throws Exception { - - /* - * FIXME Decide how we are going to resolve the appropriate index - * for the predicate. This could go through - * IJoinNexus.getTailRelationView() and - * IJoinNexus.getTailAccessPath(). Those are just going through the - * locator. Review how the actual access path is selected versus the - * IKeyOrder specified on the IPredicate. If the IKeyOrder of - * interest is on the IPredicate, then why not just use that? - */ - -// final IPredicate<E> pred = pred(); -// -// final String relationName = pred.getOnlyRelationName(); -// -// final IRelation<E> rel = (IRelation<E>) joinNexus.getIndexManager() -// .getResourceLocator().locate(relationName, -// joinNexus.getReadTimestamp()); -// -// final IAccessPath<E> accessPath = rel.getAccessPath(pred); - - /* - * FIXME Sample N randomly chosen indices or evenly selected? - * - * Note: If there are only 100 leaves and we sample evenly, that - * could result in reading all the leaves. However, when the - * B+Tree is large we will only touch a few leaves even with - * uniform sampling. - */ - throw new UnsupportedOperationException(); - - } - - } // class LocalBTreeSampleTask - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalShard.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalShard.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleLocalShard.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,87 +0,0 @@ -package com.bigdata.bop.ap; - -import java.util.concurrent.Callable; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; - -import com.bigdata.bop.BOpContext; -import com.bigdata.bop.IPredicate; -import com.bigdata.btree.AbstractBTree; -import com.bigdata.relation.IRelation; -import com.bigdata.relation.accesspath.IAccessPath; -import com.bigdata.relation.accesspath.IBlockingBuffer; - -/** - * Sampling operator for a shard view. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ -public class SampleLocalShard<E> extends AbstractSampleIndex<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public SampleLocalShard(final IPredicate<E> pred, final int limit) { - - super(pred,limit); - - } - - /* - * Note: This is done at evaluation time, local to the data. - */ - public FutureTask<Void> eval(final BOpContext<E> context) { - - if (context.getPartitionId() == -1) { - // Must be specific to a shard. - throw new UnsupportedOperationException(); - } - - return new FutureTask<Void>(new LocalShardSampleTask(context)); - - } - - /** - * Sample an {@link AbstractBTree}. - */ - private class LocalShardSampleTask implements Callable<Void> { - - private final BOpContext<E> context; - private final IBlockingBuffer<E[]> sink; - - LocalShardSampleTask(final BOpContext<E> context) { - - this.context = context; - - this.sink = context.getSink(); - - } - - public Void call() throws Exception { - - final IPredicate<E> pred = pred(); - - final IRelation<E> view = context.getRelation(pred); - - final IAccessPath<E> accessPath = view.getAccessPath(pred); - - /* - * FIXME Sample N tuples based on a uniform offset distribution, - * discarding duplicates or tuples which are deleted in their - * most recent revision. - * - * Note: If there are only 100 leaves and we sample evenly, that - * could result in reading all the leaves. However, when the - * B+Tree is large we will only touch a few leaves even with - * uniform sampling. - */ - throw new UnsupportedOperationException(); - - } - - } // class LocalShardSampleTask - -} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1583,11 +1583,15 @@ if (partitionCount == 0) { - /* - * SWAG in case zero partition count is reported (I am not sure that - * this code path is possible). - */ - return new ScanCostReport(0L/* rangeCount */, partitionCount, 100/* millis */); +// /* +// * SWAG in case zero partition count is reported (I am not sure that +// * this code path is possible). +// */ +// return new ScanCostReport(0L/* rangeCount */, partitionCount, 100/* millis */); + /* + * Should never be "zero" partition count. + */ + throw new AssertionError(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestAll.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestAll.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -24,8 +24,6 @@ package com.bigdata.bop.ap; -import com.bigdata.bop.ap.filter.TestDistinctFilter; - import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; @@ -72,12 +70,9 @@ /* * Sampling an access path. */ - - // test sampling from an AbstractBTree. - suite.addTestSuite(TestSampleLocalBTree.class); - // test sampling from an FusedView. - suite.addTestSuite(TestSampleLocalBTree.class); + // test sampling form an index. + suite.addTestSuite(TestSampleIndex.class); return suite; Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java (from rev 3756, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleIndex.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -0,0 +1,234 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 19, 2010 + */ + +package com.bigdata.bop.ap; + +import java.text.NumberFormat; +import java.util.Arrays; +import java.util.Properties; +import java.util.Random; + +import junit.framework.TestCase2; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContextBase; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.Var; +import com.bigdata.journal.BufferMode; +import com.bigdata.journal.ITx; +import com.bigdata.journal.Journal; +import com.bigdata.striterator.ChunkedArrayIterator; + +/** + * Test suite for {@link SampleIndex}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestSampleLocalBTree.java 3665 2010-09-28 16:53:22Z thompsonbry + * $ + * + * FIXME Just like {@link TestPredicateAccessPath}, this test suite + * needs to cover all of the combinations of global views of + * partitioned and unpartitioned indices. + */ +public class TestSampleIndex extends TestCase2 { + + /** + * + */ + public TestSampleIndex() { + } + + /** + * @param name + */ + public TestSampleIndex(String name) { + super(name); + } + + @Override + public Properties getProperties() { + + final Properties p = new Properties(super.getProperties()); + + p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient + .toString()); + + return p; + + } + + static private final String namespace = "ns"; + + Journal jnl; + + R rel; + + public void setUp() throws Exception { + + jnl = new Journal(getProperties()); + + } + + /** + * Create and populate relation in the {@link #namespace}. + * + * @return The #of distinct entries. + */ + private int loadData(final int scale) { + + final String[] names = new String[] { "John", "Mary", "Saul", "Paul", + "Leon", "Jane", "Mike", "Mark", "Jill", "Jake", "Alex", "Lucy" }; + + final Random rnd = new Random(); + + // #of distinct instances of each name. + final int populationSize = Math.max(10, (int) Math.ceil(scale / 10.)); + + // #of trailing zeros for each name. + final int nzeros = 1 + (int) Math.ceil(Math.log10(populationSize)); + +// System.out.println("scale=" + scale + ", populationSize=" +// + populationSize + ", nzeros=" + nzeros); + + final NumberFormat fmt = NumberFormat.getIntegerInstance(); + fmt.setMinimumIntegerDigits(nzeros); + fmt.setMaximumIntegerDigits(nzeros); + fmt.setGroupingUsed(false); + + // create the relation. + final R rel = new R(jnl, namespace, ITx.UNISOLATED, new Properties()); + rel.create(); + + // data to insert. + final E[] a = new E[scale]; + + for (int i = 0; i < scale; i++) { + + final String n1 = names[rnd.nextInt(names.length)] + + fmt.format(rnd.nextInt(populationSize)); + + final String n2 = names[rnd.nextInt(names.length)] + + fmt.format(rnd.nextInt(populationSize)); + +// System.err.println("i=" + i + ", n1=" + n1 + ", n2=" + n2); + + a[i] = new E(n1, n2); + + } + + // sort before insert for efficiency. + Arrays.sort(a,R.primaryKeyOrder.getComparator()); + + // insert data (the records are not pre-sorted). + final long ninserts = rel.insert(new ChunkedArrayIterator<E>(a.length, a, null/* keyOrder */)); + + // Do commit since not scale-out. + jnl.commit(); + + // should exist as of the last commit point. + this.rel = (R) jnl.getResourceLocator().locate(namespace, + ITx.READ_COMMITTED); + + assertNotNull(rel); + + return (int) ninserts; + + } + + public void tearDown() throws Exception { + + if (jnl != null) { + jnl.destroy(); + jnl = null; + } + + // clear reference. + rel = null; + + } + + /** + * Unit test verifies some aspects of a sample taken from a local index + * (primarily that the sample respects the limit). + */ + public void test_something() { + + final int scale = 10000; + + final int nrecords = loadData(scale); + + final IVariable<?> x = Var.var("x"); + + final IVariable<?> y = Var.var("y"); + + final IPredicate<E> predicate = new Predicate<E>(new BOp[] { x, y }, + new NV(IPredicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(IPredicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED)// + ); + + final BOpContextBase context = new BOpContextBase(null/* fed */, jnl/* indexManager */); + + final int[] limits = new int[] { // + 1, 9, 19, 100, 217, 900,// + nrecords, + nrecords + 1 + }; + + for (int limit : limits) { + + final SampleIndex<E> sampleOp = new SampleIndex<E>( + new BOp[0], + NV + .asMap( + // + new NV(SampleIndex.Annotations.PREDICATE, + predicate),// + new NV(SampleIndex.Annotations.LIMIT, limit)// + )); + + final E[] a = sampleOp.eval(context); + +// System.err.println("limit=" + limit + ", nrecords=" + nrecords +// + ", nsamples=" + a.length); +// +// for (int i = 0; i < a.length && i < 10; i++) { +// System.err.println("a[" + i + "]=" + a[i]); +// } + + final int nexpected = Math.min(nrecords, limit); + + assertEquals("#samples (limit=" + limit + ", nrecords=" + nrecords + + ", nexpected=" + nexpected + ")", nexpected, a.length); + + } + + } + +} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalBTree.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,59 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 19, 2010 - */ - -package com.bigdata.bop.ap; - -import com.bigdata.bop.ap.SampleLocalBTree; - -import junit.framework.TestCase2; - -/** - * Test suite for {@link SampleLocalBTree}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class TestSampleLocalBTree extends TestCase2 { - - /** - * - */ - public TestSampleLocalBTree() { - } - - /** - * @param name - */ - public TestSampleLocalBTree(String name) { - super(name); - } - - public void test_something() { - fail("write tests"); - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalShard.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalShard.java 2010-10-22 19:47:04 UTC (rev 3839) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestSampleLocalShard.java 2010-10-22 19:58:41 UTC (rev 3840) @@ -1,59 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 19, 2010 - */ - -package com.bigdata.bop.ap; - -import com.bigdata.bop.ap.SampleLocalShard; - -import junit.framework.TestCase2; - -/** - * Test suite for {@link SampleLocalShard}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class TestSampleLocalShard extends TestCase2 { - - /** - * - */ - public TestSampleLocalShard() { - } - - /** - * @param name - */ - public TestSampleLocalShard(String name) { - super(name); - } - - public void test_something() { - fail("write tests"); - } - -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-22 19:47:10
|
Revision: 3839 http://bigdata.svn.sourceforge.net/bigdata/?rev=3839&view=rev Author: thompsonbry Date: 2010-10-22 19:47:04 +0000 (Fri, 22 Oct 2010) Log Message: ----------- javadoc edit Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java 2010-10-22 19:45:33 UTC (rev 3838) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java 2010-10-22 19:47:04 UTC (rev 3839) @@ -133,6 +133,14 @@ } } + /** + * {@inheritDoc} + * + * @todo Due to the inherent non-atomicity of the while(hasNext()) next() + * idiom, it is possible for {@link #hasNext()} to report true and for + * {@link #next()} to throw {@link NoSuchElementException} if the + * iterator has been concurrently closed. + */ public E next() { while (true) { final IAsynchronousIterator<E> tmp = nextSource(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-22 19:45:42
|
Revision: 3838 http://bigdata.svn.sourceforge.net/bigdata/?rev=3838&view=rev Author: thompsonbry Date: 2010-10-22 19:45:33 +0000 (Fri, 22 Oct 2010) Log Message: ----------- Reorganized RunningQueue in order to have (a) operator tasks generate chunks incrementally (this avoids problems with deadlock when the operator writes onto a bounded queue and reduces the latency required to produce each solution); and (b) have operator tasks drain their work queue in order to get better efficiency when the producer is leading. These changes should all benefit scale-out and as well as scale-up. Scale-up will also benefit from chaining the operators together (rather than passing around IChunkMessages) but I have not yet made that optimization. I have tested this change set against: - TestBigdataSailWithQuads - LUBM U10 - BSBM 1M No obvious lock contention was visible with BSBM 1M. No obvious hotspots were revealed by a sampling profiler. I am going to test on larger scale on a workstation next so I can compare performance to the trunk baseline. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/NIOChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/ThickChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestConditionalRoutingOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestCopyBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestNIOChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestThickChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestDistinctBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkHandler.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChunkHandler.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederationChunkHandler.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-10-22 17:53:21 UTC (rev 3837) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -37,8 +37,6 @@ import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; -import com.bigdata.relation.accesspath.IMultiSourceAsynchronousIterator; -import com.bigdata.relation.accesspath.MultiSourceSequentialAsynchronousIterator; import com.bigdata.service.IBigdataFederation; /** @@ -57,7 +55,8 @@ private final BOpStats stats; - private final IMultiSourceAsynchronousIterator<E[]> source; +// private final IMultiSourceAsynchronousIterator<E[]> source; + private final IAsynchronousIterator<E[]> source; private final IBlockingBuffer<E[]> sink; @@ -98,25 +97,25 @@ return source; } - /** - * Attach another source. The decision to attach the source is mutex with - * respect to the decision that the source reported by {@link #getSource()} - * is exhausted. - * - * @param source - * The source. - * - * @return <code>true</code> iff the source was attached. - */ - public boolean addSource(IAsynchronousIterator<E[]> source) { +// /** +// * Attach another source. The decision to attach the source is mutex with +// * respect to the decision that the source reported by {@link #getSource()} +// * is exhausted. +// * +// * @param source +// * The source. +// * +// * @return <code>true</code> iff the source was attached. +// */ +// public boolean addSource(IAsynchronousIterator<E[]> source) { +// +// if (source == null) +// throw new IllegalArgumentException(); +// +// return this.source.add(source); +// +// } - if (source == null) - throw new IllegalArgumentException(); - - return this.source.add(source); - - } - /** * Where to write the output of the operator. * @@ -202,7 +201,8 @@ throw new IllegalArgumentException(); this.partitionId = partitionId; this.stats = stats; - this.source = new MultiSourceSequentialAsynchronousIterator<E[]>(source); + this.source = source; +// this.source = new MultiSourceSequentialAsynchronousIterator<E[]>(source); this.sink = sink; this.sink2 = sink2; // may be null } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-10-22 17:53:21 UTC (rev 3837) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -161,31 +161,31 @@ } - /** - * Instantiate a buffer suitable as a sink for this operator. The buffer - * will be provisioned based on the operator annotations. - * <p> - * Note: if the operation swallows binding sets from the pipeline (such as - * operators which write on the database) then the operator MAY return an - * immutable empty buffer. - * - * @param stats - * The statistics on this object will automatically be updated as - * elements and chunks are output onto the returned buffer. - * - * @return The buffer. - */ - public IBlockingBuffer<IBindingSet[]> newBuffer(final BOpStats stats) { +// /** +// * Instantiate a buffer suitable as a sink for this operator. The buffer +// * will be provisioned based on the operator annotations. +// * <p> +// * Note: if the operation swallows binding sets from the pipeline (such as +// * operators which write on the database) then the operator MAY return an +// * immutable empty buffer. +// * +// * @param stats +// * The statistics on this object will automatically be updated as +// * elements and chunks are output onto the returned buffer. +// * +// * @return The buffer. +// */ +// public IBlockingBuffer<IBindingSet[]> newBuffer(final BOpStats stats) { +// +// if (stats == null) +// throw new IllegalArgumentException(); +// +// return new BlockingBufferWithStats<IBindingSet[]>( +// getChunkOfChunksCapacity(), getChunkCapacity(), +// getChunkTimeout(), Annotations.chunkTimeoutUnit, stats); +// +// } - if (stats == null) - throw new IllegalArgumentException(); - - return new BlockingBufferWithStats<IBindingSet[]>( - getChunkOfChunksCapacity(), getChunkCapacity(), - getChunkTimeout(), Annotations.chunkTimeoutUnit, stats); - - } - /** * Return a {@link FutureTask} which computes the operator against the * evaluation context. The caller is responsible for executing the @@ -205,77 +205,4 @@ */ abstract public FutureTask<Void> eval(BOpContext<IBindingSet> context); - private static class BlockingBufferWithStats<E> extends BlockingBuffer<E> { - - private final BOpStats stats; - - /** - * @param chunkOfChunksCapacity - * @param chunkCapacity - * @param chunkTimeout - * @param chunkTimeoutUnit - * @param stats - */ - public BlockingBufferWithStats(int chunkOfChunksCapacity, - int chunkCapacity, long chunkTimeout, - TimeUnit chunkTimeoutUnit, final BOpStats stats) { - - super(chunkOfChunksCapacity, chunkCapacity, chunkTimeout, - chunkTimeoutUnit); - - this.stats = stats; - - } - - /** - * Overridden to track {@link BOpStats#unitsOut} and - * {@link BOpStats#chunksOut}. - * <p> - * Note: {@link BOpStats#chunksOut} will report the #of chunks added to - * this buffer. However, the buffer MAY combine chunks either on add() - * or when drained by the iterator so the actual #of chunks read back - * from the iterator MAY differ. - * <p> - * {@inheritDoc} - */ - @Override - public boolean add(final E e, final long timeout, final TimeUnit unit) - throws InterruptedException { - - final boolean ret = super.add(e, timeout, unit); - - if (e.getClass().getComponentType() != null) { - - stats.unitsOut.add(((Object[]) e).length); - - } else { - - stats.unitsOut.increment(); - - } - - stats.chunksOut.increment(); - - return ret; - - } - - /** - * You can uncomment a line in this method to see who is closing the - * buffer. - * <p> - * {@inheritDoc} - */ - @Override - public void close() { - -// if (isOpen()) -// log.error(toString(), new RuntimeException("STACK TRACE")); - - super.close(); - - } - - } - } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -0,0 +1,90 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 22, 2010 + */ + +package com.bigdata.bop.engine; + +import java.util.concurrent.TimeUnit; + +import com.bigdata.bop.BufferAnnotations; +import com.bigdata.bop.PipelineOp; +import com.bigdata.relation.accesspath.BlockingBuffer; + +/** + * Extended to use the {@link BufferAnnotations} to provision the + * {@link BlockingBuffer} and to track the {@link BOpStats} as chunks are added + * to the buffer. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class BlockingBufferWithStats<E> extends BlockingBuffer<E> { + + private final BOpStats stats; + + public BlockingBufferWithStats(final PipelineOp op, final BOpStats stats) { + + super(op.getChunkOfChunksCapacity(), op.getChunkCapacity(), op + .getChunkTimeout(), BufferAnnotations.chunkTimeoutUnit); + + this.stats = stats; + + } + + /** + * Overridden to track {@link BOpStats#unitsOut} and + * {@link BOpStats#chunksOut}. + * <p> + * Note: {@link BOpStats#chunksOut} will report the #of chunks added to this + * buffer. However, the buffer MAY combine chunks either on add() or when + * drained by the iterator so the actual #of chunks read back from the + * iterator MAY differ. + * <p> + * {@inheritDoc} + */ + @Override + public boolean add(final E e, final long timeout, final TimeUnit unit) + throws InterruptedException { + + final boolean ret = super.add(e, timeout, unit); + + if (e.getClass().getComponentType() != null) { + + stats.unitsOut.add(((Object[]) e).length); + + } else { + + stats.unitsOut.increment(); + + } + + stats.chunksOut.increment(); + + return ret; + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkHandler.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkHandler.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkHandler.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -0,0 +1,76 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 22, 2010 + */ + +package com.bigdata.bop.engine; + +import com.bigdata.bop.IBindingSet; + +/** + * Interface dispatches an {@link IBindingSet}[] chunk generated by a running + * operator task. Each task may produce zero or more such chunks. The chunks may + * be combined together by the caller in order to have "chunkier" processing by + * this interface. The interface is responsible for generating the appropriate + * {@link IChunkMessage}(s) for each {@link IBindingSet}[] chunk. In standalone + * there is a one-to-one relationship between input chunks and output messages. + * In scale-out, we map each {@link IBindingSet} over the shard(s) for the next + * operator, which is a many-to-one mapping. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface IChunkHandler { + + /** + * Take an {@link IBindingSet}[] chunk generated by some pass over an + * operator and make it available to the target operator. How this is done + * depends on whether the query is running against a standalone database or + * the scale-out database. + * <p> + * Note: The return value is used as part of the termination criteria for + * the query which depends on (a) the #of running operator tasks and (b) the + * #of {@link IChunkMessage}s generated (available) and consumed. The return + * value of this method increases the #of {@link IChunkMessage} available to + * the query. + * + * @param query + * The query. + * @param bopId + * The operator which wrote on the sink. + * @param sinkId + * The identifier of the target operator. + * @param chunk + * The intermediate results to be passed to that target operator. + * + * @return The #of {@link IChunkMessage} sent. This will always be ONE (1) + * for scale-up. For scale-out, there will be at least one + * {@link IChunkMessage} per index partition over which the + * intermediate results were mapped. + */ + int handleChunk(RunningQuery query, int bopId, int sinkId, + IBindingSet[] chunk); + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkHandler.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java 2010-10-22 17:53:21 UTC (rev 3837) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -54,13 +54,15 @@ * {@link RunState} termination conditions linked to having multiple * {@link IChunkMessage}s. * <p> - * Note: Just controlling the - * {@link PipelineOp.Annotations#CHUNK_CAPACITY} and - * {@link PipelineOp.Annotations#CHUNK_OF_CHUNKS_CAPACITY} is not - * enough to force the {@link QueryEngine} to run the an operator once per - * source chunk. The {@link QueryEngine} normally combines chunks together. - * You MUST also specify this annotation in order for the query engine to - * send multiple {@link IChunkMessage} rather than just one. + * Note: Just controlling the {@link PipelineOp.Annotations#CHUNK_CAPACITY} + * and {@link PipelineOp.Annotations#CHUNK_OF_CHUNKS_CAPACITY} is not enough + * to force the {@link QueryEngine} to run the an operator once per source + * chunk. The {@link QueryEngine} normally combines chunks together. You + * MUST also specify this annotation in order for the query engine to send + * multiple {@link IChunkMessage} rather than just one. + * + * @deprecated Support for this is no longer present. It was lost when the + * {@link StandaloneChunkHandler} was written. */ String ONE_MESSAGE_PER_CHUNK = QueryEngineTestAnnotations.class.getName() + ".oneMessagePerChunk"; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-10-22 17:53:21 UTC (rev 3837) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -185,7 +185,7 @@ * readily exposed as {@link Map} object. If we were to expose the map, it * would have to be via a get(key) style interface. */ - /* private */final Map<Integer/* bopId */, AtomicLong/* availableChunkCount */> availableMap = new ConcurrentHashMap<Integer, AtomicLong>(); + /* private */final Map<Integer/* bopId */, AtomicLong/* availableChunkCount */> availableMap = new LinkedHashMap<Integer, AtomicLong>(); /** * A collection reporting on the #of instances of a given {@link BOp} which @@ -412,71 +412,71 @@ } - /** - * Update the {@link RunState} to indicate that the data in the - * {@link IChunkMessage} was attached to an already running task for the - * target operator. - * - * @param msg - * @param runningOnServiceId - * @return <code>true</code> if this is the first time we will evaluate the - * op. - * - * @throws IllegalArgumentException - * if the argument is <code>null</code>. - * @throws TimeoutException - * if the deadline for the query has passed. - */ - synchronized - public void addSource(final IChunkMessage<?> msg, - final UUID runningOnServiceId) throws TimeoutException { - - if (msg == null) - throw new IllegalArgumentException(); - - if (allDone.get()) - throw new IllegalStateException(ERR_QUERY_HALTED); - - if (deadline < System.currentTimeMillis()) - throw new TimeoutException(ERR_DEADLINE); - - nsteps.incrementAndGet(); - - final int bopId = msg.getBOpId(); - final int nmessages = 1; - - if (runningMap.get(bopId) == null) { - /* - * Note: There is a race condition in RunningQuery such that it is - * possible to add a 2nd source to an operator task before the task - * has begun to execute. Since the task calls startOp() once it - * begins to execute, this means that addSource() can be ordered - * before startOp() for the same task. This code block explicitly - * allows this condition and sets a 0L in the runningMap for the - * [bopId]. - */ - AtomicLong n = runningMap.get(bopId); - if (n == null) - runningMap.put(bopId, n = new AtomicLong()); -// throw new AssertionError(ERR_OP_NOT_STARTED + " msg=" + msg -// + ", this=" + this); - } - - messagesConsumed(bopId, nmessages); - - if (TableLog.tableLog.isInfoEnabled()) { - TableLog.tableLog.info(getTableRow("addSrc", runningOnServiceId, - bopId, msg.getPartitionId(), nmessages/* fanIn */, - null/* cause */, null/* stats */)); - } - - if (log.isInfoEnabled()) - log.info("startOp: " + toString() + " : bop=" + bopId); - - if (log.isTraceEnabled()) - log.trace(msg.toString()); - - } +// /** +// * Update the {@link RunState} to indicate that the data in the +// * {@link IChunkMessage} was attached to an already running task for the +// * target operator. +// * +// * @param msg +// * @param runningOnServiceId +// * @return <code>true</code> if this is the first time we will evaluate the +// * op. +// * +// * @throws IllegalArgumentException +// * if the argument is <code>null</code>. +// * @throws TimeoutException +// * if the deadline for the query has passed. +// */ +// synchronized +// public void addSource(final IChunkMessage<?> msg, +// final UUID runningOnServiceId) throws TimeoutException { +// +// if (msg == null) +// throw new IllegalArgumentException(); +// +// if (allDone.get()) +// throw new IllegalStateException(ERR_QUERY_HALTED); +// +// if (deadline < System.currentTimeMillis()) +// throw new TimeoutException(ERR_DEADLINE); +// +// nsteps.incrementAndGet(); +// +// final int bopId = msg.getBOpId(); +// final int nmessages = 1; +// +// if (runningMap.get(bopId) == null) { +// /* +// * Note: There is a race condition in RunningQuery such that it is +// * possible to add a 2nd source to an operator task before the task +// * has begun to execute. Since the task calls startOp() once it +// * begins to execute, this means that addSource() can be ordered +// * before startOp() for the same task. This code block explicitly +// * allows this condition and sets a 0L in the runningMap for the +// * [bopId]. +// */ +// AtomicLong n = runningMap.get(bopId); +// if (n == null) +// runningMap.put(bopId, n = new AtomicLong()); +//// throw new AssertionError(ERR_OP_NOT_STARTED + " msg=" + msg +//// + ", this=" + this); +// } +// +// messagesConsumed(bopId, nmessages); +// +// if (TableLog.tableLog.isInfoEnabled()) { +// TableLog.tableLog.info(getTableRow("addSrc", runningOnServiceId, +// bopId, msg.getPartitionId(), nmessages/* fanIn */, +// null/* cause */, null/* stats */)); +// } +// +// if (log.isInfoEnabled()) +// log.info("startOp: " + toString() + " : bop=" + bopId); +// +// if (log.isTraceEnabled()) +// log.trace(msg.toString()); +// +// } /** * Update the {@link RunState} to reflect the post-condition of the Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-22 17:53:21 UTC (rev 3837) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-22 19:45:33 UTC (rev 3838) @@ -28,6 +28,9 @@ package com.bigdata.bop.engine; import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.UUID; import java.util.concurrent.BlockingQueue; @@ -36,7 +39,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; -import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -55,13 +58,12 @@ import com.bigdata.bop.NoSuchBOpException; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.solutions.SliceOp; -import com.bigdata.io.DirectBufferPoolAllocator.IAllocationContext; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.ITx; -import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; -import com.bigdata.relation.accesspath.MultiplexBlockingBuffer; +import com.bigdata.relation.accesspath.IMultiSourceAsynchronousIterator; +import com.bigdata.relation.accesspath.MultiSourceSequentialAsynchronousIterator; import com.bigdata.service.IBigdataFederation; import com.bigdata.striterator.ICloseableIterator; import com.bigdata.util.concurrent.Haltable; @@ -85,6 +87,12 @@ * controller is attempted on some other {@link IQueryPeer}. */ static protected final String ERR_NOT_CONTROLLER = "Operator only permitted on the query controller"; + + /** + * Error message used when a request is made after the query has stopped + * executing. + */ + static protected final String ERR_QUERY_DONE = "Query is no longer running"; /** * The class executing the query on this node. @@ -141,67 +149,66 @@ * A collection of (bopId,partitionId) keys mapped onto a collection of * operator task evaluation contexts for currently executing operators for * this query. - * - * @todo Futures are not being cleared from this collection as operators - * complete. This should be done systematically in order to ensure - * that any allocations associated with an operator task execution are - * released in a timely manner for long-running operators. (In fact, - * the {@link IAllocationContext} should take care of most of the - * issues here but we could still wind up with a lot of entries in - * this map in scale-out where there can be up to one per bop per - * shard in a given query.) */ private final ConcurrentHashMap<BSBundle, ChunkFutureTask> operatorFutures; /** + * A map of unbounded work queues for each (bopId,partitionId). Empty queues + * are removed from the map. + * <p> + * The map is guarded by the {@link #lock}. + */ + private final Map<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>> operatorQueues; + + /** * The runtime statistics for each {@link BOp} in the query and * <code>null</code> unless this is the query controller. */ final private ConcurrentHashMap<Integer/* bopId */, BOpStats> statsMap; - /** - * When running in stand alone, we can chain together the operators and have - * much higher throughput. Each operator has an {@link BlockingBuffer} which - * is essentially its input queue. The operator will drain its input queue - * using {@link BlockingBuffer#iterator()}. - * <p> - * Each operator closes its {@link IBlockingBuffer} sink(s) once its own - * source has been closed and it has finished processing that source. Since - * multiple producers can target the same operator, we need a means to - * ensure that the source for the target operator is not closed until each - * producer which targets that operator has closed its corresponding sink. - * <p> - * In order to support this many-to-one producer/consumer pattern, we wrap - * the input queue (a {@link BlockingBuffer}) for each operator having - * multiple sources with a {@link MultiplexBlockingBuffer}. This class gives - * each producer their own view on the underlying {@link BlockingBuffer}. - * The underlying {@link BlockingBuffer} will not be closed until all - * source(s) have closed their view of that buffer. This collection keeps - * track of the {@link MultiplexBlockingBuffer} wrapping the - * {@link BlockingBuffer} which is the input queue for each operator. - * <p> - * The input queues themselves are {@link BlockingBuffer} objects. Those - * objects are available from this map using - * {@link MultiplexBlockingBuffer#getBackingBuffer()}. These buffers are - * pre-allocated by {@link #populateInputBufferMap(BOp)}. - * {@link #startTasks(BOp)} is responsible for starting the operator tasks - * in a "back-to-front" order. {@link #startQuery(IChunkMessage)} kicks off - * the query and invokes {@link #startTasks(BOp)} to chain the input queues - * and output queues together (when so chained, the output queues are skins - * over the input queues obtained from {@link MultiplexBlockingBuffer}). - * - * FIXME The inputBufferMap will let us construct consumer producer chains - * where the consumer _waits_ for all producer(s) which target the consumer - * to close the sink associated with that consumer. Unlike when attaching an - * {@link IChunkMessage} to an already running operator, the consumer will - * NOT terminate (due to lack up input) until each running producer - * terminating that consumer terminates. This will improve concurrency, - * result in fewer task instances, and have better throughput than attaching - * a chunk to an already running task. However, in scale-out we will have - * tasks running on different nodes so we can not always chain together the - * producer and consumer in this tightly integrated manner. - */ - final private ConcurrentHashMap<Integer/*operator*/, MultiplexBlockingBuffer<IBindingSet[]>/*inputQueue*/> inputBufferMap; +// /** +// * When running in stand alone, we can chain together the operators and have +// * much higher throughput. Each operator has an {@link BlockingBuffer} which +// * is essentially its input queue. The operator will drain its input queue +// * using {@link BlockingBuffer#iterator()}. +// * <p> +// * Each operator closes its {@link IBlockingBuffer} sink(s) once its own +// * source has been closed and it has finished processing that source. Since +// * multiple producers can target the same operator, we need a means to +// * ensure that the source for the target operator is not closed until each +// * producer which targets that operator has closed its corresponding sink. +// * <p> +// * In order to support this many-to-one producer/consumer pattern, we wrap +// * the input queue (a {@link BlockingBuffer}) for each operator having +// * multiple sources with a {@link MultiplexBlockingBuffer}. This class gives +// * each producer their own view on the underlying {@link BlockingBuffer}. +// * The underlying {@link BlockingBuffer} will not be closed until all +// * source(s) have closed their view of that buffer. This collection keeps +// * track of the {@link MultiplexBlockingBuffer} wrapping the +// * {@link BlockingBuffer} which is the input queue for each operator. +// * <p> +// * The input queues themselves are {@link BlockingBuffer} objects. Those +// * objects are available from this map using +// * {@link MultiplexBlockingBuffer#getBackingBuffer()}. These buffers are +// * pre-allocated by {@link #populateInputBufferMap(BOp)}. +// * {@link #startTasks(BOp)} is responsible for starting the operator tasks +// * in a "back-to-front" order. {@link #startQuery(IChunkMessage)} kicks off +// * the query and invokes {@link #startTasks(BOp)} to chain the input queues +// * and output queues together (when so chained, the output queues are skins +// * over the input queues obtained from {@link MultiplexBlockingBuffer}). +// * +// * FIXME The inputBufferMap will let us construct consumer producer chains +// * where the consumer _waits_ for all producer(s) which target the consumer +// * to close the sink associated with that consumer. Unlike when attaching an +// * {@link IChunkMessage} to an already running operator, the consumer will +// * NOT terminate (due to lack up input) until each running producer +// * terminating that consumer terminates. This will improve concurrency, +// * result in fewer task instances, and have better throughput than attaching +// * a chunk to an already running task. However, in scale-out we will have +// * tasks running on different nodes so we can not always chain together the +// * producer and consumer in this tightly integrated manner. +// */ +// final private ConcurrentHashMap<Integer/*operator*/, MultiplexBlockingBuffer<IBindingSet[]>/*inputQueue*/> inputBufferMap; /** * The buffer used for the overall output of the query pipeline. @@ -244,14 +251,14 @@ */ final AtomicBoolean didQueryTearDown = new AtomicBoolean(false); - /** - * The chunks available for immediate processing (they must have been - * materialized). - * <p> - * Note: This is package private so it will be visible to the - * {@link QueryEngine}. - */ - final/* private */BlockingQueue<IChunkMessage<IBindingSet>> chunksIn = new LinkedBlockingDeque<IChunkMessage<IBindingSet>>(); +// /** +// * The chunks available for immediate processing (they must have been +// * materialized). +// * <p> +// * Note: This is package private so it will be visible to the +// * {@link QueryEngine}. +// */ +// final/* private */BlockingQueue<IChunkMessage<IBindingSet>> chunksIn = new LinkedBlockingDeque<IChunkMessage<IBindingSet>>(); /** * Set the query deadline. The query will be cancelled when the deadline is @@ -368,6 +375,21 @@ } /** + * Lookup and return the {@link BOp} with that identifier using an index. + * + * @param bopId + * The identifier. + * + * @return The {@link BOp} -or- <code>null</code> if no {@link BOp} was + * found in the query with for that identifier. + */ + public BOp getBOp(final int bopId) { + + return bopIndex.get(bopId); + + } + + /** * @param queryEngine * The {@link QueryEngine} on which the query is running. In * scale-out, a query is typically instantiated on many @@ -430,6 +452,8 @@ this.operatorFutures = new ConcurrentHashMap<BSBundle, ChunkFutureTask>(); + this.operatorQueues = new LinkedHashMap<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>>(); + /* * Setup the BOpStats object for each pipeline operator in the query. */ @@ -445,7 +469,8 @@ final BOpStats queryStats = statsMap.get(query.getId()); - queryBuffer = query.newBuffer(queryStats); + queryBuffer = new BlockingBufferWithStats<IBindingSet[]>(query, + queryStats); queryIterator = new QueryResultIterator<IBindingSet[]>(this, queryBuffer.iterator()); @@ -467,31 +492,31 @@ } - if(!queryEngine.isScaleOut()) { - /* - * Since the query engine is using the stand alone database mode we - * will now setup the input queues for each operator. Those queues - * will be used by each operator which targets a given operator. - * Each operator will start once and will run until all of its - * source(s) are closed. - * - * This allocates the buffers in a top-down manner (this is the - * reverse of the pipeline evaluation order). Allocation halts at if - * we reach an operator without children (e.g., StartOp) or an - * operator which is a CONTROLLER (Union). (If allocation does not - * halt at those boundaries then we can allocate buffers which will - * not be used. On the one hand, the StartOp receives a message - * containing the chunk to be evaluated. On the other hand, the - * buffers are not shared between the parent and a subquery so - * allocation within the subquery is wasted. This is also true for - * the [statsMap].) - */ - inputBufferMap = null; -// inputBufferMap = new ConcurrentHashMap<Integer, MultiplexBlockingBuffer<IBindingSet[]>>(); -// populateInputBufferMap(query); - } else { - inputBufferMap = null; - } +// if(!queryEngine.isScaleOut()) { +// /* +// * Since the query engine is using the stand alone database mode we +// * will now setup the input queues for each operator. Those queues +// * will be used by each operator which targets a given operator. +// * Each operator will start once and will run until all of its +// * source(s) are closed. +// * +// * This allocates the buffers in a top-down manner (this is the +// * reverse of the pipeline evaluation order). Allocation halts at if +// * we reach an operator without children (e.g., StartOp) or an +// * operator which is a CONTROLLER (Union). (If allocation does not +// * halt at those boundaries then we can allocate buffers which will +// * not be used. On the one hand, the StartOp receives a message +// * containing the chunk to be evaluated. On the other hand, the +// * buffers are not shared between the parent and a subquery so +// * allocation within the subquery is wasted. This is also true for +// * the [statsMap].) +// */ +// inputBufferMap = null; +//// inputBufferMap = new ConcurrentHashMap<Integer, MultiplexBlockingBuffer<IBindingSet[]>>(); +//// populateInputBufferMap(query); +// } else { +// inputBufferMap = null; +// } } @@ -562,140 +587,97 @@ // // } - /** - * Take a chunk generated by some pass over an operator and make it - * available to the target operator. How this is done depends on whether the - * query is running against a standalone database or the scale-out database. - * <p> - * Note: The return value is used as part of the termination criteria for - * the query. - * <p> - * The default implementation supports a standalone database. The generated - * chunk is left on the Java heap and handed off synchronously using - * {@link QueryEngine#acceptChunk(IChunkMessage)}. That method will queue - * the chunk for asynchronous processing. - * - * @param bop - * The operator which wrote on the sink. - * @param sinkId - * The identifier of the target operator. - * @param sink - * The intermediate results to be passed to that target operator. - * - * @return The #of {@link IChunkMessage} sent. This will always be ONE (1) - * for scale-up. For scale-out, there will be at least one - * {@link IChunkMessage} per index partition over which the - * intermediate results were mapped. - */ - protected <E> int handleOutputChunk(final BOp bop, final int sinkId, - final IBlockingBuffer<IBindingSet[]> sink) { +// /** +// * Take a chunk generated by some pass over an operator and make it +// * available to the target operator. How this is done depends on whether the +// * query is running against a standalone database or the scale-out database. +// * <p> +// * Note: The return value is used as part of the termination criteria for +// * the query. +// * <p> +// * The default implementation supports a standalone database. The generated +// * chunk is left on the Java heap and handed off synchronously using +// * {@link QueryEngine#acceptChunk(IChunkMessage)}. That method will queue +// * the chunk for asynchronous processing. +// * +// * @param bop +// * The operator which wrote on the sink. +// * @param sinkId +// * The identifier of the target operator. +// * @param sink +// * The intermediate results to be passed to that target operator. +// * +// * @return The #of {@link IChunkMessage} sent. This will always be ONE (1) +// * for scale-up. For scale-out, there will be at least one +// * {@link IChunkMessage} per index partition over which the +// * intermediate results were mapped. +// */ +// protected <E> int handleOutputChunk(final BOp bop, final int sinkId, +// final IBlockingBuffer<IBindingSet[]> sink) { +// +// if (bop == null) +// throw new IllegalArgumentException(); +// +// if (sink == null) +// throw new IllegalArgumentException(); +// +// if (inputBufferMap != null && inputBufferMap.get(sinkId) != null) { +// /* +// * FIXME The sink is just a wrapper for the input buffer so we do +// * not need to do anything to propagate the data from one operator +// * to the next. +// */ +// return 0; +// } +// +// /* +// * Note: The partitionId will always be -1 in scale-up. +// */ +// final int partitionId = -1; +// +// final boolean oneMessagePerChunk = bop.getProperty( +// QueryEngineTestAnnotations.ONE_MESSAGE_PER_CHUNK, +// QueryEngineTestAnnotations.DEFAULT_ONE_MESSAGE_PER_CHUNK); +// +// if (oneMessagePerChunk) { +// +// final IAsynchronousIterator<IBindingSet[]> itr = sink.iterator(); +// +// int nchunks = 0; +// +// while (itr.hasNext()) { +// +// final IBlockingBuffer<IBindingSet[]> tmp = new BlockingBuffer<IBindingSet[]>( +// 1); +// +// tmp.add(itr.next()); +// +// tmp.close(); +// +// final LocalChunkMessage<IBindingSet> chunk = new LocalChunkMessage<IBindingSet>( +// clientProxy, queryId, sinkId, partitionId, tmp +// .iterator()); +// +// queryEngine.acceptChunk(chunk); +// +// nchunks++; +// +// } +// +// return nchunks; +// +// } +// +// final LocalChunkMessage<IBindingSet> chunk = new LocalChunkMessage<IBindingSet>( +// clientProxy, queryId, sinkId, partitionId, sink.iterator()); +// +// queryEngine.acceptChunk(chunk); +// +// return 1; +// +// } - if (bop == null) - throw new IllegalArgumentException(); - - if (sink == null) - throw new IllegalArgumentException(); - - if (inputBufferMap != null && inputBufferMap.get(sinkId) != null) { - /* - * FIXME The sink is just a wrapper for the input buffer so we do - * not need to do anything to propagate the data from one operator - * to the next. - */ - return 0; - } - - /* - * Note: The partitionId will always be -1 in scale-up. - */ - final int partitionId = -1; - - final boolean oneMessagePerChunk = bop.getProperty( - QueryEngineTestAnnotations.ONE_MESSAGE_PER_CHUNK, - QueryEngineTestAnnotations.DEFAULT_ONE_MESSAGE_PER_CHUNK); - - if (oneMessagePerChunk) { - - final IAsynchronousIterator<IBindingSet[]> itr = sink.iterator(); - - int nchunks = 0; - - while (itr.hasNext()) { - - final IBlockingBuffer<IBindingSet[]> tmp = new BlockingBuffer<IBindingSet[]>( - 1); - - tmp.add(itr.next()); - - tmp.close(); - - final LocalChunkMessage<IBindingSet> chunk = new LocalChunkMessage<IBindingSet>( - clientProxy, queryId, sinkId, partitionId, tmp - .iterator()); - - queryEngine.acceptChunk(chunk); - - nchunks++; - - } - - return nchunks; - - } - - final LocalChunkMessage<IBindingSet> chunk = new LocalChunkMessage<IBindingSet>( - clientProxy, queryId, sinkId, partitionId, sink.iterator()); - - queryEngine.acceptChunk(chunk); - - return 1; - - } - /** - * Make a chunk of binding sets available for consumption by the query. - * <p> - * Note: this is invoked by {@link QueryEngine#acceptChunk(IChunkMessage)} - * - * @param msg - * The chunk. - * - * @todo Does this method really need the {@link #lock}? I doubt it since - * {@link #chunksIn} is thread-safe. - */ - protected void acceptChunk(final IChunkMessage<IBindingSet> msg) { - - if (msg == null) - throw new IllegalArgumentException(); - - if (!msg.isMaterialized()) - throw new IllegalStateException(); - - lock.lock(); - - try { - - // verify still running. - if (future.isDone()) { - throw new RuntimeException("Query is done", future.getCause()); - } - - // add chunk to be consumed. - chunksIn.add(msg); - - if (log.isDebugEnabled()) - log - .debug("chunksIn.size()=" + chunksIn.size() + ", msg=" - + msg); - } finally { - - lock.unlock(); - - } - - } - - /** * Invoked once by the query controller with the initial * {@link IChunkMessage} which gets the query moving. */ @@ -949,155 +931,254 @@ } +// /** +// * Consume zero or more chunks in the input queue for this query. The +// * chunk(s) will either be assigned to an already running task for the +// * target operator or they will be assigned to new tasks. +// * +// * FIXME Drain the input queue, assigning any chunk waiting to a task. If +// * the task is already running, then add the chunk to that task. Otherwise +// * start a new task. +// */ +// protected void consumeChunk() { +// final IChunkMessage<IBindingSet> msg = chunksIn.poll(); +// if (msg == null) +// return; +// try { +// if (!msg.isMaterialized()) +// throw new IllegalStateException(); +// if (log.isTraceEnabled()) +// log.trace("Accepted chunk: " + msg); +// final BSBundle bundle = new BSBundle(msg.getBOpId(), msg +// .getPartitionId()); +//// /* +//// * Look for instance of this task which is already running. +//// */ +//// final ChunkFutureTask chunkFutureTask = operatorFutures.get(bundle); +//// if (!queryEngine.isScaleOut() && chunkFutureTask != null) { +//// /* +//// * Attempt to atomically attach the message as another src. +//// */ +//// if (chunkFutureTask.chunkTask.context.addSource(msg +//// .getChunkAccessor().iterator())) { +//// /* +//// * @todo I've commented this out for now. I am not convinced +//// * that we need to update the RunState when accepting +//// * another message into a running task. This would only +//// * matter if haltOp() reported the #of consumed messages, +//// * but RunState.haltOp() just decrements the #of available +//// * messages by one which balances startOp(). Just because we +//// * attach more messages dynamically does not mean that we +//// * need to report that back to the query controller as long +//// * as haltOp() balances startOp(). +//// */ +////// lock.lock(); +////// try { +////// /* +////// * message was added to a running task. +////// * +////// * FIXME This needs to be an RMI in scale-out back to +////// * the query controller so it can update the #of +////// * messages which are being consumed by this task. +////// * However, doing RMI here will add latency into the +////// * thread submitting tasks for evaluation and the +////// * coordination overhead of addSource() in scale-out may +////// * be too high. However, if we do not combine sources in +////// * scale-out then we may have too much overhead in terms +////// * of the #of running tasks with few tuples per task. +////// * Another approach is the remote async iterator with +////// * multiple sources (parallel multi source iterator). +////// * +////// * FIXME This code path is NOT being taken in scale-out +////// * right now since it would not get the message to the +////// * query controller. We will need to add addSource() to +////// * IQueryClient parallel to startOp() and haltOp() for +////// * this to work. +////// */ +////// runState.addSource(msg, queryEngine.getServiceUUID()); +////// return; +////// } finally { +////// lock.unlock(); +////// } +//// } +//// } +// // wrap runnable. +// final ChunkFutureTask ft = new ChunkFutureTask(new ChunkTask(msg)); +// /* +// * FIXME Rather than queue up a bunch of operator tasks for the same +// * (bopId,partitionId), this blocks until the current operator task +// * is done and then submits the new one. This prevents us from +// * allocating 100s of threads for complex queries and prevents us +// * from losing track of the Futures of those tasks. However, since +// * this is happening in the caller's thread the QueryEngine is not +// * making any progress while we are blocked. A pattern which hooks +// * the Future and then submits the next task (such as the +// * LatchedExecutor) would fix this. This might have to be one +// * LatchedExecutor per pipeline operator. +// */ +// FutureTask<Void> existing = operatorFutures.putIfAbsent(bundle, ft); +// if (existing != null) { +// existing.get(); +// if (!operatorFutures.remove(bundle, existing)) +// throw new AssertionError(); +// if (operatorFutures.put(bundle, ft) != null) +// throw new AssertionError(); +// } +//// // add to list of active futures for this query. +//// if (operatorFutures.put(bundle, ft) != null) { +//// /* +//// * Note: This can cause the FutureTask to be accessible (above) +//// * before startOp() has been called for that ChunkTask (the +//// * latter occurs when the chunk task actually runs.) This a race +//// * condition has been resolved in RunState by allowing +//// * addSource() even when there is no registered task running for +//// * that [bopId]. +//// * +//// * FIXME This indicates that we have more than one future for +//// * the same (bopId,shardId). When this is true we are losing +//// * track of Futures with the consequence that we can not +//// * properly cancel them. Instead of losing track like this, we +//// * should be targeting the running operator instance with the +//// * new chunk. This needs to be done atomically, e.g., using the +//// * [lock]. +//// * +//// * Even if we only have one task per operator in standalone and +//// * we attach chunks to an already running task in scale-out, +//// * there is still the possibility in scale-out that a task may +//// * have closed its source but still be running, in which case we +//// * would lose the Future for the already running task when we +//// * start a new task for the new chunk for the target operator. +//// */ +//// // throw new AssertionError(); +//// } +// // submit task for execution (asynchronous). +// queryEngine.execute(ft); +// } catch (Throwable ex) { +// // halt query. +// throw new RuntimeException(halt(ex)); +// } +// } + /** - * Consume zero or more chunks in the input queue for this query. The - * chunk(s) will either be assigned to an already running task for the - * target operator or they will be assigned to new tasks. + * Make a chunk of binding sets available for consumption by the query. + * <p> + * Note: this is invoked by {@link QueryEngine#acceptChunk(IChunkMessage)} * - * FIXME Drain the input queue, assigning any chunk waiting to a task. If - * the task is already running, then add the chunk to that task. Otherwise - * start a new task. + * @param msg + * The chunk. */ - protected void consumeChunk() { - final IChunkMessage<IBindingSet> msg = chunksIn.poll(); + protected void acceptChunk(final IChunkMessage<IBindingSet> msg) { + if (msg == null) - return; + throw new IllegalArgumentException(); + + if (!msg.isMaterialized()) + throw new IllegalStateException(); + + final BSBundle bundle = new BSBundle(msg.getBOpId(), msg + .getPartitionId()); + + lock.lock(); + try { - if (!msg.isMaterialized()) - throw new IllegalStateException(); - if (log.isTraceEnabled()) - log.trace("Accepted chunk: " + msg); - final BSBundle bundle = new BSBundle(msg.getBOpId(), msg - .getPartitionId()); + + // verify still running. + if (future.isDone()) + throw new RuntimeException(ERR_QUERY_DONE, future.getCause()); + + BlockingQueue<IChunkMessage<IBindingSet>> queue = operatorQueues + .get(bundle); + + if (queue == null) { + + queue = new LinkedBlockingQueue<IChunkMessage<IBindingSet>>(/* unbounded */); + + operatorQueues.put(bundle, queue); + + } + + queue.add(msg); + + } finally { + + lock.unlock(); + + } + + } + + /** + * Examines the input queue for each (bopId,partitionId). If there is work + * available and no task is currently running, then drain the work queue and + * submit a task to consume that work. + */ + protected void consumeChunk() { + lock.lock(); + try { + for(BSBundle bundle : operatorQueues.keySet()) { + scheduleNext(bundle); + } + } finally { + lock.unlock(); + } + } + + /** + * Examine the input queue for the (bopId,partitionId). If there is work + * available and no task is currently running, then drain the work queue and + * submit a task to consume that work. + * + * @param bundle + * The (bopId,partitionId). + * + * @return <code>true</code> if a new task was started. + */ + private boolean scheduleNext(final BSBundle bundle) { + if (bundle == null) + throw new IllegalArgumentException(); + lock.lock(); + try { + // Make sure the query is still running. + future.halted(); + // Is there a Future for this (bopId,partitionId)? + final ChunkFutureTask cft = operatorFutures.get(bundle); + if (cft != null && !cft.isDone()) { + // already running. + return false; + } + // Remove the work queue for that (bopId,partitionId). + final BlockingQueue<IChunkMessage<IBindingSet>> queue = operatorQueues + .remove(bundle); + if (queue == null || queue.isEmpty()) { + // no work + return false; + } + // Drain the work queue. + final List<IChunkMessage<IBindingSet>> messages = new LinkedList<IChunkMessage<IBindingSet>>(); + queue.drainTo(messages); + final int nmessages = messages.size(); /* - * Look for instance of this task which is already running. + * Combine the messages into a single source to be consumed by a + * task. */ - final ChunkFutureTask chunkFutureTask = operatorFutures.get(bundle); - if (!queryEngine.isScaleOut() && chunkFutureTask != null) { - /* - * Attempt to atomically attach the message as another src. - */ - if (chunkFutureTask.chunkTask.context.addSource(msg - ... [truncated message content] |
From: <tho...@us...> - 2010-10-22 17:53:28
|
Revision: 3837 http://bigdata.svn.sourceforge.net/bigdata/?rev=3837&view=rev Author: thompsonbry Date: 2010-10-22 17:53:21 +0000 (Fri, 22 Oct 2010) Log Message: ----------- javadoc edit Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/LatchedExecutor.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/LatchedExecutor.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/LatchedExecutor.java 2010-10-22 11:59:15 UTC (rev 3836) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/LatchedExecutor.java 2010-10-22 17:53:21 UTC (rev 3837) @@ -53,8 +53,11 @@ /** * A thread-safe blocking queue of pending tasks. + * + * @todo The capacity of this queue does not of necessity need to be + * unbounded. */ - private final BlockingQueue<Runnable> queue = new LinkedBlockingDeque<Runnable>(); + private final BlockingQueue<Runnable> queue = new LinkedBlockingDeque<Runnable>(/*unbounded*/); private final int nparallel; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-22 11:59:24
|
Revision: 3836 http://bigdata.svn.sourceforge.net/bigdata/?rev=3836&view=rev Author: thompsonbry Date: 2010-10-22 11:59:15 +0000 (Fri, 22 Oct 2010) Log Message: ----------- Relocated the IBindingSet implementations and test suites into the com.bigdata.bop.bindingSet package in preparation for developing additional interfaces and classes related to the packaging of arrays of binding sets and large blocks of binding sets for more efficient federated query. I've left IBindingSet in place for the moment as moving it will touch even more files. The com.bigdata.bop.bset package contains operators for copying (and conditional copying) of binding sets. It should probably be renamed to reduce confusion with the package for the binding set implementation classes. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StartOpMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/Rule.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestConditionalRoutingOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestCopyBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestEQ.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestEQConstant.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestINConstraint.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestInBinarySearch.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestNE.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestNEConstant.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestOR.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestUnion.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestRunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestNIOChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestThickChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/shards/TestMapBindingSetsOverShards.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestDistinctBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/TestRule.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestJiniFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/AbstractRuleDistinctTermScan.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/FastClosure.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOAccessPath.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestSlice.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPORelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOStarJoin.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestArrayBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestHashBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestIBindingSet.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ArrayBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/EmptyBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/HashBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestArrayBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestHashBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestIBindingSet.java Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ArrayBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ArrayBindingSet.java 2010-10-21 17:23:06 UTC (rev 3835) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ArrayBindingSet.java 2010-10-22 11:59:15 UTC (rev 3836) @@ -1,471 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -/* - * Created on Jun 20, 2008 - */ - -package com.bigdata.bop; - -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.log4j.Logger; - -/** - * An {@link IBindingSet} backed by an dense array (no gaps). This - * implementation is more efficient for fixed or small N (N LTE ~20). It simples - * scans the array looking for the variable using references tests for equality. - * Since the #of variables is generally known in advance this can be faster and - * lighter than {@link HashBindingSet} for most applications. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class ArrayBindingSet implements IBindingSet { - - private static final long serialVersionUID = -6468905602211956490L; - - private static final Logger log = Logger.getLogger(ArrayBindingSet.class); - - /** - * A dense array of the bound variables. - */ - private final IVariable[] vars; - /** - * A dense array of the values bound to the variables (correlated with - * {@link #vars}). - */ - private final IConstant[] vals; - - private int nbound = 0; - - /** - * Copy constructor. - */ - protected ArrayBindingSet(final ArrayBindingSet bindingSet) { - - if (bindingSet == null) - throw new IllegalArgumentException(); - - nbound = bindingSet.nbound; - - vars = bindingSet.vars.clone(); - - vals = bindingSet.vals.clone(); - - } - - /** - * Initialized with the given bindings (assumes for efficiency that all - * elements of bound arrays are non-<code>null</code> and that no - * variables are duplicated). - * - * @param vars - * The variables. - * @param vals - * Their bound values. - */ - public ArrayBindingSet(final IVariable[] vars, final IConstant[] vals) { - - if (vars == null) - throw new IllegalArgumentException(); - - if (vals == null) - throw new IllegalArgumentException(); - - if(vars.length != vals.length) - throw new IllegalArgumentException(); - - // for (int i = 0; i < vars.length; i++) { - // - // if (vars[i] == null) - // throw new IllegalArgumentException(); - // - // if (vals[i] == null) - // throw new IllegalArgumentException(); - // - // } - - this.vars = vars; - - this.vals = vals; - - this.nbound = vars.length; - - } - - /** - * Initialized with the given capacity. - * - * @param capacity - * The capacity. - * - * @throws IllegalArgumentException - * if the <i>capacity</i> is negative. - */ - public ArrayBindingSet(final int capacity) { - - if (capacity < 0) - throw new IllegalArgumentException(); - - vars = new IVariable[capacity]; - - vals = new IConstant[capacity]; - - } - - public Iterator<IVariable> vars() { - - return Collections.unmodifiableList(Arrays.asList(vars)).iterator(); - - } - - /** - * Iterator does not support either removal or concurrent modification of - * the binding set. - */ - public Iterator<Map.Entry<IVariable,IConstant>> iterator() { - - return new BindingSetIterator(); - - } - - private class BindingSetIterator implements Iterator<Map.Entry<IVariable,IConstant>> { - - private int i = 0; - - public boolean hasNext() { - - return i < nbound; - - } - - public Entry<IVariable, IConstant> next() { - - // the index whose bindings are being returned. - final int index = i++; - - return new Map.Entry<IVariable, IConstant>() { - - public IVariable getKey() { - - return vars[index]; - - } - - public IConstant getValue() { - - return vals[index]; - - } - - public IConstant setValue(IConstant value) { - - if (value == null) - throw new IllegalArgumentException(); - - final IConstant t = vals[index]; - - vals[index] = value; - - return t; - - } - - }; - - } - - public void remove() { - - throw new UnsupportedOperationException(); - - } - - } - - public int size() { - - return nbound; - - } - - public void clearAll() { - - for (int i = nbound - 1; nbound > 0; i--, nbound--) { - - vars[i] = null; - - vals[i] = null; - - } - - // clear the hash code. - hash = 0; - - assert nbound == 0; - - } - - /** - * Since the array is dense (no gaps), {@link #clear(IVariable)} requires - * that we copy down any remaining elements in the array by one position. - */ - public void clear(final IVariable var) { - - if (var == null) - throw new IllegalArgumentException(); - - for (int i = 0; i < nbound; i++) { - - if (vars[i] == var) { - - final int nremaining = nbound-(i+1); - - if (nremaining >= 0) { - - // Copy down to close up the gap! - System.arraycopy(vars, i+1, vars, i, nremaining); - - System.arraycopy(vals, i+1, vals, i, nremaining); - - } else { - - // Just clear the reference. - - vars[i] = null; - - vals[i] = null; - - } - - // clear the hash code. - hash = 0; - - nbound--; - - break; - - } - - } - - } - - public IConstant get(final IVariable var) { - - if (var == null) - throw new IllegalArgumentException(); - - for (int i = 0; i < nbound; i++) { - - if (vars[i] == var) { - - return vals[i]; - - } - - } - - return null; - - } - - public boolean isBound(final IVariable var) { - - return get(var) != null; - - } - - public void set(final IVariable var, final IConstant val) { - - if (var == null) - throw new IllegalArgumentException(); - - if (val == null) - throw new IllegalArgumentException(); - - if (log.isTraceEnabled()) { - - log.trace("var=" + var + ", val=" + val + ", nbound=" + nbound - + ", capacity=" + vars.length); - - } - - for (int i = 0; i < nbound; i++) { - - if (vars[i] == var) { - - vals[i] = val; - - // clear the hash code. - hash = 0; - - return; - - } - - } - - vars[nbound] = var; - - vals[nbound] = val; - - // clear the hash code. - hash = 0; - - nbound++; - - } - - public String toString() { - - final StringBuilder sb = new StringBuilder(); - - sb.append("{"); - - for(int i=0; i<nbound; i++) { - - if(i>0) sb.append(", "); - - sb.append(vars[i]); - - sb.append("="); - - sb.append(vals[i]); - - } - - sb.append("}"); - - return sb.toString(); - - } - - public ArrayBindingSet clone() { - - return new ArrayBindingSet(this); - - } - - /** - * Return a shallow copy of the binding set, eliminating unecessary - * variables. - */ - public ArrayBindingSet copy(final IVariable[] variablesToKeep) { - - // bitflag for the old binding set - final boolean[] keep = new boolean[nbound]; - - // for each var in the old binding set, see if we need to keep it - for (int i = 0; i < nbound; i++) { - - final IVariable v = vars[i]; - - keep[i] = false; - for (IVariable k : variablesToKeep) { - if (v == k) { - keep[i] = true; - break; - } - } - - } - - // allocate the new vars - final IVariable[] newVars = new IVariable[vars.length]; - - // allocate the new vals - final IConstant[] newVals = new IConstant[vals.length]; - - // fill in the new binding set based on the keep bitflag - int newbound = 0; - for (int i = 0; i < nbound; i++) { - if (keep[i]) { - newVars[newbound] = vars[i]; - newVals[newbound] = vals[i]; - newbound++; - } - } - - ArrayBindingSet bs = new ArrayBindingSet(newVars, newVals); - bs.nbound = newbound; - - return bs; - - } - - public boolean equals(final Object t) { - - if (this == t) - return true; - - if(!(t instanceof IBindingSet)) - return false; - - final IBindingSet o = (IBindingSet)t; - - if (nbound != o.size()) - return false; - - for(int i=0; i<nbound; i++) { - - IConstant<?> o_val = o.get ( vars [ i ] ) ; - if ( null == o_val || !vals[i].equals( o_val )) - return false; - - } - - return true; - - } - - public int hashCode() { - - if (hash == 0) { - - int result = 0; - - for (int i = 0; i < nbound; i++) { - - if (vals[i] == null) - continue; - - result ^= vals[i].hashCode(); - - } - - hash = result; - - } - return hash; - - } - private int hash; - -} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-10-21 17:23:06 UTC (rev 3835) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-10-22 11:59:15 UTC (rev 3836) @@ -315,6 +315,8 @@ * The element. * @param bindingSet * The binding set, which is modified as a side-effect. + * + * @todo This appears to be unused, in which case it should be dropped. */ final public void bind(final IVariable<?>[] vars, final IElement e, final IBindingSet bindingSet) { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-10-21 17:23:06 UTC (rev 3835) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-10-22 11:59:15 UTC (rev 3836) @@ -516,6 +516,40 @@ } /** + * Return the effective default sink. + * + * @param bop + * The operator. + * @param p + * The parent of that operator, if any. + * + * @todo unit tests. + */ + static public Integer getEffectiveDefaultSink(final BOp bop, final BOp p) { + + if (bop == null) + throw new IllegalArgumentException(); + + Integer sink; + + // Explicitly specified sink? + sink = (Integer) bop.getProperty(PipelineOp.Annotations.SINK_REF); + + if (sink == null) { + if (p == null) { + // No parent, so no sink. + return null; + } + // The parent is the sink. + sink = (Integer) p + .getRequiredProperty(BOp.Annotations.BOP_ID); + } + + return sink; + + } + + /** * Combine chunks drawn from an iterator into a single chunk. This is useful * when materializing intermediate results for an all-at-once operator. * Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/EmptyBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/EmptyBindingSet.java 2010-10-21 17:23:06 UTC (rev 3835) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/EmptyBindingSet.java 2010-10-22 11:59:15 UTC (rev 3836) @@ -1,158 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -/* - * Created on Sep 10, 2008 - */ - -package com.bigdata.bop; - -import java.io.ObjectStreamException; -import java.io.Serializable; -import java.util.Iterator; -import java.util.Map.Entry; - -import cutthecrap.utils.striterators.EmptyIterator; - -/** - * An immutable empty binding set. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -final public class EmptyBindingSet implements IBindingSet, Serializable { - - /** - * - */ - private static final long serialVersionUID = 4270590461117389862L; - - /** - * Immutable singleton. - */ - public static transient final EmptyBindingSet INSTANCE = new EmptyBindingSet(); - - private EmptyBindingSet() { - - } - - /** - * @todo Clone returns the same object, which is immutable. Since we use - * clone when binding, it might be better to return a mutable object. - */ - public EmptyBindingSet clone() { - - return this; - - } - - public EmptyBindingSet copy(IVariable[] variablesToDrop) { - - return this; - - } - - public void clear(IVariable var) { - throw new UnsupportedOperationException(); - } - - public void clearAll() { - throw new UnsupportedOperationException(); - } - - @SuppressWarnings("unchecked") - public Iterator<Entry<IVariable, IConstant>> iterator() { - - return EmptyIterator.DEFAULT; - - } - - public void set(IVariable var, IConstant val) { - throw new UnsupportedOperationException(); - } - - public int size() { - return 0; - } - - public boolean equals(final Object t) { - - if (this == t) - return true; - - if (!(t instanceof IBindingSet)) - return false; - - final IBindingSet o = (IBindingSet) t; - - if (o.size() == 0) - return true; - - return false; - - } - - /** - * The hash code of an empty binding set is always zero. - */ - public int hashCode() { - - return 0; - - } - - public IConstant get(IVariable var) { - - if (var == null) - throw new IllegalArgumentException(); - - return null; - - } - - public boolean isBound(IVariable var) { - - if (var == null) - throw new IllegalArgumentException(); - - return false; - - } - - /** - * Imposes singleton pattern during object de-serialization. - */ - private Object readResolve() throws ObjectStreamException { - - return EmptyBindingSet.INSTANCE; - - } - - public Iterator<IVariable> vars() { - - return EmptyIterator.DEFAULT; - - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/HashBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/HashBindingSet.java 2010-10-21 17:23:06 UTC (rev 3835) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/HashBindingSet.java 2010-10-22 11:59:15 UTC (rev 3836) @@ -1,319 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -/* - * Created on Jun 19, 2008 - */ - -package com.bigdata.bop; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.Map; -import java.util.Map.Entry; - -/** - * {@link IBindingSet} backed by a {@link HashMap}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * - * @todo Since {@link Var}s allow reference testing, a faster implementation - * could be written based on a {@link LinkedList}. Just scan the list - * until the entry is found with the desired {@link Var} reference and - * then return it. - */ -public class HashBindingSet implements IBindingSet { - - private static final long serialVersionUID = -2989802566387532422L; - - /** - * Note: A {@link LinkedHashMap} provides a fast iterator, which we use a - * bunch. - */ - private LinkedHashMap<IVariable, IConstant> map; - - /** - * New empty binding set. - */ - public HashBindingSet() { - - map = new LinkedHashMap<IVariable, IConstant>(); - - } - - /** - * Copy constructor. - * - * @param src - */ - protected HashBindingSet(final HashBindingSet src) { - - map = new LinkedHashMap<IVariable, IConstant>(src.map); - - } - - /** - * Copy constructor. - * - * @param src - */ - public HashBindingSet(final IBindingSet src) { - - map = new LinkedHashMap<IVariable, IConstant>(src.size()); - - final Iterator<Map.Entry<IVariable, IConstant>> itr = src.iterator(); - - while (itr.hasNext()) { - - final Map.Entry<IVariable, IConstant> e = itr.next(); - - map.put(e.getKey(), e.getValue()); - - } - - } - - public HashBindingSet(final IVariable[] vars, final IConstant[] vals) { - - if (vars == null) - throw new IllegalArgumentException(); - - if (vals == null) - throw new IllegalArgumentException(); - - if (vars.length != vals.length) - throw new IllegalArgumentException(); - - map = new LinkedHashMap<IVariable, IConstant>(vars.length); - - for (int i = 0; i < vars.length; i++) { - - map.put(vars[i], vals[i]); - - } - - } - - public boolean isBound(final IVariable var) { - - if (var == null) - throw new IllegalArgumentException(); - - return map.containsKey(var); - - } - - public IConstant get(final IVariable var) { - - if (var == null) - throw new IllegalArgumentException(); - - return map.get(var); - - } - - public void set(final IVariable var, final IConstant val) { - - if (var == null) - throw new IllegalArgumentException(); - - if (val == null) - throw new IllegalArgumentException(); - - map.put(var,val); - - // clear the hash code. - hash = 0; - - } - - public void clear(final IVariable var) { - - if (var == null) - throw new IllegalArgumentException(); - - map.remove(var); - - // clear the hash code. - hash = 0; - - } - - public void clearAll() { - - map.clear(); - - // clear the hash code. - hash = 0; - - } - - public String toString() { - - final StringBuilder sb = new StringBuilder(); - - sb.append("{ "); - - int i = 0; - - final Iterator<Map.Entry<IVariable, IConstant>> itr = map.entrySet() - .iterator(); - - while (itr.hasNext()) { - - if (i > 0) - sb.append(", "); - - final Map.Entry<IVariable, IConstant> entry = itr.next(); - - sb.append(entry.getKey()); - - sb.append("="); - - sb.append(entry.getValue()); - - i++; - - } - - sb.append(" }"); - - return sb.toString(); - - } - - /** - * Iterator does not support removal, set, or concurrent modification. - */ - public Iterator<Entry<IVariable, IConstant>> iterator() { - - return Collections.unmodifiableMap(map).entrySet().iterator(); - - } - - public Iterator<IVariable> vars() { - - return Collections.unmodifiableSet(map.keySet()).iterator(); - - } - - public int size() { - - return map.size(); - - } - - public HashBindingSet clone() { - - return new HashBindingSet( this ); - - } - - /** - * Return a shallow copy of the binding set, eliminating unecessary - * variables. - */ - public HashBindingSet copy(final IVariable[] variablesToKeep) { - - final HashBindingSet bs = new HashBindingSet(); - - for (IVariable<?> var : variablesToKeep) { - - final IConstant<?> val = map.get(var); - - if (val != null) { - - bs.map.put(var, val); - - } - - } - - return bs; - - } - - public boolean equals(final Object t) { - - if (this == t) - return true; - - if(!(t instanceof IBindingSet)) - return false; - - final IBindingSet o = (IBindingSet) t; - - if (size() != o.size()) - return false; - - final Iterator<Map.Entry<IVariable,IConstant>> itr = map.entrySet().iterator(); - - while(itr.hasNext()) { - - final Map.Entry<IVariable,IConstant> entry = itr.next(); - - final IVariable<?> var = entry.getKey(); - - final IConstant<?> val = entry.getValue(); - -// if (!o.isBound(vars[i])) -// return false; - IConstant<?> o_val = o.get ( var ) ; - if (null == o_val || !val.equals(o_val)) - return false; - - } - - return true; - - } - - public int hashCode() { - - if (hash == 0) { - - int result = 0; - - for(IConstant<?> c : map.values()) { - - if (c == null) - continue; - - result ^= c.hashCode(); - - } - - hash = result; - - } - return hash; - - } - private int hash; - -} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java 2010-10-21 17:23:06 UTC (rev 3835) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java 2010-10-22 11:59:15 UTC (rev 3836) @@ -32,45 +32,10 @@ import java.util.Iterator; import java.util.Map; -import com.bigdata.relation.rule.IRule; -import com.bigdata.relation.rule.Rule; -import com.bigdata.relation.rule.eval.RuleState; - /** * Interface for a set of bindings. The set of variables values is extensible * and the bound values are loosely typed. * - * @todo The variable positions in a binding set can be assigned an index by the - * order in which they are encountered across the predicates when the - * predicates are considered in execution order. This gives us a dense - * index in [0:nvars-1]. The index can be into an array. When the bindings - * are of a primitive type, as they are for the RDF DB, that array can be - * an array of the primitive type, e.g., long[nvars]. - * <p> - * This change would require that the singleton factory for a variable was - * on the {@link Rule} (different rules would have different index - * assignments), it would require predicates to be cloned into a - * {@link Rule} so that the variables possessed the necessary index - * assignment, and that index assignment would have to be late - once the - * evaluation order was determined, so maybe the Rule is cloned into the - * {@link RuleState} once we have the evaluation order. - * <p> - * There would also need to be a type-specific means for copying bindings - * from a visited element into a bindingSet if a want to avoid autoboxing. - * <p> - * The {@link IConstant} interface might have to disappear for this as - * well. I am not convinced that it adds much. - * <p> - * To obtain a {@link Var} you MUST go to the {@link IVariable} factory on - * the {@link IRule}. (It is easy to find violators since all vars are - * currently assigned by a single factory.) - * <p> - * Since we sometimes do not have access to the rule that generated the - * bindings, we would also require the ability to retrieve a binding by - * the name of the variable (this case arises when the rule is generated - * dynamically in a manner that is not visible to the consumer of the - * bindings, e.g., the match rule of the RDF DB). - * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ @@ -177,9 +142,6 @@ * does not dependent on the order in which the bindings are iterated over. * The hash code reflects the current state of the bindings and must be * recomputed if the bindings are changed. - * - * @todo the test suites should be enhanced to verify the contract for - * {@link IBindingSet#hashCode()} */ public int hashCode(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-10-21 17:23:06 UTC (rev 3835) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-10-22 11:59:15 UTC (rev 3836) @@ -31,7 +31,6 @@ import java.util.Map; import com.bigdata.bop.AbstractAccessPathOp; -import com.bigdata.bop.ArrayBindingSet; import com.bigdata.bop.BOp; import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; @@ -41,6 +40,7 @@ import com.bigdata.bop.IVariable; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.NV; +import com.bigdata.bop.bindingSet.ArrayBindingSet; import com.bigdata.relation.accesspath.ElementFilter; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.relation.rule.IAccessPathExpander; Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java (from rev 3802, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ArrayBindingSet.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java 2010-10-22 11:59:15 UTC (rev 3836) @@ -0,0 +1,475 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Jun 20, 2008 + */ + +package com.bigdata.bop.bindingSet; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IVariable; + +/** + * An {@link IBindingSet} backed by an dense array (no gaps). This + * implementation is more efficient for fixed or small N (N LTE ~20). It simples + * scans the array looking for the variable using references tests for equality. + * Since the #of variables is generally known in advance this can be faster and + * lighter than {@link HashBindingSet} for most applications. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class ArrayBindingSet implements IBindingSet { + + private static final long serialVersionUID = -6468905602211956490L; + + private static final Logger log = Logger.getLogger(ArrayBindingSet.class); + + /** + * A dense array of the bound variables. + */ + private final IVariable[] vars; + /** + * A dense array of the values bound to the variables (correlated with + * {@link #vars}). + */ + private final IConstant[] vals; + + private int nbound = 0; + + /** + * Copy constructor. + */ + protected ArrayBindingSet(final ArrayBindingSet bindingSet) { + + if (bindingSet == null) + throw new IllegalArgumentException(); + + nbound = bindingSet.nbound; + + vars = bindingSet.vars.clone(); + + vals = bindingSet.vals.clone(); + + } + + /** + * Initialized with the given bindings (assumes for efficiency that all + * elements of bound arrays are non-<code>null</code> and that no + * variables are duplicated). + * + * @param vars + * The variables. + * @param vals + * Their bound values. + */ + public ArrayBindingSet(final IVariable[] vars, final IConstant[] vals) { + + if (vars == null) + throw new IllegalArgumentException(); + + if (vals == null) + throw new IllegalArgumentException(); + + if(vars.length != vals.length) + throw new IllegalArgumentException(); + + // for (int i = 0; i < vars.length; i++) { + // + // if (vars[i] == null) + // throw new IllegalArgumentException(); + // + // if (vals[i] == null) + // throw new IllegalArgumentException(); + // + // } + + this.vars = vars; + + this.vals = vals; + + this.nbound = vars.length; + + } + + /** + * Initialized with the given capacity. + * + * @param capacity + * The capacity. + * + * @throws IllegalArgumentException + * if the <i>capacity</i> is negative. + */ + public ArrayBindingSet(final int capacity) { + + if (capacity < 0) + throw new IllegalArgumentException(); + + vars = new IVariable[capacity]; + + vals = new IConstant[capacity]; + + } + + public Iterator<IVariable> vars() { + + return Collections.unmodifiableList(Arrays.asList(vars)).iterator(); + + } + + /** + * Iterator does not support either removal or concurrent modification of + * the binding set. + */ + public Iterator<Map.Entry<IVariable,IConstant>> iterator() { + + return new BindingSetIterator(); + + } + + private class BindingSetIterator implements Iterator<Map.Entry<IVariable,IConstant>> { + + private int i = 0; + + public boolean hasNext() { + + return i < nbound; + + } + + public Entry<IVariable, IConstant> next() { + + // the index whose bindings are being returned. + final int index = i++; + + return new Map.Entry<IVariable, IConstant>() { + + public IVariable getKey() { + + return vars[index]; + + } + + public IConstant getValue() { + + return vals[index]; + + } + + public IConstant setValue(IConstant value) { + + if (value == null) + throw new IllegalArgumentException(); + + final IConstant t = vals[index]; + + vals[index] = value; + + return t; + + } + + }; + + } + + public void remove() { + + throw new UnsupportedOperationException(); + + } + + } + + public int size() { + + return nbound; + + } + + public void clearAll() { + + for (int i = nbound - 1; nbound > 0; i--, nbound--) { + + vars[i] = null; + + vals[i] = null; + + } + + // clear the hash code. + hash = 0; + + assert nbound == 0; + + } + + /** + * Since the array is dense (no gaps), {@link #clear(IVariable)} requires + * that we copy down any remaining elements in the array by one position. + */ + public void clear(final IVariable var) { + + if (var == null) + throw new IllegalArgumentException(); + + for (int i = 0; i < nbound; i++) { + + if (vars[i] == var) { + + final int nremaining = nbound-(i+1); + + if (nremaining >= 0) { + + // Copy down to close up the gap! + System.arraycopy(vars, i+1, vars, i, nremaining); + + System.arraycopy(vals, i+1, vals, i, nremaining); + + } else { + + // Just clear the reference. + + vars[i] = null; + + vals[i] = null; + + } + + // clear the hash code. + hash = 0; + + nbound--; + + break; + + } + + } + + } + + public IConstant get(final IVariable var) { + + if (var == null) + throw new IllegalArgumentException(); + + for (int i = 0; i < nbound; i++) { + + if (vars[i] == var) { + + return vals[i]; + + } + + } + + return null; + + } + + public boolean isBound(final IVariable var) { + + return get(var) != null; + + } + + public void set(final IVariable var, final IConstant val) { + + if (var == null) + throw new IllegalArgumentException(); + + if (val == null) + throw new IllegalArgumentException(); + + if (log.isTraceEnabled()) { + + log.trace("var=" + var + ", val=" + val + ", nbound=" + nbound + + ", capacity=" + vars.length); + + } + + for (int i = 0; i < nbound; i++) { + + if (vars[i] == var) { + + vals[i] = val; + + // clear the hash code. + hash = 0; + + return; + + } + + } + + vars[nbound] = var; + + vals[nbound] = val; + + // clear the hash code. + hash = 0; + + nbound++; + + } + + public String toString() { + + final StringBuilder sb = new StringBuilder(); + + sb.append("{"); + + for(int i=0; i<nbound; i++) { + + if(i>0) sb.append(", "); + + sb.append(vars[i]); + + sb.append("="); + + sb.append(vals[i]); + + } + + sb.append("}"); + + return sb.toString(); + + } + + public ArrayBindingSet clone() { + + return new ArrayBindingSet(this); + + } + + /** + * Return a shallow copy of the binding set, eliminating unecessary + * variables. + */ + public ArrayBindingSet copy(final IVariable[] variablesToKeep) { + + // bitflag for the old binding set + final boolean[] keep = new boolean[nbound]; + + // for each var in the old binding set, see if we need to keep it + for (int i = 0; i < nbound; i++) { + + final IVariable v = vars[i]; + + keep[i] = false; + for (IVariable k : variablesToKeep) { + if (v == k) { + keep[i] = true; + break; + } + } + + } + + // allocate the new vars + final IVariable[] newVars = new IVariable[vars.length]; + + // allocate the new vals + final IConstant[] newVals = new IConstant[vals.length]; + + // fill in the new binding set based on the keep bitflag + int newbound = 0; + for (int i = 0; i < nbound; i++) { + if (keep[i]) { + newVars[newbound] = vars[i]; + newVals[newbound] = vals[i]; + newbound++; + } + } + + ArrayBindingSet bs = new ArrayBindingSet(newVars, newVals); + bs.nbound = newbound; + + return bs; + + } + + public boolean equals(final Object t) { + + if (this == t) + return true; + + if(!(t instanceof IBindingSet)) + return false; + + final IBindingSet o = (IBindingSet)t; + + if (nbound != o.size()) + return false; + + for(int i=0; i<nbound; i++) { + + IConstant<?> o_val = o.get ( vars [ i ] ) ; + if ( null == o_val || !vals[i].equals( o_val )) + return false; + + } + + return true; + + } + + public int hashCode() { + + if (hash == 0) { + + int result = 0; + + for (int i = 0; i < nbound; i++) { + + if (vals[i] == null) + continue; + + result ^= vals[i].hashCode(); + + } + + hash = result; + + } + return hash; + + } + private int hash; + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java (from rev 3802, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/EmptyBindingSet.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java 2010-10-22 11:59:15 UTC (rev 3836) @@ -0,0 +1,162 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Sep 10, 2008 + */ + +package com.bigdata.bop.bindingSet; + +import java.io.ObjectStreamException; +import java.io.Serializable; +import java.util.Iterator; +import java.util.Map.Entry; + +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IVariable; + +import cutthecrap.utils.striterators.EmptyIterator; + +/** + * An immutable empty binding set. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +final public class EmptyBindingSet implements IBindingSet, Serializable { + + /** + * + */ + private static final long serialVersionUID = 4270590461117389862L; + + /** + * Immutable singleton. + */ + public static transient final EmptyBindingSet INSTANCE = new EmptyBindingSet(); + + private EmptyBindingSet() { + + } + + /** + * @todo Clone returns the same object, which is immutable. Since we use + * clone when binding, it might be better to return a mutable object. + */ + public EmptyBindingSet clone() { + + return this; + + } + + public EmptyBindingSet copy(IVariable[] variablesToDrop) { + + return this; + + } + + public void clear(IVariable var) { + throw new UnsupportedOperationException(); + } + + public void clearAll() { + throw new UnsupportedOperationException(); + } + + @SuppressWarnings("unchecked") + public Iterator<Entry<IVariable, IConstant>> iterator() { + + return EmptyIterator.DEFAULT; + + } + + public void set(IVariable var, IConstant val) { + throw new UnsupportedOperationException(); + } + + public int size() { + return 0; + } + + public boolean equals(final Object t) { + + if (this == t) + return true; + + if (!(t instanceof IBindingSet)) + return false; + + final IBindingSet o = (IBindingSet) t; + + if (o.size() == 0) + return true; + + return false; + + } + + /** + * The hash code of an empty binding set is always zero. + */ + public int hashCode() { + + return 0; + + } + + public IConstant get(IVariable var) { + + if (var == null) + throw new IllegalArgumentException(); + + return null; + + } + + public boolean isBound(IVariable var) { + + if (var == null) + throw new IllegalArgumentException(); + + return false; + + } + + /** + * Imposes singleton pattern during object de-serialization. + */ + private Object readResolve() throws ObjectStreamException { + + return EmptyBindingSet.INSTANCE; + + } + + public Iterator<IVariable> vars() { + + return EmptyIterator.DEFAULT; + + } + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java (from rev 3802, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/HashBindingSet.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java 2010-10-22 11:59:15 UTC (rev 3836) @@ -0,0 +1,324 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Jun 19, 2008 + */ + +package com.bigdata.bop.bindingSet; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.Map; +import java.util.Map.Entry; + +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.Var; + +/** + * {@link IBindingSet} backed by a {@link HashMap}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + * + * @todo Since {@link Var}s allow reference testing, a faster implementation + * could be written based on a {@link LinkedList}. Just scan the list + * until the entry is found with the desired {@link Var} reference and + * then return it. + */ +public class HashBindingSet implements IBindingSet { + + private static final long serialVersionUID = -2989802566387532422L; + + /** + * Note: A {@link LinkedHashMap} provides a fast iterator, which we use a + * bunch. + */ + private LinkedHashMap<IVariable, IConstant> map; + + /** + * New empty binding set. + */ + public HashBindingSet() { + + map = new LinkedHashMap<IVariable, IConstant>(); + + } + + /** + * Copy constructor. + * + * @param src + */ + protected HashBindingSet(final HashBindingSet src) { + + map = new LinkedHashMap<IVariable, IConstant>(src.map); + + } + + /** + * Copy constructor. + * + * @param src + */ + public HashBindingSet(final IBindingSet src) { + + map = new LinkedHashMap<IVariable, IConstant>(src.size()); + + final Iterator<Map.Entry<IVariable, IConstant>> itr = src.iterator(); + + while (itr.hasNext()) { + + final Map.Entry<IVariable, IConstant> e = itr.next(); + + map.put(e.getKey(), e.getValue()); + + } + + } + + public HashBindingSet(final IVariable[] vars, final IConstant[] vals) { + + if (vars == null) + throw new IllegalArgumentException(); + + if (vals == null) + throw new IllegalArgumentException(); + + if (vars.length != vals.length) + throw new IllegalArgumentException(); + + map = new LinkedHashMap<IVariable, IConstant>(vars.length); + + for (int i = 0; i < vars.length; i++) { + + map.put(vars[i], vals[i]); + + } + + } + + public boolean isBound(final IVariable var) { + + if (var == null) + throw new IllegalArgumentException(); + + return map.containsKey(var); + + } + + public IConstant get(final IVariable var) { + + if (var == null) + throw new IllegalArgumentException(); + + return map.get(var); + + } + + public void set(final IVariable var, final IConstant val) { + + if (var == null) + throw new IllegalArgumentException(); + + if (val == null) + throw new IllegalArgumentException(); + + map.put(var,val); + + // clear the hash code. + hash = 0; + + } + + public void clear(final IVariable var) { + + if (var == null) + throw new IllegalArgumentException(); + + map.remove(var); + + // clear the hash code. + hash = 0; + + } + + public void clearAll() { + + map.clear(); + + // clear the hash code. + hash = 0; + + } + + public String toString() { + + final StringBuilder sb = new StringBuilder(); + + sb.append("{ "); + + int i = 0; + + final Iterator<Map.Entry<IVariable, IConstant>> itr = map.entrySet() + .iterator(); + + while (itr.hasNext()) { + + if (i > 0) + sb.append(", "); + + final Map.Entry<IVariable, IConstant> entry = itr.next(); + + sb.append(entry.getKey()); + + sb.append("="); + + sb.append(entry.getValue()); + + i++; + + } + + sb.append(" }"); + + return sb.toString(); + + } + + /** + * Iterator does not support removal, set, or concurrent modification. + */ + public Iterator<Entry<IVariable, IConstant>> iterator() { + + return Collections.unmodifiableMap(map).entrySet().iterator(); + + } + + public Iterator<IVariable> vars() { + + return Collections.unmodifiableSet(map.keySet()).iterator(); + + } + + public int size() { + + return map.size(); + + } + + public HashBindingSet clone() { + + return new HashBindingSet( this ); + + } + + /** + * Return a shallow copy of the binding set, eliminating unecessary + * variables. + */ + public HashBindingSet copy(final IVariable[] variablesToKeep) { + + final HashBindingSet bs = new HashBindingSet(); + + for (IVariable<?> var : variablesToKeep) { + + final IConstant<?> val = map.get(var); + + if (val != null) { + + bs.map.put(var, val); + + } + + } + + return bs; + + } + + public boolean equals(final Object t) { + + if (this == t) + return true; + + if(!(t instanceof IBindingSet)) + return false; + + final IBindingSet o = (IBindingSet) t; + + if (size() != o.size()) + return false; + + final Iterator<Map.Entry<IVariable,IConstant>> itr = map.entrySet().iterator(); + + while(itr.hasNext()) { + + final Map.Entry<IVariable,IConstant> entry = itr.next(... [truncated message content] |
From: <ble...@us...> - 2010-10-21 17:23:14
|
Revision: 3835 http://bigdata.svn.sourceforge.net/bigdata/?rev=3835&view=rev Author: blevine218 Date: 2010-10-21 17:23:06 +0000 (Thu, 21 Oct 2010) Log Message: ----------- added true and false test cases for ConditionalRoutingOp Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-10-20 18:39:05 UTC (rev 3834) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-10-21 17:23:06 UTC (rev 3835) @@ -1623,15 +1623,262 @@ } /** - * @todo Write unit tests for the {@link ConditionalRoutingOp}? + * Unit test for {@link ConditionalRoutingOp}. This test case tests when the + * condition is true (the joins are not skipped) in which case the test + * (and results) are essentially identical to test_query_join2(). */ - public void test_query_join2_conditionalRouting() { + public void test_query_join2_conditionalRoutingTrue() throws Exception { + int startId = 1; + int joinId1 = 2; + int joinId2 = 3; + + IConstraint condition = new EQConstant(Var.var("x"), new Constant<String>("Mary")); + RunningQuery runningQuery = initQueryWithConditionalRoutingOp(condition, startId, joinId1, joinId2); - fail("write test"); + // verify solutions. + { + // the expected solution. + final IBindingSet[] expected = new IBindingSet[] {// + new ArrayBindingSet(// + new IVariable[] { Var.var("x"), Var.var("y"), Var.var("z")},// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Paul"), + new Constant<String>("Leon")}// + ) }; + assertSameSolutions(expected, runningQuery.iterator()); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + log.info(statsMap.toString()); + assertEquals(5, statsMap.size()); + } + + // validate the stats for the start operator. + { + final BOpStats stats = statsMap.get(startId); + assertNotNull(stats); + if (log.isInfoEnabled()) + log.info("start: " + stats.toString()); + + // verify query solution stats details. + assertEquals(1L, stats.chunksIn.get()); + assertEquals(1L, stats.unitsIn.get()); + assertEquals(1L, stats.unitsOut.get()); + assertEquals(1L, stats.chunksOut.get()); + } + + // validate the stats for the 1st join operator. + { + final BOpStats stats = statsMap.get(joinId1); + assertNotNull(stats); + if (log.isInfoEnabled()) + log.info("join1: " + stats.toString()); + + // verify query solution stats details. + assertEquals(1L, stats.chunksIn.get()); + assertEquals(1L, stats.unitsIn.get()); + assertEquals(1L, stats.unitsOut.get()); + assertEquals(1L, stats.chunksOut.get()); + } + + // validate the stats for the 2nd join operator. + { + final BOpStats stats = statsMap.get(joinId2); + assertNotNull(stats); + if (log.isInfoEnabled()) + log.info("join2: " + stats.toString()); + + // verify query solution stats details. + assertEquals(1L, stats.chunksIn.get()); + assertEquals(1L, stats.unitsIn.get()); + assertEquals(1L, stats.unitsOut.get()); + assertEquals(1L, stats.chunksOut.get()); + } + } + + /** + * Unit test for {@link ConditionalRoutingOp}. This test case tests when the + * condition is false (the joins are skipped). + */ + public void test_query_join2_conditionalRoutingFalse() throws Exception { + int startId = 1; + int joinId1 = 2; + int joinId2 = 3; + + // 'x' is actually bound to "Mary" so this condition will be false. + IConstraint condition = new EQConstant(Var.var("x"), new Constant<String>("Fred")); + + RunningQuery runningQuery = initQueryWithConditionalRoutingOp(condition, startId, joinId1, joinId2); + // verify solutions. + { + // the expected solution. + final IBindingSet[] expected = new IBindingSet[] {// + new ArrayBindingSet(// + new IVariable[] { Var.var("x")},// + new IConstant[] { new Constant<String>("Mary")}// + ) }; + + assertSameSolutions(expected, runningQuery.iterator()); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + log.info(statsMap.toString()); + assertEquals(5, statsMap.size()); + } + + // validate the stats for the start operator. + { + final BOpStats stats = statsMap.get(startId); + assertNotNull(stats); + + if (log.isInfoEnabled()) + log.info("start: " + stats.toString()); + + // verify query solution stats details. + assertEquals(1L, stats.chunksIn.get()); + assertEquals(1L, stats.unitsIn.get()); + assertEquals(1L, stats.unitsOut.get()); + assertEquals(1L, stats.chunksOut.get()); + } + + // validate the stats for the 1st join operator. This will have been skipped + // and so the counts will be 0. + { + final BOpStats stats = statsMap.get(joinId1); + assertNotNull(stats); + if (log.isInfoEnabled()) + log.info("join1: " + stats.toString()); + + // verify query solution stats details. + assertEquals(0L, stats.chunksIn.get()); + assertEquals(0L, stats.unitsIn.get()); + assertEquals(0L, stats.unitsOut.get()); + assertEquals(0L, stats.chunksOut.get()); + } + + // validate the stats for the 2nd join operator. This will have been skipped + // and so the counts will be 0. + { + final BOpStats stats = statsMap.get(joinId2); + assertNotNull(stats); + if (log.isInfoEnabled()) + log.info("join2: " + stats.toString()); + + // verify query solution stats details. + assertEquals(0L, stats.chunksIn.get()); + assertEquals(0L, stats.unitsIn.get()); + assertEquals(0L, stats.unitsOut.get()); + assertEquals(0L, stats.chunksOut.get()); + } + } + /** + * Helper method to initialize a BOp tree that includes a ConditionalRoutinOp + * + * @param condition the condition to be tested in the ConditionalalRoutingOp + * @param startId the bopId of the startOp + * @param joinId1 the bopId of the first join + * @param joinId2 the bopId of the second join + * + * @return the RunningQuery created + * + * @throws Exception + */ + private RunningQuery initQueryWithConditionalRoutingOp(IConstraint condition, int startId, int joinId1, int joinId2) throws Exception { + final int predId1 = 10; + final int predId2 = 11; + final int condId = 12; + final int sliceId = 13; + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>(new IVariableOrConstant[] { + Var.var("x"), Var.var("y") }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>(new IVariableOrConstant[] { + Var.var("y"), Var.var("z") }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final ConditionalRoutingOp cond = new ConditionalRoutingOp(new BOp[]{startOp}, + NV.asMap(new NV[]{// + new NV(BOp.Annotations.BOP_ID,condId), + new NV(PipelineOp.Annotations.SINK_REF, joinId1), + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId), + new NV(ConditionalRoutingOp.Annotations.CONDITION, condition), + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + cond, pred1Op,// + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + })); + + final PipelineOp join2Op = new PipelineJoin<E>(// + join1Op, pred2Op, // + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + })); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{join2Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + final IBindingSet initialBindings = new HashBindingSet(); + initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + + RunningQuery runningQuery = queryEngine.eval(queryId, query,initialChunkMessage); + + return runningQuery; + } + + /** * Verify the expected solutions. * * @param expected This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-20 18:39:13
|
Revision: 3834 http://bigdata.svn.sourceforge.net/bigdata/?rev=3834&view=rev Author: thompsonbry Date: 2010-10-20 18:39:05 +0000 (Wed, 20 Oct 2010) Log Message: ----------- Modified BOpContext to support IMultiSourceAsynchronousIterator so we can attach sources to already running tasks. Modified BOpContextBase to hold a hard reference to the Executor to avoid problems with errors reported up from the IIndexManager if it has been closed. Since the caller now has access to the Executor after the IIndexManager is closed, the relevant code in RunningQuery now sees a RejectedExecutionException rather than an IllegalStateException. Modified RunningQuery to attach new chunks to already running tasks, at least in standalone. There is more work that needs to be done here which falls broadly under the category of performance optimizations of the query engine. This optimization is not yet available in scale-out because an RMI is necessary back to the controller and that should not happen in the QueryEngine's run Thread. There is a known problem with high volume queries, such as LUBM Q9 on U10 or above, that they can block. What appears to be happening is that a join is running into a bounded queue (a BlockingBuffer with a limited capacity). The code needs to be modified either to use an unbounded queue (potentially backed by a direct ByteBuffer), or to emit multiple IChunkMessages (this option was historically used in scale-out), or to chain the consumers and producers together as we did historically in the trunk (this option is very efficient in standalone). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestJiniFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-10-20 18:32:25 UTC (rev 3833) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-10-20 18:39:05 UTC (rev 3834) @@ -32,14 +32,14 @@ import com.bigdata.bop.engine.BOpStats; import com.bigdata.bop.engine.IChunkMessage; import com.bigdata.bop.engine.IRunningQuery; -import com.bigdata.bop.engine.RunningQuery; import com.bigdata.btree.ILocalBTreeView; import com.bigdata.journal.IIndexManager; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; +import com.bigdata.relation.accesspath.IMultiSourceAsynchronousIterator; +import com.bigdata.relation.accesspath.MultiSourceSequentialAsynchronousIterator; import com.bigdata.service.IBigdataFederation; -import com.ibm.icu.impl.ByteBuffer; /** * The evaluation context for the operator (NOT serializable). @@ -57,7 +57,7 @@ private final BOpStats stats; - private final IAsynchronousIterator<E[]> source; + private final IMultiSourceAsynchronousIterator<E[]> source; private final IBlockingBuffer<E[]> sink; @@ -93,28 +93,31 @@ /** * Where to read the data to be consumed by the operator. - * - * @todo Since joins now run from locally materialized data in all cases the - * API could be simplified somewhat given that we know that there will - * be a single "source" chunk of binding sets. Also, the reason for - * the {@link IAsynchronousIterator} here is that a downstream join - * could error (or satisfy a slice) and halt the upstream joins. That - * is being coordinated through the {@link RunningQuery} now. - * <p> - * It is not yet clear what the right API is for the source. The - * iterator model might be just fine, but might not need to be - * asynchronous and does not need to be closeable. - * <p> - * Perhaps the right thing is to expose an object with a richer API - * for obtaining various kinds of iterators or even access to the - * direct {@link ByteBuffer}s backing the data (for high volume joins, - * external merge sorts, etc). */ public final IAsynchronousIterator<E[]> getSource() { return source; } /** + * Attach another source. The decision to attach the source is mutex with + * respect to the decision that the source reported by {@link #getSource()} + * is exhausted. + * + * @param source + * The source. + * + * @return <code>true</code> iff the source was attached. + */ + public boolean addSource(IAsynchronousIterator<E[]> source) { + + if (source == null) + throw new IllegalArgumentException(); + + return this.source.add(source); + + } + + /** * Where to write the output of the operator. * * @see PipelineOp.Annotations#SINK_REF @@ -199,7 +202,7 @@ throw new IllegalArgumentException(); this.partitionId = partitionId; this.stats = stats; - this.source = source; + this.source = new MultiSourceSequentialAsynchronousIterator<E[]>(source); this.sink = sink; this.sink2 = sink2; // may be null } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java 2010-10-20 18:32:25 UTC (rev 3833) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java 2010-10-20 18:39:05 UTC (rev 3834) @@ -60,6 +60,11 @@ */ private final IIndexManager indexManager; + /** + * The executor service. + */ + private final Executor executor; + /** * The <strong>local</strong> {@link IIndexManager}. Query evaluation occurs * against the local indices. In scale-out, query evaluation proceeds shard @@ -88,13 +93,13 @@ * <em>local</em> {@link #getIndexManager() index manager}. */ public final Executor getExecutorService() { - return indexManager.getExecutorService(); + return executor; } public BOpContextBase(final QueryEngine queryEngine) { this(queryEngine.getFederation(), queryEngine.getIndexManager()); - + } /** @@ -119,6 +124,9 @@ this.fed = fed; this.indexManager = indexManager; + + this.executor = indexManager == null ? null : indexManager + .getExecutorService(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-10-20 18:32:25 UTC (rev 3833) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-10-20 18:39:05 UTC (rev 3834) @@ -38,7 +38,6 @@ import java.util.concurrent.FutureTask; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.PriorityBlockingQueue; -import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.atomic.AtomicReference; import org.apache.log4j.Logger; @@ -366,6 +365,16 @@ } /** + * {@link QueryEngine}s are using with a singleton pattern. They must be + * torn down automatically once they are no longer reachable. + */ + @Override + protected void finalize() throws Throwable { + shutdownNow(); + super.finalize(); + } + + /** * The service on which we run the query engine. This is started by {@link #init()}. */ private final AtomicReference<ExecutorService> engineService = new AtomicReference<ExecutorService>(); @@ -430,35 +439,12 @@ private class QueryEngineTask implements Runnable { public void run() { if(log.isInfoEnabled()) - log.info("running: " + this); + log.info("Running: " + this); while (true) { try { final RunningQuery q = priorityQueue.take(); - final UUID queryId = q.getQueryId(); - if (q.isCancelled()) - continue; - final IChunkMessage<IBindingSet> chunk = q.chunksIn.poll(); - if (chunk == null) - continue; - if (log.isTraceEnabled()) - log.trace("Accepted chunk: " + chunk); - try { - // create task. - final FutureTask<?> ft = q.newChunkTask(chunk); - if (log.isDebugEnabled()) - log.debug("Running chunk: " + chunk); - // execute task. - execute(ft); - } catch (RejectedExecutionException ex) { - // shutdown of the pool (should be an unbounded - // pool). - log.warn("Dropping chunk: queryId=" + queryId); - continue; - } catch (Throwable ex) { - // halt that query. - q.halt(ex); - continue; - } + if (!q.isDone()) + q.consumeChunk(); } catch (InterruptedException e) { /* * Note: Uncomment the stack trace here if you want to find Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-10-20 18:32:25 UTC (rev 3833) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-10-20 18:39:05 UTC (rev 3834) @@ -32,6 +32,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.Date; +import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.Map; import java.util.Set; @@ -195,7 +196,7 @@ * readily exposed as {@link Map} object. If we were to expose the map, it * would have to be via a get(key) style interface. */ - /* private */final Map<Integer/* bopId */, AtomicLong/* runningCount */> runningMap = new ConcurrentHashMap<Integer, AtomicLong>(); + /* private */final Map<Integer/* bopId */, AtomicLong/* runningCount */> runningMap = new LinkedHashMap<Integer, AtomicLong>(); /** * A collection of the operators which have executed at least once. @@ -367,9 +368,7 @@ /** * Update the {@link RunState} to indicate that the operator identified in * the {@link StartOpMessage} will execute and will consume the one or more - * {@link IChunkMessage}s. Both the total #of available messages and the #of - * messages available for that operator are incremented by - * {@link StartOpMessage#nmessages}. + * {@link IChunkMessage}s. * * @return <code>true</code> if this is the first time we will evaluate the * op. @@ -414,6 +413,72 @@ } /** + * Update the {@link RunState} to indicate that the data in the + * {@link IChunkMessage} was attached to an already running task for the + * target operator. + * + * @param msg + * @param runningOnServiceId + * @return <code>true</code> if this is the first time we will evaluate the + * op. + * + * @throws IllegalArgumentException + * if the argument is <code>null</code>. + * @throws TimeoutException + * if the deadline for the query has passed. + */ + synchronized + public void addSource(final IChunkMessage<?> msg, + final UUID runningOnServiceId) throws TimeoutException { + + if (msg == null) + throw new IllegalArgumentException(); + + if (allDone.get()) + throw new IllegalStateException(ERR_QUERY_HALTED); + + if (deadline < System.currentTimeMillis()) + throw new TimeoutException(ERR_DEADLINE); + + nsteps.incrementAndGet(); + + final int bopId = msg.getBOpId(); + final int nmessages = 1; + + if (runningMap.get(bopId) == null) { + /* + * Note: There is a race condition in RunningQuery such that it is + * possible to add a 2nd source to an operator task before the task + * has begun to execute. Since the task calls startOp() once it + * begins to execute, this means that addSource() can be ordered + * before startOp() for the same task. This code block explicitly + * allows this condition and sets a 0L in the runningMap for the + * [bopId]. + */ + AtomicLong n = runningMap.get(bopId); + if (n == null) + runningMap.put(bopId, n = new AtomicLong()); +// throw new AssertionError(ERR_OP_NOT_STARTED + " msg=" + msg +// + ", this=" + this); + } + + messagesConsumed(bopId, nmessages); + + if (TableLog.tableLog.isInfoEnabled()) { + TableLog.tableLog.info(getTableRow("addSrc", runningOnServiceId, + bopId, msg.getPartitionId(), nmessages/* fanIn */, + null/* cause */, null/* stats */)); + } + + if (log.isInfoEnabled()) + log.info("startOp: " + toString() + " : bop=" + bopId); + + if (log.isTraceEnabled()) + log.trace(msg.toString()); + + } + + /** * Update the {@link RunState} to reflect the post-condition of the * evaluation of an operator against one or more {@link IChunkMessage}, * adjusting the #of messages available for consumption by the operator Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-20 18:32:25 UTC (rev 3833) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-10-20 18:39:05 UTC (rev 3834) @@ -31,6 +31,7 @@ import java.util.Map; import java.util.UUID; import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; @@ -40,6 +41,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; @@ -53,11 +55,13 @@ import com.bigdata.bop.NoSuchBOpException; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.io.DirectBufferPoolAllocator.IAllocationContext; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.ITx; import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; +import com.bigdata.relation.accesspath.MultiplexBlockingBuffer; import com.bigdata.service.IBigdataFederation; import com.bigdata.striterator.ICloseableIterator; import com.bigdata.util.concurrent.Haltable; @@ -116,6 +120,11 @@ /** The query. */ final private PipelineOp query; +// /** +// * @see QueryEngineTestAnnotations#COMBINE_RECEIVED_CHUNKS +// */ +// final protected boolean combineReceivedChunks; + /** * An index from the {@link BOp.Annotations#BOP_ID} to the {@link BOp}. This * index is generated by the constructor. It is immutable and thread-safe. @@ -129,10 +138,20 @@ final private Haltable<Void> future = new Haltable<Void>(); /** - * A collection of {@link Future}s for currently executing operators for + * A collection of (bopId,partitionId) keys mapped onto a collection of + * operator task evaluation contexts for currently executing operators for * this query. + * + * @todo Futures are not being cleared from this collection as operators + * complete. This should be done systematically in order to ensure + * that any allocations associated with an operator task execution are + * released in a timely manner for long-running operators. (In fact, + * the {@link IAllocationContext} should take care of most of the + * issues here but we could still wind up with a lot of entries in + * this map in scale-out where there can be up to one per bop per + * shard in a given query.) */ - private final ConcurrentHashMap<BSBundle, Future<?>> operatorFutures = new ConcurrentHashMap<BSBundle, Future<?>>(); + private final ConcurrentHashMap<BSBundle, ChunkFutureTask> operatorFutures; /** * The runtime statistics for each {@link BOp} in the query and @@ -141,6 +160,50 @@ final private ConcurrentHashMap<Integer/* bopId */, BOpStats> statsMap; /** + * When running in stand alone, we can chain together the operators and have + * much higher throughput. Each operator has an {@link BlockingBuffer} which + * is essentially its input queue. The operator will drain its input queue + * using {@link BlockingBuffer#iterator()}. + * <p> + * Each operator closes its {@link IBlockingBuffer} sink(s) once its own + * source has been closed and it has finished processing that source. Since + * multiple producers can target the same operator, we need a means to + * ensure that the source for the target operator is not closed until each + * producer which targets that operator has closed its corresponding sink. + * <p> + * In order to support this many-to-one producer/consumer pattern, we wrap + * the input queue (a {@link BlockingBuffer}) for each operator having + * multiple sources with a {@link MultiplexBlockingBuffer}. This class gives + * each producer their own view on the underlying {@link BlockingBuffer}. + * The underlying {@link BlockingBuffer} will not be closed until all + * source(s) have closed their view of that buffer. This collection keeps + * track of the {@link MultiplexBlockingBuffer} wrapping the + * {@link BlockingBuffer} which is the input queue for each operator. + * <p> + * The input queues themselves are {@link BlockingBuffer} objects. Those + * objects are available from this map using + * {@link MultiplexBlockingBuffer#getBackingBuffer()}. These buffers are + * pre-allocated by {@link #populateInputBufferMap(BOp)}. + * {@link #startTasks(BOp)} is responsible for starting the operator tasks + * in a "back-to-front" order. {@link #startQuery(IChunkMessage)} kicks off + * the query and invokes {@link #startTasks(BOp)} to chain the input queues + * and output queues together (when so chained, the output queues are skins + * over the input queues obtained from {@link MultiplexBlockingBuffer}). + * + * FIXME The inputBufferMap will let us construct consumer producer chains + * where the consumer _waits_ for all producer(s) which target the consumer + * to close the sink associated with that consumer. Unlike when attaching an + * {@link IChunkMessage} to an already running operator, the consumer will + * NOT terminate (due to lack up input) until each running producer + * terminating that consumer terminates. This will improve concurrency, + * result in fewer task instances, and have better throughput than attaching + * a chunk to an already running task. However, in scale-out we will have + * tasks running on different nodes so we can not always chain together the + * producer and consumer in this tightly integrated manner. + */ + final private ConcurrentHashMap<Integer/*operator*/, MultiplexBlockingBuffer<IBindingSet[]>/*inputQueue*/> inputBufferMap; + + /** * The buffer used for the overall output of the query pipeline. * <p> * Note: This only exists on the query controller, and then only when the @@ -162,7 +225,9 @@ * A lock guarding various state changes. This guards changes to the * internal state of the {@link #runState} object. It is also used to * serialize requests to {@link #acceptChunk(IChunkMessage)} and - * {@link #cancel(boolean)}. + * {@link #cancel(boolean)} and make atomic decision concerning whether to + * attach a new {@link IChunkMessage} to an operator task which is already + * running or to start a new task for that message. * * @see RunState */ @@ -357,8 +422,14 @@ this.query = query; +// combineReceivedChunks = query.getProperty( +// QueryEngineTestAnnotations.COMBINE_RECEIVED_CHUNKS, +// QueryEngineTestAnnotations.DEFAULT_COMBINE_RECEIVED_CHUNKS); + this.bopIndex = BOpUtility.getIndex(query); + this.operatorFutures = new ConcurrentHashMap<BSBundle, ChunkFutureTask>(); + /* * Setup the BOpStats object for each pipeline operator in the query. */ @@ -366,8 +437,10 @@ runState = new RunState(this); - statsMap = createStatsMap(bopIndex); + statsMap = new ConcurrentHashMap<Integer, BOpStats>(); + populateStatsMap(query); + if (!query.isMutation()) { final BOpStats queryStats = statsMap.get(query.getId()); @@ -394,43 +467,101 @@ } + if(!queryEngine.isScaleOut()) { + /* + * Since the query engine is using the stand alone database mode we + * will now setup the input queues for each operator. Those queues + * will be used by each operator which targets a given operator. + * Each operator will start once and will run until all of its + * source(s) are closed. + * + * This allocates the buffers in a top-down manner (this is the + * reverse of the pipeline evaluation order). Allocation halts at if + * we reach an operator without children (e.g., StartOp) or an + * operator which is a CONTROLLER (Union). (If allocation does not + * halt at those boundaries then we can allocate buffers which will + * not be used. On the one hand, the StartOp receives a message + * containing the chunk to be evaluated. On the other hand, the + * buffers are not shared between the parent and a subquery so + * allocation within the subquery is wasted. This is also true for + * the [statsMap].) + */ + inputBufferMap = null; +// inputBufferMap = new ConcurrentHashMap<Integer, MultiplexBlockingBuffer<IBindingSet[]>>(); +// populateInputBufferMap(query); + } else { + inputBufferMap = null; + } + } /** - * Pre-populate a map with {@link BOpStats} objects for a query. - * - * @param bopIndex - * A map of the operators in the query which have assigned - * bopIds. - * - * @return A new map with an entry for each operator with a bopId which - * associates that operator with its {@link BOpStats} object. + * Pre-populate a map with {@link BOpStats} objects for the query. Operators + * in subqueries are not visited since they will be assigned {@link BOpStats} + * objects when they are run as a subquery. */ - static private ConcurrentHashMap<Integer, BOpStats> createStatsMap( - final Map<Integer, BOp> bopIndex) { + private void populateStatsMap(final BOp op) { - ConcurrentHashMap<Integer, BOpStats> statsMap = new ConcurrentHashMap<Integer, BOpStats>(); + if(!(op instanceof PipelineOp)) + return; + + final PipelineOp bop = (PipelineOp) op; - for (Map.Entry<Integer, BOp> e : bopIndex.entrySet()) { + final int bopId = bop.getId(); + + statsMap.put(bopId, bop.newStats()); - final int bopId = e.getKey(); - - final BOp tmp = e.getValue(); - - if ((tmp instanceof PipelineOp)) { - - final PipelineOp bop = (PipelineOp) tmp; - - statsMap.put(bopId, bop.newStats()); - + if (!op.getProperty(BOp.Annotations.CONTROLLER, + BOp.Annotations.DEFAULT_CONTROLLER)) { + /* + * Visit children, but not if this is a CONTROLLER operator since + * its children belong to a subquery. + */ + for (BOp t : op.args()) { + // visit children (recursion) + populateStatsMap(t); } - } - - return statsMap; - + } +// /** +// * Pre-populate a map with {@link MultiplexBlockingBuffer} objects for the +// * query. Operators in subqueries are not visited since they will be +// * assigned buffer objects when they are run as a subquery. Operators +// * without children are not visited since they can not be the targets of +// * some other operator and hence do not need to have an assigned input +// * buffer. +// */ +// private void populateInputBufferMap(final BOp op) { +// +// if(!(op instanceof PipelineOp)) +// return; +// +// if (op.arity() == 0) +// return; +// +// final PipelineOp bop = (PipelineOp) op; +// +// final int bopId = bop.getId(); +// +// inputBufferMap.put(bopId, new MultiplexBlockingBuffer<IBindingSet[]>( +// bop.newBuffer(statsMap.get(bopId)))); +// +// if (!op.getProperty(BOp.Annotations.CONTROLLER, +// BOp.Annotations.DEFAULT_CONTROLLER)) { +// /* +// * Visit children, but not if this is a CONTROLLER operator since +// * its children belong to a subquery. +// */ +// for (BOp t : op.args()) { +// // visit children (recursion) +// populateInputBufferMap(t); +// } +// } +// +// } + /** * Take a chunk generated by some pass over an operator and make it * available to the target operator. How this is done depends on whether the @@ -465,6 +596,15 @@ if (sink == null) throw new IllegalArgumentException(); + if (inputBufferMap != null && inputBufferMap.get(sinkId) != null) { + /* + * FIXME The sink is just a wrapper for the input buffer so we do + * not need to do anything to propagate the data from one operator + * to the next. + */ + return 0; + } + /* * Note: The partitionId will always be -1 in scale-up. */ @@ -519,6 +659,9 @@ * * @param msg * The chunk. + * + * @todo Does this method really need the {@link #lock}? I doubt it since + * {@link #chunksIn} is thread-safe. */ protected void acceptChunk(final IChunkMessage<IBindingSet> msg) { @@ -575,6 +718,11 @@ runState.startQuery(msg); +// if (inputBufferMap != null) { +// // Prestart a task for each operator. +// startTasks(query); +// } + } catch (TimeoutException ex) { halt(ex); @@ -587,6 +735,61 @@ } +// /** +// * Prestart a task for each operator. The operators are started in +// * back-to-front order (reverse pipeline evaluation order). The input queues +// * for the operators were created in by {@link #populateInputBufferMap(BOp)} +// * and are found in {@link #inputBufferMap}. The output queues for the +// * operators are skins over the output queues obtained from +// * {@link MultiplexBlockingBuffer}. +// * +// * @param op +// * The +// * +// * @see #inputBufferMap +// */ +// private void startTasks(final BOp op) { +// +// if(!(op instanceof PipelineOp)) +// return; +// +// if (op.arity() == 0) +// return; +// +// final PipelineOp bop = (PipelineOp) op; +// +// final int bopId = bop.getId(); +// +// final MultiplexBlockingBuffer<IBindingSet[]> inputBuffer = inputBufferMap +// .get(bopId); +// +// if (inputBuffer == null) +// throw new AssertionError("No input buffer? " + op); +// +// final IAsynchronousIterator<IBindingSet[]> src = inputBuffer +// .getBackingBuffer().iterator(); +// +// final ChunkTask chunkTask = new ChunkTask(bopId, -1/* partitionId */, +// src); +// +// final FutureTask<Void> futureTask = wrapChunkTask(chunkTask); +// +// queryEngine.execute(futureTask); +// +// if (!op.getProperty(BOp.Annotations.CONTROLLER, +// BOp.Annotations.DEFAULT_CONTROLLER)) { +// /* +// * Visit children, but not if this is a CONTROLLER operator since +// * its children belong to a subquery. +// */ +// for (BOp t : op.args()) { +// // visit children (recursion) +// startTasks(t); +// } +// } +// +// } + /** * Message provides notice that the operator has started execution and will * consume some specific number of binding set chunks. @@ -747,85 +950,290 @@ } /** - * Return a {@link FutureTask} which will consume the binding set chunk. The - * caller must run the {@link FutureTask}. + * Consume zero or more chunks in the input queue for this query. The + * chunk(s) will either be assigned to an already running task for the + * target operator or they will be assigned to new tasks. * - * @param chunk - * A chunk to be consumed. + * FIXME Drain the input queue, assigning any chunk waiting to a task. If + * the task is already running, then add the chunk to that task. Otherwise + * start a new task. */ - @SuppressWarnings("unchecked") - protected FutureTask<Void> newChunkTask( - final IChunkMessage<IBindingSet> chunk) { - - // create runnable to evaluate a chunk for an operator and partition. - final ChunkTask chunkTask = new ChunkTask(chunk); - - // wrap runnable. - final FutureTask<Void> f2 = new FutureTask(chunkTask, null/* result */); - - final BSBundle bundle = new BSBundle(chunk.getBOpId(), chunk - .getPartitionId()); - - // add to list of active futures for this query. - if (operatorFutures.put(bundle, f2) != null) { + protected void consumeChunk() { + final IChunkMessage<IBindingSet> msg = chunksIn.poll(); + if (msg == null) + return; + try { + if (!msg.isMaterialized()) + throw new IllegalStateException(); + if (log.isTraceEnabled()) + log.trace("Accepted chunk: " + msg); + final BSBundle bundle = new BSBundle(msg.getBOpId(), msg + .getPartitionId()); /* - * FIXME This indicates that we have more than one future for the - * same (bopId,shardId). When this is true we are losing track of - * with the consequence that we can not properly cancel them. - * Instead of losing track like this, we should be targeting the - * running operator instance with the new chunk. This needs to be - * done atomically. + * Look for instance of this task which is already running. */ -// throw new AssertionError(); + final ChunkFutureTask chunkFutureTask = operatorFutures.get(bundle); + if (!queryEngine.isScaleOut() && chunkFutureTask != null) { + /* + * Attempt to atomically attach the message as another src. + */ + if (chunkFutureTask.chunkTask.context.addSource(msg + .getChunkAccessor().iterator())) { + lock.lock(); + try { + /* + * message was added to a running task. + * + * FIXME This needs to be an RMI in scale-out back to + * the query controller so it can update the #of + * messages which are being consumed by this task. + * However, doing RMI here will add latency into the + * thread submitting tasks for evaluation and the + * coordination overhead of addSource() in scale-out may + * be too high. However, if we do not combine sources in + * scale-out then we may have too much overhead in terms + * of the #of running tasks with few tuples per task. + * Another approach is the remote async iterator with + * multiple sources (parallel multi source iterator). + * + * FIXME This code path is NOT being taken in scale-out + * right now since it would not get the message to the + * query controller. We will need to add addSource() to + * IQueryClient parallel to startOp() and haltOp() for + * this to work. + */ + runState.addSource(msg, queryEngine.getServiceUUID()); + return; + } finally { + lock.unlock(); + } + } + } + // wrap runnable. + final ChunkFutureTask ft = new ChunkFutureTask(new ChunkTask(msg)); + // add to list of active futures for this query. + if (operatorFutures.put(bundle, ft) != null) { + /* + * Note: This can cause the FutureTask to be accessible (above) + * before startOp() has been called for that ChunkTask (the + * latter occurs when the chunk task actually runs.) This a race + * condition has been resolved in RunState by allowing + * addSource() even when there is no registered task running for + * that [bopId]. + * + * FIXME This indicates that we have more than one future for + * the same (bopId,shardId). When this is true we are losing + * track of Futures with the consequence that we can not + * properly cancel them. Instead of losing track like this, we + * should be targeting the running operator instance with the + * new chunk. This needs to be done atomically, e.g., using the + * [lock]. + * + * Even if we only have one task per operator in standalone and + * we attach chunks to an already running task in scale-out, + * there is still the possibility in scale-out that a task may + * have closed its source but still be running, in which case we + * would lose the Future for the already running task when we + * start a new task for the new chunk for the target operator. + */ + // throw new AssertionError(); + } + // submit task for execution (asynchronous). + queryEngine.execute(ft); + } catch (Throwable ex) { + // halt query. + throw new RuntimeException(halt(ex)); } - - // return : caller will execute. - return f2; - } - - /* - * @todo Possible class to give us more information about a running operator - * so we can attach a new chunk to the source for a running instance. An - * alternative is to attach the same sinks to each instance of the operator, - * but then we get into trouble with the operator implementations which will - * close their sinks when they get to the bottom of their processing loop. - */ -// private static class RunningFutureContext { + +// /** +// * Return a {@link FutureTask} which will consume the binding set chunk. The +// * caller must run the {@link FutureTask}. +// * +// * @param chunk +// * A chunk to be consumed. +// */ +// private FutureTask<Void> newChunkTask( +// final IChunkMessage<IBindingSet> chunk) { // -// private final Future<Void> f; -// private final BOpContext<IBindingSet> context; -// private final ChunkTask chunkTask; +// if (!chunk.isMaterialized()) +// throw new IllegalStateException(); // -// public RunningFutureContext(final Future<Void> f, -// final BOpContext<IBindingSet> context, final ChunkTask chunkTask) { -// this.f = f; -// this.context = context; -// this.chunkTask = chunkTask; -// } +// // create runnable to evaluate a chunk for an operator and partition. +// final ChunkTask chunkTask = new ChunkTask(chunk); +// +//// return wrapChunkTask(chunkTask); +//// +//// } +//// +//// protected FutureTask<Void> wrapChunkTask(final ChunkTask chunkTask) { +// +// final BSBundle bundle = new BSBundle(chunkTask.bopId, +// chunkTask.partitionId); // -// public void addMessage(final IChunkMessage<IBindingSet> msg) { -// context.getSource(); -// throw new UnsupportedOperationException(); +// // wrap runnable. +// final ChunkFutureTask f2 = new ChunkFutureTask(chunkTask); +// +// // add to list of active futures for this query. +// if (operatorFutures.put(bundle, f2) != null) { +// /* +// * FIXME This indicates that we have more than one future for the +// * same (bopId,shardId). When this is true we are losing track of +// * Futures with the consequence that we can not properly cancel +// * them. Instead of losing track like this, we should be targeting +// * the running operator instance with the new chunk. This needs to +// * be done atomically, e.g., using the [lock]. +// * +// * Even if we only have one task per operator in standalone and we +// * attach chunks to an already running task in scale-out, there is +// * still the possibility in scale-out that a task may have closed +// * its source but still be running, in which case we would lose the +// * Future for the already running task when we start a new task for +// * the new chunk for the target operator. +// */ +//// throw new AssertionError(); // } -// +// +// // return : caller will execute. +// return f2; +// // } + + /** + * A {@link FutureTask} which exposes the {@link ChunkTask} which is being + * evaluated. + */ + private class ChunkFutureTask extends FutureTask<Void> { + + public final ChunkTask chunkTask; + + public ChunkFutureTask(final ChunkTask chunkTask) { + +// super(chunkTask, null/* result */); + + // Note: wraps chunk task to ensure source and sinks get closed. + super(new ChunkTaskWrapper(chunkTask), null/* result */); + + this.chunkTask = chunkTask; + + } + + } + + /** + * Wraps the {@link ChunkTask} and handles various handshaking with the + * {@link RunningQuery} and the {@link RunState}. Since starting and + * stopping a {@link ChunkTask} requires handshaking with the query + * controller, it is important that these actions take place once the task + * has been submitted - otherwise they would be synchronous in the loop + * which consumes available chunks and generates new {@link ChunkTask}s. + */ + private class ChunkTaskWrapper implements Runnable { + + private final ChunkTask t; + + public ChunkTaskWrapper(final ChunkTask chunkTask) { + + if (chunkTask == null) + throw new IllegalArgumentException(); + + this.t = chunkTask; + + } + + public void run() { + final UUID serviceId = queryEngine.getServiceUUID(); + final int messagesIn = 1; // accepted one IChunkMessage. FIXME + // Problem when chaining buffers? + try { + /* + * Note: This is potentially an RMI back to the controller. It + * is invoked from within the running task in order to remove + * the latency for that RMI from the thread which submits tasks + * to consume chunks. + */ + clientProxy.startOp(new StartOpMessage(queryId, t.bopId, + t.partitionId, serviceId, messagesIn)); + t.call(); + // Send message to controller. + final HaltOpMessage msg = new HaltOpMessage(queryId, t.bopId, + t.partitionId, serviceId, null/* cause */, t.sinkId, + t.sinkMessagesOut.get(), t.altSinkId, + t.altSinkMessagesOut.get(), t.context.getStats()); + try { + t.context.getExecutorService().execute( + new SendHaltMessageTask(clientProxy, msg, + RunningQuery.this)); + } catch (RejectedExecutionException ex) { + // e.g., service is shutting down. + log.error("Could not send message: " + msg, ex); + } + } catch (Throwable ex1) { + + // Log an error. + log.error("queryId=" + queryId + ", bopId=" + t.bopId, ex1); + + /* + * Mark the query as halted on this node regardless of whether + * we are able to communicate with the query controller. + * + * Note: Invoking halt(t) here will log an error. This logged + * error message is necessary in order to catch errors in + * clientProxy.haltOp() (above and below). + */ + final Throwable firstCause = halt(ex1); + + final HaltOpMessage msg = new HaltOpMessage(queryId, t.bopId, + t.partitionId, serviceId, firstCause, t.sinkId, + t.sinkMessagesOut.get(), t.altSinkId, + t.altSinkMessagesOut.get(), t.context.getStats()); + try { + /* + * Queue a task to send the halt message to the query + * controller. + */ + t.context.getExecutorService().execute( + new SendHaltMessageTask(clientProxy, msg, + RunningQuery.this)); + } catch (RejectedExecutionException ex) { + // e.g., service is shutting down. + log.warn("Could not send message: " + msg, ex); + } catch (Throwable ex) { + log + .error("Could not send message: " + msg + " : " + + ex, ex); + } + + } + + } + + } +// final BOpContext<?> context = chunkTask.context; +// context.getSource().close(); +// if (context.getSink() != null) { +// context.getSink().close(); +// } +// if (context.getSink2() != null) { +// context.getSink2().close(); +// } + /** * Runnable evaluates an operator for some chunk of inputs. In scale-out, * the operator may be evaluated against some partition of a scale-out * index. */ - private class ChunkTask implements Runnable { + private class ChunkTask implements Callable<Void> { /** Alias for the {@link ChunkTask}'s logger. */ private final Logger log = chunkTaskLog; - /** - * The message with the materialized chunk to be consumed by the - * operator. - */ - final IChunkMessage<IBindingSet> msg; +// /** +// * The message with the materialized chunk to be consumed by the +// * operator. +// */ +// final IChunkMessage<IBindingSet> msg; /** The index of the bop which is being evaluated. */ private final int bopId; @@ -882,13 +1290,27 @@ */ private final FutureTask<Void> ft; + /** #of chunk messages out to sink. */ + final AtomicInteger sinkMessagesOut = new AtomicInteger(0); + + /** #of chunk messages out to altSink. */ + final AtomicInteger altSinkMessagesOut = new AtomicInteger(0); + /** - * Create a task to consume a chunk. This looks up the {@link BOp} which - * is the target for the message in the {@link RunningQuery#bopIndex}, - * creates the sink(s) for the {@link BOp}, creates the - * {@link BOpContext} for that {@link BOp}, and wraps the value returned - * by {@link PipelineOp#eval(BOpContext)} in order to handle - * the outputs written on those sinks. + * A human readable representation of the {@link ChunkTask}'s state. + */ + public String toString() { + return "ChunkTask" + // + "{query=" + queryId + // + ",bopId=" + bopId + // + ",partitionId=" + partitionId + // + ",sinkId=" + sinkId + // + ",altSinkId=" + altSinkId + // + "}"; + } + + /** + * Create a task to consume a chunk. * * @param msg * A message containing the materialized chunk and metadata @@ -900,18 +1322,48 @@ */ public ChunkTask(final IChunkMessage<IBindingSet> msg) { - if (msg == null) - throw new IllegalArgumentException(); + this(msg.getBOpId(), msg.getPartitionId(), msg.getChunkAccessor() + .iterator()); + + } + +// /** +// * Alternative constructor used when chaining the operators together in +// * standalone. The input queue of an operator is wrapped and used as the +// * output queue of each operator which targets that operator as either +// * its default or alternative sink. +// */ +// public ChunkTask(final int bopId) { +// +// this(bopId, -1/* partitionId */, inputBufferMap.get(bopId) +// .getBackingBuffer().iterator()); +// +// } + + /** + * Core implementation. + * <p> + * This looks up the {@link BOp} which is the target for the message in + * the {@link RunningQuery#bopIndex}, creates the sink(s) for the + * {@link BOp}, creates the {@link BOpContext} for that {@link BOp}, and + * wraps the value returned by {@link PipelineOp#eval(BOpContext)} in + * order to handle the outputs written on those sinks. + * + * @param bopId + * The operator to which the message was addressed. + * @param partitionId + * The partition identifier to which the message was + * addressed. + * @param source + * Where the task will read its inputs. + */ + public ChunkTask(final int bopId, final int partitionId, + final IAsynchronousIterator<IBindingSet[]> src) { + + this.bopId = bopId; - if (!msg.isMaterialized()) - throw new IllegalStateException(); + this.partitionId = partitionId; - this.msg = msg; - - bopId = msg.getBOpId(); - - partitionId = msg.getPartitionId(); - bop = bopIndex.get(bopId); if (bop == null) @@ -947,13 +1399,6 @@ + bop); } -// if (sinkId != null && altSinkId != null -// && sinkId.intValue() == altSinkId.intValue()) { -// throw new RuntimeException( -// "The primary and alternative sink may not be the same operator: " -// + bop); -// } - /* * Setup the BOpStats object. For some operators, e.g., SliceOp, * this MUST be the same object across all invocations of that @@ -965,34 +1410,30 @@ * since that would cause double counting when the same object is * used for each invocation of the operator. * - * @todo If we always pass in a shared stats object then we will - * have live reporting on all instances of the task evaluating each - * operator in the query but there could be more contention for the - * counters. However, if we chain the operators together then we are - * likely to run one task instance per operator, at least in - * standalone. Try it w/ always shared and see if there is a hot - * spot? + * Note: By using a shared stats object we have live reporting on + * all instances of the task which are being evaluated on the query + * controller (tasks running on peers always have distinct stats + * objects and those stats are aggregated when the task finishes). */ final BOpStats stats; - if (((PipelineOp) bop).isSharedState()) { -// final BOpStats foo = op.newStats(); -// final BOpStats bar = statsMap.putIfAbsent(bopId, foo); -// stats = (bar == null ? foo : bar); + if (((PipelineOp) bop).isSharedState() || statsMap != null) { + // shared stats object. stats = statsMap.get(bopId); } else { + // distinct stats objects, aggregated as each task finishes. stats = op.newStats(); } assert stats != null; - sink = (p == null ? queryBuffer : op.newBuffer(stats)); + sink = (p == null ? queryBuffer : newBuffer(op, sinkId, stats)); altSink = altSinkId == null ? null - : altSinkId.equals(sinkId) ? sink : op.newBuffer(stats); + : altSinkId.equals(sinkId) ? sink : newBuffer(op, sinkId, + stats); // context : @todo pass in IChunkMessage or IChunkAccessor context = new BOpContext<IBindingSet>(RunningQuery.this, - partitionId, stats, msg.getChunkAccessor().iterator(), - sink, altSink); + partitionId, stats, src, sink, altSink); // FutureTask for operator execution (not running yet). if ((ft = op.eval(context)) == null) @@ -1001,6 +1442,38 @@ } /** + * Factory returns the {@link IBlockingBuffer} on which the operator + * should write its outputs which target the specified <i>sinkId</i>. + * + * @param op + * The operator whose evaluation task is being constructed. + * @param sinkId + * The identifier for an operator which which the task will + * write its solutions (either the primary or alternative + * sink). + * @param stats + * The statistics object for the evaluation of the operator. + * + * @return The buffer on which the operator should write outputs which + * target that sink. + */ + private IBlockingBuffer<IBindingSet[]> newBuffer(final PipelineOp op, + final int sinkId, final BOpStats stats) { + + final MultiplexBlockingBuffer<IBindingSet[]> factory = inputBufferMap == null ? null + : inputBufferMap.get(sinkId); + + if (factory != null) { + + return factory.newInstance(); + + } + + return op.newBuffer(stats); + + } + + /** * Return the effective default sink. * * @param bop @@ -1031,102 +1504,42 @@ return sink; } - + /** * Evaluate the {@link IChunkMessage}. */ - public void run() { - final UUID serviceId = queryEngine.getServiceUUID(); - final int messagesIn = 1; // accepted one IChunkMessage. - int sinkMessagesOut = 0; // #of chunk messages out to sink. - int altSinkMessagesOut = 0; // #of chunk messages out to altSink. - try { - clientProxy.startOp(new StartOpMessage(queryId, bopId, - partitionId, serviceId, messagesIn)); - if (log.isDebugEnabled()) - log.debug("Running chunk: " + msg); - ft.run(); // run - ft.get(); // verify success - if (sink != null && sink != queryBuffer && !sink.isEmpty()) { - if (sinkId == null) - throw new RuntimeException("sinkId not defined: bopId=" - + bopId + ", query=" - + BOpUtility.toString(query)); - /* - * Handle sink output, sending appropriate chunk message(s). - * - * Note: This maps output over shards/nodes in s/o. - */ - sinkMessagesOut += handleOutputChunk(bop, sinkId, sink); - } - if (altSink != null && altSink != queryBuffer - && !altSink.isEmpty()) { - if (altSinkId == null) - throw new RuntimeException( - "altSinkId not defined: bopId=" + bopId - + ", query=" - + BOpUtility.toString(query)); - /* - * Handle alt sink output, sending appropriate chunk - * message(s). - * - * Note: This maps output over shards/nodes in s/o. - */ - altSinkMessagesOut += handleOutputChunk(bop, altSinkId, - altSink); - } - // Send message to controller. - try { - final HaltOpMessage msg = new HaltOpMessage(queryId, bopId, - partitionId, serviceId, null/* cause */, sinkId, - sinkMessagesOut, altSinkId, altSinkMessagesOut, - context.getStats()); - context.getExecutorService().execute( - new SendHaltMessageTask(clientProxy, msg, - RunningQuery.this)); - } catch (RejectedExecutionException ex) { - // e.g., service is shutting down. - log.error("Could not send message: " + msg, ex); - } - } catch (Throwable t) { - - // Log an error. - log.error("queryId=" + queryId + ", bopId=" + bopId, t); - + public Void call() throws Exception { + if (log.isDebugEnabled()) + log.debug("Running chunk: " + this); + ft.run(); // run + ft.get(); // verify success + if (sink != null && sink != queryBuffer && !sink.isEmpty()) { + if (sinkId == null) + throw new RuntimeException("sinkId not defined: bopId=" + + bopId + ", query=" + BOpUtility.toString(query)); /* - * Mark the query as halted on this node regardless of whether - * we are able to communicate with the query controller. + * Handle sink output, sending appropriate chunk message(s). * - * Note: Invoking halt(t) here will log an error. This logged - * error message is necessary in order to catch errors in - * clientProxy.haltOp() (above and below). + * Note: This maps output over shards/nodes in s/o. */ - final Throwable firstCause = halt(t); - - try { - /* - * Queue a task to send the halt message to the query - * controller. - */ - final HaltOpMessage msg = new HaltOpMessage(queryId, bopId, - partitionId, serviceId, firstCause, sinkId, - sinkMessagesOut, altSinkId, altSinkMessagesOut, - context.getStats()); - context.getExecutorService().execute( - new SendHaltMessageTask(clientProxy, msg, - RunningQuery.this)); - } catch (RejectedExecutionException ex) { - // e.g., service is shutting down. - log.error("Could not send message: " + msg, ex); - } catch (Throwable ex) { - log.error("Could not send message: " + msg + " : " - + ex, ex); - } - + sinkMessagesOut.addAndGet(handleOutputChunk(bop, sinkId, sink)); } + if (altSink != null && altSink != queryBuffer && !altSink.isEmpty()) { + if (altSinkId == null) + throw new RuntimeException("altSinkId not defined: bopId=" + + bopId + ", query=" + BOpUtility.toString(query)); + /* + * Handle alt sink output, sending appropriate chunk message(s). + * + * Note: This maps output over shards/nodes in s/o. + */ + altSinkMessagesOut.addAndGet(handleOutputChunk(bop, altSinkId, + altSink)); + } +... [truncated message content] |
From: <tho...@us...> - 2010-10-20 18:32:32
|
Revision: 3833 http://bigdata.svn.sourceforge.net/bigdata/?rev=3833&view=rev Author: thompsonbry Date: 2010-10-20 18:32:25 +0000 (Wed, 20 Oct 2010) Log Message: ----------- Modified the QueryEngineFactory to use a singleton pattern and modified the BigdataSail to NOT shutdown the QueryEngine when the BigdataSail is shutdown. This fixes a problem where the NanoSparqlServer was creating one QueryEngine per query. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/QueryEngineFactory.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/QueryEngineFactory.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/QueryEngineFactory.java 2010-10-20 18:31:10 UTC (rev 3832) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/QueryEngineFactory.java 2010-10-20 18:32:25 UTC (rev 3833) @@ -34,16 +34,18 @@ import java.util.UUID; import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.cache.ConcurrentWeakValueCache; import com.bigdata.journal.BufferMode; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.Journal; +import com.bigdata.service.IBigdataClient; import com.bigdata.service.IBigdataFederation; import com.bigdata.service.ManagedResourceService; import com.bigdata.service.ResourceService; import com.bigdata.util.config.NicUtil; /** - * Factory for a query controller. + * Singleton factory for a query controller. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ @@ -51,36 +53,86 @@ public class QueryEngineFactory { /** - * New instance for standalone or scale-out. + * Weak value cache to enforce the singleton pattern for standalone + * journals. + */ + private static ConcurrentWeakValueCache<Journal, QueryEngine> standaloneQECache = new ConcurrentWeakValueCache<Journal, QueryEngine>(); + + /** + * Weak value cache to enforce the singleton pattern for + * {@link IBigdataClient}s (the data services are query engine peers rather + * than controllers and handle their own query engine initialization so as + * to expose their resources to other peers). + */ + private static ConcurrentWeakValueCache<IBigdataFederation<?>, FederatedQueryEngine> federationQECache = new ConcurrentWeakValueCache<IBigdataFederation<?>, FederatedQueryEngine>(); + + /** + * Singleton factory for standalone or scale-out. * * @param indexManager * The database. * * @return The query controller. */ - static public QueryEngine newQueryController(final IIndexManager indexManager) { + static public QueryEngine getQueryController(final IIndexManager indexManager) { if (indexManager instanceof IBigdataFederation<?>) { - return newFederatedQueryController((IBigdataFederation<?>) indexManager); + return getFederatedQueryController((IBigdataFederation<?>) indexManager); } - return newStandaloneQueryController((Journal) indexManager); + return getStandaloneQueryController((Journal) indexManager); } /** - * New query controller for standalone. + * Singleton factory for standalone. * * @param indexManager * The journal. * * @return The query controller. */ - static public QueryEngine newStandaloneQueryController( + static public QueryEngine getStandaloneQueryController( final Journal indexManager) { + if (indexManager == null) + throw new IllegalArgumentException(); + + QueryEngine queryEngine = standaloneQECache.get(indexManager); + + if (queryEngine == null) { + + synchronized (standaloneQECache) { + + if ((queryEngine = standaloneQECache.get(indexManager)) == null) { + + queryEngine = newStandaloneQueryEngine(indexManager); + + standaloneQECache.put(indexManager, queryEngine); + + } + + } + + } + + return queryEngine; + + } + + /** + * Initialize a new query engine for the journal. + * + * @param indexManager + * The journal. + * + * @return The new query engine. + */ + private static QueryEngine newStandaloneQueryEngine( + final Journal indexManager) { + final QueryEngine queryEngine = new QueryEngine(indexManager); queryEngine.init(); @@ -88,7 +140,7 @@ return queryEngine; } - + /** * New query controller for scale-out. * @@ -99,16 +151,53 @@ * * @todo parameterize the local resource service and temporary storage. */ - static public FederatedQueryEngine newFederatedQueryController( + static public FederatedQueryEngine getFederatedQueryController( final IBigdataFederation<?> fed) { + if (fed == null) + throw new IllegalArgumentException(); + + FederatedQueryEngine queryEngine = federationQECache.get(fed); + + if (queryEngine == null) { + + synchronized (federationQECache) { + + if ((queryEngine = federationQECache.get(fed)) == null) { + + queryEngine = newFederatedQueryEngine(fed); + + federationQECache.put(fed, queryEngine); + + } + + } + + } + + return queryEngine; + + } + + /** + * Initialize a new query engine for the federation. + * + * @param fed + * The federation. + * + * @return The new query engine. + */ + private static FederatedQueryEngine newFederatedQueryEngine( + final IBigdataFederation<?> fed) { + + final FederatedQueryEngine queryEngine; + // The local resource service for the query controller. ManagedResourceService queryEngineResourceService = null; // The local persistence store for the query controller. Journal queryEngineStore = null; - final FederatedQueryEngine queryEngine; try { // Create index manager for the query controller. @@ -116,10 +205,11 @@ final Properties p = new Properties(); - p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Temporary - .toString()); + p.setProperty(Journal.Options.BUFFER_MODE, + BufferMode.Temporary.toString()); - p.setProperty(Journal.Options.CREATE_TEMP_FILE, "true"); + p.setProperty(Journal.Options.CREATE_TEMP_FILE, + "true"); queryEngineStore = new Journal(p); @@ -129,12 +219,14 @@ { queryEngineResourceService = new ManagedResourceService( new InetSocketAddress(InetAddress - .getByName(NicUtil.getIpAddress("default.nic", - "default", true/* loopbackOk */)), 0/* port */ + .getByName(NicUtil.getIpAddress( + "default.nic", "default", + true/* loopbackOk */)), 0/* port */ ), 0/* requestServicePoolSize */) { @Override - protected File getResource(UUID uuid) throws Exception { + protected File getResource(UUID uuid) + throws Exception { // Will not serve up files. return null; } @@ -142,8 +234,9 @@ } // create the query controller. - queryEngine = new FederatedQueryController(fed.getServiceUUID(), - fed, queryEngineStore, queryEngineResourceService); + queryEngine = new FederatedQueryController(fed + .getServiceUUID(), fed, queryEngineStore, + queryEngineResourceService); } catch (Throwable t) { @@ -160,9 +253,9 @@ queryEngine.init(); return queryEngine; - + } - + /** * Implementation manages its own local storage and resource service. */ Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2010-10-20 18:31:10 UTC (rev 3832) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2010-10-20 18:32:25 UTC (rev 3833) @@ -923,7 +923,7 @@ namespaces = Collections.synchronizedMap(new LinkedHashMap<String, String>()); - queryEngine = QueryEngineFactory.newQueryController(database + queryEngine = QueryEngineFactory.getQueryController(database .getIndexManager()); } @@ -996,9 +996,14 @@ public void shutDown() throws SailException { assertOpen(); + + /* + * Note: DO NOT shutdown the query engine. It is shared by all + * operations against the same backing Journal or IBigdataFederation + * within this JVM! + */ +// queryEngine.shutdown(); - queryEngine.shutdown(); - super.shutDown(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-20 18:31:16
|
Revision: 3832 http://bigdata.svn.sourceforge.net/bigdata/?rev=3832&view=rev Author: thompsonbry Date: 2010-10-20 18:31:10 +0000 (Wed, 20 Oct 2010) Log Message: ----------- Cleaner message in AbstractJournal when a journal backed by a transient resource has been asynchronously closed. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2010-10-20 18:30:43 UTC (rev 3831) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2010-10-20 18:31:10 UTC (rev 3832) @@ -1801,7 +1801,7 @@ if (!_bufferStrategy.isOpen()) { - throw new IllegalStateException("file=" + getFile()); + throw new IllegalStateException((getFile()==null?"transient":"file=" + getFile())); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-20 18:30:49
|
Revision: 3831 http://bigdata.svn.sourceforge.net/bigdata/?rev=3831&view=rev Author: thompsonbry Date: 2010-10-20 18:30:43 +0000 (Wed, 20 Oct 2010) Log Message: ----------- Added generic type parameter to the termCache. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java 2010-10-20 18:30:25 UTC (rev 3830) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java 2010-10-20 18:30:43 UTC (rev 3831) @@ -2117,7 +2117,7 @@ * The {@link ILexiconConfiguration} instance, which will determine how * terms are encoded and decoded in the key space. */ - private final ILexiconConfiguration lexiconConfiguration; + private final ILexiconConfiguration<BigdataValue> lexiconConfiguration; /** * Constant for the {@link LexiconRelation} namespace component. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-20 18:30:33
|
Revision: 3830 http://bigdata.svn.sourceforge.net/bigdata/?rev=3830&view=rev Author: thompsonbry Date: 2010-10-20 18:30:25 +0000 (Wed, 20 Oct 2010) Log Message: ----------- Added a MultiplexBlockingBuffer. This is a factory pattern which may be used to share the same backing BlockingBuffer among many producers. Each producer receives a skin for the backing buffer. The backing buffer is only closed once each producer closes their skin. Added an IMultiSourceAsynchronousIterator interface for an IAsynchronousIterator which can consume multiple sources. There is one implementation in this commit, which allows the producer to attach another source. This is used to assign a chunk to a task which is already running. There is another version which handles multiple concurrent producers, but it's implementation is not yet finished. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestAll.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/IMultiSourceAsynchronousIterator.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiplexBlockingBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiSourceSequentialAsynchronousIterator.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiplexBlockingBuffer.java Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/IMultiSourceAsynchronousIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/IMultiSourceAsynchronousIterator.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/IMultiSourceAsynchronousIterator.java 2010-10-20 18:30:25 UTC (rev 3830) @@ -0,0 +1,53 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 19, 2010 + */ + +package com.bigdata.relation.accesspath; + +/** + * An interface which permits new sources to be attached dynamically. The + * decision to accept a new source via {@link #add(IAsynchronousIterator)} or to + * {@link IMultiSourceAsynchronousIterator#close()} the iterator must be atomic. + * In particular, it is illegal for a source to be accepted after the iterator + * has been closed. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface IMultiSourceAsynchronousIterator<E> extends + IAsynchronousIterator<E> { + + /** + * Add a source. If the iterator already reports that it is closed then the + * new source can not be added and this method will return false. + * + * @param src + * The source. + * @return <code>true</code> iff the source could be added. + */ + boolean add(IAsynchronousIterator<E> src); + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/IMultiSourceAsynchronousIterator.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java 2010-10-20 18:30:25 UTC (rev 3830) @@ -0,0 +1,186 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 19, 2010 + */ + +package com.bigdata.relation.accesspath; + +import java.util.NoSuchElementException; +import java.util.Queue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReentrantLock; + + +/** + * Class allows new sources to be attached dynamically. If the existing sources + * are drained then the iterator will {@link #close()} itself so that new + * sources can no longer be attached. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class MultiSourceSequentialAsynchronousIterator<E> implements + IMultiSourceAsynchronousIterator<E> { + + private final ReentrantLock lock = new ReentrantLock(); + + private final Queue<IAsynchronousIterator<E>> sources = new LinkedBlockingQueue<IAsynchronousIterator<E>>(); + + /** + * The current inner iterator. When <code>null</code> the outer iterator has + * been closed and will not deliver any more results and will not accept any + * new sources. + * <p> + * Note: This can be asynchronously closed if the application invokes + * {@link #close()}. Methods which test on this can not assume that it will + * be non-<code>null</code> the next time they check unless they are holding + * the {@link #lock}. Methods which do not obtain the lock can offer a + * weaker atomicity by copying the reference to a local variable and then + * testing that variable. + */ + private volatile IAsynchronousIterator<E> current; + + public MultiSourceSequentialAsynchronousIterator(final IAsynchronousIterator<E> src) { + current = src; + } + + public void close() { + lock.lock(); + try { + current = null; + sources.clear(); + } finally { + lock.unlock(); + } + } + + public boolean add(final IAsynchronousIterator<E> src) { + if (src == null) + throw new IllegalArgumentException(); + lock.lock(); + try { + if (current == null) + return false; + sources.add(src); + return true; + } finally { + lock.unlock(); + } + } + + /** + * If the current source is not exhausted, then return it immediately. + * Otherwise, return the next source which is not exhausted. If no such + * sources are available, then {@link #close()} the iterator. The decision + * to accept another source or to close the iterator is made atomic by the + * use of the {@link #lock} in this method and in {@link #close()}. + * + * @return The next source -or- <code>null</code> if there are no sources + * available. + */ + private IAsynchronousIterator<E> nextSource() { + final IAsynchronousIterator<E> tmp = current; + if (tmp == null) + return null; + if (!tmp.isExhausted()) + return current; // Note: MAY be asynchronously cleared! + // current is known to be [null]. + lock.lock(); + try { + // remove the head of the queue (non-blocking) + while ((current = sources.poll()) != null) { + if (!current.isExhausted()) + return current; + } + // no more sources with data, close while holding lock. + close(); + return null; + } finally { + lock.unlock(); + } + } + + public boolean hasNext() { + while (true) { + final IAsynchronousIterator<E> tmp = nextSource(); + if (tmp == null) + return false; + if (tmp.hasNext()) + return true; + } + } + + public E next() { + while (true) { + final IAsynchronousIterator<E> tmp = nextSource(); + if (tmp == null) + throw new NoSuchElementException(); + if (tmp.hasNext()) + return tmp.next(); + } + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + public boolean isExhausted() { + return nextSource() == null; + } + + public boolean hasNext(final long timeout, final TimeUnit unit) + throws InterruptedException { + final long begin = System.nanoTime(); + final long nanos = unit.toNanos(timeout); + long remaining = nanos; + while (remaining > 0) { + final IAsynchronousIterator<E> tmp = nextSource(); + if (tmp == null) + return false; + if (tmp.hasNext(remaining, TimeUnit.NANOSECONDS)) + return true; + remaining = nanos - (System.nanoTime() - begin); + } + // timeout. + return false; + } + + public E next(final long timeout, final TimeUnit unit) + throws InterruptedException { + final long begin = System.nanoTime(); + final long nanos = unit.toNanos(timeout); + long remaining = nanos; + while (true) { + final IAsynchronousIterator<E> tmp = nextSource(); + if (tmp == null) + return null; + if (tmp.hasNext(remaining, TimeUnit.NANOSECONDS)) + return tmp.next(); + remaining = nanos - (System.nanoTime() - begin); + } + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiSourceSequentialAsynchronousIterator.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiplexBlockingBuffer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiplexBlockingBuffer.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiplexBlockingBuffer.java 2010-10-20 18:30:25 UTC (rev 3830) @@ -0,0 +1,212 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 7, 2010 + */ + +package com.bigdata.relation.accesspath; + +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.concurrent.Future; +import java.util.concurrent.locks.ReentrantLock; + + +/** + * A factory for skins which may be used to multiplex writes against a + * {@link BlockingBuffer}. Each skin writes through to the backing + * {@link BlockingBuffer} but may be closed independently of the backing + * {@link BlockingBuffer}. This allows multiple produces to share a single + * {@link BlockingBuffer} as long as they use a subset of the + * {@link IBlockingBuffer} API (they can not set the {@link Future} on the + * objects returned by this factory or obtain its + * {@link IBlockingBuffer#iterator()}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + * + * @todo Does this need to close automatically when the last open inner buffer + * is closed or should it be closed explicitly and close all inner buffers + * when it is closed? + */ +public class MultiplexBlockingBuffer<E> { + + /** The delegate. */ + private final IBlockingBuffer<E> b; + + /** Lock guarding internal state. */ + private final ReentrantLock lock = new ReentrantLock(); + + /** The set of opened buffered which have not yet been closed. */ + private final LinkedHashSet<IBlockingBuffer<E>> set = new LinkedHashSet<IBlockingBuffer<E>>(); + + /** The #of currently open buffers. */ + private int counter = 0; + + public MultiplexBlockingBuffer(final IBlockingBuffer<E> b) { + if (b == null) + throw new IllegalArgumentException(); + this.b = b; + } + + public boolean isOpen() { + return b.isOpen(); + } + + public IBlockingBuffer<E> newInstance() { + lock.lock(); + try { + if(!isOpen())// ??? + throw new BufferClosedException(); + final IBlockingBuffer<E> n = new InnerBlockingBuffer(); + if (!set.add(n)) + throw new AssertionError(); + counter++; + return n; + } finally { + lock.unlock(); + } + } + + public void flushAndCloseAll() { + lock.lock(); + try { + final Iterator<IBlockingBuffer<E>> itr = set.iterator(); + while(itr.hasNext()) { + final IBlockingBuffer<E> n = itr.next(); + n.close(); + } + assert counter == 0; + b.flush(); + b.close(); + } finally { + lock.unlock(); + } + } + + /** + * The {@link IBlockingBuffer} reference provided to the constructor. + */ + public IBlockingBuffer<E> getBackingBuffer() { + return b; + } + + /** + * Inner "skin" writes through to the backing buffer shared by all skins. + * <p> + * Note: This inner class does not support several of the + * {@link IBlockingBuffer} methods whose semantics are likely to cause + * problems when interpreted in the light of a skin over a shared buffer. + * The only way these methods could be given clear semantics is if the skin + * were actually a full {@link BlockingBuffer} which was coupled to the + * shared buffer. However, that involves double buffering and double copying + * and I do not think that this is worth it. + */ + private class InnerBlockingBuffer implements IBlockingBuffer<E> { + + public InnerBlockingBuffer() { + } + + private boolean innerBufferOpen = true; + + public IAsynchronousIterator<E> iterator() { + throw new UnsupportedOperationException(); + } + + public void setFuture(Future future) { + throw new UnsupportedOperationException(); + } + + public void abort(final Throwable cause) { + lock.lock(); + try { + if (!innerBufferOpen) + throw new BufferClosedException(); + b.abort(cause); + } finally { + lock.unlock(); + } + } + + public void close() { + lock.lock(); + try { + if (!innerBufferOpen) + return; + innerBufferOpen = false; + if (!set.remove(this)) + throw new AssertionError(); + counter--; + if (counter == 0) { + /* + * Note: We flush the backing buffer before we close it in + * case it has anything buffered. This covers the normal, + * which is where the caller has already invoked flush() on + * this skin and should not create any harm otherwise. + */ + b.flush(); + b.close(); + } + } finally { + lock.unlock(); + } + } + + public Future getFuture() { + return b.getFuture(); + } + + public boolean isOpen() { + return innerBufferOpen && b.isOpen(); + } + + public long flush() { + /* + * Nothing to flush. The target is flushed when the outer class is + * closed. + */ + return 0; + } + + public void add(E e) { + if (!innerBufferOpen) + throw new BufferClosedException(); + b.add(e); + } + + public boolean isEmpty() { + return b.isEmpty(); + } + + public void reset() { + throw new UnsupportedOperationException(); + } + + public int size() { + return b.size(); + } + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiplexBlockingBuffer.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestAll.java 2010-10-20 18:27:22 UTC (rev 3829) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestAll.java 2010-10-20 18:30:25 UTC (rev 3830) @@ -77,11 +77,17 @@ suite.addTestSuite(TestBlockingBufferWithChunks.class); suite.addTestSuite(TestBlockingBufferWithChunksDeque.class); - + suite.addTestSuite(TestUnsynchronizedArrayBuffer.class); suite.addTestSuite(TestUnsynchronizedUnboundedChunkBuffer.class); + suite.addTestSuite(TestMultiplexBlockingBuffer.class); + + suite.addTestSuite(TestMultiSourceSequentialAsynchronousIterator.class); + + //suite.addTestSuite(TestMultiSourceParallelAsynchronousIterator.class); + return suite; } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiSourceSequentialAsynchronousIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiSourceSequentialAsynchronousIterator.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiSourceSequentialAsynchronousIterator.java 2010-10-20 18:30:25 UTC (rev 3830) @@ -0,0 +1,171 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 6, 2010 + */ + +package com.bigdata.relation.accesspath; + +import java.util.concurrent.TimeUnit; + +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; + +import junit.framework.TestCase2; + +/** + * Test suite for the {@link MultiSourceSequentialAsynchronousIterator}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestMultiSourceSequentialAsynchronousIterator extends TestCase2 { + + public TestMultiSourceSequentialAsynchronousIterator() { + + } + + public TestMultiSourceSequentialAsynchronousIterator(String name) { + super(name); + } + + private final IAsynchronousIterator<String> emptyIterator() { + return new ThickAsynchronousIterator<String>(new String[]{}); + } + + public void test1() throws InterruptedException { + + // empty iterator. + final MultiSourceSequentialAsynchronousIterator<String> itr = new MultiSourceSequentialAsynchronousIterator<String>( + emptyIterator()); + +// // nothing available yet. +// assertFalse(itr.hasNext(1, TimeUnit.MILLISECONDS)); +// assertNull(itr.next(1, TimeUnit.MILLISECONDS)); + + // add an empty chunk. + assertTrue(itr.add(new ThickAsynchronousIterator<String>( + new String[] {}))); + +// // still nothing available yet. +// assertFalse(itr.hasNext(1, TimeUnit.MILLISECONDS)); +// assertNull(itr.next(1, TimeUnit.MILLISECONDS)); + + // add a non-empty chunk. + assertTrue(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "a" }))); + + // reports data available and visits data. + assertTrue(itr.hasNext(1, TimeUnit.MILLISECONDS)); + assertEquals("a", itr.next(1, TimeUnit.MILLISECONDS)); + + // add a non-empty chunk. + assertTrue(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "b" }))); + + // reports data available and visits data. + assertTrue(itr.hasNext()); + assertEquals("b", itr.next()); + + // close the iterator. + itr.close(); + + // iterator reports nothing available. + assertFalse(itr.hasNext()); + assertFalse(itr.hasNext(1, TimeUnit.MILLISECONDS)); + assertNull(itr.next(1, TimeUnit.MILLISECONDS)); + + // can not add more sources. + assertFalse(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "b" }))); + + } + + public void test2() throws InterruptedException { + + // empty iterator. + final MultiSourceSequentialAsynchronousIterator<String> itr = new MultiSourceSequentialAsynchronousIterator<String>( + emptyIterator()); + + // add a non-empty chunk. + assertTrue(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "a" }))); + + // add a non-empty chunk. + assertTrue(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "b" }))); + + // reports data available and visits data. + assertTrue(itr.hasNext()); + assertEquals("a", itr.next()); + assertTrue(itr.hasNext()); + assertEquals("b", itr.next()); + + // another read on the iterator causes it to be closed. + assertFalse(itr.hasNext()); + + // can not add more sources. + assertFalse(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "b" }))); + + } + + /** + * Verify that the iterator notices if it is asynchronously closed. + * + * @throws InterruptedException + */ + public void test3() throws InterruptedException { + + // empty iterator. + final MultiSourceSequentialAsynchronousIterator<String> itr = new MultiSourceSequentialAsynchronousIterator<String>( + emptyIterator()); + + new Thread() { + + public void run() { + try { + log.info("Will wait on iterator."); + if (itr.hasNext(2000, TimeUnit.MILLISECONDS)) + fail("Iterator should not visit anything."); + } catch (Throwable t) { + log.error(t, t); + } + } + + }.start(); + + log.info("Sleeping..."); + Thread.sleep(500/*milliseconds.*/); + + log.info("Will close iterator."); + itr.close(); + + // can not add more sources. + assertFalse(itr.add(new ThickAsynchronousIterator<String>( + new String[] { "b" }))); + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiSourceSequentialAsynchronousIterator.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiplexBlockingBuffer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiplexBlockingBuffer.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiplexBlockingBuffer.java 2010-10-20 18:30:25 UTC (rev 3830) @@ -0,0 +1,127 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 8, 2010 + */ + +package com.bigdata.relation.accesspath; + +import junit.framework.TestCase2; + +import com.bigdata.relation.accesspath.BlockingBuffer; +import com.bigdata.relation.accesspath.BufferClosedException; +import com.bigdata.relation.accesspath.IBlockingBuffer; + +/** + * Test suite for {@link MultiplexBlockingBuffer}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestMultiplexBlockingBuffer extends TestCase2 { + + /** + * + */ + public TestMultiplexBlockingBuffer() { + + } + + /** + * @param name + */ + public TestMultiplexBlockingBuffer(String name) { + super(name); + + } + + public void test_multiplex() { + + final IBlockingBuffer<String> buffer = new BlockingBuffer<String>(); + + final MultiplexBlockingBuffer<String> multiplex = new MultiplexBlockingBuffer<String>(buffer); + + // buffer is open and empty. + assertTrue(buffer.isOpen()); + assertTrue(buffer.isEmpty()); + + // multiplex is open. + assertTrue(multiplex.isOpen()); + + final IBlockingBuffer<String> skin1 = multiplex.newInstance(); + + final IBlockingBuffer<String> skin2 = multiplex.newInstance(); + + // buffer is open and empty. + assertTrue(buffer.isOpen()); + assertTrue(buffer.isEmpty()); + + // multiplex is open. + assertTrue(multiplex.isOpen()); + + skin1.add("a"); + skin1.flush(); + skin1.close(); + try { + skin1.add("a2"); + fail("Expecting: " + BufferClosedException.class); + } catch (BufferClosedException ex) { + if (log.isInfoEnabled()) + log.info("Ignoring expected exception: " + ex); + } + + // buffer is open but no longer empty. + assertTrue(buffer.isOpen()); + assertFalse(buffer.isEmpty()); + + // multiplex is open. + assertTrue(multiplex.isOpen()); + + skin2.add("b"); + skin2.add("c"); + skin2.flush(); + + // buffer is open but not empty. + assertTrue(buffer.isOpen()); + assertFalse(buffer.isEmpty()); + + // multiplex is open. + assertTrue(multiplex.isOpen()); + + // close the last open skin. + skin2.close(); + + // buffer is closed but not empty. + assertFalse(buffer.isOpen()); + assertFalse(buffer.isEmpty()); + + // multiplex closed. + assertFalse(multiplex.isOpen()); + + // verify the data. + assertSameIterator(new String[]{"a","b","c"}, buffer.iterator()); + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/accesspath/TestMultiplexBlockingBuffer.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-20 18:27:28
|
Revision: 3829 http://bigdata.svn.sourceforge.net/bigdata/?rev=3829&view=rev Author: thompsonbry Date: 2010-10-20 18:27:22 +0000 (Wed, 20 Oct 2010) Log Message: ----------- Optimization in AccessPath. It had been modified such that it was no longer hitting the cached index references in SPORelation and LexiconRelation. This fixes that. There is a non-javadoc note in the code which indicates that this fix is a bit of a kludge, but it is the same known kludge which we have been relying on for a long time now - caching of the index references on the concrete IRelation class implementations. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java 2010-10-20 18:25:26 UTC (rev 3828) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java 2010-10-20 18:27:22 UTC (rev 3829) @@ -425,11 +425,21 @@ // The predicate is not constrained to an index partition. pmd = null; - // Obtain the index. - final String fqn = AbstractRelation.getFQN(relation, keyOrder); - - ndx = AbstractRelation.getIndex(indexManager, fqn, timestamp); - + /* + * Obtain the index. + * + * FIXME The getIndex(IKeyOrder) code path is optimized by + * SPORelation and LexiconRelation. However, we should have + * automatic caching of the index references to avoid the + * significant penalty of going down to the commitRecordIndex and + * Name2Addr each time we need to resolve an index. (Scale-out has + * separate caching for this in IndexManager.) + */ + ndx = relation.getIndex(keyOrder); +// final String fqn = AbstractRelation.getFQN(relation, keyOrder); +// +// ndx = AbstractRelation.getIndex(indexManager, fqn, timestamp); + if (ndx == null) { throw new RuntimeException("No such index: relation=" This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-20 18:25:32
|
Revision: 3828 http://bigdata.svn.sourceforge.net/bigdata/?rev=3828&view=rev Author: thompsonbry Date: 2010-10-20 18:25:26 +0000 (Wed, 20 Oct 2010) Log Message: ----------- Added toString() and code edit on equals() Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BSBundle.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BSBundle.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BSBundle.java 2010-10-20 18:25:09 UTC (rev 3827) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BSBundle.java 2010-10-20 18:25:26 UTC (rev 3828) @@ -41,12 +41,12 @@ public final int shardId; public String toString() { - + return super.toString() + "{bopId=" + bopId + ",shardId=" + shardId + "}"; - + } - + public BSBundle(final int bopId, final int shardId) { this.bopId = bopId; @@ -65,16 +65,17 @@ } public boolean equals(final Object o) { - + if (this == o) return true; - + if (!(o instanceof BSBundle)) return false; - - return bopId == ((BSBundle) o).bopId - && shardId == ((BSBundle) o).shardId; - + + final BSBundle t = (BSBundle) o; + + return bopId == t.bopId && shardId == t.shardId; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-20 18:25:15
|
Revision: 3827 http://bigdata.svn.sourceforge.net/bigdata/?rev=3827&view=rev Author: thompsonbry Date: 2010-10-20 18:25:09 +0000 (Wed, 20 Oct 2010) Log Message: ----------- changed the order of Q2 and Q9 such that Q2 now runs first (it tends to be faster). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/src/resources/config/config.query.sparql Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/src/resources/config/config.query.sparql =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/src/resources/config/config.query.sparql 2010-10-20 18:24:39 UTC (rev 3826) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/src/resources/config/config.query.sparql 2010-10-20 18:25:09 UTC (rev 3827) @@ -130,6 +130,19 @@ ?x a ub:Student } +[query2] +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX ub: <http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#> +SELECT ?x ?y ?z +WHERE{ + ?x a ub:GraduateStudent . + ?y a ub:University . + ?z a ub:Department . + ?x ub:memberOf ?z . + ?z ub:subOrganizationOf ?y . + ?x ub:undergraduateDegreeFrom ?y +} + [query9] PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX ub: <http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#> @@ -142,16 +155,3 @@ ?y ub:teacherOf ?z . ?x ub:takesCourse ?z . } - -[query2] -PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -PREFIX ub: <http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#> -SELECT ?x ?y ?z -WHERE{ - ?x a ub:GraduateStudent . - ?y a ub:University . - ?z a ub:Department . - ?x ub:memberOf ?z . - ?z ub:subOrganizationOf ?y . - ?x ub:undergraduateDegreeFrom ?y -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-20 18:24:45
|
Revision: 3826 http://bigdata.svn.sourceforge.net/bigdata/?rev=3826&view=rev Author: thompsonbry Date: 2010-10-20 18:24:39 +0000 (Wed, 20 Oct 2010) Log Message: ----------- The log4j.properties file should be in ant-build/bin, not ant-build/classes. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/build.xml Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/build.xml =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/build.xml 2010-10-20 14:09:18 UTC (rev 3825) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm/build.xml 2010-10-20 18:24:39 UTC (rev 3826) @@ -54,14 +54,14 @@ <exclude name="**/*.java" /> <exclude name="**/package.html" /> </fileset> - <!-- copy log4j configuration file. --> - <fileset dir="${lubm.dir}/src/resources/logging" /> </copy> <copy toDir="${build.dir}/bin"> <!-- copy benchmark data and queries. --> <fileset dir="${lubm.dir}/src/resources/config" /> <!-- copy the journal configuration file. --> <fileset file="${lubm.dir}/*.properties" /> + <!-- copy log4j configuration file. --> + <fileset dir="${lubm.dir}/src/resources/logging" /> </copy> </target> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-10-20 14:09:25
|
Revision: 3825 http://bigdata.svn.sourceforge.net/bigdata/?rev=3825&view=rev Author: thompsonbry Date: 2010-10-20 14:09:18 +0000 (Wed, 20 Oct 2010) Log Message: ----------- reverting to the old behavior for a query which contains a single statement pattern. this will get the wrong answer for default graph queries against quads, but it does better overall. we are continuing to look into this issue. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2010-10-19 20:46:00 UTC (rev 3824) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2010-10-20 14:09:18 UTC (rev 3825) @@ -1829,6 +1829,46 @@ } } + /** + * Override evaluation of StatementPatterns to recognize magic search + * predicate. + */ + @Override + public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( + final StatementPattern sp, final BindingSet bindings) + throws QueryEvaluationException { + + if (log.isDebugEnabled()) { + log.debug("evaluating statement pattern:\n" + sp); + } + + // check for magic search + final Var predVar = sp.getPredicateVar(); + final Value predValue = getVarValue(predVar, bindings); + if (BD.SEARCH.equals(predValue)) { + final Var ovar = sp.getObjectVar(); + final Value oval = getVarValue(ovar, bindings); + if (oval == null) { + throw new QueryEvaluationException(BD.SEARCH + + " : object must be bound."); + } + if (!(oval instanceof Literal)) { + throw new QueryEvaluationException(BD.SEARCH + + " : object must be literal."); + } + final Literal lit = (Literal) oval; + if (lit.getDatatype() != null) { + throw new QueryEvaluationException(BD.SEARCH + + " : object is datatype literal."); + } + return search(sp.getSubjectVar(), lit.getLanguage(), + lit.getLabel(), bindings, sp.getScope()); + } + + return super.evaluate(sp, bindings); + + } + // /** // * Override evaluation of StatementPatterns to recognize magic search // * predicate. @@ -1838,72 +1878,32 @@ // final StatementPattern sp, final BindingSet bindings) // throws QueryEvaluationException { // +// // no check against the nativeJoins property here because we are simply +// // using the native execution model to take care of magic searches. +// // if (log.isDebugEnabled()) { // log.debug("evaluating statement pattern:\n" + sp); // } // -// // check for magic search -// final Var predVar = sp.getPredicateVar(); -// final Value predValue = getVarValue(predVar, bindings); -// if (BD.SEARCH.equals(predValue)) { -// final Var ovar = sp.getObjectVar(); -// final Value oval = getVarValue(ovar, bindings); -// if (oval == null) { -// throw new QueryEvaluationException(BD.SEARCH -// + " : object must be bound."); -// } -// if (!(oval instanceof Literal)) { -// throw new QueryEvaluationException(BD.SEARCH -// + " : object must be literal."); -// } -// final Literal lit = (Literal) oval; -// if (lit.getDatatype() != null) { -// throw new QueryEvaluationException(BD.SEARCH -// + " : object is datatype literal."); -// } -// return search(sp.getSubjectVar(), lit.getLanguage(), -// lit.getLabel(), bindings, sp.getScope()); +// final IStep query = createNativeQuery(sp); +// +// if (query == null) { +// return new EmptyIteration<BindingSet, QueryEvaluationException>(); // } +// +// try { // -// return super.evaluate(sp, bindings); +// return execute(query, bindings); +// +// } catch (Exception ex) { +// +// throw new QueryEvaluationException(ex); +// +// } // // } - /** - * Override evaluation of StatementPatterns to recognize magic search - * predicate. - */ - @Override - public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( - final StatementPattern sp, final BindingSet bindings) - throws QueryEvaluationException { - - // no check against the nativeJoins property here because we are simply - // using the native execution model to take care of magic searches. - - if (log.isDebugEnabled()) { - log.debug("evaluating statement pattern:\n" + sp); - } - - final IStep query = createNativeQuery(sp); - - if (query == null) { - return new EmptyIteration<BindingSet, QueryEvaluationException>(); - } - try { - - return execute(query, bindings); - - } catch (Exception ex) { - - throw new QueryEvaluationException(ex); - - } - - } - - /** * Evaluates the {@link BD#SEARCH} magic predicate as a full-text search * against the index literal in the database, binding <i>svar</i> to each This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |