From: <tho...@us...> - 2014-01-10 23:06:49
|
Revision: 7769 http://bigdata.svn.sourceforge.net/bigdata/?rev=7769&view=rev Author: thompsonbry Date: 2014-01-10 23:06:42 +0000 (Fri, 10 Jan 2014) Log Message: ----------- fix for an RTO test case where the RTO was run inside of a named subquery. The test harness now checks all queries that were reported to the listener for the RTO's data. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/AbstractRTOTestCase.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/TestRTO_BSBM.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/TestRTO_FOAF.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/AbstractRTOTestCase.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/AbstractRTOTestCase.java 2014-01-10 22:55:17 UTC (rev 7768) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/AbstractRTOTestCase.java 2014-01-10 23:06:42 UTC (rev 7769) @@ -28,6 +28,8 @@ package com.bigdata.rdf.sparql.ast.eval.rto; import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.Set; import java.util.UUID; import org.apache.log4j.Logger; @@ -149,7 +151,7 @@ protected static class MyQueryListener implements IRunningQueryListener { private final UUID queryId; - private volatile IRunningQuery q; + private final Set<IRunningQuery> queries = new LinkedHashSet<IRunningQuery>(); public MyQueryListener(final UUID queryId) { @@ -163,22 +165,23 @@ @Override public void notify(final IRunningQuery q) { - if(q.getQueryId().equals(queryId)) { +// if(q.getQueryId().equals(queryId)) { - this.q = q; + queries.add(q); - } +// } } - public IRunningQuery getRunningQuery() { + /** + * Return each {@link IRunningQuery} that was noticed by this listener. + */ + public Set<IRunningQuery> getRunningQueries() { - final IRunningQuery q = this.q; - - if (q == null) + if (queries.isEmpty()) fail("Not found."); - return q; + return queries; } @@ -223,13 +226,37 @@ final PipelineOp queryPlan = astContainer.getQueryPlan(); + /* + * Note: Some queries may have more than one JoinGraph instance. They + * will throw an exception here. You can (a) turn off all but one of the + * places where the RTO is running; (b) modify the test harness to be + * more general and verify each of the RTO instances that actually ran; + * or (c) move that query into a part of the test suite that is only + * concerned with getting the right answer and not verifying that the + * join ordering remains consistent in CI runs. + */ final JoinGraph joinGraph = BOpUtility.getOnly(queryPlan, JoinGraph.class); assertNotNull(joinGraph); - // The join path selected by the RTO. - final Path path = joinGraph.getPath(l.getRunningQuery()); + /* + * The join path selected by the RTO. + * + * Note: The RTO might be running inside of a named subquery. If so, + * then the Path is not attached to the main query. This is why we have + * to check each query that was noticed by our listener. + */ + final Path path; + { + Path tmp = null; + for (IRunningQuery q : l.getRunningQueries()) { + tmp = joinGraph.getPath(q); + if (tmp != null) + break; + } + path = tmp; + } // Verify that a path was attached to the query. assertNotNull(path); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/TestRTO_BSBM.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/TestRTO_BSBM.java 2014-01-10 22:55:17 UTC (rev 7768) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/TestRTO_BSBM.java 2014-01-10 23:06:42 UTC (rev 7769) @@ -29,16 +29,28 @@ import java.util.Properties; -import com.bigdata.bop.engine.IRunningQuery; -import com.bigdata.bop.joinGraph.rto.Path; import com.bigdata.rdf.axioms.NoAxioms; import com.bigdata.rdf.sail.BigdataSail; -import com.bigdata.rdf.sparql.ast.eval.OutOfOrderEvaluationException; /** * Data driven test suite for the Runtime Query Optimizer (RTO) using BSBM data * and queries based on BSBM. * <p> + * Note: BSBM is parameterized. We can generate more queries against the pc100 + * data set easily enough. In priciple, those queries might exhibit different + * correlations. However, the pc100 data set may be too small for any + * interesting correlations. In fact, it may be too small since the vertex + * estimates and cutoff joins may be exact before the RTO is run running. If so, + * then we need to go back and use a larger data set. However, the specific + * parameterized queries will remain valid against larger data sets since BSBM + * only adds more data when generating a larger data set. Of course, the number + * of solutions for the queries may change. + * <p> + * Note: BSBM uses a lot of filters, subgroups, and sub-selects. As we build up + * coverage for those constructions in the RTO, it will handle more of the + * query. As a result, the observed join orders (and even the #of joins that are + * considered) are likely to change. + * <p> * Note: Q6 is no longer run in BSBM (the query was dropped). * <p> * Note: Q9 is a simple DESCRIBE (too simple for the RTO). Sample query is: @@ -68,22 +80,6 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id: TestBasicQuery.java 6440 2012-08-14 17:57:33Z thompsonbry $ - * - * TODO BSBM uses a lot of filters, subgroups, and sub-selects. As we - * build up coverage for those constructions in the RTO, it will handle - * more of the query. As a result, the observed join orders (and even - * the #of joins that are considered) are likely to change. - * - * TODO BSBM is parameterized. We can generate more queries against the - * pc100 data set easily enough. In priciple, those queries might - * exhibit different correlations. However, the pc100 data set may be - * too small for any interesting correlations. In fact, it may be too - * small since the vertex estimates and cutoff joins may be exact - * before the RTO is run running. If so, then we need to go back and - * use a larger data set. However, the specific parameterized queries - * will remain valid against larger data sets since BSBM only adds more - * data when generating a larger data set. Of course, the number of - * solutions for the queries may change. */ public class TestRTO_BSBM extends AbstractRTOTestCase { @@ -236,11 +232,6 @@ /** * BSBM Q7 on the pc100 data set. - * - * FIXME This fails because the RTO is running in a named subquery. The test - * harness is looking in the wrong place (it is looking on the wrong - * {@link IRunningQuery}) and therefore it fails to find the {@link Path} - * computed by the RTO. */ public void test_BSBM_Q7_pc100() throws Exception { @@ -255,11 +246,10 @@ * Verify that the runtime optimizer produced the expected join path. */ - // FIXME The join order is unknown. - final int[] expected = new int[] { 1, 3, 2, 5, 4, 7, 6 }; + final int[] expected = new int[] { 13, 12, 14, 10, 11, 15, 16 }; assertSameJoinOrder(expected, helper); - + } /** Added: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/TestRTO_FOAF.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/TestRTO_FOAF.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/TestRTO_FOAF.java 2014-01-10 23:06:42 UTC (rev 7769) @@ -0,0 +1,225 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 4, 2011 + */ + +package com.bigdata.rdf.sparql.ast.eval.rto; + +import java.util.Properties; + +import com.bigdata.rdf.axioms.NoAxioms; +import com.bigdata.rdf.sail.BigdataSail; + +/** + * Data driven test suite for the Runtime Query Optimizer (RTO) using quads-mode + * FOAF data. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestBasicQuery.java 6440 2012-08-14 17:57:33Z thompsonbry $ + */ +public class TestRTO_FOAF extends AbstractRTOTestCase { + +// private final static Logger log = Logger.getLogger(TestRTO_LUBM.class); + + /** + * + */ + public TestRTO_FOAF() { + } + + /** + * @param name + */ + public TestRTO_FOAF(String name) { + super(name); + } + + /** + * Data files for 3-degrees of separation starting with a crawl of TBLs foaf + * card. + */ + private static final String[] dataFiles = new String[] { // data files + "bigdata-rdf/src/resources/data/foaf/data-0.nq.gz",// + "bigdata-rdf/src/resources/data/foaf/data-1.nq.gz",// + "bigdata-rdf/src/resources/data/foaf/data-2.nq.gz",// + };// + + @Override + public Properties getProperties() { + + // Note: clone to avoid modifying!!! + final Properties properties = (Properties) super.getProperties().clone(); + + properties.setProperty(BigdataSail.Options.QUADS_MODE, "true"); + + properties.setProperty(BigdataSail.Options.AXIOMS_CLASS, + NoAxioms.class.getName()); + + return properties; + + } + + /** + * Find all friends of a friend. + * + * <pre> + * PREFIX foaf: <http://xmlns.com/foaf/0.1/> + * SELECT ?x ?z (count(?y) as ?connectionCount) + * (sample(?xname2) as ?xname) + * (sample(?zname2) as ?zname) + * WHERE { + * ?x foaf:knows ?y . + * ?y foaf:knows ?z . + * FILTER NOT EXISTS { ?x foaf:knows ?z } . + * FILTER ( !sameTerm(?x,?z)) . + * OPTIONAL { ?x rdfs:label ?xname2 } . + * OPTIONAL { ?z rdfs:label ?zname2 } . + * } + * GROUP BY ?x ?z + * </pre> + * + * FIXME This example is not complex enough to run through the RTO. This may + * change when we begin to handle OPTIONALs. However, the FILTER NOT EXISTS + * would also need to be handled to make this work since otherwise the query + * remain 2 required SPs with a simple FILTER, a sub-SELECTs (for the FILTER + * NOT EXISTS) and then two simple OPTIONALs. + */ + public void test_FOAF_Q1() throws Exception { + + final TestHelper helper = new TestHelper(// + "rto/FOAF-Q1", // testURI, + "rto/FOAF-Q1.rq",// queryFileURL + dataFiles,// + "rto/FOAF-Q1.srx"// resultFileURL + ); + + /* + * Verify that the runtime optimizer produced the expected join path. + */ + + final int[] expected = new int[] { 2, 4, 1, 3, 5 }; + + assertSameJoinOrder(expected, helper); + + } + + /** + * Find all friends of a friend having at least N indirect connections. + * + * <pre> + * PREFIX foaf: <http://xmlns.com/foaf/0.1/> + * SELECT ?x ?z (count(?y) as ?connectionCount) + * (sample(?xname2) as ?xname) + * (sample(?zname2) as ?zname) + * WHERE { + * ?x foaf:knows ?y . + * ?y foaf:knows ?z . + * FILTER NOT EXISTS { ?x foaf:knows ?z } . + * FILTER ( !sameTerm(?x,?z)) . + * OPTIONAL { ?x rdfs:label ?xname2 } . + * OPTIONAL { ?z rdfs:label ?zname2 } . + * } + * GROUP BY ?x ?z + * HAVING (?connectionCount > 1) + * </pre> + * + * FIXME This example is not complex enough to run through the RTO. This may + * change when we begin to handle OPTIONALs. However, the FILTER NOT EXISTS + * would also need to be handled to make this work since otherwise the query + * remain 2 required SPs with a simple FILTER, a sub-SELECTs (for the FILTER + * NOT EXISTS) and then two simple OPTIONALs. + */ + public void test_FOAF_Q2() throws Exception { + + final TestHelper helper = new TestHelper(// + "rto/FOAF-Q2", // testURI, + "rto/FOAF-Q2.rq",// queryFileURL + dataFiles,// + "rto/FOAF-Q2.srx"// resultFileURL + ); + + /* + * Verify that the runtime optimizer produced the expected join path. + */ + + final int[] expected = new int[] { 2, 4, 1, 3, 5 }; + + assertSameJoinOrder(expected, helper); + + } + + /** + * Find all direct friends and extract their names (when available). + * + * <pre> + * PREFIX foaf: <http://xmlns.com/foaf/0.1/> + * CONSTRUCT { + * ?u a foaf:Person . + * ?u foaf:knows ?v . + * ?u rdfs:label ?name . + * } + * WHERE { + * + * # Control all RTO parameters for repeatable behavior. + * hint:Query hint:optimizer "Runtime". + * hint:Query hint:RTO-sampleType "DENSE". + * hint:Query hint:RTO-limit "100". + * hint:Query hint:RTO-nedges "1". + * + * ?u a foaf:Person . + * ?u foaf:knows ?v . + * OPTIONAL { ?u rdfs:label ?name } . + * } + * LIMIT 100 + * </pre> + * + * FIXME This example is not complex enough to run through the RTO. This + * might change when we handle the OPTIONAL join inside of the RTO, however + * it would remain 2 required JOINS and an OPTIONAL join and there is no + * reason to run that query through the RTO. The query plan will always be + * the most selective vertex, then the other vertex, then the OPTIONAL JOIN. + * This is fully deterministic based on inspection on the query and the + * range counts. The RTO is not required. + */ + public void test_FOAF_Q10() throws Exception { + + final TestHelper helper = new TestHelper(// + "rto/FOAF-Q10", // testURI, + "rto/FOAF-Q10.rq",// queryFileURL + dataFiles,// + "rto/FOAF-Q10.srx"// resultFileURL + ); + + /* + * Verify that the runtime optimizer produced the expected join path. + */ + + final int[] expected = new int[] { 2, 4, 1, 3, 5 }; + + assertSameJoinOrder(expected, helper); + + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |