From: <mrp...@us...> - 2011-01-13 17:55:21
|
Revision: 4085 http://bigdata.svn.sourceforge.net/bigdata/?rev=4085&view=rev Author: mrpersonick Date: 2011-01-13 17:55:15 +0000 (Thu, 13 Jan 2011) Log Message: ----------- added the option to inline datetimes Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/RWStore.properties branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/WORMStore.properties Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/RWStore.properties =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/RWStore.properties 2011-01-13 15:57:45 UTC (rev 4084) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/RWStore.properties 2011-01-13 17:55:15 UTC (rev 4085) @@ -48,11 +48,14 @@ #com.bigdata.rdf.store.AbstractTripleStore.termCache.capacity=50000 com.bigdata.rdf.store.AbstractTripleStore.axiomsClass=com.bigdata.rdf.axioms.NoAxioms +# xsd:DateTime needs to be inlined for BSBM +com.bigdata.rdf.store.AbstractTripleStore.inlineDateTimes=true + ## ## Sail options. ## -com.bigdata.rdf.sail.BigdataSail.truthMaintenance=false +com.bigdata.rdf.sail.truthMaintenance=false # 10000 is default. -com.bigdata.rdf.sail.BigdataSail.bufferCapacity=100000 +com.bigdata.rdf.sail.bufferCapacity=100000 Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/WORMStore.properties =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/WORMStore.properties 2011-01-13 15:57:45 UTC (rev 4084) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/WORMStore.properties 2011-01-13 17:55:15 UTC (rev 4085) @@ -27,11 +27,14 @@ #com.bigdata.rdf.store.AbstractTripleStore.termCache.capacity=50000 com.bigdata.rdf.store.AbstractTripleStore.axiomsClass=com.bigdata.rdf.axioms.NoAxioms +# xsd:DateTime needs to be inlined for BSBM +com.bigdata.rdf.store.AbstractTripleStore.inlineDateTimes=true + ## ## Sail options. ## -com.bigdata.rdf.sail.BigdataSail.truthMaintenance=false +com.bigdata.rdf.sail.truthMaintenance=false # 10000 is default. -com.bigdata.rdf.sail.BigdataSail.bufferCapacity=100000 +com.bigdata.rdf.sail.bufferCapacity=100000 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-16 21:09:39
|
Revision: 4116 http://bigdata.svn.sourceforge.net/bigdata/?rev=4116&view=rev Author: thompsonbry Date: 2011-01-16 21:09:33 +0000 (Sun, 16 Jan 2011) Log Message: ----------- Updated the bsbm3 notes. Added alternative version of Q5 with better performance (2x) and a copy of the original version of Q5 in case people lose track of which is which. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm3/README.txt Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/bsbm-data/queries/query5-explicit-order.txt branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/bsbm-data/queries/query5-original.txt branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm3/bsbmtools/queries/explore/query5-explicit-order.txt branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm3/bsbmtools/queries/explore/query5-original.txt Added: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/bsbm-data/queries/query5-explicit-order.txt =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/bsbm-data/queries/query5-explicit-order.txt (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/bsbm-data/queries/query5-explicit-order.txt 2011-01-16 21:09:33 UTC (rev 4116) @@ -0,0 +1,22 @@ +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/> +PREFIX BIGDATA_QUERY_HINTS: <http://www.bigdata.com/queryHints#com.bigdata.rdf.sail.QueryHints.optimizer=None> + +SELECT DISTINCT ?product ?productLabel +WHERE { + + %ProductXYZ% bsbm:productPropertyNumeric2 ?origProperty2 . + %ProductXYZ% bsbm:productPropertyNumeric1 ?origProperty1 . + %ProductXYZ% bsbm:productFeature ?prodFeature . + ?product bsbm:productFeature ?prodFeature . + ?product bsbm:productPropertyNumeric1 ?simProperty1 . + FILTER (?simProperty1 < (?origProperty1 + 120) && ?simProperty1 > (?origProperty1 - 120)) + ?product bsbm:productPropertyNumeric2 ?simProperty2 . + FILTER (?simProperty2 < (?origProperty2 + 170) && ?simProperty2 > (?origProperty2 - 170)) + ?product rdfs:label ?productLabel . + FILTER (%ProductXYZ% != ?product) + +} +ORDER BY ?productLabel +LIMIT 5 \ No newline at end of file Added: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/bsbm-data/queries/query5-original.txt =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/bsbm-data/queries/query5-original.txt (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/bsbm-data/queries/query5-original.txt 2011-01-16 21:09:33 UTC (rev 4116) @@ -0,0 +1,19 @@ +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/> + +SELECT DISTINCT ?product ?productLabel +WHERE { + ?product rdfs:label ?productLabel . + FILTER (%ProductXYZ% != ?product) + %ProductXYZ% bsbm:productFeature ?prodFeature . + ?product bsbm:productFeature ?prodFeature . + %ProductXYZ% bsbm:productPropertyNumeric1 ?origProperty1 . + ?product bsbm:productPropertyNumeric1 ?simProperty1 . + FILTER (?simProperty1 < (?origProperty1 + 120) && ?simProperty1 > (?origProperty1 - 120)) + %ProductXYZ% bsbm:productPropertyNumeric2 ?origProperty2 . + ?product bsbm:productPropertyNumeric2 ?simProperty2 . + FILTER (?simProperty2 < (?origProperty2 + 170) && ?simProperty2 > (?origProperty2 - 170)) +} +ORDER BY ?productLabel +LIMIT 5 \ No newline at end of file Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm3/README.txt =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm3/README.txt 2011-01-16 20:47:09 UTC (rev 4115) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm3/README.txt 2011-01-16 21:09:33 UTC (rev 4116) @@ -54,3 +54,33 @@ There are a variety of other ant tasks in that directory which may be used to run load and run the BSBM qualification data set, etc. + +Performance should be extremely good for the reduced query mix, which can be +enabled by editing: + + bigdata-perf/bsbm3/bsbmtools/queries/explore/ignoreQueries + +For the reduced query mix, "ignoreQueries" should contain "5 6". For the full +query mix, it should be an empty file (the reduced query mix is enabled by +default in SVN). + +Notes on the queries: + +The static query optimizer and vectored pipelined joins do a great job on most +of the BSBM queries. However, there are two queries which do not do so well out +of the box: + +Query 5 has a bad join plan using the static query optimizer. Good performance +for query 5 can be achieved by replacing the contents of: + + bigdata-perf/bsbm3/bsbmtools/queries/explore/query5.txt + + bigdata-perf/bsbm3/bsbmtools/queries/explore/query5-explicit-order.txt + +The original version of query5 has also been saved as query5-original.txt + +Query 6 is uses a REGEX filter. Bigdata does not have index support for REGEX, +so this winds up visiting a lot of data and then filtering using the REGEX. This +drags the overall performance down dramatically. It is possible to integrate +bigdata with Lucene, which does support indexed regular expressions, but that is +not something which works out of the box. Added: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm3/bsbmtools/queries/explore/query5-explicit-order.txt =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm3/bsbmtools/queries/explore/query5-explicit-order.txt (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm3/bsbmtools/queries/explore/query5-explicit-order.txt 2011-01-16 21:09:33 UTC (rev 4116) @@ -0,0 +1,22 @@ +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/> +PREFIX BIGDATA_QUERY_HINTS: <http://www.bigdata.com/queryHints#com.bigdata.rdf.sail.QueryHints.optimizer=None> + +SELECT DISTINCT ?product ?productLabel +WHERE { + + %ProductXYZ% bsbm:productPropertyNumeric2 ?origProperty2 . + %ProductXYZ% bsbm:productPropertyNumeric1 ?origProperty1 . + %ProductXYZ% bsbm:productFeature ?prodFeature . + ?product bsbm:productFeature ?prodFeature . + ?product bsbm:productPropertyNumeric1 ?simProperty1 . + FILTER (?simProperty1 < (?origProperty1 + 120) && ?simProperty1 > (?origProperty1 - 120)) + ?product bsbm:productPropertyNumeric2 ?simProperty2 . + FILTER (?simProperty2 < (?origProperty2 + 170) && ?simProperty2 > (?origProperty2 - 170)) + ?product rdfs:label ?productLabel . + FILTER (%ProductXYZ% != ?product) + +} +ORDER BY ?productLabel +LIMIT 5 \ No newline at end of file Added: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm3/bsbmtools/queries/explore/query5-original.txt =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm3/bsbmtools/queries/explore/query5-original.txt (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm3/bsbmtools/queries/explore/query5-original.txt 2011-01-16 21:09:33 UTC (rev 4116) @@ -0,0 +1,19 @@ +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/> + +SELECT DISTINCT ?product ?productLabel +WHERE { + ?product rdfs:label ?productLabel . + FILTER (%ProductXYZ% != ?product) + %ProductXYZ% bsbm:productFeature ?prodFeature . + ?product bsbm:productFeature ?prodFeature . + %ProductXYZ% bsbm:productPropertyNumeric1 ?origProperty1 . + ?product bsbm:productPropertyNumeric1 ?simProperty1 . + FILTER (?simProperty1 < (?origProperty1 + 120) && ?simProperty1 > (?origProperty1 - 120)) + %ProductXYZ% bsbm:productPropertyNumeric2 ?origProperty2 . + ?product bsbm:productPropertyNumeric2 ?simProperty2 . + FILTER (?simProperty2 < (?origProperty2 + 170) && ?simProperty2 > (?origProperty2 - 170)) +} +ORDER BY ?productLabel +LIMIT 5 \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-05-15 17:46:11
|
Revision: 4500 http://bigdata.svn.sourceforge.net/bigdata/?rev=4500&view=rev Author: thompsonbry Date: 2011-05-15 17:46:05 +0000 (Sun, 15 May 2011) Log Message: ----------- Removed reference to the older version of the NanoSparqlServer (in the sail.bench package). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/build.properties branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/logging/log4j.properties Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/build.properties =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/build.properties 2011-05-15 17:45:52 UTC (rev 4499) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/build.properties 2011-05-15 17:46:05 UTC (rev 4500) @@ -42,9 +42,9 @@ # The #of products, which implies the #of triples. The relationship is linear. # E.g.: 10=5k; 2785=1M; 70812=25M; 284826=100M; 566496=200M; 1132992=400M, etc. #bsbm.pc=10 -#bsbm.pc=2785 +bsbm.pc=2785 #bsbm.pc=70812 -bsbm.pc=284826 +#bsbm.pc=284826 #bsbm.pc=566496 # The namespace of the KB instance (multiple KBs can be in the same database). @@ -140,11 +140,11 @@ #bsbm.seed=919191 # Test with random seed (the seed is taken from the system clock). This is good for "cold cache" tests. -#bsbm.seed=random +bsbm.seed=random # Use a specific seed (hot disk cache run with only JVM tuning effects). #bsbm.seed=1273687925860 -bsbm.seed=919191 +#bsbm.seed=1273687925861 # # Profiler parameters. @@ -181,8 +181,8 @@ #gcdebug=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:jvm_gc.log # The record cache (empty for the default cache). -#cache= -cache=-Dcom.bigdata.LRUNexus.enabled=false +cache= +#cache=-Dcom.bigdata.LRUNexus.enabled=false #cache=-Dcom.bigdata.LRUNexus.class=com.bigdata.cache.BCHMGlobalLRU2 -Dcom.bigdata.LRUNexus.threadLocalBuffers=true #cache=-Dcom.bigdata.LRUNexus.class=com.bigdata.cache.BCHMGlobalLRU -Dcom.bigdata.LRUNexus.accessPolicy=LIRS -Dcom.bigdata.LRUNexus.limitingCapacity=2000000 #-Dcom.bigdata.LRUNexus.class=com.bigdata.cache.StoreAndAddressLRUCache @@ -190,6 +190,9 @@ # ## -Dcom.bigdata.LRUNexus.percentHeap=.1 +# Option is required in some pre-1.6.0_18 JVMs to avoid problems with lost monitor events. +membar= +#membar=-XX:+UseMembar + # all jvm args for query. -queryJvmArgs=-server -Xmx${bsbm.maxMem} ${gcopts} ${gcdebug} ${profiler} ${cache} -Dlog4j.configuration=file:log4j.properties -# -Dlog4j.debug +queryJvmArgs=-server -Xmx${bsbm.maxMem} -showversion ${membar} ${gcopts} ${gcdebug} ${profiler} ${cache} -Dlog4j.configuration=file:log4j.properties Modified: branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/logging/log4j.properties =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/logging/log4j.properties 2011-05-15 17:45:52 UTC (rev 4499) +++ branches/QUADS_QUERY_BRANCH/bigdata-perf/bsbm/src/resources/logging/log4j.properties 2011-05-15 17:46:05 UTC (rev 4500) @@ -17,7 +17,7 @@ #log4j.logger.com.bigdata.rdf.sail.BigdataSail=INFO #log4j.logger.com.bigdata.rdf.sail.BigdataEvaluationStrategyImpl2=INFO -#log4j.logger.com.bigdata.rdf.sail.bench.NanoSparqlServer=INFO +#log4j.logger.com.bigdata.rdf.sail.webapp.NanoSparqlServer=INFO log4j.logger.com.bigdata.relation.accesspath.BlockingBuffer=ERROR # My Stuff This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |