From: <mar...@us...> - 2010-09-10 14:26:41
|
Revision: 3527 http://bigdata.svn.sourceforge.net/bigdata/?rev=3527&view=rev Author: martyncutcher Date: 2010-09-10 14:26:34 +0000 (Fri, 10 Sep 2010) Log Message: ----------- Add stats output for RWStore allocations Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/Allocator.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/BlobAllocator.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java 2010-09-09 17:17:21 UTC (rev 3526) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java 2010-09-10 14:26:34 UTC (rev 3527) @@ -27,6 +27,7 @@ import java.util.ArrayList; import com.bigdata.io.writecache.WriteCacheService; +import com.bigdata.rwstore.RWStore.AllocationStats; /** * Bit maps for an allocator. The allocator is a bit map managed as int[]s. @@ -189,10 +190,17 @@ return allocBits; } - public String getStats() { + public String getStats(AllocationStats stats) { final int total = m_ints * 32; final int allocBits = getAllocBits(); + if (stats != null) { + stats.m_reservedSlots += total; + stats.m_filledSlots += allocBits; + + return ""; + } + return " - start addr : " + RWStore.convertAddr(m_addr) + " [" + allocBits + "::" + total + "]"; } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/Allocator.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/Allocator.java 2010-09-09 17:17:21 UTC (rev 3526) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/Allocator.java 2010-09-10 14:26:34 UTC (rev 3527) @@ -28,7 +28,9 @@ import java.util.ArrayList; import java.util.concurrent.atomic.AtomicLong; +import com.bigdata.rwstore.RWStore.AllocationStats; + public interface Allocator extends Comparable { public int getBlockSize(); public void setIndex(int index); @@ -50,7 +52,7 @@ public void addAddresses(ArrayList addrs); public int getRawStartAddr(); public int getIndex(); - public void appendShortStats(StringBuffer str); + public void appendShortStats(StringBuilder str, AllocationStats[] stats); public boolean canImmediatelyFree(int addr, int size, IAllocationContext context); } \ No newline at end of file Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/BlobAllocator.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/BlobAllocator.java 2010-09-09 17:17:21 UTC (rev 3526) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/BlobAllocator.java 2010-09-10 14:26:34 UTC (rev 3527) @@ -7,6 +7,7 @@ import java.util.ArrayList; import java.util.concurrent.atomic.AtomicLong; +import com.bigdata.rwstore.RWStore.AllocationStats; import com.bigdata.util.ChecksumUtility; /** @@ -297,7 +298,7 @@ return m_hdrs[hdrIndex]; } - public void appendShortStats(StringBuffer str) { + public void appendShortStats(StringBuilder str, AllocationStats[] stats) { str.append("Index: " + m_index + ", address: " + getStartAddr() + ", BLOB\n"); } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2010-09-09 17:17:21 UTC (rev 3526) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2010-09-10 14:26:34 UTC (rev 3527) @@ -30,6 +30,7 @@ import org.apache.log4j.Logger; +import com.bigdata.rwstore.RWStore.AllocationStats; import com.bigdata.util.ChecksumUtility; /** @@ -334,7 +335,7 @@ if (block.m_addr == 0) { break; } - sb.append(block.getStats() + "\r\n"); + sb.append(block.getStats(null) + "\r\n"); counter.addAndGet(block.getAllocBits() * m_size); } @@ -489,14 +490,26 @@ return m_index; } - public void appendShortStats(StringBuffer str) { - str.append("Index: " + m_index + ", " + m_size); + public void appendShortStats(StringBuilder str, AllocationStats[] stats) { + + int si = -1; + + if (stats == null) { + str.append("Index: " + m_index + ", " + m_size); + } else { + for (int i = 0; i < stats.length; i++) { + if (m_size == stats[i].m_blockSize) { + si = i; + break; + } + } + } Iterator<AllocBlock> blocks = m_allocBlocks.iterator(); while (blocks.hasNext()) { AllocBlock block = blocks.next(); if (block.m_addr != 0) { - str.append(block.getStats()); + str.append(block.getStats(si == -1 ? null : stats[si])); } else { break; } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-09-09 17:17:21 UTC (rev 3526) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-09-10 14:26:34 UTC (rev 3527) @@ -634,7 +634,7 @@ // clearOutstandingDeferrels(deferredFreeListAddr, deferredFreeListEntries); if (log.isTraceEnabled()) { - final StringBuffer str = new StringBuffer(); + final StringBuilder str = new StringBuilder(); this.showAllocators(str); log.trace(str); } @@ -778,7 +778,7 @@ } if (false) { - StringBuffer tmp = new StringBuffer(); + StringBuilder tmp = new StringBuilder(); showAllocators(tmp); System.out.println("Allocators: " + tmp.toString()); @@ -2076,16 +2076,50 @@ } + public static class AllocationStats { + public AllocationStats(int i) { + m_blockSize = i; + } + long m_blockSize; + long m_reservedSlots; + long m_filledSlots; + } /** * Utility debug outputing the allocator array, showing index, start * address and alloc type/size + * + * Collected statistics are against each Allocation Block size: + * total number of slots | store size + * number of filled slots | store used */ - public void showAllocators(StringBuffer str) { + public void showAllocators(StringBuilder str) { + AllocationStats[] stats = new AllocationStats[m_allocSizes.length]; + for (int i = 0; i < stats.length; i++) { + stats[i] = new AllocationStats(m_allocSizes[i]*64); + } Iterator allocs = m_allocs.iterator(); while (allocs.hasNext()) { Allocator alloc = (Allocator) allocs.next(); - alloc.appendShortStats(str); + alloc.appendShortStats(str, stats); } + + // Append Summary + str.append("\n-------------------------\n"); + str.append("RWStore Allocation Summary\n"); + str.append("-------------------------\n"); + long treserved = 0; + long tfilled = 0; + for (int i = 0; i < stats.length; i++) { + str.append("Allocation: " + stats[i].m_blockSize); + long reserved = stats[i].m_reservedSlots * stats[i].m_blockSize; + treserved += reserved; + str.append(", reserved: " + reserved); + long filled = stats[i].m_filledSlots * stats[i].m_blockSize; + tfilled += filled; + str.append(", filled: " + filled); + str.append("\n"); + } + str.append("Total - file: " + convertAddr(m_fileSize) + ", reserved: " + treserved + ", filled: " + tfilled + "\n"); } public ArrayList getStorageBlockAddresses() { Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java 2010-09-09 17:17:21 UTC (rev 3526) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java 2010-09-10 14:26:34 UTC (rev 3527) @@ -75,11 +75,13 @@ import com.bigdata.LRUNexus; import com.bigdata.btree.IndexMetadata; import com.bigdata.journal.AbstractJournal; +import com.bigdata.journal.IBufferStrategy; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.IJournal; import com.bigdata.journal.ITransactionService; import com.bigdata.journal.ITx; import com.bigdata.journal.Journal; +import com.bigdata.journal.RWStrategy; import com.bigdata.journal.TimestampUtility; import com.bigdata.rdf.sail.BigdataSail; import com.bigdata.rdf.sail.BigdataSailGraphQuery; @@ -89,6 +91,7 @@ import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.relation.AbstractResource; import com.bigdata.relation.RelationSchema; +import com.bigdata.rwstore.RWStore; import com.bigdata.service.AbstractDistributedFederation; import com.bigdata.service.AbstractFederation; import com.bigdata.service.IBigdataFederation; @@ -363,6 +366,17 @@ } // sb.append(tripleStore.predicateUsage()); + + if (tripleStore.getIndexManager() instanceof Journal) { + Journal journal = (Journal) tripleStore.getIndexManager(); + IBufferStrategy strategy = journal.getBufferStrategy(); + if (strategy instanceof RWStrategy) { + RWStore store = ((RWStrategy) strategy).getRWStore(); + + store.showAllocators(sb); + + } + } } catch (Throwable t) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-09-13 15:17:03
|
Revision: 3535 http://bigdata.svn.sourceforge.net/bigdata/?rev=3535&view=rev Author: thompsonbry Date: 2010-09-13 15:16:56 +0000 (Mon, 13 Sep 2010) Log Message: ----------- Modified the WriteCacheService to log cache evictions @ INFO. Modified the bsbm ant script and properties to locate the correct log4j configuration file. Fixed reporting for nclean and perhaps hitRate for the write cache / write cache service Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java branches/JOURNAL_HA_BRANCH/bigdata-perf/bsbm/RWStore.properties branches/JOURNAL_HA_BRANCH/bigdata-perf/bsbm/build.properties branches/JOURNAL_HA_BRANCH/bigdata-perf/bsbm/build.xml Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java 2010-09-13 14:53:57 UTC (rev 3534) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java 2010-09-13 15:16:56 UTC (rev 3535) @@ -48,6 +48,7 @@ import org.apache.log4j.Logger; import com.bigdata.btree.IndexSegmentBuilder; +import com.bigdata.counters.CAT; import com.bigdata.counters.CounterSet; import com.bigdata.counters.Instrument; import com.bigdata.io.DirectBufferPool; @@ -792,7 +793,7 @@ if ((md = recordMap.get(offset)) == null) { // The record is not in this write cache. - counters.nmiss.incrementAndGet(); + counters.nmiss.increment(); return null; } @@ -843,7 +844,7 @@ } - counters.nhit.incrementAndGet(); + counters.nhit.increment(); if (log.isTraceEnabled()) { log.trace(show(dst, "read bytes")); @@ -1329,12 +1330,12 @@ /** * #of read requests that are satisfied by the write cache. */ - public final AtomicLong nhit = new AtomicLong(); + public final CAT nhit = new CAT(); /** * The #of read requests that are not satisfied by the write cache. */ - public final AtomicLong nmiss = new AtomicLong(); + public final CAT nmiss = new CAT(); /* * write on the cache. Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java 2010-09-13 14:53:57 UTC (rev 3534) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java 2010-09-13 15:16:56 UTC (rev 3535) @@ -628,10 +628,23 @@ try { cleanList.add(cache); cleanListNotEmpty.signalAll(); - counters.get().nclean = dirtyList.size(); + counters.get().nclean = cleanList.size(); } finally { cleanListLock.unlock(); } + if(log.isInfoEnabled()) { + final WriteCacheServiceCounters tmp = counters.get(); + final long nhit = tmp.nhit.get(); + final long ntests = nhit + tmp.nmiss.get(); + final double hitRate=(ntests == 0L ? 0d : (double) nhit / ntests); + log.info("WriteCacheService: bufferSize=" + + buffers[0].capacity() + ",nbuffers=" + + tmp.nbuffers + ",nclean=" + tmp.nclean + + ",ndirty=" + tmp.ndirty + ",maxDirty=" + + tmp.maxdirty + ",nflush=" + tmp.nflush + + ",nwrite=" + tmp.nwrite + ",hitRate=" + + hitRate); + } } catch (InterruptedException t) { /* @@ -1394,8 +1407,8 @@ public boolean write(final long offset, final ByteBuffer data, final int chk, final boolean useChecksum) throws InterruptedException, IllegalStateException { - if (log.isInfoEnabled()) { - log.info("offset: " + offset + ", length: " + data.limit() + if (log.isTraceEnabled()) { + log.trace("offset: " + offset + ", length: " + data.limit() + ", chk=" + chk + ", useChecksum=" + useChecksum); } @@ -1675,8 +1688,8 @@ protected boolean writeLargeRecord(final long offset, final ByteBuffer data, final int chk, final boolean useChecksum) throws InterruptedException, IllegalStateException { - if (log.isInfoEnabled()) { - log.info("offset: " + offset + ", length: " + data.limit() + ", chk=" + chk + ", useChecksum=" + if (log.isTraceEnabled()) { + log.trace("offset: " + offset + ", length: " + data.limit() + ", chk=" + chk + ", useChecksum=" + useChecksum); } @@ -1905,6 +1918,9 @@ if (cache == null) { // No match. + + counters.get().nmiss.increment(); + return null; } Modified: branches/JOURNAL_HA_BRANCH/bigdata-perf/bsbm/RWStore.properties =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-perf/bsbm/RWStore.properties 2010-09-13 14:53:57 UTC (rev 3534) +++ branches/JOURNAL_HA_BRANCH/bigdata-perf/bsbm/RWStore.properties 2010-09-13 15:16:56 UTC (rev 3535) @@ -13,6 +13,21 @@ com.bigdata.btree.writeRetentionQueue.capacity=4000 com.bigdata.btree.BTree.branchingFactor=128 +# Reduce the branching factor for the lexicon since BSBM uses a lot of long +# literals. Note that you have to edit this override to specify the namespace +# into which the BSBM data will be loaded. +com.bigdata.namespace.BSBM_284826.lex.TERM2ID.com.bigdata.btree.BTree.branchingFactor=32 +com.bigdata.namespace.BSBM_284826.lex.ID2TERM.com.bigdata.btree.BTree.branchingFactor=32 + +# Override the #of write cache buffers. +com.bigdata.journal.AbstractJournal.writeCacheBufferCount=12 + +# Note: You must override the buffer capacity in build.xml on the +# "run-load" target, but this would give you 10M write cache buffers +# if you placed that override there. +# +# -Dcom.bigdata.io.DirectBufferPool.bufferCapacity=10485760 + # 200M initial extent. com.bigdata.journal.AbstractJournal.initialExtent=209715200 com.bigdata.journal.AbstractJournal.maximumExtent=209715200 Modified: branches/JOURNAL_HA_BRANCH/bigdata-perf/bsbm/build.properties =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-perf/bsbm/build.properties 2010-09-13 14:53:57 UTC (rev 3534) +++ branches/JOURNAL_HA_BRANCH/bigdata-perf/bsbm/build.properties 2010-09-13 15:16:56 UTC (rev 3535) @@ -57,9 +57,9 @@ # Laptop #bsbm.baseDir=d:/bigdata-perf-analysis/bsbm/bsbm_${bsbm.pc} # Server -#bsbm.baseDir=/nas/data/bsbm/bsbm_${bsbm.pc} +bsbm.baseDir=/nas/data/bsbm/bsbm_${bsbm.pc} # Windows 2008 Server -bsbm.baseDir=c:/usr/local/data/bsbm/bsbm_${bsbm.pc} +#bsbm.baseDir=c:/usr/local/data/bsbm/bsbm_${bsbm.pc} # Where to put the XML results files. bsbm.resultsDir=${bsbm.baseDir}/.. @@ -71,12 +71,12 @@ bsbm.outputType=nt # Specify ".gz" or ".zip" if pre-generated files have been compressed. -bsbm.compressType= -#bsbm.compressType=".gz" +#bsbm.compressType= +bsbm.compressType=".gz" # Which mode to use for the Journal. (DiskRW or DiskWORM) -#journalMode=RW -journalMode=WORM +journalMode=RW +#journalMode=WORM # The name of the file containing the generated RDF data without the filename extension. bsbm.outputFile=${bsbm.baseDir}/dataset @@ -89,11 +89,11 @@ #bsbm.journalFile=${bsbm.baseDir}/bigdata-bsbm.worm #bsbm.journalFile=${bsbm.baseDir}/bigdata-bsbm.jnl # Note: This is on the large volume. -#bsbm.journalFile=/data/bsbm/bsbm_${bsbm.pc}/bigdata-bsbm.${journalMode}.jnl +bsbm.journalFile=/data/bsbm/bsbm_${bsbm.pc}/bigdata-bsbm.${journalMode}.jnl # Windows 2008 Server: SSD. #bsbm.journalFile=e:/data/bsbm/bsbm_${bsbm.pc}/bigdata-bsbm.${journalMode}.jnl # Windows 2008 Server: SAS. -bsbm.journalFile=f:/data/bsbm/bsbm_${bsbm.pc}/bigdata-bsbm.${journalMode}.jnl +#bsbm.journalFile=f:/data/bsbm/bsbm_${bsbm.pc}/bigdata-bsbm.${journalMode}.jnl # # Qualification of the system under test. @@ -144,7 +144,7 @@ # Use a specific seed (hot disk cache run with only JVM tuning effects). #bsbm.seed=1273687925860 -bsbm.seed=1273687925861 +bsbm.seed=919191 # # Profiler parameters. @@ -167,7 +167,7 @@ profiler=${profilerAgent} ${profilerAgentOptions} # Configure GC. -gcopts= +#gcopts= #gcopts=-verbose:gc #gcopts=-XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode gcopts=-XX:+UseParallelOldGC @@ -191,4 +191,5 @@ ## -Dcom.bigdata.LRUNexus.percentHeap=.1 # all jvm args for query. -queryJvmArgs=-server -Xmx${bsbm.maxMem} ${gcopts} ${gcdebug} ${profiler} ${cache} -Dlog4j.configuration=log4j.properties +queryJvmArgs=-server -Xmx${bsbm.maxMem} ${gcopts} ${gcdebug} ${profiler} ${cache} -Dlog4j.configuration=file:log4j.properties +# -Dlog4j.debug Modified: branches/JOURNAL_HA_BRANCH/bigdata-perf/bsbm/build.xml =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-perf/bsbm/build.xml 2010-09-13 14:53:57 UTC (rev 3534) +++ branches/JOURNAL_HA_BRANCH/bigdata-perf/bsbm/build.xml 2010-09-13 15:16:56 UTC (rev 3535) @@ -50,14 +50,14 @@ <exclude name="**/*.java" /> <exclude name="**/package.html" /> </fileset> - <!-- copy log4j configuration file. --> - <fileset dir="${bsbm.dir}/src/resources/logging" /> </copy> <copy toDir="${build.dir}/bin"> <!-- copy benchmark data and queries. --> <fileset dir="${bsbm.dir}/src/resources/bsbm-data" /> <!-- copy the journal configuration file. --> <fileset file="${bsbm.dir}/*.properties" /> + <!-- copy log4j configuration file. --> + <fileset dir="${bsbm.dir}/src/resources/logging" /> </copy> </target> @@ -144,7 +144,10 @@ <java classname="com.bigdata.rdf.store.DataLoader" fork="true" failonerror="true" dir="${build.dir}/bin"> <arg line="-namespace ${bsbm.namespace} ${bsbm.journalPropertyFile} ${bsbm.outputFile}.${bsbm.outputType}${bsbm.compressType}" /> <!-- specify/override the journal file name. --> - <jvmarg line="${queryJvmArgs} -Dcom.bigdata.journal.AbstractJournal.file=${bsbm.journalFile}" /> + <jvmarg line="${queryJvmArgs} -Dcom.bigdata.journal.AbstractJournal.file=${bsbm.journalFile} + -Dcom.bigdata.rdf.store.DataLoader.bufferCapacity=1000000 + -Dcom.bigdata.io.DirectBufferPool.bufferCapacity=10485760 + " /> <classpath> <path refid="runtime.classpath" /> </classpath> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2010-10-08 11:51:53
|
Revision: 3751 http://bigdata.svn.sourceforge.net/bigdata/?rev=3751&view=rev Author: martyncutcher Date: 2010-10-08 11:51:47 +0000 (Fri, 08 Oct 2010) Log Message: ----------- add debug output to isolate test failures Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/io/TestFileChannelUtility.java branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestDatabaseAtOnceClosure.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java 2010-10-08 01:45:59 UTC (rev 3750) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java 2010-10-08 11:51:47 UTC (rev 3751) @@ -1411,6 +1411,9 @@ log.trace("offset: " + offset + ", length: " + data.limit() + ", chk=" + chk + ", useChecksum=" + useChecksum); } + + if (!open) + throw new IllegalStateException("WriteCacheService has been closed"); if (offset < 0) throw new IllegalArgumentException(); @@ -1907,6 +1910,8 @@ * Not open. Return [null] rather than throwing an exception per the * contract for this implementation. */ + log.warn("Reading from closed writeCacheService"); + return null; } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/io/TestFileChannelUtility.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/io/TestFileChannelUtility.java 2010-10-08 01:45:59 UTC (rev 3750) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/io/TestFileChannelUtility.java 2010-10-08 11:51:47 UTC (rev 3751) @@ -358,7 +358,7 @@ final RandomAccessFile source = new RandomAccessFile(sourceFile, "rw"); - final RandomAccessFile target = new RandomAccessFile(sourceFile, "rw"); + final RandomAccessFile target = new RandomAccessFile(targetFile, "rw"); try { @@ -377,6 +377,7 @@ // write ground truth onto the file. FileChannelUtility.writeAll(source.getChannel(), ByteBuffer .wrap(expected), 0L/* pos */); + target.setLength(FILE_SIZE); // do a bunch of trials of random transfers. for(int trial=0; trial<1000; trial++) { Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestDatabaseAtOnceClosure.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestDatabaseAtOnceClosure.java 2010-10-08 01:45:59 UTC (rev 3750) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestDatabaseAtOnceClosure.java 2010-10-08 11:51:47 UTC (rev 3751) @@ -619,12 +619,15 @@ String baseURI; try { + + System.out.println("looking for " + resource); is = new FileInputStream(new File(resource)); baseURI = new File(resource).toURI().toString(); } catch (FileNotFoundException ex) { + System.out.println("no file, retrieving from resource"); is = getClass().getResourceAsStream(resource); baseURI = getClass().getResource(resource).toURI() .toString(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-04 19:51:13
|
Revision: 3896 http://bigdata.svn.sourceforge.net/bigdata/?rev=3896&view=rev Author: thompsonbry Date: 2010-11-04 19:51:05 +0000 (Thu, 04 Nov 2010) Log Message: ----------- IndexMetadata - Raised the maximum branching factor to 4k. AbstractLocalTripleStore - Modified log information to report the average record size for a B+Tree. DataLoader - Added "-verbose" option to show the performance counters and details from the indices and the store. ConcurrencyManager, Journal, RWStrategy, RWStore - fixed some issues related to performance counter reporting. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/IndexMetadata.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractLocalTransactionManager.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/ConcurrencyManager.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/Journal.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/RWStrategy.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/ClosureStats.java branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractLocalTripleStore.java branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/DataLoader.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/IndexMetadata.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/IndexMetadata.java 2010-11-04 17:06:22 UTC (rev 3895) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/IndexMetadata.java 2010-11-04 19:51:05 UTC (rev 3896) @@ -271,7 +271,7 @@ /** * A reasonable maximum branching factor for a {@link BTree}. */ - int MAX_BTREE_BRANCHING_FACTOR = 1024; + int MAX_BTREE_BRANCHING_FACTOR = 4196; /** * A reasonable maximum branching factor for an {@link IndexSegment}. Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractLocalTransactionManager.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractLocalTransactionManager.java 2010-11-04 17:06:22 UTC (rev 3895) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractLocalTransactionManager.java 2010-11-04 19:51:05 UTC (rev 3896) @@ -308,27 +308,22 @@ throw new RuntimeException(msg, cause); } - - /** - * Return interesting statistics about the transaction manager. - */ - synchronized public CounterSet getCounters() { - - if (countersRoot == null) { - countersRoot = new CounterSet(); + /** + * Return interesting statistics about the transaction manager. + */ + public CounterSet getCounters() { - countersRoot.addCounter("#active", new Instrument<Integer>() { - protected void sample() { - setValue(activeTx.size()); - } - }); + final CounterSet countersRoot = new CounterSet(); - } - - return countersRoot; - - } - private CounterSet countersRoot; - + countersRoot.addCounter("#active", new Instrument<Integer>() { + protected void sample() { + setValue(activeTx.size()); + } + }); + + return countersRoot; + + } + } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/ConcurrencyManager.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/ConcurrencyManager.java 2010-11-04 17:06:22 UTC (rev 3895) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/ConcurrencyManager.java 2010-11-04 19:51:05 UTC (rev 3896) @@ -1002,9 +1002,9 @@ */ synchronized public CounterSet getCounters() { - if (countersRoot == null){ +// if (countersRoot == null){ - countersRoot = new CounterSet(); + CounterSet countersRoot = new CounterSet(); // elapsed time since the service started (milliseconds). countersRoot.addCounter("elapsed", @@ -1050,12 +1050,12 @@ } - } +// } return countersRoot; } - private CounterSet countersRoot; +// private CounterSet countersRoot; /** * Submit a task (asynchronous). Tasks will execute asynchronously in the Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/Journal.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/Journal.java 2010-11-04 17:06:22 UTC (rev 3895) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/Journal.java 2010-11-04 19:51:05 UTC (rev 3896) @@ -283,20 +283,15 @@ public CounterSet getCounters() { -// if (counters == null) { - final CounterSet counters = super.getCounters(); counters.attach(concurrencyManager.getCounters()); counters.attach(localTransactionManager.getCounters()); -// } - return counters; } -// private CounterSet counters; /* * IResourceManager Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/RWStrategy.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/RWStrategy.java 2010-11-04 17:06:22 UTC (rev 3895) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/RWStrategy.java 2010-11-04 19:51:05 UTC (rev 3896) @@ -293,7 +293,7 @@ public CounterSet getCounters() { - return m_store.getStoreCounters().getCounters(); + return m_store.getCounters(); } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-04 17:06:22 UTC (rev 3895) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-04 19:51:05 UTC (rev 3896) @@ -3853,6 +3853,8 @@ /** * Return interesting information about the write cache and file operations. + * + * @todo allocations data? user extent allocated? user extent used? etc. */ public CounterSet getCounters() { Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/ClosureStats.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/ClosureStats.java 2010-11-04 17:06:22 UTC (rev 3895) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/ClosureStats.java 2010-11-04 19:51:05 UTC (rev 3896) @@ -73,10 +73,16 @@ } + public long triplesPerSecond() { + + return ((long) (((double) mutationCount.get()) / ((double) elapsed.get()) * 1000d)); + + } + public String toString() { return getClass().getSimpleName() + "{mutationCount=" + mutationCount.estimate_get() - + ", elapsed=" + elapsed.estimate_get() + "ms}"; + + ", elapsed=" + elapsed.estimate_get() + "ms, rate="+triplesPerSecond()+"}"; } Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractLocalTripleStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractLocalTripleStore.java 2010-11-04 17:06:22 UTC (rev 3895) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractLocalTripleStore.java 2010-11-04 19:51:05 UTC (rev 3896) @@ -90,10 +90,11 @@ final long nodesWritten = btreeCounters.getNodesWritten(); final long leavesWritten = btreeCounters.getLeavesWritten(); final long bytesWritten = btreeCounters.getBytesWritten(); + final long bytesPerRecord = bytesWritten/(nodesWritten+leavesWritten); - sb.append((first ? "" : ", ") + fqn + "{nodes=" + nodesWritten - + ",leaves=" + leavesWritten + ", bytes=" + bytesWritten - + "}"); + sb.append((first ? "" : ", ") + fqn + "{nodes=" + nodesWritten + + ",leaves=" + leavesWritten + ", bytes=" + bytesWritten + + ", averageBytesPerRecord=" + bytesPerRecord + "}"); first = false; @@ -113,10 +114,11 @@ final long nodesWritten = btreeCounters.getNodesWritten(); final long leavesWritten = btreeCounters.getLeavesWritten(); final long bytesWritten = btreeCounters.getBytesWritten(); + final long bytesPerRecord = bytesWritten/(nodesWritten+leavesWritten); - sb.append((first ? "" : ", ") + fqn + "{nodes=" + nodesWritten - + ",leaves=" + leavesWritten + ", bytes=" + bytesWritten - + "}"); + sb.append((first ? "" : ", ") + fqn + "{nodes=" + nodesWritten + + ",leaves=" + leavesWritten + ", bytes=" + bytesWritten + + ", averageBytesPerRecord=" + bytesPerRecord + "}"); first = false; Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/DataLoader.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/DataLoader.java 2010-11-04 17:06:22 UTC (rev 3895) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/DataLoader.java 2010-11-04 19:51:05 UTC (rev 3896) @@ -49,6 +49,7 @@ import com.bigdata.journal.ITx; import com.bigdata.journal.Journal; +import com.bigdata.journal.RWStrategy; import com.bigdata.rdf.inf.ClosureStats; import com.bigdata.rdf.inf.TruthMaintenance; import com.bigdata.rdf.lexicon.LexiconRelation; @@ -1190,7 +1191,7 @@ * support multiple data files within a single archive. * * @param args - * [-closure][-namespace <i>namespace</i>] propertyFile (fileOrDir)+ + * [-closure][-verbose][-namespace <i>namespace</i>] propertyFile (fileOrDir)+ * * @throws IOException */ @@ -1199,6 +1200,7 @@ // default namespace. String namespace = "kb"; boolean doClosure = false; + boolean verbose = false; RDFFormat rdfFormat = null; String baseURI = null; @@ -1226,6 +1228,10 @@ doClosure = true; + } else if (arg.equals("-verbose")) { + + verbose = true; + } else { System.err.println("Unknown argument: " + arg); @@ -1335,8 +1341,10 @@ jnl = new Journal(properties); - final long firstOffset = jnl.getRootBlockView().getNextOffset(); - + // #of bytes on the journal before (user extent). +// final long firstOffset = jnl.getRootBlockView().getNextOffset(); + final long userData0 = jnl.getBufferStrategy().size(); + System.out.println("Journal file: "+jnl.getFile()); AbstractTripleStore kb = (AbstractTripleStore) jnl @@ -1368,9 +1376,19 @@ dataLoader.endSource(); System.out.println("Load: " + totals); - - if (dataLoader.closureEnum == ClosureEnum.None && doClosure) { + + if (dataLoader.closureEnum == ClosureEnum.None && doClosure) { + if (verbose) { + + System.out.println(jnl.getCounters().toString()); + + System.out + .println(((AbstractLocalTripleStore) dataLoader.database) + .getLocalBTreeBytesWritten( + new StringBuilder()).toString()); + } + System.out.println("Computing closure."); final ClosureStats stats = dataLoader.doClosure(); @@ -1378,13 +1396,38 @@ System.out.println("Closure: "+stats.toString()); } + + jnl.commit(); + + if (verbose) { + + System.out.println(jnl.getCounters().toString()); + + System.out + .println(((AbstractLocalTripleStore) dataLoader.database) + .getLocalBTreeBytesWritten(new StringBuilder()) + .toString()); + + if (jnl.getBufferStrategy() instanceof RWStrategy) { + + final StringBuilder sb = new StringBuilder(); + + ((RWStrategy) jnl.getBufferStrategy()).getRWStore() + .showAllocators(sb); + + System.out.println(sb); + + } + + } + + // #of bytes on the journal (user data only). + final long userData1 = jnl.getBufferStrategy().size(); - jnl.commit(); - - final long lastOffset = jnl.getRootBlockView().getNextOffset(); + // #of bytes written (user data only) + final long bytesWritten = (userData1 - userData0); - System.out.println("Wrote: " + (lastOffset - firstOffset) - + " bytes."); + System.out.println("Wrote: " + bytesWritten + " bytes."); final long elapsedTotal = System.currentTimeMillis() - begin; @@ -1404,7 +1447,7 @@ private static void usage() { - System.err.println("usage: [-namespace namespace] propertyFile (fileOrDir)+"); + System.err.println("usage: [-closure][-verbose][-namespace namespace] propertyFile (fileOrDir)+"); System.exit(1); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-07 12:46:43
|
Revision: 3906 http://bigdata.svn.sourceforge.net/bigdata/?rev=3906&view=rev Author: thompsonbry Date: 2010-11-07 12:46:37 +0000 (Sun, 07 Nov 2010) Log Message: ----------- Reduced a variety of defaults in order to reduce the heap demand associated with join processing on larger data sets. IChunkedIterator.DEFAULT_CHUNK_SIZE = 100;//was 10000; BlockingBuffer.DEFAULT_PRODUCER_QUEUE_CAPACITY = 10; // was 5000 BlockingBuffer.DEFAULT_MINIMUM_CHUNK_SIZE = 100; // was 10000 AbstractResource.DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = "10"; // was 1000 AbstractTipleStore.DEFAULT_TERM_CACHE_CAPACITY = "5000"; // was 50000 AbstractAccessPath#1030 modified to pass in the chunkCapacity. final BlockingBuffer<R[]> buffer = new BlockingBuffer<R[]>( chunkOfChunksCapacity,chunkCapacity,10,TimeUnit.MILLISECONDS); and AbstractResource.DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = "200";//""+20*Bytes.kilobyte32; Load of U50 is unchanged when compared with the baseline. [java] Load: 6890949 stmts added in 123.422 secs, rate= 55831, commitLatency=0ms However, closure is significantly slower (compare with 30707). Closure performance can not be related to the lexicon, so this must be either the queue capacity or the chunk capacity. [java] Closure: ClosureStats{mutationCount=1699274, elapsed=71662ms, rate=23712} Total time: 3 minutes 17 seconds There is very little impact on query: (compare with 10569 for ~ 4k pages from above). [java] ### Finished testing BIGDATA_SPARQL_ENDPOINT ### [java] BIGDATA_SPARQL_ENDPOINT #trials=10 #parallel=1 [java] query Time Result# [java] query1 46 4 [java] query3 25 6 [java] query4 63 34 [java] query5 59 719 [java] query7 24 61 [java] query8 189 6463 [java] query10 26 0 [java] query11 26 0 [java] query12 34 0 [java] query13 28 0 [java] query14 2952 393730 [java] query6 3218 430114 [java] query9 2958 8627 [java] query2 740 130 [java] Total 10388 However, when looking at U1000 there is a significant benefit for query: [java] Load: 138318723 stmts added in 7559.498 secs, rate= 18297, commitLatency=0ms [java] Closure: ClosureStats{mutationCount=34082911, elapsed=2909594ms, rate=11713} [java] ### Finished testing BIGDATA_SPARQL_ENDPOINT ### [java] BIGDATA_SPARQL_ENDPOINT #trials=10 #parallel=1 [java] query Time Result# [java] query1 69 4 [java] query3 33 6 [java] query4 67 34 [java] query5 66 719 [java] query7 34 61 [java] query8 231 6463 [java] query10 26 0 [java] query11 27 0 [java] query12 28 0 [java] query13 23 0 [java] query14 69907 7924765 (versus 124545) [java] query6 74343 8653646 (versus 130354) [java] query9 76161 172632 (versus 125518) [java] query2 368962 2528 (versus inconsistent due to backed out change to AbstractBTree.touch()) [java] Total 589977 This commit therefore improves query performance on larger LUBM data sets, but has a known negative impact on U50 closure and an unknown impact on LUBM U1000 closure. Closure warrants additional investigation. BSBM 100M performance with these changes and the following settings is as follows (this is the reduced query mix without query 3): com.bigdata.btree.writeRetentionQueue.capacity=4000 com.bigdata.btree.BTree.branchingFactor=128 # Reduce the branching factor for the lexicon since BSBM uses a lot of long # literals. Note that you have to edit this override to specify the namespace # into which the BSBM data will be loaded. com.bigdata.namespace.BSBM_284826.lex.TERM2ID.com.bigdata.btree.BTree.branchingFactor=32 com.bigdata.namespace.BSBM_284826.lex.ID2TERM.com.bigdata.btree.BTree.branchingFactor=32 # 4k pages. com.bigdata.namespace.BSBM_284826.spo.POS.com.bigdata.btree.BTree.branchingFactor=970 com.bigdata.namespace.BSBM_284826.spo.SPO.com.bigdata.btree.BTree.branchingFactor=512 com.bigdata.namespace.BSBM_284826.spo.OSP.com.bigdata.btree.BTree.branchingFactor=470 # Override the #of write cache buffers. com.bigdata.journal.AbstractJournal.writeCacheBufferCount=12 Cold JVM run immediately after data load: 98-99% disk utilization. [java] QMpH: 7515.78 query mixes per hour Hot JVM, cold disk: 98-99% disk utilization. [java] QMpH: 6459.97 query mixes per hour Hot JVM, hot disk: ~4% utilization. [java] QMpH: 40213.81 query mixes per hour Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractResource.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AbstractAccessPath.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/striterator/IChunkedIterator.java branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractLocalTripleStore.java branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractResource.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractResource.java 2010-11-07 12:39:08 UTC (rev 3905) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractResource.java 2010-11-07 12:46:37 UTC (rev 3906) @@ -222,7 +222,7 @@ /** * Default for {@link #CHUNK_OF_CHUNKS_CAPACITY} */ - String DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = "1000"; + String DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = "10"; // was 1000 /** * <p> @@ -275,7 +275,7 @@ * * @todo figure out how good this value is. */ - String DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = ""+20*Bytes.kilobyte32; + String DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = "200";//""+20*Bytes.kilobyte32; /** * When <code>true</code> ({@value #DEFAULT_FORCE_SERIAL_EXECUTION}), Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AbstractAccessPath.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AbstractAccessPath.java 2010-11-07 12:39:08 UTC (rev 3905) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AbstractAccessPath.java 2010-11-07 12:46:37 UTC (rev 3906) @@ -33,6 +33,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.TimeUnit; import org.apache.log4j.Logger; @@ -1027,7 +1028,7 @@ * once the elements were materialized on the client. */ final BlockingBuffer<R[]> buffer = new BlockingBuffer<R[]>( - chunkOfChunksCapacity); + chunkOfChunksCapacity,chunkCapacity,10,TimeUnit.MILLISECONDS); final ExecutorService executorService = indexManager .getExecutorService(); Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java 2010-11-07 12:39:08 UTC (rev 3905) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java 2010-11-07 12:46:37 UTC (rev 3906) @@ -167,12 +167,14 @@ * The default capacity for the internal {@link Queue} on which elements (or * chunks of elements) are buffered. */ - public static transient final int DEFAULT_PRODUCER_QUEUE_CAPACITY = 5000; +// public static transient final int DEFAULT_PRODUCER_QUEUE_CAPACITY = 5000; + public static transient final int DEFAULT_PRODUCER_QUEUE_CAPACITY = 10; // was 5000 /** * The default minimum chunk size for the chunk combiner. */ - public static transient final int DEFAULT_MINIMUM_CHUNK_SIZE = 10000; +// public static transient final int DEFAULT_MINIMUM_CHUNK_SIZE = 10000; + public static transient final int DEFAULT_MINIMUM_CHUNK_SIZE = 100; // was 10000 /** * The default timeout in milliseconds during which chunks of elements may @@ -381,7 +383,12 @@ final int minimumChunkSize, final long chunkTimeout, final TimeUnit chunkTimeoutUnit, final boolean ordered) { - if (queue == null) + if (minimumChunkSize >= 1000 || queue.remainingCapacity() >= 1000) + log.fatal(new RuntimeException("queueCapacity=" + + queue.remainingCapacity() + ", minimumChunkSize=" + + minimumChunkSize)); + + if (queue == null) throw new IllegalArgumentException(); if (minimumChunkSize < 0) { Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/striterator/IChunkedIterator.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/striterator/IChunkedIterator.java 2010-11-07 12:39:08 UTC (rev 3905) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/striterator/IChunkedIterator.java 2010-11-07 12:46:37 UTC (rev 3906) @@ -59,7 +59,7 @@ /** * The default chunk size. */ - int DEFAULT_CHUNK_SIZE = 10000; + int DEFAULT_CHUNK_SIZE = 100;//was 10000; /** * The next element available from the iterator. Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractLocalTripleStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractLocalTripleStore.java 2010-11-07 12:39:08 UTC (rev 3905) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractLocalTripleStore.java 2010-11-07 12:46:37 UTC (rev 3906) @@ -90,7 +90,9 @@ final long nodesWritten = btreeCounters.getNodesWritten(); final long leavesWritten = btreeCounters.getLeavesWritten(); final long bytesWritten = btreeCounters.getBytesWritten(); - final long bytesPerRecord = bytesWritten/(nodesWritten+leavesWritten); + final long totalWritten = (nodesWritten + leavesWritten); + final long bytesPerRecord = totalWritten == 0 ? 0 : bytesWritten + / (nodesWritten + leavesWritten); sb.append((first ? "" : ", ") + fqn + "{nodes=" + nodesWritten + ",leaves=" + leavesWritten + ", bytes=" + bytesWritten Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2010-11-07 12:39:08 UTC (rev 3905) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2010-11-07 12:46:37 UTC (rev 3906) @@ -572,7 +572,7 @@ String TERM_CACHE_CAPACITY = AbstractTripleStore.class.getName() + ".termCache.capacity"; - String DEFAULT_TERM_CACHE_CAPACITY = "50000"; + String DEFAULT_TERM_CACHE_CAPACITY = "5000"; // was 50000 /** * The name of the class that will establish the pre-defined This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-20 23:39:27
|
Revision: 4025 http://bigdata.svn.sourceforge.net/bigdata/?rev=4025&view=rev Author: thompsonbry Date: 2010-12-20 23:39:20 +0000 (Mon, 20 Dec 2010) Log Message: ----------- Modifications to the FullTextIndex to provide support for exact matches (finally) in addition to prefix matches and to disable the fieldId field for new RDF database instances (since that field was unused and just wasting space in the indices). Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/IKeyBuilder.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hit.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hiterator.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/ReadIndexTask.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermFrequencyData.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermMetadata.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TokenBuffer.java branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestKeyBuilder.java branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestPrefixSearch.java branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/BigdataRDFFullTextIndex.java branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/IKeyBuilder.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/IKeyBuilder.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/IKeyBuilder.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -212,7 +212,7 @@ * <p> * Note: While the ASCII encoding happens to use one byte for each character * that is NOT true of the Unicode encoding. The space requirements for the - * Unicode encoding depend on the text, the Local, the collator strength, + * Unicode encoding depend on the text, the Locale, the collator strength, * and the collator decomposition mode. * <p> * Note: The <i>successor</i> option is designed to encapsulate some Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -627,7 +627,7 @@ } - final public KeyBuilder append(double d) { + final public KeyBuilder append(final double d) { // performance tweak. if (len + 8 > buf.length) ensureCapacity(len+8); @@ -648,7 +648,7 @@ } - static public double decodeDouble(byte[] key,int off) { + static public double decodeDouble(final byte[] key,final int off) { long v = decodeLong(key, off); @@ -663,7 +663,7 @@ } - final public KeyBuilder append(float f) { + final public KeyBuilder append(final float f) { // performance tweak. if (len + 4 > buf.length) ensureCapacity(len+4); Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -76,7 +76,6 @@ import com.bigdata.journal.IIndexManager; import com.bigdata.journal.IResourceLock; import com.bigdata.journal.ITx; -import com.bigdata.journal.TemporaryStore; import com.bigdata.journal.TimestampUtility; import com.bigdata.relation.AbstractRelation; import com.bigdata.relation.accesspath.IAccessPath; @@ -344,6 +343,17 @@ String INDEXER_TIMEOUT = "indexer.timeout"; String DEFAULT_INDEXER_TIMEOUT = "1000"; + + /** + * When <code>true</code>, the <code>fieldId</code> is stored as part of + * the key. When <code>false</code>, each key will be four bytes + * shorter. Applications which do not use <code>fieldId</code> are + * encouraged to disable it when creating the {@link FullTextIndex}. + */ + String FIELDS_ENABLED = FullTextIndex.class.getName() + + ".fieldsEnabled"; + + String DEFAULT_FIELDS_ENABLED = "true"; } @@ -367,6 +377,21 @@ private final long timeout; /** + * @see Options#FIELDS_ENABLED + */ + private final boolean fieldsEnabled; + + /** + * Return the value configured by the {@link Options#FIELDS_ENABLED} + * property. + */ + public boolean isFieldsEnabled() { + + return fieldsEnabled; + + } + + /** * The basename of the search index. */ public static final transient String NAME_SEARCH = "search"; @@ -434,6 +459,16 @@ } + { + + fieldsEnabled = Boolean.parseBoolean(properties.getProperty( + Options.FIELDS_ENABLED, Options.DEFAULT_FIELDS_ENABLED)); + + if (log.isInfoEnabled()) + log.info(Options.FIELDS_ENABLED + "=" + fieldsEnabled); + + } + /* * Note: defer resolution of the index. */ @@ -950,7 +985,7 @@ return tokenStream; } - + /** * Create a key for a term. * @@ -959,21 +994,26 @@ * @param token * The token whose key will be formed. * @param successor - * When <code>true</code> the successor of the token's text - * will be encoded into the key. This is useful when forming the + * When <code>true</code> the successor of the token's text will + * be encoded into the key. This is useful when forming the * <i>toKey</i> in a search. + * @param fieldsEnabled + * When <code>true</code> the <code>fieldId</code> will be + * included as a component in the generated key. When + * <code>false</code> it will not be present in the generated + * key. * @param docId - * The document identifier - use {@link Long#MIN_VALUE} when forming a - * search key. + * The document identifier - use {@link Long#MIN_VALUE} when + * forming a search key. * @param fieldId - * The field identifier - use {@link Integer#MIN_VALUE} when forming a - * search key. + * The field identifier - use {@link Integer#MIN_VALUE} when + * forming a search key. * * @return The key. */ static protected byte[] getTokenKey(final IKeyBuilder keyBuilder, - final String termText, final boolean successor, final long docId, - final int fieldId) { + final String termText, final boolean successor, + final boolean fieldsEnabled, final long docId, final int fieldId) { keyBuilder.reset(); @@ -982,14 +1022,16 @@ keyBuilder.append(docId); - keyBuilder.append(fieldId); + if (fieldsEnabled) + keyBuilder.append(fieldId); final byte[] key = keyBuilder.getKey(); if (log.isDebugEnabled()) { - log.debug("{" + termText + "," + docId + "," + fieldId - + "}, successor=" + (successor?"true ":"false") + ", key=" + log.debug("{" + termText + "," + docId + + (fieldsEnabled ? "," + fieldId : "") + "}, successor=" + + (successor ? "true " : "false") + ", key=" + BytesUtil.toString(key)); } @@ -1146,15 +1188,15 @@ * The collection of hits is scored and hits that fail a threshold are * discarded. The remaining hits are placed into a total order and the * caller is returned an iterator which can read from that order. If the - * operation is interrupted, then only those {@link IHit}s that have - * already been computed will be returned. + * operation is interrupted, then only those {@link IHit}s that have already + * been computed will be returned. * * @param query * The query (it will be parsed into tokens). * @param languageCode * The language code that should be used when tokenizing the - * query -or- <code>null</code> to use the default - * {@link Locale}). + * query -or- <code>null</code> to use the default {@link Locale} + * ). * @param minCosine * The minimum cosine that will be returned. * @param maxRank @@ -1169,22 +1211,17 @@ * * @return The hit list. * - * @todo note that we can not incrementally materialize the search results - * since they are being delivered in rank order and the search - * algorithm achieves rank order by a post-search sort. mg4j supports + * @todo Note: we can not incrementally materialize the search results since + * they are being delivered in rank order and the search algorithm + * achieves rank order by a post-search sort. mg4j supports * incremental evaluation and would be a full-featured replacement for * this package. * - * @todo manage the life of the result sets and perhaps serialize them onto - * an index backed by a {@link TemporaryStore}. The fromIndex/toIndex - * might be with respect to that short-term result set. Reclaim result - * sets after N seconds. - * - * @todo consider other kinds of queries that we might write here. For + * @todo Consider other kinds of queries that we might write here. For * example, full text search should support AND OR NOT operators for * tokens. * - * @todo allow search within field(s). This will be a filter on the range + * @todo Allow search within field(s). This will be a filter on the range * iterator that is sent to the data service such that the search * terms are visited only when they occur in the matching field(s). */ @@ -1331,7 +1368,7 @@ log.info("Done: " + nhits + " hits in " + elapsed + "ms"); /* - * Note: The caller will only see those documents which satisify both + * Note: The caller will only see those documents which satisfy both * constraints (minCosine and maxRank). Results below a threshold will * be pruned. Any relevant results exceeding the maxRank will be pruned. */ Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hit.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hit.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hit.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -14,17 +14,17 @@ */ public class Hit implements IHit, Comparable<Hit>{ - final protected static Logger log = Logger.getLogger(Hit.class); + final private static transient Logger log = Logger.getLogger(Hit.class); - /** - * True iff the {@link #log} level is INFO or less. - */ - final protected static boolean INFO = log.isInfoEnabled(); - - /** - * True iff the {@link #log} level is DEBUG or less. - */ - final protected static boolean DEBUG = log.isDebugEnabled(); +// /** +// * True iff the {@link #log} level is INFO or less. +// */ +// final protected static boolean INFO = log.isInfoEnabled(); +// +// /** +// * True iff the {@link #log} level is DEBUG or less. +// */ +// final protected static boolean DEBUG = log.isDebugEnabled(); /** note: defaults to an illegal value. */ private long docId = -1; @@ -47,7 +47,7 @@ } - void setDocId(long docId) { + void setDocId(final long docId) { this.docId = docId; @@ -77,13 +77,13 @@ /** * Adds another component to the cosine. */ - public void add(String term, double weight) { + public void add(final String term, final double weight) { cosine += weight; nterms ++; - if(DEBUG) { + if(log.isDebugEnabled()) { log.debug("docId=" + docId + ", term: " + term + ", nterms=" + nterms + ", weight=" + weight + ", cosine=" + cosine); @@ -102,7 +102,7 @@ * Sorts {@link Hit}s into decreasing cosine order with ties broken by the * the <code>docId</code>. */ - public int compareTo(Hit o) { + public int compareTo(final Hit o) { if (cosine < o.cosine) return 1; Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hiterator.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hiterator.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hiterator.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -100,7 +100,7 @@ /** * The #of hits (approximate). * - * @todo differentiate between the #of hits and the #of hits that satisify + * @todo differentiate between the #of hits and the #of hits that satisfy * the minCosine and maxRank criteria * * @todo this and other search engine metadata (elapsed time) might go on a @@ -144,7 +144,7 @@ if(!hasNext()) throw new NoSuchElementException(); - A tmp = nextHit; + final A tmp = nextHit; nextHit = null; @@ -171,9 +171,6 @@ /** * @throws UnsupportedOperationException - * - * @todo should this even be supported? it makes no sense unless you can - * restart the iterator. */ public void remove() { Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/ReadIndexTask.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/ReadIndexTask.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/ReadIndexTask.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -35,7 +35,7 @@ */ public class ReadIndexTask implements Callable<Object> { - final protected static Logger log = Logger.getLogger(ReadIndexTask.class); + final private static Logger log = Logger.getLogger(ReadIndexTask.class); // /** // * True iff the {@link #log} level is INFO or less. @@ -48,8 +48,10 @@ // final protected static boolean DEBUG = log.isDebugEnabled(); private final String queryTerm; - private final boolean prefixMatch; +// private final boolean prefixMatch; +// private final int exactMatchLength; private final double queryTermWeight; + private final boolean fieldsEnabled; // private final FullTextIndex searchEngine; private final ConcurrentHashMap<Long, Hit> hits; private final ITupleIterator itr; @@ -95,36 +97,69 @@ this.queryTerm = termText; - this.prefixMatch = prefixMatch; +// this.prefixMatch = prefixMatch; this.queryTermWeight = queryTermWeight; + this.fieldsEnabled = searchEngine.isFieldsEnabled(); + // this.searchEngine = searchEngine; this.hits = hits; final IKeyBuilder keyBuilder = searchEngine.getKeyBuilder(); + +// if (!prefixMatch) { +// /* +// * Figure out how many bytes are in the Unicode sort key for the +// * termText. In order to be an exact match, the visited tuples may +// * not have more than this many bytes before the start of the docId +// * field. (It is not possible for them to have fewer bytes since the +// * Unicode sort key prefix length will be the same for both the +// * fromKey and the toKey. The Unicode sort key for the toKey is +// * formed by adding one to the LSB position). +// */ +// +// keyBuilder +// .appendText(termText, true/* unicode */, false/* successor */); +// +// exactMatchLength = keyBuilder.getLength(); +// +// } else { +// +// // ignored. +// exactMatchLength = -1; +// +// } + /* + * FIXME This would appear to start in the middle of the docId and + * fieldId value space since I would assume that Long.MIN_VALUE is the + * first docId. + */ final byte[] fromKey = FullTextIndex.getTokenKey(keyBuilder, termText, - false/* successor */, 0L/* docId */, 0/* fieldId */); + false/* successor */, fieldsEnabled, Long.MIN_VALUE/* docId */, + Integer.MIN_VALUE/* fieldId */); final byte[] toKey; // FIXME prefixMatch can not be turned off right now. -// if (prefixMatch) { + if (prefixMatch) { /* * Accepts anything starting with the search term. E.g., given * "bro", it will match "broom" and "brown" but not "break". */ + toKey = FullTextIndex.getTokenKey(keyBuilder, termText, + true/* successor */, fieldsEnabled, Long.MIN_VALUE/* docId */, + Integer.MIN_VALUE/* fieldId */); + } else { + /* + * Accepts only those entries that exactly match the search term. + */ toKey = FullTextIndex.getTokenKey(keyBuilder, termText, - true/* successor */, Long.MIN_VALUE/* docId */, Integer.MIN_VALUE/* fieldId */); -// } else { -// /* -// * Accepts only those entries that exactly match the search term. -// */ -// toKey = FullTextIndex.getTokenKey(keyBuilder, termText+"\0", -// false/* successor */, 0L/* docId */, 0/* fieldId */); -// } + false/* successor */, fieldsEnabled, + Long.MAX_VALUE/* docId */, Integer.MAX_VALUE/* fieldId */); + } if (log.isDebugEnabled()) log.debug // System.err.println @@ -161,7 +196,8 @@ while (itr.hasNext()) { - if (t.isInterrupted()) { + // don't test for interrupted on each result -- too much work. + if (nhits % 100 == 0 && t.isInterrupted()) { if (log.isInfoEnabled()) log.info("Interrupted: queryTerm=" + queryTerm + ", nhits=" @@ -182,10 +218,29 @@ // - Bytes.SIZEOF_LONG /*docId*/ - Bytes.SIZEOF_INT/*fieldId*/); final ByteArrayBuffer kbuf = tuple.getKeyBuffer(); + + /* + * The byte offset of the docId in the key. + * + * Note: This is also the byte length of the match on the unicode + * sort key, which appears at the head of the key. + */ + final int docIdOffset = kbuf.limit() - Bytes.SIZEOF_LONG /* docId */ + - (fieldsEnabled ? Bytes.SIZEOF_INT/* fieldId */: 0); + +// if (!prefixMatch && docIdOffset != exactMatchLength) { +// +// /* +// * The Unicode sort key associated with this tuple is longer +// * than the given token - hence it can not be an exact match. +// */ +// +// continue; +// +// } // decode the document identifier. - final long docId = KeyBuilder.decodeLong(kbuf.array(), kbuf.limit() - - Bytes.SIZEOF_LONG /*docId*/ - Bytes.SIZEOF_INT/*fieldId*/); + final long docId = KeyBuilder.decodeLong(kbuf.array(), docIdOffset); /* * Extract the term frequency and normalized term-frequency (term Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermFrequencyData.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermFrequencyData.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermFrequencyData.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -27,8 +27,9 @@ */ final public HashMap<String,TermMetadata> terms = new HashMap<String,TermMetadata>(); - public TermFrequencyData(long docId, int fieldId, String token) { - + public TermFrequencyData(final long docId, final int fieldId, + final String token) { + this.docId = docId; this.fieldId = fieldId; @@ -43,9 +44,10 @@ * @param token * The token. * - * @return true iff the termText did not previously exist for this {@link TermFrequencyData}. + * @return true iff the termText did not previously exist for this + * {@link TermFrequencyData}. */ - public boolean add(String token) { + public boolean add(final String token) { final String termText = token; @@ -127,7 +129,7 @@ for(TermMetadata md : terms.values()) { - int termFreq = md.termFreq(); + final int termFreq = md.termFreq(); md.localTermWeight = (double)termFreq / magnitude; Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermMetadata.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermMetadata.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermMetadata.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -2,8 +2,6 @@ import java.util.ArrayList; -import org.apache.lucene.analysis.Token; - /** * Mutable metadata for the occurrences of a term within a field of some * document. @@ -33,8 +31,7 @@ */ public double localTermWeight; - // @todo make private? - ArrayList<String> occurrences = new ArrayList<String>(); + private final ArrayList<String> occurrences = new ArrayList<String>(); /** * Add an occurrence. Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TokenBuffer.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TokenBuffer.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TokenBuffer.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -3,10 +3,7 @@ import java.util.Arrays; import java.util.Iterator; -import org.apache.log4j.Level; import org.apache.log4j.Logger; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenStream; import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.KV; @@ -319,12 +316,12 @@ final String termText = termMetadata.termText(); final byte[] key = FullTextIndex.getTokenKey(keyBuilder, termText, - false/* successor */, docId, fieldId); + false/* successor */, textIndexer.isFieldsEnabled(), docId, fieldId); if(log.isDebugEnabled()) { log.debug("{" + termText + "," + docId + "," + fieldId + "}: #occurences=" - + termMetadata.occurrences.size()); + + termMetadata.termFreq()); } final byte[] val = textIndexer.getTokenValue(buf, termMetadata); Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestKeyBuilder.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestKeyBuilder.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestKeyBuilder.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -33,7 +33,6 @@ import junit.framework.TestCase2; import com.bigdata.btree.BytesUtil; -import com.bigdata.btree.ITupleSerializer; import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.KeyBuilder; import com.bigdata.search.FullTextIndex.Options; @@ -43,8 +42,6 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ - * - * @todo write tests in which the docId is a negative long integer. */ public class TestKeyBuilder extends TestCase2 { @@ -94,36 +91,35 @@ private IKeyBuilder keyBuilder; /** - * @todo this test needs to populate an index with terms that would match on - * a prefix match and then verify that they do match and that terms - * that are not prefix matches do not match. - */ - public void test_prefixMatch_unicode() { - - } - - /** * Unit test verifies the relative sort order of a term and its successor, * of a prefix of that term and its successor, and that the prefix and the * successor of the prefix are ordered before and after the term and its * successor respectively. */ public void test_keyOrder() { - + + doKeyOrderTest(-1L/*docId*/, 0/*fieldId*/, true/*fieldsEnabled*/); + doKeyOrderTest(0L/*docId*/, 0/*fieldId*/, true/*fieldsEnabled*/); + doKeyOrderTest(1L/*docId*/, 12/*fieldId*/, true/*fieldsEnabled*/); + + doKeyOrderTest(-1L/*docId*/, 0/*fieldId*/, false/*fieldsEnabled*/); + doKeyOrderTest(0L/*docId*/, 0/*fieldId*/, false/*fieldsEnabled*/); + doKeyOrderTest(1L/*docId*/, 0/*fieldId*/, false/*fieldsEnabled*/); + + } + + protected void doKeyOrderTest(final long docId, final int fieldId, + final boolean fieldsEnabled) { + final IKeyBuilder keyBuilder = getKeyBuilder(); - final long docId = 0L; - - final int fieldId = 0; - - // the full term. final byte[] k0 = FullTextIndex.getTokenKey(keyBuilder, "brown", - false/* successor */, docId, fieldId); + false/* successor */, fieldsEnabled, docId, fieldId); // the successor of the full term. final byte[] k0s = FullTextIndex.getTokenKey(keyBuilder, "brown", - true/* successor */, docId, fieldId); + true/* successor */, fieldsEnabled, docId, fieldId); // verify sort key order for the full term and its successor. assertTrue(BytesUtil.compareBytes(k0, k0s) < 0); @@ -131,11 +127,11 @@ // a prefix of that term. final byte[] k1 = FullTextIndex.getTokenKey(keyBuilder, "bro", - false/* successor */, docId, fieldId); + false/* successor */, fieldsEnabled, docId, fieldId); // the successor of that prefix. final byte[] k1s = FullTextIndex.getTokenKey(keyBuilder, "bro", - true/* successor */, docId, fieldId); + true/* successor */, fieldsEnabled, docId, fieldId); // verify sort key order for prefix and its successor. assertTrue(BytesUtil.compareBytes(k0, k0s) < 0); @@ -184,76 +180,4 @@ } -/* - * @todo Finish the exact match test. - */ -// /** -// * @todo this test needs to populate an index with terms that would match if -// * we were allowing a prefix match and then verify that the terms are -// * NOT matched. it should also verify that terms that are exact -// * matches are matched. -// * -// * @todo also test ability to extract the docId and fieldId from the key. -// * -// * @todo refactor into an {@link ITupleSerializer}. -// * -// * @todo make the fieldId optional in the key. this needs to be part of the -// * state of the {@link ITupleSerializer}. -// */ -// public void test_exactMatch_unicode() { -// -// final IKeyBuilder keyBuilder = getKeyBuilder(); -// -// final long docId = 0L; -// -// final int fieldId = 0; -// -// -// // the full term. -// final byte[] termSortKey = FullTextIndex.getTokenKey(keyBuilder, "brown", -// false/* successor */, docId, fieldId); -// -// // the successor of the full term allowing prefix matches. -// final byte[] termPrefixMatchSuccessor = FullTextIndex.getTokenKey(keyBuilder, "brown", -// true/* successor */, docId, fieldId); -// -//// // the successor of the full term for an exact match. -//// final byte[] termExactMatchSuccessor = FullTextIndex.getTokenKey( -//// keyBuilder, "brown \0", true/* successor */, docId, fieldId); -//// -//// /* -//// * verify sort key order for the full term and its prefix match -//// * successor. -//// */ -//// LT(termSortKey, termPrefixMatchSuccessor); -// -//// /* -//// * verify sort key for the full term orders before its exact match -//// * successor. -//// */ -//// LT(termSortKey, termExactMatchSuccessor); -// -// // term that is longer than the full term. -// final byte[] longerTermSortKey = FullTextIndex.getTokenKey(keyBuilder, -// "browns", false/* successor */, docId, fieldId); -// -// // verify sort order for the full term and the longer term. -// LT(termSortKey, longerTermSortKey); -// -// /* -// * verify longer term is less than the prefix match successor of the -// * full term. -// */ -// LT(longerTermSortKey, termPrefixMatchSuccessor); -// -//// /* -//// * verify longer term is greater than the exact match successor of the -//// * full term. -//// */ -//// GT(longerTermSortKey, termExactMatchSuccessor); -// -// fail("finish test"); -// -// } - } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestPrefixSearch.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestPrefixSearch.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestPrefixSearch.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -36,8 +36,10 @@ import com.bigdata.journal.ProxyTestCase; /** - * Unit test for prefix search. Prefix search allows a query "bro" to match - * "brown" rather than requiring an exact match on the search term(s). + * Unit test for prefix and exact match searches. Prefix search allows a query + * "bro" to match "brown" rather than requiring an exact match on the search + * term(s). Exact match searches should only visit tuples which match the full + * length of the token (once encoded as a Unicode sort key). * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ @@ -148,12 +150,28 @@ } /* + * Search (one term, prefix match on that term in both documents + * (the prefix match is an exact match in this case)). + */ + { + + final Hiterator itr = ndx + .search("brown", languageCode, false/* prefixMatch */); + + if (INFO) + log.info("hits:" + itr); + + assertEquals(2, itr.size()); + + } + + /* * Search (one term, exact match on that term in both documents). */ { final Hiterator itr = ndx - .search("brown", languageCode, false/*prefixMatch*/); + .search("brown", languageCode, true/* prefixMatch */); if(INFO) log.info("hits:" + itr); @@ -176,18 +194,65 @@ } /* + * Search (one term, no exact match on that term). + */ + { + + final Hiterator itr = ndx + .search("bro", languageCode, false/* prefixMatch */); + + if (INFO) + log.info("hits:" + itr); + + assertEquals(0, itr.size()); + + } + + /* * Search (one term, prefix match on that term in one document). */ { - final Hiterator itr = ndx.search("qui", languageCode); + final Hiterator itr = ndx + .search("qui", languageCode, true/* prefixMatch */); - if(INFO) log.info("hits:" + itr); + if (INFO) + log.info("hits:" + itr); assertEquals(1, itr.size()); } + /* + * Search (one term, no exact match on that term). + */ + { + + final Hiterator itr = ndx + .search("qui", languageCode, false/* prefixMatch */); + + if (INFO) + log.info("hits:" + itr); + + assertEquals(0, itr.size()); + + } + + /* + * Search (one term, exact match on that term in one document). + */ + { + + final Hiterator itr = ndx + .search("quick", languageCode, false/* prefixMatch */); + + if (INFO) + log.info("hits:" + itr); + + assertEquals(1, itr.size()); + + } + } finally { indexManager.destroy(); Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/BigdataRDFFullTextIndex.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/BigdataRDFFullTextIndex.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/BigdataRDFFullTextIndex.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -37,7 +37,6 @@ import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.store.AbstractTripleStore; -import com.bigdata.rdf.store.IRawTripleStore; import com.bigdata.search.FullTextIndex; import com.bigdata.search.TokenBuffer; @@ -94,7 +93,9 @@ } - public void index(int capacity, Iterator<BigdataValue> valuesIterator) { + public void index(final int capacity, + final Iterator<BigdataValue> valuesIterator) { + final TokenBuffer buffer = new TokenBuffer(capacity, this); int n = 0; @@ -141,8 +142,7 @@ final IV termId = val.getIV(); - assert termId != null; // the termId must have been - // assigned. + assert termId != null; // the termId must have been assigned. // don't bother text indexing inline values for now if (termId.isInline()) { Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -880,7 +880,7 @@ String DEFAULT_INLINE_BNODES = "false"; - /** + /** * Set up database to inline date/times directly into the statement * indices rather than using the lexicon to map them to term identifiers * and back. Date times will be converted to UTC, then stored as @@ -894,14 +894,14 @@ String DEFAULT_INLINE_DATE_TIMES = "false"; /** - * The name of the {@link IExtensionFactory} class. The implementation - * MUST declare a constructor that accepts an - * {@link IDatatypeURIResolver} as its only argument. The - * {@link IExtension}s constructed by the factory need a resolver to - * resolve datatype URIs to term identifiers in the database. - * - * @see #DEFAULT_EXTENSION_FACTORY_CLASS - */ + * The name of the {@link IExtensionFactory} class. The implementation + * MUST declare a constructor that accepts an + * {@link IDatatypeURIResolver} as its only argument. The + * {@link IExtension}s constructed by the factory need a resolver to + * resolve datatype URIs to term identifiers in the database. + * + * @see #DEFAULT_EXTENSION_FACTORY_CLASS + */ String EXTENSION_FACTORY_CLASS = AbstractTripleStore.class.getName() + ".extensionFactoryClass"; @@ -1255,6 +1255,20 @@ // set property that will let the contained relations locate their container. tmp.setProperty(RelationSchema.CONTAINER, getNamespace()); + if (Boolean.valueOf(tmp.getProperty(Options.TEXT_INDEX, + Options.DEFAULT_TEXT_INDEX))) { + + /* + * If the text index is enabled for a new kb instance, then disable + * the fieldId component of the full text index key since it is not + * used by the RDF database and will just waste space in the index. + * + * Note: Also see below where this is set on the global row store. + */ + tmp.setProperty(FullTextIndex.Options.FIELDS_ENABLED, "false"); + + } + final IResourceLock resourceLock = acquireExclusiveLock(); try { @@ -1336,7 +1350,7 @@ ((BaseAxioms)axioms).init(); } - + /* * Update the global row store to set the axioms and the * vocabulary objects. @@ -1354,6 +1368,14 @@ // vocabulary. map.put(TripleStoreSchema.VOCABULARY, vocab); + if (lexiconRelation.isTextIndex()) { + /* + * Per the logic and commentary at the top of create(), + * disable this option on the global row store. + */ + map.put(FullTextIndex.Options.FIELDS_ENABLED, "false"); + } + // Write the map on the row store. getIndexManager().getGlobalRowStore().write( RelationSchema.INSTANCE, map); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-21 15:06:06
|
Revision: 4027 http://bigdata.svn.sourceforge.net/bigdata/?rev=4027&view=rev Author: thompsonbry Date: 2010-12-21 15:05:59 +0000 (Tue, 21 Dec 2010) Log Message: ----------- Bug fix to AbstractJournal where it was using a local FileMetadata object rather than the instance field and thus was not reporting the FileMetadata reference to DumpJournal. Added a NAMESPACE option to the BigdataSail. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2010-12-21 13:55:51 UTC (rev 4026) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2010-12-21 15:05:59 UTC (rev 4027) @@ -880,7 +880,7 @@ } else { - final FileMetadata fileMetadata = FileMetadata.createInstance( + /*final FileMetadata*/ fileMetadata = FileMetadata.createInstance( properties, !(this instanceof Journal), quorumToken); final BufferMode bufferMode = fileMetadata.bufferMode; Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2010-12-21 13:55:51 UTC (rev 4026) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2010-12-21 15:05:59 UTC (rev 4027) @@ -340,7 +340,6 @@ public static final String DEFAULT_ALLOW_AUTO_COMMIT = "false"; - /** * Options (default <code>false</code>) creates the SPO relation with * isolatable indices to allow read/write transactions. @@ -360,6 +359,15 @@ public static final String DEFAULT_STAR_JOINS = "false"; + /** + * Option specifies the namespace of the designed KB instance (default + * {@value #DEFAULT_NAMESPACE}). + */ + public static final String NAMESPACE = BigdataSail.class.getPackage() + .getName()+ ".namespace"; + + public static final String DEFAULT_NAMESPACE = "kb"; + } /** @@ -621,8 +629,10 @@ final ITransactionService txService = journal.getTransactionManager().getTransactionService(); - final String namespace = "kb"; - + final String namespace = properties.getProperty( + BigdataSail.Options.NAMESPACE, + BigdataSail.Options.DEFAULT_NAMESPACE); + // throws an exception if there are inconsistent properties checkProperties(properties); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2011-01-19 02:03:30
|
Revision: 4127 http://bigdata.svn.sourceforge.net/bigdata/?rev=4127&view=rev Author: mrpersonick Date: 2011-01-19 02:03:23 +0000 (Wed, 19 Jan 2011) Log Message: ----------- added support for exact match and prefix match from sparql Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2011-01-18 21:59:35 UTC (rev 4126) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2011-01-19 02:03:23 UTC (rev 4127) @@ -28,8 +28,6 @@ package com.bigdata.search; -import info.aduna.i18n.languagetag.IanaLanguageTag; - import java.io.IOException; import java.io.Reader; import java.io.StringReader; Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java 2011-01-18 21:59:35 UTC (rev 4126) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java 2011-01-19 02:03:23 UTC (rev 4127) @@ -14,6 +14,7 @@ import com.bigdata.rdf.internal.TermId; import com.bigdata.rdf.internal.VTE; import com.bigdata.rdf.internal.XSDDoubleIV; +import com.bigdata.rdf.lexicon.ITextIndexer; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPO; @@ -21,7 +22,6 @@ import com.bigdata.rdf.spo.SPOPredicate; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.rdf.store.BD; -import com.bigdata.rdf.store.IRawTripleStore; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.rule.IPredicate; import com.bigdata.relation.rule.ISolutionExpander; @@ -139,14 +139,28 @@ if (hiterator == null) { assert database!=null; assert query != null; - if (database.getLexiconRelation().getSearchEngine() == null) + + final ITextIndexer textNdx = + database.getLexiconRelation().getSearchEngine(); + + if (textNdx == null) throw new UnsupportedOperationException( "No free text index?"); + // final long begin = System.nanoTime(); - hiterator = database.getLexiconRelation() - .getSearchEngine().search(query.getLabel(), + + String s = query.getLabel(); + final boolean prefixMatch; + if (s.indexOf('*') >= 0) { + prefixMatch = true; + s = s.replaceAll("\\*", ""); + } else { + prefixMatch = false; + } + + hiterator = textNdx.search(s, query.getLanguage(), - false/* prefixMatch */, + prefixMatch, minRelevance == null ? 0d : minRelevance.doubleValue()/* minCosine */, maxHits == null ? 10000 : maxHits.intValue()+1/* maxRank */, 1000L/* timeout */, TimeUnit.MILLISECONDS); Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2011-01-18 21:59:35 UTC (rev 4126) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2011-01-19 02:03:23 UTC (rev 4127) @@ -925,6 +925,138 @@ } + { // exact match + + final String searchQuery = "brown cow"; + final double minRelevance = 0.0d; + + final String query = + "select ?s ?o ?score " + + "where " + + "{ " + + " ?s <"+RDFS.LABEL+"> ?o . " + + " ?o <"+BD.SEARCH+"> \""+searchQuery+"\" . " + + " ?o <"+BD.RELEVANCE+"> ?score . " + +// " ?o <"+BD.MIN_RELEVANCE+"> \""+minRelevance+"\" . " + +// " ?o <"+BD.MAX_HITS+"> \"5\" . " + + " filter regex(?o, \""+searchQuery+"\") " + + "} " + + "order by desc(?score)"; + + log.info("\n"+query); + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(true /* includeInferred */); + TupleQueryResult result = tupleQuery.evaluate(); + + int i = 0; + while (result.hasNext()) { + log.info(i++ + ": " + result.next().toString()); + } + assertTrue("wrong # of results: " + i, i == 2); + + result = tupleQuery.evaluate(); + + Collection<BindingSet> answer = new LinkedList<BindingSet>(); + + final ITextIndexer search = + sail.getDatabase().getLexiconRelation().getSearchEngine(); + final Hiterator<IHit> hits = + search.search(searchQuery, + null, // languageCode + false, // prefixMatch + minRelevance, // minCosine + 10000, // maxRank (=maxResults + 1) + 1000L, // timeout + TimeUnit.MILLISECONDS // unit + ); + + while (hits.hasNext()) { + final IHit hit = hits.next(); + final IV id = new TermId(VTE.LITERAL, hit.getDocId()); + final Literal score = vf.createLiteral(hit.getCosine()); + final URI s = uris.get(id); + final Literal o = literals.get(id); + if (!o.getLabel().contains(searchQuery)) + continue; + final BindingSet bs = createBindingSet( + new BindingImpl("s", s), + new BindingImpl("o", o), + new BindingImpl("score", score)); + log.info(bs); + answer.add(bs); + } + + compare(result, answer); + + } + + { // prefix match + + final String searchQuery = "bro*"; + final double minRelevance = 0.0d; + + final String query = + "select ?s ?o ?score " + + "where " + + "{ " + + " ?s <"+RDFS.LABEL+"> ?o . " + + " ?o <"+BD.SEARCH+"> \""+searchQuery+"\" . " + + " ?o <"+BD.RELEVANCE+"> ?score . " + +// " ?o <"+BD.MIN_RELEVANCE+"> \""+minRelevance+"\" . " + +// " ?o <"+BD.MAX_HITS+"> \"5\" . " + +// " filter regex(?o, \""+searchQuery+"\") " + + "} " + + "order by desc(?score)"; + + log.info("\n"+query); + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(true /* includeInferred */); + TupleQueryResult result = tupleQuery.evaluate(); + + int i = 0; + while (result.hasNext()) { + log.info(i++ + ": " + result.next().toString()); + } + assertTrue("wrong # of results: " + i, i == 3); + + result = tupleQuery.evaluate(); + + Collection<BindingSet> answer = new LinkedList<BindingSet>(); + + final ITextIndexer search = + sail.getDatabase().getLexiconRelation().getSearchEngine(); + final Hiterator<IHit> hits = + search.search(searchQuery, + null, // languageCode + true, // prefixMatch + minRelevance, // minCosine + 10000, // maxRank (=maxResults + 1) + 1000L, // timeout + TimeUnit.MILLISECONDS // unit + ); + + while (hits.hasNext()) { + final IHit hit = hits.next(); + final IV id = new TermId(VTE.LITERAL, hit.getDocId()); + final Literal score = vf.createLiteral(hit.getCosine()); + final URI s = uris.get(id); + final Literal o = literals.get(id); + final BindingSet bs = createBindingSet( + new BindingImpl("s", s), + new BindingImpl("o", o), + new BindingImpl("score", score)); + log.info(bs); + answer.add(bs); + } + + compare(result, answer); + + } + } finally { cxn.close(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |