From: <tho...@us...> - 2013-11-13 17:19:07
|
Revision: 7537 http://bigdata.svn.sourceforge.net/bigdata/?rev=7537&view=rev Author: thompsonbry Date: 2013-11-13 17:19:00 +0000 (Wed, 13 Nov 2013) Log Message: ----------- Refactored IIndexManagerCallable out of the HAGlue interface. Some changes related to DumpJournal and TestDumpJournal for an as-yet unresolved issue with the inability to run DumpJournal with a concurrent mutation on the Journal. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/HAGlue.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/IndexManagerCallable.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/DumpJournal.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestDumpJournal.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/IIndexManagerCallable.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/HAGlue.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/HAGlue.java 2013-11-13 17:14:29 UTC (rev 7536) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/HAGlue.java 2013-11-13 17:19:00 UTC (rev 7537) @@ -25,11 +25,9 @@ import java.io.FileNotFoundException; import java.io.IOException; -import java.io.Serializable; import java.rmi.Remote; import java.security.DigestException; import java.security.NoSuchAlgorithmException; -import java.util.concurrent.Callable; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -48,7 +46,6 @@ import com.bigdata.ha.msg.IHASnapshotRequest; import com.bigdata.ha.msg.IHASnapshotResponse; import com.bigdata.journal.AbstractJournal; -import com.bigdata.journal.IIndexManager; import com.bigdata.journal.jini.ha.HAJournalServer; import com.bigdata.quorum.AsynchronousQuorumCloseException; import com.bigdata.quorum.QuorumException; @@ -299,7 +296,6 @@ Future<Void> rebuildFromLeader(IHARemoteRebuildRequest req) throws IOException; - /** * Run the caller's task on the service. * @@ -314,35 +310,5 @@ */ public <T> Future<T> submit(IIndexManagerCallable<T> callable, boolean asyncFuture) throws IOException; - - - public interface IIndexManagerCallable<T> extends Serializable, Callable<T> { - - /** - * Invoked before the task is executed to provide a reference to the - * {@link IIndexManager} on which it is executing. - * - * @param indexManager - * The index manager on the service. - * - * @throws IllegalArgumentException - * if the argument is <code>null</code> - * @throws IllegalStateException - * if {@link #setIndexManager(IIndexManager)} has already been - * invoked and was set with a different value. - */ - void setIndexManager(IIndexManager indexManager); - - /** - * Return the {@link IIndexManager}. - * - * @return The data service and never <code>null</code>. - * - * @throws IllegalStateException - * if {@link #setIndexManager(IIndexManager)} has not been invoked. - */ - IIndexManager getIndexManager(); - - } } Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/IIndexManagerCallable.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/IIndexManagerCallable.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/IIndexManagerCallable.java 2013-11-13 17:19:00 UTC (rev 7537) @@ -0,0 +1,66 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.ha; + +import java.io.Serializable; +import java.util.concurrent.Callable; + +import com.bigdata.journal.IIndexManager; + +/** + * Interface allows arbitrary tasks to be submitted to an {@link HAGlue} service + * for evaluation. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @param <T> + */ +public interface IIndexManagerCallable<T> extends Serializable, Callable<T> { + + /** + * Invoked before the task is executed to provide a reference to the + * {@link IIndexManager} on which it is executing. + * + * @param indexManager + * The index manager on the service. + * + * @throws IllegalArgumentException + * if the argument is <code>null</code> + * @throws IllegalStateException + * if {@link #setIndexManager(IIndexManager)} has already been + * invoked and was set with a different value. + */ + void setIndexManager(IIndexManager indexManager); + + /** + * Return the {@link IIndexManager}. + * + * @return The data service and never <code>null</code>. + * + * @throws IllegalStateException + * if {@link #setIndexManager(IIndexManager)} has not been + * invoked. + */ + IIndexManager getIndexManager(); + +} Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/IndexManagerCallable.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/IndexManagerCallable.java 2013-11-13 17:14:29 UTC (rev 7536) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/IndexManagerCallable.java 2013-11-13 17:19:00 UTC (rev 7537) @@ -2,7 +2,6 @@ import org.apache.log4j.Logger; -import com.bigdata.ha.HAGlue.IIndexManagerCallable; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.jini.ha.HAJournal; Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-11-13 17:14:29 UTC (rev 7536) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-11-13 17:19:00 UTC (rev 7537) @@ -100,6 +100,7 @@ import com.bigdata.ha.HAGlue; import com.bigdata.ha.HAStatusEnum; import com.bigdata.ha.HATXSGlue; +import com.bigdata.ha.IIndexManagerCallable; import com.bigdata.ha.IJoinedAndNonJoinedServices; import com.bigdata.ha.JoinedAndNonJoinedServices; import com.bigdata.ha.PrepareRequest; @@ -3106,8 +3107,8 @@ */ private final long commitToken; - /** The #of bytes on the journal as of the previous commit point. */ - private final long byteCountBefore; +// /** The #of bytes on the journal as of the previous commit point. */ +// private final long byteCountBefore; /** * The commit counter that will be assigned to the new commit point. @@ -3150,8 +3151,8 @@ this.old = store._rootBlock; - // #of bytes on the journal as of the previous commit point. - this.byteCountBefore = store._rootBlock.getNextOffset(); +// // #of bytes on the journal as of the previous commit point. +// this.byteCountBefore = store._rootBlock.getNextOffset(); this.newCommitCounter = old.getCommitCounter() + 1; @@ -5857,12 +5858,14 @@ /* * We also need to discard any active read/write tx since there - * is no longer a quorum and a read/write tx was running on the + * is no longer a quorum. This will hit both read-only + * transactions running on any service (not necessarily the + * leader) and read/write transactions if this service was the * old leader. * - * We do not need to discard read-only tx since the committed - * state should remain valid even when a quorum is lost. - * However, it would be a bit odd to leave read-only + * Note: We do not need to discard read-only tx since the + * committed state should remain valid even when a quorum is + * lost. However, it would be a bit odd to leave read-only * transactions running if you could not start a new read-only * because the quorum is not met. */ @@ -5874,7 +5877,17 @@ * * FIXME HA : Abort the unisolated connection? (esp for group * commit and the NSS level SPARQL and REST API unisolated - * operations). + * operations). Maybe we can wrap the execute of the UpdateTask + * and the execution of the REST Mutation API methods in a + * well-known ThreadGuard and then do interruptAll() to force + * the cancelation of any running task? We could also wrap any + * IIndexManagerCallable in HAGlue.submit() with a FutureTask + * implementation that uses the appropriate ThreadGuard to + * ensure that any unisolated tasks are cancelled (that is + * actually overkill since it would not differentiate TX based + * operations from unisolated operations - we could also use + * that ThreadGuard in AbstractTask). Add unit tests for both + * UPDATE and other REST mutation methods. * * @see <a * href="https://sourceforge.net/apps/trac/bigdata/ticket/753" Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/DumpJournal.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/DumpJournal.java 2013-11-13 17:14:29 UTC (rev 7536) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/DumpJournal.java 2013-11-13 17:19:00 UTC (rev 7537) @@ -381,6 +381,18 @@ final boolean dumpHistory, final boolean dumpPages, final boolean dumpIndices, final boolean showTuples) { +// Note: This does not fix the issue. +// /** +// * Start a transaction. This will bracket all index access and protect +// * the data on the journal from concurrent recycling. +// * +// * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/762"> +// * DumpJournal does not protect against concurrent updates (NSS) +// * </a> +// */ +// final long tx = journal.newTx(ITx.READ_COMMITTED); +// try { +// final FileMetadata fmd = journal.getFileMetadata(); if (fmd != null) { @@ -600,6 +612,9 @@ dumpPages, dumpIndices, showTuples); } +// } finally { +// journal.abort(tx); +// } } @@ -614,7 +629,7 @@ } - public void dumpGlobalRowStore(final PrintWriter out) { + private void dumpGlobalRowStore(final PrintWriter out) { final SparseRowStore grs = journal.getGlobalRowStore(journal .getLastCommitTime()); @@ -826,7 +841,7 @@ * * @return */ - public String dumpRawRecord(final long addr) { + private String dumpRawRecord(final long addr) { if (journal.getBufferStrategy() instanceof IRWStrategy) { /** @@ -984,6 +999,7 @@ } } case Stream: + @SuppressWarnings("unused") final Stream stream = (Stream) ndx; /* * Note: We can't do anything here with a Stream, but we do @@ -1004,41 +1020,4 @@ } - /** - * Return the data in the buffer. - */ - public static byte[] getBytes(ByteBuffer buf) { - - if (buf.hasArray() && buf.arrayOffset() == 0 && buf.position() == 0 - && buf.limit() == buf.capacity()) { - - /* - * Return the backing array. - */ - - return buf.array(); - - } - - /* - * Copy the expected data into a byte[] using a read-only view on the - * buffer so that we do not mess with its position, mark, or limit. - */ - final byte[] a; - { - - buf = buf.asReadOnlyBuffer(); - - final int len = buf.remaining(); - - a = new byte[len]; - - buf.get(a); - - } - - return a; - - } - } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestDumpJournal.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestDumpJournal.java 2013-11-13 17:14:29 UTC (rev 7536) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestDumpJournal.java 2013-11-13 17:19:00 UTC (rev 7537) @@ -29,15 +29,23 @@ package com.bigdata.journal; import java.io.IOException; +import java.util.LinkedList; +import java.util.List; import java.util.UUID; +import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; +import java.util.concurrent.FutureTask; +import java.util.concurrent.TimeUnit; import com.bigdata.btree.AbstractBTreeTestCase; import com.bigdata.btree.BTree; import com.bigdata.btree.HTreeIndexMetadata; import com.bigdata.btree.IndexMetadata; import com.bigdata.btree.keys.KV; +import com.bigdata.concurrent.FutureTaskMon; import com.bigdata.htree.HTree; +import com.bigdata.rwstore.IRWStrategy; +import com.bigdata.util.concurrent.LatchedExecutor; /** * Test suite for {@link DumpJournal}. @@ -66,8 +74,10 @@ /** * @param name */ - public TestDumpJournal(String name) { + public TestDumpJournal(final String name) { + super(name); + } /** @@ -361,4 +371,254 @@ } + /** + * Unit test for {@link DumpJournal} with concurrent updates against the + * backing store. This is intended primarily to detect failures to protect + * against the recycling model associated with the {@link IRWStrategy}. + * + * @throws Exception + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/762"> + * DumpJournal does not protect against concurrent updates (NSS) </a> + */ + public void test_dumpJournal_concurrent_updates() throws Exception { + + final String PREFIX = "testIndex#"; + final int NUM_INDICES = 4; + + Journal src = getStore(getProperties()); + + try { + + for (int i = 0; i < NUM_INDICES; i++) { + + // register an index + final String name = PREFIX + i; + + src.registerIndex(new IndexMetadata(name, UUID.randomUUID())); + { + + // lookup the index. + final BTree ndx = src.getIndex(name); + + // #of tuples to write. + final int ntuples = r.nextInt(1000); + + // generate random data. + final KV[] a = AbstractBTreeTestCase + .getRandomKeyValues(ntuples); + + // write tuples (in random order) + for (KV kv : a) { + + ndx.insert(kv.key, kv.val); + + if (r.nextInt(100) < 10) { + + // randomly increment the counter (10% of the time). + ndx.getCounter().incrementAndGet(); + + } + + } + + } + + } + + // commit the journal (!) + src.commit(); + + /** + * Task to run various DumpJournal requests. + */ + final class DumpTask implements Callable<Void> { + + private final Journal src; + + public DumpTask(final Journal src) { + + this.src = src; + + } + @Override + public Void call() throws Exception { + + new DumpJournal(src) + .dumpJournal(false/* dumpHistory */, + true/* dumpPages */, + false/* dumpIndices */, false/* showTuples */); + + new DumpJournal(src) + .dumpJournal(true/* dumpHistory */, + true/* dumpPages */, true/* dumpIndices */, + false/* showTuples */); + + // test again w/o dumpPages + new DumpJournal(src) + .dumpJournal(true/* dumpHistory */, + false/* dumpPages */, + true/* dumpIndices */, false/* showTuples */); + + return (Void) null; + + } + + } + + final class UpdateTask implements Callable<Void> { + + private final Journal src; + + public UpdateTask(final Journal src) { + + this.src = src; + + } + @Override + public Void call() throws Exception { + + /* + * Now write some more data, going through a series of commit + * points. This let's us check access to historical commit points. + */ + for (int j = 0; j < 10; j++) { + + for (int i = 0; i < NUM_INDICES; i++) { + + // register an index + final String name = PREFIX + i; + + // lookup the index. + final BTree ndx = src.getIndex(name); + + // #of tuples to write. + final int ntuples = r.nextInt(1000); + + // generate random data. + final KV[] a = AbstractBTreeTestCase + .getRandomKeyValues(ntuples); + + // write tuples (in random order) + for (KV kv : a) { + + ndx.insert(kv.key, kv.val); + + if (r.nextInt(100) < 10) { + + // randomly increment the counter (10% of the time). + ndx.getCounter().incrementAndGet(); + + } + + } + + } + + log.info("Will commit"); + src.commit(); + log.info("Did commit"); + + } + + return (Void) null; + } + } + + final List<FutureTask<Void>> tasks1 = new LinkedList<FutureTask<Void>>(); + final List<FutureTask<Void>> tasks2 = new LinkedList<FutureTask<Void>>(); + final List<FutureTask<Void>> tasksAll = new LinkedList<FutureTask<Void>>(); + + // Setup executor that limits parallelism. + final LatchedExecutor executor1 = new LatchedExecutor( + src.getExecutorService(), 1/* nparallel */); + + // Setup executor that limits parallelism. + final LatchedExecutor executor2 = new LatchedExecutor( + src.getExecutorService(), 1/* nparallel */); + + try { + + // Tasks to run. + tasks1.add(new FutureTaskMon<Void>(new DumpTask(src))); + tasks1.add(new FutureTaskMon<Void>(new DumpTask(src))); + tasks1.add(new FutureTaskMon<Void>(new DumpTask(src))); + + tasks2.add(new FutureTaskMon<Void>(new UpdateTask(src))); + tasks2.add(new FutureTaskMon<Void>(new UpdateTask(src))); + tasks2.add(new FutureTaskMon<Void>(new UpdateTask(src))); + + // Schedule the tasks. + for (FutureTask<Void> ft : tasks1) + executor1.execute(ft); + for (FutureTask<Void> ft : tasks2) + executor2.execute(ft); + + log.info("Blocking for futures"); + + // Wait for tasks. + tasksAll.addAll(tasks1); + tasksAll.addAll(tasks2); + int ndone = 0; + for (FutureTask<Void> ft : tasksAll) { + + /* + * Check Future. + * + * Note: sanity check for test termination w/ timeout. + */ + + try { + ft.get(2, TimeUnit.MINUTES); + } catch (ExecutionException ex) { + log.error("ndone=" + ndone, ex); + throw ex; + } + + log.info("ndone=" + ndone); + ndone++; + } + + } finally { + + // Ensure tasks are terminated. + for (FutureTask<Void> ft : tasksAll) { + + ft.cancel(true/*mayInterruptIfRunning*/); + + } + + } + + if (src.isStable()) { + + src = reopenStore(src); + + // Try running the DumpTask again. + new DumpTask(src).call(); + + } + + } finally { + + src.destroy(); + + } + + } + /** Stress test to look for different failure modes. */ + public void _testStress_dumpJournal_concurrent_updates() throws Exception { + final int LIMIT = 20; + for (int i = 0; i < LIMIT; i++) { + if (i > 1) + setUp(); + try { + test_dumpJournal_concurrent_updates(); + } catch (Exception ex) { + log.fatal("FAILURE: i=" + i + ", cause=" + ex); + } + if (i + 1 < LIMIT) + tearDown(); + } + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-03-16 11:24:16
|
Revision: 7985 http://sourceforge.net/p/bigdata/code/7985 Author: thompsonbry Date: 2014-03-16 11:24:12 +0000 (Sun, 16 Mar 2014) Log Message: ----------- I have modified the DefaultNodeCoder in the 1.3.0 development and maintenance branch to explicitly look for a 0L in the valid childAddr slots and throw an exception. This will prevent bad data from becoming durable. I do not observe any problems in the local B+Tree test suite. I am committing this change to CI for feedback from full CI runs. See #855 (Child identity is not persistent). Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/btree/data/DefaultNodeCoder.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/btree/data/AbstractNodeDataRecordTestCase.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/btree/data/DefaultNodeCoder.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/btree/data/DefaultNodeCoder.java 2014-03-16 11:04:19 UTC (rev 7984) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/btree/data/DefaultNodeCoder.java 2014-03-16 11:24:12 UTC (rev 7985) @@ -43,6 +43,7 @@ import com.bigdata.io.AbstractFixedByteArrayBuffer; import com.bigdata.io.DataOutputBuffer; import com.bigdata.rawstore.Bytes; +import com.bigdata.rawstore.IRawStore; /** * Default implementation for immutable {@link INodeData} records. @@ -217,9 +218,18 @@ // childAddr[] : @todo code childAddr[] (needs IAddressManager if store aware coding). // final int O_childAddr = buf.pos(); for (int i = 0; i <= nkeys; i++) { + + /* + * See #855 (Child identity is not persistent). + */ + final long childAddr = node.getChildAddr(i); - buf.putLong(node.getChildAddr(i)); + if (childAddr == IRawStore.NULL) + throw new AssertionError("Child is not persistent: index=" + i + + " out of " + nkeys + " entries, " + node.toString()); + buf.putLong(childAddr); + } // final int O_childEntryCount = buf.pos(); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/btree/data/AbstractNodeDataRecordTestCase.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/btree/data/AbstractNodeDataRecordTestCase.java 2014-03-16 11:04:19 UTC (rev 7984) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/btree/data/AbstractNodeDataRecordTestCase.java 2014-03-16 11:24:12 UTC (rev 7985) @@ -79,6 +79,9 @@ final long minimumVersionTimestamp = 0L; final long maximumVersionTimestamp = 0L; + // Must not be 0L. See #855. + childAddr[0] = 12L; + final INodeData expected = new MockNodeData(new ReadOnlyKeysRaba(nkeys, keys), spannedTupleCount, childAddr, childEntryCount, hasVersionTimestamps, minimumVersionTimestamp, @@ -104,6 +107,9 @@ final long minimumVersionTimestamp = System.currentTimeMillis(); final long maximumVersionTimestamp = System.currentTimeMillis() + 20; + // Must not be 0L. See #855. + childAddr[0] = 12L; + final INodeData expected = new MockNodeData(new ReadOnlyKeysRaba(nkeys, keys), spannedTupleCount, childAddr, childEntryCount, hasVersionTimestamps, minimumVersionTimestamp, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jer...@us...> - 2014-05-08 02:48:47
|
Revision: 8228 http://sourceforge.net/p/bigdata/code/8228 Author: jeremy_carroll Date: 2014-05-08 02:48:41 +0000 (Thu, 08 May 2014) Log Message: ----------- Tests for Language Range Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestAll.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestLanguageRange.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java 2014-05-08 01:52:09 UTC (rev 8227) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java 2014-05-08 02:48:41 UTC (rev 8228) @@ -126,7 +126,7 @@ public class ConfigurableAnalyzerFactory implements IAnalyzerFactory { final private static transient Logger log = Logger.getLogger(ConfigurableAnalyzerFactory.class); - private static class LanguageRange implements Comparable<LanguageRange> { + static class LanguageRange implements Comparable<LanguageRange> { private final String range[]; private final String full; @@ -173,6 +173,10 @@ public int hashCode() { return full.hashCode(); } + + public boolean extendedFilterMatch(String langTag) { + return extendedFilterMatch(langTag.toLowerCase(Locale.ROOT).split("-")); + } // See RFC 4647, 3.3.2 public boolean extendedFilterMatch(String[] language) { Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestAll.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestAll.java 2014-05-08 01:52:09 UTC (rev 8227) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestAll.java 2014-05-08 02:48:41 UTC (rev 8228) @@ -72,6 +72,8 @@ // search backed by EDS. suite.addTest(proxySuite(new TestEDS("EDS Search"),"EDS")); + + suite.addTestSuite(TestLanguageRange.class); return suite; Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestLanguageRange.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestLanguageRange.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestLanguageRange.java 2014-05-08 02:48:41 UTC (rev 8228) @@ -0,0 +1,70 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2014. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on May 7, 2014 + */ +package com.bigdata.search; + +import com.bigdata.search.ConfigurableAnalyzerFactory.LanguageRange; + +import junit.framework.TestCase2; + +public class TestLanguageRange extends TestCase2 { + + public TestLanguageRange() { + } + + + public TestLanguageRange(String name) { + super(name); + } + + private void match(String range, String lang) { + LanguageRange lr = new LanguageRange(range.toLowerCase()); + assertTrue(lr.extendedFilterMatch(lang)); + } + + private void nomatch(String range, String lang) { + LanguageRange lr = new LanguageRange(range.toLowerCase()); + assertFalse(lr.extendedFilterMatch(lang)); + } + + + public void testRFC4647() { + for (String range: new String[]{"de-DE", "de-*-DE"}) { + match(range, "de-DE"); + match(range, "de-Latn-DE"); + match(range, "de-Latf-DE"); + match(range, "de-DE-x-goethe"); + match(range, "de-Latn-DE-1996"); + match(range, "de-Deva-DE-1996"); + nomatch(range, "de"); + nomatch(range, "de-x-DE"); + nomatch(range, "de-Deva"); + } + + } + + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jer...@us...> - 2014-05-10 11:52:28
|
Revision: 8263 http://sourceforge.net/p/bigdata/code/8263 Author: jeremy_carroll Date: 2014-05-10 11:52:25 +0000 (Sat, 10 May 2014) Log Message: ----------- Cleaning up of ConfigurableAnalyzerFactory, adding TermCompletionAnalyzer, deprecating DefaultAnalyzerFactory Finishing of trac 912, work on 915 Unit tests for the old and new behaviors This merges the branch TEXT_ANALYZERS. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/AbstractAnalyzerFactoryTest.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/AbstractSearchTest.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/NonEnglishExamples.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestAll.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestConfigurableAsDefaultAnalyzerFactory.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestDefaultAnalyzerFactory.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/TermCompletionAnalyzer.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/AbstractDefaultAnalyzerFactoryTest.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestConfigurableAnalyzerFactory.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestUnconfiguredAnalyzerFactory.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java 2014-05-10 02:56:35 UTC (rev 8262) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java 2014-05-10 11:52:25 UTC (rev 8263) @@ -66,6 +66,7 @@ * Supported classes included all the natural language specific classes from Lucene, and also: * <ul> * <li>{@link PatternAnalyzer} + * <li>{@link TermCompletionAnalyzer} * <li>{@link KeywordAnalyzer} * <li>{@link SimpleAnalyzer} * <li>{@link StopAnalyzer} @@ -76,7 +77,6 @@ * <ul> * <li>no arguments * <li>{@link Version} - * <li>{@link Set} (of strings, the stop words) * <li>{@link Version}, {@link Set} * </ul> * is usable. If the class has a static method named <code>getDefaultStopSet()</code> then this is assumed @@ -89,19 +89,17 @@ * abbreviate to <code>c.b.s.C</code> in this documentation. * Properties from {@link Options} apply to the factory. * <p> - * - * If there are no such properties at all then the property {@link Options#INCLUDE_DEFAULTS} is set to true, - * and the behavior of this class is the same as the legacy {@link DefaultAnalyzerFactory}. - * <p> * Other properties, from {@link AnalyzerOptions} start with * <code>c.b.s.C.analyzer.<em>language-range</em></code> where <code><em>language-range</em></code> conforms - * with the extended language range construct from RFC 4647, section 2.2. These are used to specify - * an analyzer for the given language range. + * with the extended language range construct from RFC 4647, section 2.2. + * There is an issue that bigdata does not allow '*' in property names, and we use the character '_' to + * substitute for '*' in extended language ranges in property names. + * These are used to specify an analyzer for the given language range. * <p> * If no analyzer is specified for the language range <code>*</code> then the {@link StandardAnalyzer} is used. * <p> * Given any specific language, then the analyzer matching the longest configured language range, - * measured in number of subtags is used {@link #getAnalyzer(String, boolean)} + * measured in number of subtags is returned by {@link #getAnalyzer(String, boolean)} * In the event of a tie, the alphabetically first language range is used. * The algorithm to find a match is "Extended Filtering" as defined in section 3.3.2 of RFC 4647. * <p> @@ -113,11 +111,13 @@ * <dd>This uses whitespace to tokenize</dd> * <dt>{@link PatternAnalyzer}</dt> * <dd>This uses a regular expression to tokenize</dd> + * <dt>{@link TermCompletionAnalyzer}</dt> + * <dd>This uses up to three regular expressions to specify multiple tokens for each word, to address term completion use cases.</dd> * <dt>{@link EmptyAnalyzer}</dt> * <dd>This suppresses the functionality, by treating every expression as a stop word.</dd> * </dl> * there are in addition the language specific analyzers that are included - * by using the option {@link Options#INCLUDE_DEFAULTS} + * by using the option {@link Options#NATURAL_LANGUAGE_SUPPORT} * * * @author jeremycarroll @@ -126,11 +126,26 @@ public class ConfigurableAnalyzerFactory implements IAnalyzerFactory { final private static transient Logger log = Logger.getLogger(ConfigurableAnalyzerFactory.class); - static class LanguageRange implements Comparable<LanguageRange> { + /** + * This is an implementation of RFC 4647 language range, + * targetted at the specific needs within bigdata, and only + * supporting the extended filtering specified in section 3.3.2 + * <p> + * Language ranges are comparable so that + * sorting an array and then matching a language tag against each + * member of the array in sequence will give the longest match. + * i.e. the longer ranges come first. + * @author jeremycarroll + * + */ + public static class LanguageRange implements Comparable<LanguageRange> { private final String range[]; private final String full; - + /** + * Note range must be in lower case, this is not verified. + * @param range + */ public LanguageRange(String range) { this.range = range.split("-"); full = range; @@ -174,12 +189,22 @@ return full.hashCode(); } + /** + * This implements the algoirthm of section 3.3.2 of RFC 4647 + * as modified with the observation about private use tags + * in <a href="http://lists.w3.org/Archives/Public/www-international/2014AprJun/0084"> + * this message</a>. + * + * + * @param langTag The RFC 5646 Language tag in lower case + * @return The result of the algorithm + */ public boolean extendedFilterMatch(String langTag) { return extendedFilterMatch(langTag.toLowerCase(Locale.ROOT).split("-")); } // See RFC 4647, 3.3.2 - public boolean extendedFilterMatch(String[] language) { + boolean extendedFilterMatch(String[] language) { // RFC 4647 step 2 if (!matchSubTag(language[0], range[0])) { return false; @@ -227,13 +252,14 @@ */ public interface Options { /** - * By setting this option to true, then the behavior of the legacy {@link DefaultAnalyzerFactory} - * is added, and may be overridden by the settings of the user. + * By setting this option to true, then all the known Lucene Analyzers for natural + * languages are used for a range of language tags. + * These settings may then be overridden by the settings of the user. * Specifically the following properties are loaded, prior to loading the * user's specification (with <code>c.b.s.C</code> expanding to * <code>com.bigdata.search.ConfigurableAnalyzerFactory</code>) <pre> -c.b.s.C.analyzer.*.like=eng +c.b.s.C.analyzer._.like=eng c.b.s.C.analyzer.por.analyzerClass=org.apache.lucene.analysis.br.BrazilianAnalyzer c.b.s.C.analyzer.pt.like=por c.b.s.C.analyzer.zho.analyzerClass=org.apache.lucene.analysis.cn.ChineseAnalyzer @@ -265,18 +291,13 @@ * * */ - String INCLUDE_DEFAULTS = ConfigurableAnalyzerFactory.class.getName() + ".includeDefaults"; + String NATURAL_LANGUAGE_SUPPORT = ConfigurableAnalyzerFactory.class.getName() + ".naturalLanguageSupport"; /** * This is the prefix to all properties configuring the individual analyzers. */ String ANALYZER = ConfigurableAnalyzerFactory.class.getName() + ".analyzer."; -/** - * If there is no configuration at all, then the defaults are included, - * but any configuration at all totally replaces the defaults, unless - * {@link #INCLUDE_DEFAULTS} - * is explicitly set to true. - */ - String DEFAULT_INCLUDE_DEFAULTS = "false"; + + String DEFAULT_NATURAL_LANGUAGE_SUPPORT = "false"; } /** * Options understood by analyzers created by {@link ConfigurableAnalyzerFactory}. @@ -286,7 +307,9 @@ /** * If specified this is the fully qualified name of a subclass of {@link Analyzer} * that has appropriate constructors. - * Either this or {@link #LIKE} or {@link #PATTERN} must be specified for each language range. + * This is set implicitly if some of the options below are selected (for example {@link #PATTERN}). + * For each configured language range, if it is not set, either explicitly or implicitly, then + * {@link #LIKE} must be specified. */ String ANALYZER_CLASS = "analyzerClass"; @@ -326,16 +349,52 @@ String STOPWORDS_VALUE_NONE = "none"; /** - * If this property is present then the analyzer being used is a - * {@link PatternAnalyzer} and the value is the pattern to use. + * The value of the pattern parameter to + * {@link PatternAnalyzer#PatternAnalyzer(Version, Pattern, boolean, Set)} * (Note the {@link Pattern#UNICODE_CHARACTER_CLASS} flag is enabled). * It is an error if a different analyzer class is specified. */ - String PATTERN = ".pattern"; + String PATTERN = "pattern"; + /** + * The value of the wordBoundary parameter to + * {@link TermCompletionAnalyzer#TermCompletionAnalyzer(Pattern, Pattern, Pattern, boolean)} + * (Note the {@link Pattern#UNICODE_CHARACTER_CLASS} flag is enabled). + * It is an error if a different analyzer class is specified. + */ + String WORD_BOUNDARY = "wordBoundary"; + /** + * The value of the subWordBoundary parameter to + * {@link TermCompletionAnalyzer#TermCompletionAnalyzer(Pattern, Pattern, Pattern, boolean)} + * (Note the {@link Pattern#UNICODE_CHARACTER_CLASS} flag is enabled). + * It is an error if a different analyzer class is specified. + */ + String SUB_WORD_BOUNDARY = "subWordBoundary"; + /** + * The value of the softHyphens parameter to + * {@link TermCompletionAnalyzer#TermCompletionAnalyzer(Pattern, Pattern, Pattern, boolean)} + * (Note the {@link Pattern#UNICODE_CHARACTER_CLASS} flag is enabled). + * It is an error if a different analyzer class is specified. + */ + String SOFT_HYPHENS = "softHyphens"; + /** + * The value of the alwaysRemoveSoftHypens parameter to + * {@link TermCompletionAnalyzer#TermCompletionAnalyzer(Pattern, Pattern, Pattern, boolean)} + * (Note the {@link Pattern#UNICODE_CHARACTER_CLASS} flag is enabled). + * It is an error if a different analyzer class is specified. + */ + String ALWAYS_REMOVE_SOFT_HYPHENS = "alwaysRemoveSoftHyphens"; + + boolean DEFAULT_ALWAYS_REMOVE_SOFT_HYPHENS = false; + + /** + * The default sub-word boundary is a pattern that never matches, + * i.e. there are no sub-word boundaries. + */ + Pattern DEFAULT_SUB_WORD_BOUNDARY = Pattern.compile("(?!)"); } - private static final String DEFAULT_PROPERTIES = + private static final String ALL_LUCENE_NATURAL_LANGUAGES = "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.*.like=eng\n" + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.por.analyzerClass=org.apache.lucene.analysis.br.BrazilianAnalyzer\n" + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.pt.like=por\n" + @@ -365,33 +424,67 @@ "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.eng.analyzerClass=org.apache.lucene.analysis.standard.StandardAnalyzer\n" + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.en.like=eng\n"; + private static final String LUCENE_STANDARD_ANALYZER = + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.*.analyzerClass=org.apache.lucene.analysis.standard.StandardAnalyzer\n"; + + /** + * This comment describes the implementation of {@link ConfigurableAnalyzerFactory}. + * The only method in the interface is {@link ConfigurableAnalyzerFactory#getAnalyzer(String, boolean)}, + * a map is used from language tag to {@link AnalyzerPair}, where the pair contains + * an {@link Analyzer} both with and without stopwords configured (some times these two analyzers are identical, + * if, for example, stop words are not supported or not required). + * <p> + * If there is no entry for the language tag in the map {@link ConfigurableAnalyzerFactory#langTag2AnalyzerPair}, + * then one is created, by walking down the array {@link ConfigurableAnalyzerFactory#config} of AnalyzerPairs + * until a matching one is found. + * <p> + * The bulk of the code in this class is invoked from the constructor in order to set up this + * {@link ConfigurableAnalyzerFactory#config} array. For example, all of the subclasses of {@link AnalyzerPair}s, + * are simply to call the appropriate constructor in the appropriate way: the difficulty is that many subclasses + * of {@link Analyzer} have constructors with different signatures, and our code needs to navigate each sort. + * @author jeremycarroll + * + */ private static class AnalyzerPair implements Comparable<AnalyzerPair>{ - private final LanguageRange range; + final LanguageRange range; private final Analyzer withStopWords; private final Analyzer withoutStopWords; + public Analyzer getAnalyzer(boolean filterStopwords) { + return filterStopwords ? withStopWords : withoutStopWords; + } + + public boolean extendedFilterMatch(String[] language) { + return range.extendedFilterMatch(language); + } + AnalyzerPair(String range, Analyzer withStopWords, Analyzer withOutStopWords) { this.range = new LanguageRange(range); this.withStopWords = withStopWords; this.withoutStopWords = withOutStopWords; } + /** + * This clone constructor implements {@link AnalyzerOptions#LIKE}. + * @param range + * @param copyMe + */ AnalyzerPair(String range, AnalyzerPair copyMe) { this.range = new LanguageRange(range); this.withStopWords = copyMe.withStopWords; this.withoutStopWords = copyMe.withoutStopWords; - } - - public Analyzer getAnalyzer(boolean filterStopwords) { - return filterStopwords ? withStopWords : withoutStopWords; - } - @Override - public String toString() { - return range.full + "=(" + withStopWords.getClass().getSimpleName() +")"; - } - + /** + * If we have a constructor, with arguments including a populated + * stop word set, then we can use it to make both the withStopWords + * analyzer, and the withoutStopWords analyzer. + * @param range + * @param cons A Constructor including a {@link java.util.Set} argument + * for the stop words. + * @param params The arguments to pass to the constructor including a populated stopword set. + * @throws Exception + */ AnalyzerPair(String range, Constructor<? extends Analyzer> cons, Object ... params) throws Exception { this(range, cons.newInstance(params), cons.newInstance(useEmptyStopWordSet(params))); } @@ -409,38 +502,52 @@ } return rslt; } + @Override + public String toString() { + return range.full + "=(" + withStopWords.getClass().getSimpleName() +")"; + } + + @Override public int compareTo(AnalyzerPair o) { return range.compareTo(o.range); } - - public boolean extendedFilterMatch(String[] language) { - return range.extendedFilterMatch(language); - } } + /** + * Used for Analyzer classes with a constructor with signature (Version, Set). + * @author jeremycarroll + * + */ private static class VersionSetAnalyzerPair extends AnalyzerPair { public VersionSetAnalyzerPair(ConfigOptionsToAnalyzer lro, Class<? extends Analyzer> cls) throws Exception { super(lro.languageRange, getConstructor(cls, Version.class, Set.class), Version.LUCENE_CURRENT, lro.getStopWords()); } } - + + /** + * Used for Analyzer classes which do not support stopwords and have a constructor with signature (Version). + * @author jeremycarroll + * + */ private static class VersionAnalyzerPair extends AnalyzerPair { - public VersionAnalyzerPair(String range, Class<? extends Analyzer> cls) throws Exception { super(range, getConstructor(cls, Version.class).newInstance(Version.LUCENE_CURRENT)); } } - + /** + * Special case code for {@link PatternAnalyzer} + * @author jeremycarroll + * + */ private static class PatternAnalyzerPair extends AnalyzerPair { - - public PatternAnalyzerPair(ConfigOptionsToAnalyzer lro, String pattern) throws Exception { + public PatternAnalyzerPair(ConfigOptionsToAnalyzer lro, Pattern pattern) throws Exception { super(lro.languageRange, getConstructor(PatternAnalyzer.class,Version.class,Pattern.class,Boolean.TYPE,Set.class), Version.LUCENE_CURRENT, - Pattern.compile(pattern, Pattern.UNICODE_CHARACTER_CLASS), + pattern, true, lro.getStopWords()); } @@ -451,6 +558,16 @@ * This class is initialized with the config options, using the {@link #setProperty(String, String)} * method, for a particular language range and works out which pair of {@link Analyzer}s * to use for that language range. + * <p> + * Instances of this class are only alive during the execution of + * {@link ConfigurableAnalyzerFactory#ConfigurableAnalyzerFactory(FullTextIndex)}, + * the life-cycle is: + * <ol> + * <li>The relveant config properties are applied, and are used to populate the fields. + * <li>The fields are validated + * <li>An {@link AnalyzerPair} is constructed + * </ol> + * * @author jeremycarroll * */ @@ -459,9 +576,13 @@ String like; String className; String stopwords; - String pattern; + Pattern pattern; final String languageRange; AnalyzerPair result; + Pattern wordBoundary; + Pattern subWordBoundary; + Pattern softHyphens; + Boolean alwaysRemoveSoftHyphens; public ConfigOptionsToAnalyzer(String languageRange) { this.languageRange = languageRange; @@ -474,7 +595,7 @@ */ public Set<?> getStopWords() { - if (AnalyzerOptions.STOPWORDS_VALUE_NONE.equals(stopwords)) + if (doNotUseStopWords()) return Collections.EMPTY_SET; if (useDefaultStopWords()) { @@ -484,6 +605,10 @@ return getStopWordsForClass(stopwords); } + boolean doNotUseStopWords() { + return AnalyzerOptions.STOPWORDS_VALUE_NONE.equals(stopwords) || (stopwords == null && pattern != null); + } + protected Set<?> getStopWordsForClass(String clazzName) { Class<? extends Analyzer> analyzerClass = getAnalyzerClass(clazzName); try { @@ -500,9 +625,13 @@ } protected boolean useDefaultStopWords() { - return stopwords == null || AnalyzerOptions.STOPWORDS_VALUE_DEFAULT.equals(stopwords); + return ( stopwords == null && pattern == null ) || AnalyzerOptions.STOPWORDS_VALUE_DEFAULT.equals(stopwords); } + /** + * The first step in the life-cycle, used to initialize the fields. + * @return true if the property was recognized. + */ public boolean setProperty(String shortProperty, String value) { if (shortProperty.equals(AnalyzerOptions.LIKE) ) { like = value; @@ -511,13 +640,24 @@ } else if (shortProperty.equals(AnalyzerOptions.STOPWORDS) ) { stopwords = value; } else if (shortProperty.equals(AnalyzerOptions.PATTERN) ) { - pattern = value; + pattern = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS); + } else if (shortProperty.equals(AnalyzerOptions.WORD_BOUNDARY) ) { + wordBoundary = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS); + } else if (shortProperty.equals(AnalyzerOptions.SUB_WORD_BOUNDARY) ) { + subWordBoundary = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS); + } else if (shortProperty.equals(AnalyzerOptions.SOFT_HYPHENS) ) { + softHyphens = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS); + } else if (shortProperty.equals(AnalyzerOptions.ALWAYS_REMOVE_SOFT_HYPHENS) ) { + alwaysRemoveSoftHyphens = Boolean.valueOf(value); } else { return false; } return true; } + /** + * The second phase of the life-cycle, used for sanity checking. + */ public void validate() { if (pattern != null ) { if ( className != null && className != PatternAnalyzer.class.getName()) { @@ -525,6 +665,27 @@ } className = PatternAnalyzer.class.getName(); } + if (this.wordBoundary != null ) { + if ( className != null && className != TermCompletionAnalyzer.class.getName()) { + throw new RuntimeException("Bad Option: Language range "+languageRange + " with pattern propety for class "+ className); + } + className = TermCompletionAnalyzer.class.getName(); + + if ( subWordBoundary == null ) { + subWordBoundary = AnalyzerOptions.DEFAULT_SUB_WORD_BOUNDARY; + } + if ( alwaysRemoveSoftHyphens != null && softHyphens == null ) { + throw new RuntimeException("Bad option: Language range "+languageRange + ": must specify softHypens when setting alwaysRemoveSoftHyphens"); + } + if (softHyphens != null && alwaysRemoveSoftHyphens == null) { + alwaysRemoveSoftHyphens = AnalyzerOptions.DEFAULT_ALWAYS_REMOVE_SOFT_HYPHENS; + } + + } else if ( subWordBoundary != null || softHyphens != null || alwaysRemoveSoftHyphens != null || + TermCompletionAnalyzer.class.getName().equals(className) ) { + throw new RuntimeException("Bad option: Language range "+languageRange + ": must specify wordBoundary for TermCompletionAnalyzer"); + } + if (PatternAnalyzer.class.getName().equals(className) && pattern == null ) { throw new RuntimeException("Bad Option: Language range "+languageRange + " must specify pattern for PatternAnalyzer."); } @@ -537,21 +698,45 @@ } + /** + * The third and final phase of the life-cyle used for identifying + * the AnalyzerPair. + */ private AnalyzerPair construct() throws Exception { if (className == null) { return null; } if (pattern != null) { return new PatternAnalyzerPair(this, pattern); - - } + } + if (softHyphens != null) { + return new AnalyzerPair( + languageRange, + new TermCompletionAnalyzer( + wordBoundary, + subWordBoundary, + softHyphens, + alwaysRemoveSoftHyphens)); + } + if (wordBoundary != null) { + return new AnalyzerPair( + languageRange, + new TermCompletionAnalyzer( + wordBoundary, + subWordBoundary)); + } final Class<? extends Analyzer> cls = getAnalyzerClass(); if (hasConstructor(cls, Version.class, Set.class)) { // RussianAnalyzer is missing any way to access stop words. - if (RussianAnalyzer.class.equals(cls) && useDefaultStopWords()) { - return new AnalyzerPair(languageRange, new RussianAnalyzer(Version.LUCENE_CURRENT), new RussianAnalyzer(Version.LUCENE_CURRENT, Collections.EMPTY_SET)); + if (RussianAnalyzer.class.equals(cls)) { + if (useDefaultStopWords()) { + return new AnalyzerPair(languageRange, new RussianAnalyzer(Version.LUCENE_CURRENT), new RussianAnalyzer(Version.LUCENE_CURRENT, Collections.EMPTY_SET)); + } + if (doNotUseStopWords()) { + return new AnalyzerPair(languageRange, new RussianAnalyzer(Version.LUCENE_CURRENT, Collections.EMPTY_SET)); + } } return new VersionSetAnalyzerPair(this, cls); } @@ -569,6 +754,29 @@ throw new RuntimeException("Bad option: cannot find constructor for class " + className + " for language range " + languageRange); } + /** + * Also part of the third phase of the life-cycle, following the {@link AnalyzerOptions#LIKE} + * properties. + * @param depth + * @param max + * @param analyzers + * @return + */ + AnalyzerPair followLikesToAnalyzerPair(int depth, int max, + Map<String, ConfigOptionsToAnalyzer> analyzers) { + if (result == null) { + if (depth == max) { + throw new RuntimeException("Bad configuration: - 'like' loop for language range " + languageRange); + } + ConfigOptionsToAnalyzer next = analyzers.get(like); + if (next == null) { + throw new RuntimeException("Bad option: - 'like' not found for language range " + languageRange+ " (not found: '"+ like +"')"); + } + result = new AnalyzerPair(languageRange, next.followLikesToAnalyzerPair(depth+1, max, analyzers)); + } + return result; + } + protected Class<? extends Analyzer> getAnalyzerClass() { return getAnalyzerClass(className); } @@ -587,22 +795,6 @@ void setAnalyzerPair(AnalyzerPair ap) { result = ap; } - - AnalyzerPair followLikesToAnalyzerPair(int depth, int max, - Map<String, ConfigOptionsToAnalyzer> analyzers) { - if (result == null) { - if (depth == max) { - throw new RuntimeException("Bad configuration: - 'like' loop for language range " + languageRange); - } - ConfigOptionsToAnalyzer next = analyzers.get(like); - if (next == null) { - throw new RuntimeException("Bad option: - 'like' not found for language range " + languageRange+ " (not found: '"+ like +"')"); - } - result = new AnalyzerPair(languageRange, next.followLikesToAnalyzerPair(depth+1, max, analyzers)); - } - return result; - } - } private final AnalyzerPair config[]; @@ -615,12 +807,19 @@ * strategy so the code will still work on the {@link #MAX_LANG_CACHE_SIZE}+1 th entry. */ private static final int MAX_LANG_CACHE_SIZE = 500; + private String defaultLanguage; private final FullTextIndex<?> fullTextIndex; + /** + * Builds a new ConfigurableAnalyzerFactory. + * @param fullTextIndex + */ public ConfigurableAnalyzerFactory(final FullTextIndex<?> fullTextIndex) { + // A description of the operation of this method is found on AnalyzerPair and + // ConfigOptionsToAnalyzer. // despite our name, we actually make all the analyzers now, and getAnalyzer method is merely a lookup. if (fullTextIndex == null) @@ -717,9 +916,9 @@ while (en.hasMoreElements()) { String prop = (String)en.nextElement(); - if (prop.equals(Options.INCLUDE_DEFAULTS)) continue; + if (prop.equals(Options.NATURAL_LANGUAGE_SUPPORT)) continue; if (prop.startsWith(Options.ANALYZER)) { - String languageRangeAndProperty[] = prop.substring(Options.ANALYZER.length()).split("[.]"); + String languageRangeAndProperty[] = prop.substring(Options.ANALYZER.length()).replaceAll("_","*").split("[.]"); if (languageRangeAndProperty.length == 2) { String languageRange = languageRangeAndProperty[0].toLowerCase(Locale.US); // Turkish "I" could create a problem @@ -745,25 +944,29 @@ protected Properties initProperties() { final Properties parentProperties = fullTextIndex.getProperties(); Properties myProps; - if (Boolean.getBoolean(parentProperties.getProperty(Options.INCLUDE_DEFAULTS, Options.DEFAULT_INCLUDE_DEFAULTS))) { - myProps = defaultProperties(); + if (Boolean.valueOf(parentProperties.getProperty( + Options.NATURAL_LANGUAGE_SUPPORT, + Options.DEFAULT_NATURAL_LANGUAGE_SUPPORT))) { + + myProps = loadPropertyString(ALL_LUCENE_NATURAL_LANGUAGES); + + } else if (hasPropertiesForStarLanguageRange(parentProperties)){ + + myProps = new Properties(); + } else { - myProps = new Properties(); + + myProps = loadPropertyString(LUCENE_STANDARD_ANALYZER); } copyRelevantProperties(fullTextIndex.getProperties(), myProps); - - if (myProps.isEmpty()) { - return defaultProperties(); - } else { - return myProps; - } + return myProps; } - protected Properties defaultProperties() { + Properties loadPropertyString(String props) { Properties rslt = new Properties(); try { - rslt.load(new StringReader(DEFAULT_PROPERTIES)); + rslt.load(new StringReader(props)); } catch (IOException e) { throw new RuntimeException("Impossible - well clearly not!", e); } @@ -780,6 +983,17 @@ } } + private boolean hasPropertiesForStarLanguageRange(Properties from) { + Enumeration<?> en = from.propertyNames(); + while (en.hasMoreElements()) { + String prop = (String)en.nextElement(); + if (prop.startsWith(Options.ANALYZER+"_.") + || prop.startsWith(Options.ANALYZER+"*.")) { + return true; + } + } + return false; + } @Override public Analyzer getAnalyzer(String languageCode, boolean filterStopwords) { Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java 2014-05-10 02:56:35 UTC (rev 8262) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java 2014-05-10 11:52:25 UTC (rev 8263) @@ -29,7 +29,6 @@ import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.Locale; import java.util.Map; import java.util.Set; @@ -52,11 +51,21 @@ import com.bigdata.btree.keys.KeyBuilder; /** - * Default implementation registers a bunch of {@link Analyzer}s for various - * language codes and then serves the appropriate {@link Analyzer} based on - * the specified language code. + * This is the default implementation but should be regarded as legacy since + * it fails to use the correct {@link Analyzer} for almost all languages (other than + * English). It uses the correct natural language analyzer only for literals tagged with + * certain three letter ISO 639 codes: + * "por", "deu", "ger", "zho", "chi", "jpn", "kor", "ces", "cze", "dut", "nld", "gre", "ell", + * "fra", "fre", "rus" and "tha". All other tags are treated as English. + * These codes do not work if they are used with subtags, e.g. "ger-AT" is treated as English. + * No two letter code, other than "en" works correctly: note that the W3C and + * IETF recommend the use of the two letter forms instead of the three letter forms. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @deprecated Using {@link ConfigurableAnalyzerFactory} with + * the {@link ConfigurableAnalyzerFactory.Options#NATURAL_LANGUAGE_SUPPORT} + * uses the appropriate natural language analyzers for the two letter codes + * and for tags which include sub-tags. * @version $Id$ */ public class DefaultAnalyzerFactory implements IAnalyzerFactory { Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/TermCompletionAnalyzer.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/TermCompletionAnalyzer.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/TermCompletionAnalyzer.java 2014-05-10 11:52:25 UTC (rev 8263) @@ -0,0 +1,248 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2014. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on May 8, 2014 by Jeremy J. Carroll, Syapse Inc. + */ +package com.bigdata.search; + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.nio.CharBuffer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; + + +/** + * An analyzer intended for the term-completion use case; particularly + * for technical vocabularies and concept schemes. + * + * <p> + * This analyzer generates several index terms for each word in the input. + * These are intended to match short sequences (e.g. three or more) characters + * of user-input, to then give the user a drop-down list of matching terms. + * <p> + * This can be set up to address issues like matching <q>half-time</q> when the user types + * <q>tim</q> or if the user types <q>halft</q> (treating the hyphen as a soft hyphen); or + * to match <q>TermCompletionAnalyzer</q> when the user types <q>Ana</q> + * <p> + * In contrast, the Lucene Analyzers are mainly geared around the free text search use + * case. + * <p> + * The intended use cases will typical involve a prefix query of the form: + * <pre> + * ?t bds:search "prefix*" . + * </pre> + * to find all literals in the selected graphs, which are indexed by a term starting in <q>prefix</q>, + * so the problem this class addresses is finding the appropriate index terms to allow + * matching, at sensible points, mid-way through words (such as at hyphens). + * <p> + * To get maximum effectiveness it maybe best to use private language subtags (see RFC 5647), + * e.g. <code>"x-term"</code> + * which are mapped to this class by {@link ConfigurableAnalyzerFactory} for + * the data being loaded into the store, and linked to some very simple process + * like {@link KeywordAnalyzer} for queries which are tagged with a different language tag + * that is only used for <code>bds:search</code>, e.g. <code>"x-query"</code>. + * The above prefix query then becomes: + * <pre> + * ?t bds:search "prefix*"@x-query . + * </pre> + * + * + * + * @author jeremycarroll + * + */ +public class TermCompletionAnalyzer extends Analyzer { + + private final Pattern wordBoundary; + private final Pattern subWordBoundary; + + private final Pattern discard; + private final boolean alwaysDiscard; + + /** + * Divide the input into words and short tokens + * as with {@link #TermCompletionAnalyzer(Pattern, Pattern)}. + * Each term is generated, and then an additional term + * is generated with softHypens (defined by the pattern), + * removed. If the alwaysRemoveSoftHypens flag is true, + * then the first term (before the removal) is suppressed. + * + * @param wordBoundary The definition of space (e.g. " ") + * @param subWordBoundary Also index after matches to this (e.g. "-") + * @param softHyphens Discard these characters from matches + * @param alwaysRemoveSoftHypens If false the discard step is optional. + */ + public TermCompletionAnalyzer(Pattern wordBoundary, + Pattern subWordBoundary, + Pattern softHyphens, + boolean alwaysRemoveSoftHypens) { + this.wordBoundary = wordBoundary; + this.subWordBoundary = subWordBoundary; + if (softHyphens != null) { + discard = softHyphens; + alwaysDiscard = alwaysRemoveSoftHypens; + } else { + discard = Pattern.compile("(?!)"); // never matches + alwaysDiscard = true; + } + } + /** + * Divide the input into words, separated by the wordBoundary, + * and return a token for each whole word, and then + * generate further tokens for each word by removing prefixes + * up to and including each successive match of + * subWordBoundary + * @param wordBoundary + * @param subWordBoundary + */ + public TermCompletionAnalyzer(Pattern wordBoundary, + Pattern subWordBoundary) { + this(wordBoundary, subWordBoundary, null, true); + } + + + @Override + public TokenStream tokenStream(String ignoredFieldName, Reader reader) { + return new TermCompletionTokenStream((StringReader)reader); + } + + /** + * This classes has three processes going on + * all driven from the {@link #increment()} method. + * + * One process is that of iterating over the words in the input: + * - the words are identified in the constructor, and the iteration + * is performed by {@link #nextWord()} + * + * - the subword boundaries are identified in {@link #next()} + * We then set up {@link #found} to contain the most + * recently found subword. + * + * - the soft hyphen discarding is processed in {@link #maybeDiscardHyphens()} + * + * - if we are not {@link #alwaysDiscard}ing then {@link #afterDiscard} + * can be set to null to return the non-discarded version on the next cycle. + * + */ + private class TermCompletionTokenStream extends TokenStream { + + final String[] words; + final TermAttribute termAtt; + + + + char currentWord[] = new char[]{}; + Matcher softMatcher; + int currentWordIx = -1; + + + int charPos = 0; + private String afterDiscard; + private CharBuffer found; + + public TermCompletionTokenStream(StringReader reader) { + termAtt = addAttribute(TermAttribute.class); + try { + reader.mark(Integer.MAX_VALUE); + int length = (int) reader.skip(Integer.MAX_VALUE); + reader.reset(); + char fileContent[] = new char[length]; + reader.read(fileContent); + words = wordBoundary.split(new String(fileContent)); + } catch (IOException e) { + throw new RuntimeException("Impossible",e); + } + } + + @Override + public boolean incrementToken() throws IOException { + if ( next() ) { + if (afterDiscard != null) { + int lg = afterDiscard.length(); + afterDiscard.getChars(0, lg, termAtt.termBuffer(), 0); + termAtt.setTermLength(lg); + } else { + int lg = found.length(); + found.get(termAtt.termBuffer(), 0, lg); + termAtt.setTermLength(lg); + } + return true; + } else { + return false; + } + } + + private boolean next() { + if (currentWordIx >= words.length) { + return false; + } + if (!alwaysDiscard) { + // Last match was the discarded version, + // now do the non-discard version. + if (afterDiscard != null) { + afterDiscard = null; + return true; + } + } + afterDiscard = null; + if (charPos + 1 < currentWord.length && softMatcher.find(charPos+1)) { + charPos = softMatcher.end(); + maybeDiscardHyphens(); + return true; + } else { + return nextWord(); + } + } + + void maybeDiscardHyphens() { + found = CharBuffer.wrap(currentWord, charPos, currentWord.length - charPos); + Matcher discarding = discard.matcher(found); + if (discarding.find()) { + afterDiscard = discarding.replaceAll(""); + } + } + + private boolean nextWord() { + currentWordIx++; + if (currentWordIx >= words.length) { + return false; + } + currentWord = words[currentWordIx].toCharArray(); + termAtt.resizeTermBuffer(currentWord.length); + charPos = 0; + softMatcher = subWordBoundary.matcher(words[currentWordIx]); + maybeDiscardHyphens(); + return true; + } + + } + +} Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/AbstractAnalyzerFactoryTest.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/AbstractAnalyzerFactoryTest.java 2014-05-10 02:56:35 UTC (rev 8262) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/AbstractAnalyzerFactoryTest.java 2014-05-10 11:52:25 UTC (rev 8263) @@ -22,151 +22,25 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* - * Created on May 7, 2014 + * Created on May 9, 2014 */ package com.bigdata.search; -import java.io.IOException; -import java.io.StringReader; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; - public abstract class AbstractAnalyzerFactoryTest extends AbstractSearchTest { - public AbstractAnalyzerFactoryTest() { + public AbstractAnalyzerFactoryTest() { } - - public AbstractAnalyzerFactoryTest(String arg0) { - super(arg0); + + public AbstractAnalyzerFactoryTest(String arg0) { + super(arg0); } - - public void setUp() throws Exception { - super.setUp(); - init(getExtraProperties()); - } - abstract String[] getExtraProperties(); - - private Analyzer getAnalyzer(String lang, boolean filterStopWords) { - return getNdx().getAnalyzer(lang, filterStopWords); + + @Override + public void setUp() throws Exception { + super.setUp(); + init(getExtraProperties()); } - - private void comparisonTest(String lang, - boolean stopWordsSignificant, - String text, - String spaceSeparated) throws IOException { - compareTokenStream(getAnalyzer(lang, stopWordsSignificant), text, - spaceSeparated.split(" ")); //$NON-NLS-1$ - } - private void compareTokenStream(Analyzer a, String text, String expected[]) throws IOException { - TokenStream s = a.tokenStream(null, new StringReader(text)); - int ix = 0; - while (s.incrementToken()) { - final TermAttribute term = s.getAttribute(TermAttribute.class); - final String word = term.term(); - assertTrue(ix < expected.length); - assertEquals(word, expected[ix++]); - } - assertEquals(ix, expected.length); - } - - public void testEnglishFilterStopWords() throws IOException { - for (String lang: new String[]{ "eng", null, "" }) { //$NON-NLS-1$ //$NON-NLS-2$ - comparisonTest(lang, - true, - "The test to end all tests! Forever.", //$NON-NLS-1$ - "test end all tests forever" //$NON-NLS-1$ - ); - } - } - public void testEnglishNoFilter() throws IOException { - for (String lang: new String[]{ "eng", null, "" }) { //$NON-NLS-1$ //$NON-NLS-2$ - comparisonTest(lang, - false, - "The test to end all tests! Forever.", //$NON-NLS-1$ - "the test to end all tests forever" //$NON-NLS-1$ - ); - } - } - - // Note we careful use a three letter language code for german. - // 'de' is more standard, but the DefaultAnalyzerFactory does not - // implement 'de' correctly. - public void testGermanFilterStopWords() throws IOException { - comparisonTest("ger", //$NON-NLS-1$ - true, - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.10") + //$NON-NLS-1$ - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.11"), //$NON-NLS-1$ - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.12") //$NON-NLS-1$ - ); - - } + abstract String[] getExtraProperties(); - // Note we careful use a three letter language code for Russian. - // 'ru' is more standard, but the DefaultAnalyzerFactory does not - // implement 'ru' correctly. - public void testRussianFilterStopWords() throws IOException { - comparisonTest("rus", //$NON-NLS-1$ - true, - // I hope this is not offensive text. - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.14") + //$NON-NLS-1$ - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.15"), //$NON-NLS-1$ - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.16") //$NON-NLS-1$ - ); - - } - public void testGermanNoStopWords() throws IOException { - comparisonTest("ger", //$NON-NLS-1$ - false, - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.18") + //$NON-NLS-1$ - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.19"), //$NON-NLS-1$ - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.20") //$NON-NLS-1$ - ); - - } - public void testRussianNoStopWords() throws IOException { - comparisonTest("rus", //$NON-NLS-1$ - false, - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.22") + //$NON-NLS-1$ - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.23"), //$NON-NLS-1$ - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.24") //$NON-NLS-1$ - ); - - } - public void testJapanese() throws IOException { - for (boolean filterStopWords: new Boolean[]{true, false}) { - comparisonTest("jpn", //$NON-NLS-1$ - filterStopWords, - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.26"), //$NON-NLS-1$ - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.27") + //$NON-NLS-1$ - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.28") + //$NON-NLS-1$ - NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.29")); //$NON-NLS-1$ - } - } - public void testConfiguredLanguages() { - checkConfig("BrazilianAnalyzer", "por", "pt"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ - checkConfig("ChineseAnalyzer", "zho", "chi", "zh"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ - checkConfig("CJKAnalyzer", "jpn", "ja", "kor", "ko"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ - checkConfig("CzechAnalyzer", "ces", "cze", "cs"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ - checkConfig("DutchAnalyzer", "dut", "nld", "nl"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ - checkConfig("GermanAnalyzer", "deu", "ger", "de"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ - checkConfig("GreekAnalyzer", "gre", "ell", "el"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ - checkConfig("RussianAnalyzer", "rus", "ru"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ - checkConfig("ThaiAnalyzer", "th", "tha"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ - checkConfig("StandardAnalyzer", "en", "eng", "", null); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ - } - - private void checkConfig(String classname, String ...langs) { - for (String lang:langs) { - // The DefaultAnalyzerFactory only works for language tags of length exactly three. -// if (lang != null && lang.length()==3) - { - assertEquals(classname, getAnalyzer(lang,true).getClass().getSimpleName()); - assertEquals(classname, getAnalyzer(lang+NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.0"),true).getClass().getSimpleName()); //$NON-NLS-1$ - } - } - - } } Copied: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/AbstractDefaultAnalyzerFactoryTest.java (from rev 8253, branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/AbstractAnalyzerFactoryTest.java) =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/AbstractDefaultAnalyzerFactoryTest.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/AbstractDefaultAnalyzerFactoryTest.java 2014-05-10 11:52:25 UTC (rev 8263) @@ -0,0 +1,133 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2014. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on May 7, 2014 + */ +package com.bigdata.search; + +import java.io.IOException; + + +public abstract class AbstractDefaultAnalyzerFactoryTest extends AbstractAnalyzerFactoryTest { + + public AbstractDefaultAnalyzerFactoryTest() { + } + + public AbstractDefaultAnalyzerFactoryTest(String arg0) { + super(arg0); + } + + public void testEnglishFilterStopWords() throws IOException { + for (String lang: new String[]{ "eng", null, "" }) { //$NON-NLS-1$ //$NON-NLS-2$ + comparisonTest(lang, + true, + "The test to end all tests! Forever.", //$NON-NLS-1$ + "test end all tests forever" //$NON-NLS-1$ + ); + } + } + public void testEnglishNoFilter() throws IOException { + for (String lang: new String[]{ "eng", null, "" }) { //$NON-NLS-1$ //$NON-NLS-2$ + comparisonTest(lang, + false, + "The test to end all tests! Forever.", //$NON-NLS-1$ + "the test to end all tests forever" //$NON-NLS-1$ + ); + } + } + + // Note we careful use a three letter language code for german. + // 'de' is more standard, but the DefaultAnalyzerFactory does not + // implement 'de' correctly. + public void testGermanFilterStopWords() throws IOException { + comparisonTest("ger", //$NON-NLS-1$ + true, + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.10") + //$NON-NLS-1$ + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.11"), //$NON-NLS-1$ + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.12") //$NON-NLS-1$ + ); + + } + + // Note we careful use a three letter language code for Russian. + // 'ru' is more standard, but the DefaultAnalyzerFactory does not + // implement 'ru' correctly. + public void testRussianFilterStopWords() throws IOException { + comparisonTest("rus", //$NON-NLS-1$ + true, + // I hope this is not offensive text. + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.14") + //$NON-NLS-1$ + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.15"), //$NON-NLS-1$ + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.16") //$NON-NLS-1$ + ); + + } + public void testGermanNoStopWords() throws IOException { + comparisonTest("ger", //$NON-NLS-1$ + false, + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.18") + //$NON-NLS-1$ + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.19"), //$NON-NLS-1$ + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.20") //$NON-NLS-1$ + ); + + } + public void testRussianNoStopWords() throws IOException { + comparisonTest("rus", //$NON-NLS-1$ + false, + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.22") + //$NON-NLS-1$ + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.23"), //$NON-NLS-1$ + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.24") //$NON-NLS-1$ + ); + + } + public void testJapanese() throws IOException { + for (boolean filterStopWords: new Boolean[]{true, false}) { + comparisonTest("jpn", //$NON-NLS-1$ + filterStopWords, + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.26"), //$NON-NLS-1$ + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.27") + //$NON-NLS-1$ + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.28") + //$NON-NLS-1$ + NonEnglishExamples.getString("AbstractAnalyzerFactoryTest.29")); //$NON-NLS-1$ + } + } + public void testConfiguredLanguages() { + checkConfig("BrazilianAnalyzer", "por", "pt"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + checkConfig("ChineseAnalyzer", "zho", "chi", "zh"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ + checkConfig("CJKAnalyzer", "jpn", "ja", "kor", "ko"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ + checkConfig("CzechAnalyzer", "ces", "cze", "cs"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ + checkConfig("DutchAnalyzer", "dut", "nld", "nl"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ + checkConfig("GermanAnalyzer", "deu", "ger", "de"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ + checkConfig("GreekAnalyzer", "gre", "ell", "el"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ + checkConfig("RussianAnalyzer", "rus", "ru"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + checkConfig("ThaiAnalyzer", "th", "tha"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + checkConfig("StandardAnalyzer", "en", "eng", "", null); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ + } + + @Override + protected void checkConfig(String classname, String ...langs) { + checkConfig(isBroken(), classname, langs); + + } + abstract boolean isBroken() ; +} Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/AbstractSearchTest.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/AbstractSearchTest.java 2014-05-10 02:56:35 UTC (rev 8262) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/AbstractSearchTest.java 2014-05-10 11:52:25 UTC (rev 8263) @@ -26,8 +26,14 @@ */ package com.bigdata.search; +import java.io.IOException; +import java.io.StringReader; import java.util.Properties; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; + import com.bigdata.journal.IIndexManager; import com.bigdata.journal.ITx; import com.bigdata.journal.ProxyTestCase; @@ -62,7 +68,7 @@ } FullTextIndex<Long> createFullTextIndex(String namespace, String ...propertyValuePairs) { - return createFullTextIndex(namespace, getProperties(), propertyValuePairs); + return createFullTextIndex(namespace, (Properties)getProperties().clone(), propertyValuePairs); } public void tearDown() throws Exception { @@ -92,4 +98,65 @@ return properties; } + protected Analyzer getAnalyzer(String lang, boolean filterStopWords) { + return getNdx().getAnalyzer(lang, filterStopWords); + } + + protected void comparisonTest(String lang, boolean filterStopWords, String text, String spaceSeparated) + throws IOException { + if (spaceSeparated == null) { + String rslt = getTokenStream(getAnalyzer(lang, filterStopWords), text); + throw new RuntimeException("Got \"" + rslt+ "\""); + } + compareTokenStream(getAnalyzer(lang, filterStopWords), text, + split(spaceSeparated)); //$NON-NLS-1$ + } + + private String[] split(String spaceSeparated) { + if (spaceSeparated.length()==0) { + return new String[0]; + } + return spaceSeparated.split(" "); + } + + protected String getTokenStream(Analyzer a, String text) throws IOException { + StringBuffer sb = new StringBuffer(); + TokenStream s = a.tokenStream(null, new StringReader(text)); + while (s.incrementToken()) { + final TermAttribute term = s.getAttribute(TermAttribute.class); + if (sb.length()!=0) { + sb.append(" "); + } + sb.append(term.term()); + } + return sb.toString(); + } + + private void compareTokenStream(Analyzer a, String text, String expected[]) throws IOException { + TokenStream s = a.tokenStream(null, new StringReader(text)); + int ix = 0; + while (s.incrementToken()) { + final TermAttribute term = s.getAttribute(TermAttribute.class); + final String word = term.term(); + assertTrue(ix < expected.length); + assertEquals(expected[ix++], word); + } + assertEquals(ix, expected.length); + } + + protected void checkConfig(boolean threeLetterOnly, String classname, String ...langs) { + for (String lang:langs) { + // The DefaultAnalyzerFactory only works for language tags of length exactly three. + if ((!threeLetterOnly) || (lang != null && lang.length()==3)) { + assertEquals(classname, getAnalyzer(lang,true).getClass().getSimpleName()); + if (!threeLetterOnly) { + assertEquals(classname, getAnalyzer(lang+"-x-foobar",true).getClass().getSimpleName()); //$NON-NLS-1$ + } + } + } + } + protected void checkConfig(String classname, String ...langs) { + checkConfig(false, classname, langs); + } + } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/NonEnglishExamples.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/NonEnglishExamples.java 2014-05-10 02:56:35 UTC (rev 8262) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/NonEnglishExamples.java 2014-05-10 11:52:25 UTC (rev 8263) @@ -1,3 +1,29 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2014. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on May 7, 2014 by Jeremy J. Carroll, Syapse Inc. + */ package com.bigdata.search; import java.util.MissingResourceException; Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestAll.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestAll.java 2014-05-10 02:56:35 UTC (rev 8262) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestAll.java 2014-05-10 11:52:25 UTC (rev 8263) @@ -114,6 +114,8 @@ // which is intended to be the same as the intended // behavior of DefaultAnalyzerFactory suite.addTestSuite(TestConfigurableAsDefaultAnalyzerFactory.class); + suite.addTestSuite(TestConfigurableAnalyzerFactory.class); + suite.addTestSuite(TestUnconfiguredAnalyzerFactory.class); return suite; } Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestConfigurableAnalyzerFactory.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestConfigurableAnalyzerFactory.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestConfigurableAnalyzerFactory.java 2014-05-10 11:52:25 UTC (rev 8263) @@ -0,0 +1,244 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2014. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Fr... [truncated message content] |
From: <jer...@us...> - 2014-05-11 15:45:20
|
Revision: 8270 http://sourceforge.net/p/bigdata/code/8270 Author: jeremy_carroll Date: 2014-05-11 15:45:17 +0000 (Sun, 11 May 2014) Log Message: ----------- Restructured ConfigurableAnalyzerFactory to allow lazy one-time initialization with weak caching by UUID of namespace, giving clearer lifecycle management Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/TermCompletionAnalyzer.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestConfigurableAnalyzerFactory.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/search/TestLanguageRange.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfiguredAnalyzerFactory.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/LanguageRange.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/NeedsConfiguringAnalyzerFactory.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java 2014-05-11 15:08:38 UTC (rev 8269) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java 2014-05-11 15:45:17 UTC (rev 8270) @@ -27,17 +27,9 @@ package com.bigdata.search; import java.io.IOException; +import java.io.Reader; import java.io.StringReader; -import java.lang.reflect.Constructor; -import java.util.Arrays; -import java.util.Collections; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.Locale; -import java.util.Map; -import java.util.Properties; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Pattern; import org.apache.log4j.Logger; @@ -45,15 +37,14 @@ import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.analysis.StopAnalyzer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer; -import org.apache.lucene.analysis.ru.RussianAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.Version; -import com.bigdata.btree.keys.IKeyBuilder; -import com.bigdata.btree.keys.KeyBuilder; - /** * This class can be used with the bigdata properties file to specify * which {@link Analyzer}s are used for which languages. @@ -127,127 +118,6 @@ final private static transient Logger log = Logger.getLogger(ConfigurableAnalyzerFactory.class); /** - * This is an implementation of RFC 4647 language range, - * targetted at the specific needs within bigdata, and only - * supporting the extended filtering specified in section 3.3.2 - * <p> - * Language ranges are comparable so that - * sorting an array and then matching a language tag against each - * member of the array in sequence will give the longest match. - * i.e. the longer ranges come first. - * @author jeremycarroll - * - */ - public static class LanguageRange implements Comparable<LanguageRange> { - - private final String range[]; - private final String full; - /** - * Note range must be in lower case, this is not verified. - * @param range - */ - public LanguageRange(String range) { - this.range = range.split("-"); - full = range; - } - - @Override - public int compareTo(LanguageRange o) { - if (equals(o)) { - return 0; - } - int diff = o.range.length - range.length; - if (diff != 0) { - // longest first - return diff; - } - if (range.length == 1) { - // * last - if (range[0].equals("*")) { - return 1; - } - if (o.range[0].equals("*")) { - return -1; - } - } - // alphabetically - for (int i=0; i<range.length; i++) { - diff = range[i].compareTo(o.range[i]); - if (diff != 0) { - return diff; - } - } - throw new RuntimeException("Impossible - supposedly"); - } - - @Override - public boolean equals(Object o) { - return (o instanceof LanguageRange) && ((LanguageRange)o).full.equals(full); - } - @Override - public int hashCode() { - return full.hashCode(); - } - - /** - * This implements the algoirthm of section 3.3.2 of RFC 4647 - * as modified with the observation about private use tags - * in <a href="http://lists.w3.org/Archives/Public/www-international/2014AprJun/0084"> - * this message</a>. - * - * - * @param langTag The RFC 5646 Language tag in lower case - * @return The result of the algorithm - */ - public boolean extendedFilterMatch(String langTag) { - return extendedFilterMatch(langTag.toLowerCase(Locale.ROOT).split("-")); - } - - // See RFC 4647, 3.3.2 - boolean extendedFilterMatch(String[] language) { - // RFC 4647 step 2 - if (!matchSubTag(language[0], range[0])) { - return false; - } - int rPos = 1; - int lPos = 1; - // variant step - for private use flags - if (language[0].equals("x") && range[0].equals("*")) { - lPos = 0; - } - // RFC 4647 step 3 - while (rPos < range.length) { - // step 3A - if (range[rPos].equals("*")) { - rPos ++; - continue; - } - // step 3B - if (lPos >= language.length) { - return false; - } - // step 3C - if (matchSubTag(language[lPos], range[rPos])) { - lPos++; - rPos++; - continue; - } - if (language[lPos].length()==1) { - return false; - } - lPos++; - } - // RFC 4647 step 4 - return true; - } - - // RFC 4647, 3.3.2, step 1 - private boolean matchSubTag(String langSubTag, String rangeSubTag) { - return langSubTag.equals(rangeSubTag) || "*".equals(rangeSubTag); - } - - } - /** * Options understood by the {@link ConfigurableAnalyzerFactory}. */ public interface Options { @@ -394,638 +264,55 @@ } - private static final String ALL_LUCENE_NATURAL_LANGUAGES = - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.*.like=eng\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.por.analyzerClass=org.apache.lucene.analysis.br.BrazilianAnalyzer\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.pt.like=por\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.zho.analyzerClass=org.apache.lucene.analysis.cn.ChineseAnalyzer\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.chi.like=zho\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.zh.like=zho\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.jpn.analyzerClass=org.apache.lucene.analysis.cjk.CJKAnalyzer\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ja.like=jpn\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.kor.like=jpn\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ko.like=kor\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ces.analyzerClass=org.apache.lucene.analysis.cz.CzechAnalyzer\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.cze.like=ces\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.cs.like=ces\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.dut.analyzerClass=org.apache.lucene.analysis.nl.DutchAnalyzer\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.nld.like=dut\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.nl.like=dut\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.deu.analyzerClass=org.apache.lucene.analysis.de.GermanAnalyzer\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ger.like=deu\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.de.like=deu\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.gre.analyzerClass=org.apache.lucene.analysis.el.GreekAnalyzer\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ell.like=gre\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.el.like=gre\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.rus.analyzerClass=org.apache.lucene.analysis.ru.RussianAnalyzer\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ru.like=rus\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.tha.analyzerClass=org.apache.lucene.analysis.th.ThaiAnalyzer\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.th.like=tha\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.eng.analyzerClass=org.apache.lucene.analysis.standard.StandardAnalyzer\n" + - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.en.like=eng\n"; - - private static final String LUCENE_STANDARD_ANALYZER = - "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.*.analyzerClass=org.apache.lucene.analysis.standard.StandardAnalyzer\n"; - - /** - * This comment describes the implementation of {@link ConfigurableAnalyzerFactory}. - * The only method in the interface is {@link ConfigurableAnalyzerFactory#getAnalyzer(String, boolean)}, - * a map is used from language tag to {@link AnalyzerPair}, where the pair contains - * an {@link Analyzer} both with and without stopwords configured (some times these two analyzers are identical, - * if, for example, stop words are not supported or not required). - * <p> - * If there is no entry for the language tag in the map {@link ConfigurableAnalyzerFactory#langTag2AnalyzerPair}, - * then one is created, by walking down the array {@link ConfigurableAnalyzerFactory#config} of AnalyzerPairs - * until a matching one is found. - * <p> - * The bulk of the code in this class is invoked from the constructor in order to set up this - * {@link ConfigurableAnalyzerFactory#config} array. For example, all of the subclasses of {@link AnalyzerPair}s, - * are simply to call the appropriate constructor in the appropriate way: the difficulty is that many subclasses - * of {@link Analyzer} have constructors with different signatures, and our code needs to navigate each sort. - * @author jeremycarroll - * - */ - private static class AnalyzerPair implements Comparable<AnalyzerPair>{ - final LanguageRange range; - private final Analyzer withStopWords; - private final Analyzer withoutStopWords; - - public Analyzer getAnalyzer(boolean filterStopwords) { - return filterStopwords ? withStopWords : withoutStopWords; - } - - public boolean extendedFilterMatch(String[] language) { - return range.extendedFilterMatch(language); - } - - AnalyzerPair(String range, Analyzer withStopWords, Analyzer withOutStopWords) { - this.range = new LanguageRange(range); - this.withStopWords = withStopWords; - this.withoutStopWords = withOutStopWords; - } - - /** - * This clone constructor implements {@link AnalyzerOptions#LIKE}. - * @param range - * @param copyMe - */ - AnalyzerPair(String range, AnalyzerPair copyMe) { - this.range = new LanguageRange(range); - this.withStopWords = copyMe.withStopWords; - this.withoutStopWords = copyMe.withoutStopWords; - } - - /** - * If we have a constructor, with arguments including a populated - * stop word set, then we can use it to make both the withStopWords - * analyzer, and the withoutStopWords analyzer. - * @param range - * @param cons A Constructor including a {@link java.util.Set} argument - * for the stop words. - * @param params The arguments to pass to the constructor including a populated stopword set. - * @throws Exception - */ - AnalyzerPair(String range, Constructor<? extends Analyzer> cons, Object ... params) throws Exception { - this(range, cons.newInstance(params), cons.newInstance(useEmptyStopWordSet(params))); - } - AnalyzerPair(String range, Analyzer stopWordsNotSupported) { - this(range, stopWordsNotSupported, stopWordsNotSupported); - } - private static Object[] useEmptyStopWordSet(Object[] params) { - Object rslt[] = new Object[params.length]; - for (int i=0; i<params.length; i++) { - if (params[i] instanceof Set) { - rslt[i] = Collections.EMPTY_SET; - } else { - rslt[i] = params[i]; - } - } - return rslt; - } - - @Override - public String toString() { - return range.full + "=(" + withStopWords.getClass().getSimpleName() +")"; - } - - @Override - public int compareTo(AnalyzerPair o) { - return range.compareTo(o.range); - } - } - - - /** - * Used for Analyzer classes with a constructor with signature (Version, Set). - * @author jeremycarroll - * - */ - private static class VersionSetAnalyzerPair extends AnalyzerPair { - public VersionSetAnalyzerPair(ConfigOptionsToAnalyzer lro, - Class<? extends Analyzer> cls) throws Exception { - super(lro.languageRange, getConstructor(cls, Version.class, Set.class), Version.LUCENE_CURRENT, lro.getStopWords()); - } - } - - /** - * Used for Analyzer classes which do not support stopwords and have a constructor with signature (Version). - * @author jeremycarroll - * - */ - private static class VersionAnalyzerPair extends AnalyzerPair { - public VersionAnalyzerPair(String range, Class<? extends Analyzer> cls) throws Exception { - super(range, getConstructor(cls, Version.class).newInstance(Version.LUCENE_CURRENT)); - } - } - - /** - * Special case code for {@link PatternAnalyzer} - * @author jeremycarroll - * - */ - private static class PatternAnalyzerPair extends AnalyzerPair { - public PatternAnalyzerPair(ConfigOptionsToAnalyzer lro, Pattern pattern) throws Exception { - super(lro.languageRange, getConstructor(PatternAnalyzer.class,Version.class,Pattern.class,Boolean.TYPE,Set.class), - Version.LUCENE_CURRENT, - pattern, - true, - lro.getStopWords()); - } - } - - - /** - * This class is initialized with the config options, using the {@link #setProperty(String, String)} - * method, for a particular language range and works out which pair of {@link Analyzer}s - * to use for that language range. - * <p> - * Instances of this class are only alive during the execution of - * {@link ConfigurableAnalyzerFactory#ConfigurableAnalyzerFactory(FullTextIndex)}, - * the life-cycle is: - * <ol> - * <li>The relveant config properties are applied, and are used to populate the fields. - * <li>The fields are validated - * <li>An {@link AnalyzerPair} is constructed - * </ol> - * - * @author jeremycarroll - * - */ - private static class ConfigOptionsToAnalyzer { - - String like; - String className; - String stopwords; - Pattern pattern; - final String languageRange; - AnalyzerPair result; - Pattern wordBoundary; - Pattern subWordBoundary; - Pattern softHyphens; - Boolean alwaysRemoveSoftHyphens; - - public ConfigOptionsToAnalyzer(String languageRange) { - this.languageRange = languageRange; - } - - /** - * This is called only when we have already identified that - * the class does support stopwords. - * @return - */ - public Set<?> getStopWords() { - - if (doNotUseStopWords()) - return Collections.EMPTY_SET; - - if (useDefaultStopWords()) { - return getStopWordsForClass(className); - } - - return getStopWordsForClass(stopwords); - } - - boolean doNotUseStopWords() { - return AnalyzerOptions.STOPWORDS_VALUE_NONE.equals(stopwords) || (stopwords == null && pattern != null); - } - - protected Set<?> getStopWordsForClass(String clazzName) { - Class<? extends Analyzer> analyzerClass = getAnalyzerClass(clazzName); - try { - return (Set<?>) analyzerClass.getMethod("getDefaultStopSet").invoke(null); - } catch (Exception e) { - if (StandardAnalyzer.class.equals(analyzerClass)) { - return StandardAnalyzer.STOP_WORDS_SET; - } - if (StopAnalyzer.class.equals(analyzerClass)) { - return StopAnalyzer.ENGLISH_STOP_WORDS_SET; - } - throw new RuntimeException("Failed to find stop words from " + clazzName + " for language range "+languageRange); - } - } - - protected boolean useDefaultStopWords() { - return ( stopwords == null && pattern == null ) || AnalyzerOptions.STOPWORDS_VALUE_DEFAULT.equals(stopwords); - } - - /** - * The first step in the life-cycle, used to initialize the fields. - * @return true if the property was recognized. - */ - public boolean setProperty(String shortProperty, String value) { - if (shortProperty.equals(AnalyzerOptions.LIKE) ) { - like = value; - } else if (shortProperty.equals(AnalyzerOptions.ANALYZER_CLASS) ) { - className = value; - } else if (shortProperty.equals(AnalyzerOptions.STOPWORDS) ) { - stopwords = value; - } else if (shortProperty.equals(AnalyzerOptions.PATTERN) ) { - pattern = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS); - } else if (shortProperty.equals(AnalyzerOptions.WORD_BOUNDARY) ) { - wordBoundary = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS); - } else if (shortProperty.equals(AnalyzerOptions.SUB_WORD_BOUNDARY) ) { - subWordBoundary = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS); - } else if (shortProperty.equals(AnalyzerOptions.SOFT_HYPHENS) ) { - softHyphens = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS); - } else if (shortProperty.equals(AnalyzerOptions.ALWAYS_REMOVE_SOFT_HYPHENS) ) { - alwaysRemoveSoftHyphens = Boolean.valueOf(value); - } else { - return false; - } - return true; - } - - /** - * The second phase of the life-cycle, used for sanity checking. - */ - public void validate() { - if (pattern != null ) { - if ( className != null && className != PatternAnalyzer.class.getName()) { - throw new RuntimeException("Bad Option: Language range "+languageRange + " with pattern propety for class "+ className); - } - className = PatternAnalyzer.class.getName(); - } - if (this.wordBoundary != null ) { - if ( className != null && className != TermCompletionAnalyzer.class.getName()) { - throw new RuntimeException("Bad Option: Language range "+languageRange + " with pattern propety for class "+ className); - } - className = TermCompletionAnalyzer.class.getName(); - - if ( subWordBoundary == null ) { - subWordBoundary = AnalyzerOptions.DEFAULT_SUB_WORD_BOUNDARY; - } - if ( alwaysRemoveSoftHyphens != null && softHyphens == null ) { - throw new RuntimeException("Bad option: Language range "+languageRange + ": must specify softHypens when setting alwaysRemoveSoftHyphens"); - } - if (softHyphens != null && alwaysRemoveSoftHyphens == null) { - alwaysRemoveSoftHyphens = AnalyzerOptions.DEFAULT_ALWAYS_REMOVE_SOFT_HYPHENS; - } - - } else if ( subWordBoundary != null || softHyphens != null || alwaysRemoveSoftHyphens != null || - TermCompletionAnalyzer.class.getName().equals(className) ) { - throw new RuntimeException("Bad option: Language range "+languageRange + ": must specify wordBoundary for TermCompletionAnalyzer"); - } - - if (PatternAnalyzer.class.getName().equals(className) && pattern == null ) { - throw new RuntimeException("Bad Option: Language range "+languageRange + " must specify pattern for PatternAnalyzer."); - } - if ( (like != null) == (className != null) ) { - throw new RuntimeException("Bad Option: Language range "+languageRange + " must specify exactly one of implementation class or like."); - } - if (stopwords != null && like != null) { - throw new RuntimeException("Bad Option: Language range "+languageRange + " must not specify stopwords with like."); - } - - } - - /** - * The third and final phase of the life-cyle used for identifying - * the AnalyzerPair. - */ - private AnalyzerPair construct() throws Exception { - if (className == null) { - return null; - } - if (pattern != null) { - return new PatternAnalyzerPair(this, pattern); - } - if (softHyphens != null) { - return new AnalyzerPair( - languageRange, - new TermCompletionAnalyzer( - wordBoundary, - subWordBoundary, - softHyphens, - alwaysRemoveSoftHyphens)); - } - if (wordBoundary != null) { - return new AnalyzerPair( - languageRange, - new TermCompletionAnalyzer( - wordBoundary, - subWordBoundary)); - } - final Class<? extends Analyzer> cls = getAnalyzerClass(); - - if (hasConstructor(cls, Version.class, Set.class)) { - - // RussianAnalyzer is missing any way to access stop words. - if (RussianAnalyzer.class.equals(cls)) { - if (useDefaultStopWords()) { - return new AnalyzerPair(languageRange, new RussianAnalyzer(Version.LUCENE_CURRENT), new RussianAnalyzer(Version.LUCENE_CURRENT, Collections.EMPTY_SET)); - } - if (doNotUseStopWords()) { - return new AnalyzerPair(languageRange, new RussianAnalyzer(Version.LUCENE_CURRENT, Collections.EMPTY_SET)); - } - } - return new VersionSetAnalyzerPair(this, cls); - } - - if (stopwords != null && !stopwords.equals(AnalyzerOptions.STOPWORDS_VALUE_NONE)) { - throw new RuntimeException("Bad option: language range: " + languageRange + " stopwords are not supported by " + className); - } - if (hasConstructor(cls, Version.class)) { - return new VersionAnalyzerPair(languageRange, cls); - } - - if (hasConstructor(cls)) { - return new AnalyzerPair(languageRange, cls.newInstance()); - } - throw new RuntimeException("Bad option: cannot find constructor for class " + className + " for language range " + languageRange); - } - - /** - * Also part of the third phase of the life-cycle, following the {@link AnalyzerOptions#LIKE} - * properties. - * @param depth - * @param max - * @param analyzers - * @return - */ - AnalyzerPair followLikesToAnalyzerPair(int depth, int max, - Map<String, ConfigOptionsToAnalyzer> analyzers) { - if (result == null) { - if (depth == max) { - throw new RuntimeException("Bad configuration: - 'like' loop for language range " + languageRange); - } - ConfigOptionsToAnalyzer next = analyzers.get(like); - if (next == null) { - throw new RuntimeException("Bad option: - 'like' not found for language range " + languageRange+ " (not found: '"+ like +"')"); - } - result = new AnalyzerPair(languageRange, next.followLikesToAnalyzerPair(depth+1, max, analyzers)); - } - return result; - } - - protected Class<? extends Analyzer> getAnalyzerClass() { - return getAnalyzerClass(className); - } - - @SuppressWarnings("unchecked") - protected Class<? extends Analyzer> getAnalyzerClass(String className2) { - final Class<? extends Analyzer> cls; - try { - cls = (Class<? extends Analyzer>) Class.forName(className2); - } catch (ClassNotFoundException e) { - throw new RuntimeException("Bad option: cannot find class " + className2 + " for language range " + languageRange, e); - } - return cls; - } - - void setAnalyzerPair(AnalyzerPair ap) { - result = ap; - } - } - - private final AnalyzerPair config[]; - - private final Map<String, AnalyzerPair> langTag2AnalyzerPair = new ConcurrentHashMap<String, AnalyzerPair>(); - /** - * While it would be very unusual to have more than 500 different language tags in a store - * it is possible - we use a max size to prevent a memory explosion, and a naive caching - * strategy so the code will still work on the {@link #MAX_LANG_CACHE_SIZE}+1 th entry. + * Initialization is a little tricky, because on the very first + * call to the constructor with a new namespace or a new journal + * the fullTextIndex is not ready for use. + * Therefore we delegate to an unconfigured object + * which on the first call to {@link NeedsConfiguringAnalyzerFactory#getAnalyzer(String, boolean)} + * does the configuration and replaces itself here with a + * {@link ConfiguredAnalyzerFactory} */ - private static final int MAX_LANG_CACHE_SIZE = 500; + IAnalyzerFactory delegate; - - private String defaultLanguage; - private final FullTextIndex<?> fullTextIndex; - - /** * Builds a new ConfigurableAnalyzerFactory. * @param fullTextIndex */ public ConfigurableAnalyzerFactory(final FullTextIndex<?> fullTextIndex) { - // A description of the operation of this method is found on AnalyzerPair and - // ConfigOptionsToAnalyzer. - // despite our name, we actually make all the analyzers now, and getAnalyzer method is merely a lookup. - - if (fullTextIndex == null) - throw new IllegalArgumentException(); - - this.fullTextIndex = fullTextIndex; - - final Properties properties = initProperties(); - - final Map<String, ConfigOptionsToAnalyzer> analyzers = new HashMap<String, ConfigOptionsToAnalyzer>(); - - properties2analyzers(properties, analyzers); - - if (!analyzers.containsKey("*")) { - throw new RuntimeException("Bad config: must specify behavior on language range '*'"); - } - - for (ConfigOptionsToAnalyzer a: analyzers.values()) { - a.validate(); - } - - try { - for (ConfigOptionsToAnalyzer a: analyzers.values()) { - a.setAnalyzerPair(a.construct()); - } - } catch (Exception e) { - throw new RuntimeException("Cannot construct ConfigurableAnalyzerFactory", e); - } - int sz = analyzers.size(); - for (ConfigOptionsToAnalyzer a: analyzers.values()) { - a.followLikesToAnalyzerPair(0, sz, analyzers); - } - - config = new AnalyzerPair[sz]; - int i = 0; - for (ConfigOptionsToAnalyzer a: analyzers.values()) { - config[i++] = a.result; - } - Arrays.sort(config); - if (log.isInfoEnabled()) { - StringBuilder sb = new StringBuilder(); - sb.append("Installed text Analyzer's: "); - for (AnalyzerPair ap: config) { - sb.append(ap.toString()); - sb.append(", "); - } - log.info(sb.toString()); - } + delegate = new NeedsConfiguringAnalyzerFactory(this, fullTextIndex); } - private String getDefaultLanguage(final FullTextIndex<?> fullTextIndex) { - - final IKeyBuilder keyBuilder = fullTextIndex.getKeyBuilder(); + static int loggerIdCounter = 0; + @Override + public Analyzer getAnalyzer(final String languageCode, boolean filterStopwords) { - if (keyBuilder.isUnicodeSupported()) { - - // The configured local for the database. - final Locale locale = ((KeyBuilder) keyBuilder) - .getSortKeyGenerator().getLocale(); - - // The analyzer for that locale. - return locale.getLanguage(); - + final Analyzer unlogged = delegate.getAnalyzer(languageCode, filterStopwords); + if (log.isDebugEnabled()) { + return new Analyzer() { + @Override + public TokenStream tokenStream(final String fieldName, final Reader reader) { + final int id = loggerIdCounter++; + final String term = TermCompletionAnalyzer.getStringReaderContents((StringReader)reader); + log.debug(id + " " + languageCode +" **"+term+"**"); + return new TokenFilter(unlogged.tokenStream(fieldName, reader)){ + + TermAttribute attr = addAttribute(TermAttribute.class); + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + log.debug(id + " |"+attr.term()+"|"); + return true; + } + return false; + }}; + } + }; } else { - // Rule, Britannia! - return "en"; - + return unlogged; } - } - private String getDefaultLanguage() { - if (defaultLanguage == null) { - defaultLanguage = getDefaultLanguage(fullTextIndex); - } - return defaultLanguage; - } - - private static boolean hasConstructor(Class<? extends Analyzer> cls, Class<?> ... parameterTypes) { - return getConstructor(cls, parameterTypes) != null; - } - - protected static Constructor<? extends Analyzer> getConstructor(Class<? extends Analyzer> cls, - Class<?>... parameterTypes) { - try { - return cls.getConstructor(parameterTypes); - } catch (NoSuchMethodException | SecurityException e) { - return null; - } - } - - private void properties2analyzers(Properties props, Map<String, ConfigOptionsToAnalyzer> analyzers) { - Enumeration<?> en = props.propertyNames(); - while (en.hasMoreElements()) { - - String prop = (String)en.nextElement(); - if (prop.equals(Options.NATURAL_LANGUAGE_SUPPORT)) continue; - if (prop.startsWith(Options.ANALYZER)) { - String languageRangeAndProperty[] = prop.substring(Options.ANALYZER.length()).replaceAll("_","*").split("[.]"); - if (languageRangeAndProperty.length == 2) { - - String languageRange = languageRangeAndProperty[0].toLowerCase(Locale.US); // Turkish "I" could create a problem - String shortProperty = languageRangeAndProperty[1]; - String value = props.getProperty(prop); - log.info("Setting language range: " + languageRange + "/" + shortProperty + " = " + value); - ConfigOptionsToAnalyzer cons = analyzers.get(languageRange); - if (cons == null) { - cons = new ConfigOptionsToAnalyzer(languageRange); - analyzers.put(languageRange, cons); - } - if (cons.setProperty(shortProperty, value)) { - continue; - } - } - } - - log.warn("Failed to process configuration property: " + prop); - } - } - protected Properties initProperties() { - final Properties parentProperties = fullTextIndex.getProperties(); - Properties myProps; - if (Boolean.valueOf(parentProperties.getProperty( - Options.NATURAL_LANGUAGE_SUPPORT, - Options.DEFAULT_NATURAL_LANGUAGE_SUPPORT))) { - - myProps = loadPropertyString(ALL_LUCENE_NATURAL_LANGUAGES); - - } else if (hasPropertiesForStarLanguageRange(parentProperties)){ - - myProps = new Properties(); - - } else { - - myProps = loadPropertyString(LUCENE_STANDARD_ANALYZER); - } - - copyRelevantProperties(fullTextIndex.getProperties(), myProps); - return myProps; - } - - Properties loadPropertyString(String props) { - Properties rslt = new Properties(); - try { - rslt.load(new StringReader(props)); - } catch (IOException e) { - throw new RuntimeException("Impossible - well clearly not!", e); - } - return rslt; - } - - private void copyRelevantProperties(Properties from, Properties to) { - Enumeration<?> en = from.propertyNames(); - while (en.hasMoreElements()) { - String prop = (String)en.nextElement(); - if (prop.startsWith(ConfigurableAnalyzerFactory.class.getName())) { - to.setProperty(prop, from.getProperty(prop)); - } - } - } - - private boolean hasPropertiesForStarLanguageRange(Properties from) { - Enumeration<?> en = from.propertyNames(); - while (en.hasMoreElements()) { - String prop = (String)en.nextElement(); - if (prop.startsWith(Options.ANALYZER+"_.") - || prop.startsWith(Options.ANALYZER+"*.")) { - return true; - } - } - return false; - } - @Override - public Analyzer getAnalyzer(String languageCode, boolean filterStopwords) { - - if (languageCode == null || languageCode.equals("")) { - - languageCode = getDefaultLanguage(); - } - - AnalyzerPair pair = langTag2AnalyzerPair.get(languageCode); - - if (pair == null) { - pair = lookupPair(languageCode); - - // naive cache - clear everything if cache is full - if (langTag2AnalyzerPair.size() == MAX_LANG_CACHE_SIZE) { - langTag2AnalyzerPair.clear(); - } - // there is a race condition below, but we don't care who wins. - langTag2AnalyzerPair.put(languageCode, pair); - } - - return pair.getAnalyzer(filterStopwords); - - } - - private AnalyzerPair lookupPair(String languageCode) { - String language[] = languageCode.split("-"); - for (AnalyzerPair p: config) { - if (p.extendedFilterMatch(language)) { - return p; - } - } - throw new RuntimeException("Impossible - supposedly - did not match '*'"); - } } Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfiguredAnalyzerFactory.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfiguredAnalyzerFactory.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfiguredAnalyzerFactory.java 2014-05-11 15:45:17 UTC (rev 8270) @@ -0,0 +1,161 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2014. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on May 6, 2014 by Jeremy J. Carroll, Syapse Inc. + */ +package com.bigdata.search; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.lucene.analysis.Analyzer; + +import com.bigdata.search.ConfigurableAnalyzerFactory.AnalyzerOptions; +/** + * This comment describes the implementation of {@link ConfiguredAnalyzerFactory}. + * The only method in the interface is {@link ConfiguredAnalyzerFactory#getAnalyzer(String, boolean)}, + * a map is used from language tag to {@link AnalyzerPair}, where the pair contains + * an {@link Analyzer} both with and without stopwords configured (some times these two analyzers are identical, + * if, for example, stop words are not supported or not required). + * <p> + * If there is no entry for the language tag in the map {@link ConfiguredAnalyzerFactory#langTag2AnalyzerPair}, + * then one is created, by walking down the array {@link ConfiguredAnalyzerFactory#config} of AnalyzerPairs + * until a matching one is found. + * @author jeremycarroll + * + */ +class ConfiguredAnalyzerFactory implements IAnalyzerFactory { + + + /** + * These provide a mapping from a language range to a pair of Analyzers + * and sort with the best-match (i.e. longest match) first. + * @author jeremycarroll + * + */ + protected static class AnalyzerPair implements Comparable<AnalyzerPair>{ + final LanguageRange range; + private final Analyzer withStopWords; + private final Analyzer withoutStopWords; + + public Analyzer getAnalyzer(boolean filterStopwords) { + return filterStopwords ? withStopWords : withoutStopWords; + } + + public boolean extendedFilterMatch(String[] language) { + return range.extendedFilterMatch(language); + } + + AnalyzerPair(String range, Analyzer withStopWords, Analyzer withOutStopWords) { + this.range = new LanguageRange(range); + this.withStopWords = withStopWords; + this.withoutStopWords = withOutStopWords; + } + + /** + * This clone constructor implements {@link AnalyzerOptions#LIKE}. + * @param range + * @param copyMe + */ + AnalyzerPair(String range, AnalyzerPair copyMe) { + this(range, copyMe.withStopWords, copyMe.withoutStopWords); + } + + @Override + public String toString() { + return range.full + "=(" + withStopWords.getClass().getSimpleName() +")"; + } + + @Override + public int compareTo(AnalyzerPair o) { + return range.compareTo(o.range); + } + } + + + private final AnalyzerPair config[]; + + /** + * This caches the result of looking up a lang tag in the + * config of language ranges. + */ + private final Map<String, AnalyzerPair> langTag2AnalyzerPair = new ConcurrentHashMap<String, AnalyzerPair>();; + + /** + * While it would be very unusual to have more than 500 different language tags in a store + * it is possible - we use a max size to prevent a memory explosion, and a naive caching + * strategy so the code will still work on the {@link #MAX_LANG_CACHE_SIZE}+1 th entry. + */ + private static final int MAX_LANG_CACHE_SIZE = 500; + + + private final String defaultLanguage; + /** + * Builds a new ConfigurableAnalyzerFactory. + * @param fullTextIndex + */ + public ConfiguredAnalyzerFactory(AnalyzerPair config[], String defaultLanguage) { + this.config = config; + this.defaultLanguage = defaultLanguage; + } + + private String getDefaultLanguage() { + return defaultLanguage; + } + + @Override + public Analyzer getAnalyzer(String languageCode, boolean filterStopwords) { + + if (languageCode == null || languageCode.equals("")) { + + languageCode = getDefaultLanguage(); + } + + AnalyzerPair pair = langTag2AnalyzerPair.get(languageCode); + + if (pair == null) { + pair = lookupPair(languageCode); + + // naive cache - clear everything if cache is full + if (langTag2AnalyzerPair.size() == MAX_LANG_CACHE_SIZE) { + langTag2AnalyzerPair.clear(); + } + // there is a race condition below, but we don't care who wins. + langTag2AnalyzerPair.put(languageCode, pair); + } + + return pair.getAnalyzer(filterStopwords); + + } + + private AnalyzerPair lookupPair(String languageCode) { + String language[] = languageCode.split("-"); + for (AnalyzerPair p: config) { + if (p.extendedFilterMatch(language)) { + return p; + } + } + throw new RuntimeException("Impossible - supposedly - did not match '*'"); + } +} Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/LanguageRange.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/LanguageRange.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/LanguageRange.java 2014-05-11 15:45:17 UTC (rev 8270) @@ -0,0 +1,126 @@ +package com.bigdata.search; + +import java.util.Locale; + + +/** + * This is an implementation of RFC 4647 language range, + * targetted at the specific needs within bigdata, and only + * supporting the extended filtering specified in section 3.3.2 + * <p> + * Language ranges are comparable so that + * sorting an array and then matching a language tag against each + * member of the array in sequence will give the longest match. + * i.e. the longer ranges come first. + * @author jeremycarroll + * + */ +public class LanguageRange implements Comparable<LanguageRange> { + + private final String range[]; + final String full; + /** + * Note range must be in lower case, this is not verified. + * @param range + */ + public LanguageRange(String range) { + this.range = range.split("-"); + full = range; + } + + @Override + public int compareTo(LanguageRange o) { + if (equals(o)) { + return 0; + } + int diff = o.range.length - range.length; + if (diff != 0) { + // longest first + return diff; + } + if (range.length == 1) { + // * last + if (range[0].equals("*")) { + return 1; + } + if (o.range[0].equals("*")) { + return -1; + } + } + // alphabetically + for (int i=0; i<range.length; i++) { + diff = range[i].compareTo(o.range[i]); + if (diff != 0) { + return diff; + } + } + throw new RuntimeException("Impossible - supposedly"); + } + + @Override + public boolean equals(Object o) { + return (o instanceof LanguageRange) && ((LanguageRange)o).full.equals(full); + } + @Override + public int hashCode() { + return full.hashCode(); + } + + /** + * This implements the algoirthm of section 3.3.2 of RFC 4647 + * as modified with the observation about private use tags + * in <a href="http://lists.w3.org/Archives/Public/www-international/2014AprJun/0084"> + * this message</a>. + * + * + * @param langTag The RFC 5646 Language tag in lower case + * @return The result of the algorithm + */ + public boolean extendedFilterMatch(String langTag) { + return extendedFilterMatch(langTag.toLowerCase(Locale.ROOT).split("-")); + } + + // See RFC 4647, 3.3.2 + boolean extendedFilterMatch(String[] language) { + // RFC 4647 step 2 + if (!matchSubTag(language[0], range[0])) { + return false; + } + int rPos = 1; + int lPos = 1; + // variant step - for private use flags + if (language[0].equals("x") && range[0].equals("*")) { + lPos = 0; + } + // RFC 4647 step 3 + while (rPos < range.length) { + // step 3A + if (range[rPos].equals("*")) { + rPos ++; + continue; + } + // step 3B + if (lPos >= language.length) { + return false; + } + // step 3C + if (matchSubTag(language[lPos], range[rPos])) { + lPos++; + rPos++; + continue; + } + if (language[lPos].length()==1) { + return false; + } + lPos++; + } + // RFC 4647 step 4 + return true; + } + + // RFC 4647, 3.3.2, step 1 + private boolean matchSubTag(String langSubTag, String rangeSubTag) { + return langSubTag.equals(rangeSubTag) || "*".equals(rangeSubTag); + } + +} \ No newline at end of file Copied: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/NeedsConfiguringAnalyzerFactory.java (from rev 8263, branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java) =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/NeedsConfiguringAnalyzerFactory.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/NeedsConfiguringAnalyzerFactory.java 2014-05-11 15:45:17 UTC (rev 8270) @@ -0,0 +1,649 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2014. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on May 6, 2014 by Jeremy J. Carroll, Syapse Inc. + */ +package com.bigdata.search; + +import java.io.IOException; +import java.io.StringReader; +import java.lang.reflect.Constructor; +import java.util.Arrays; +import java.util.Collections; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.UUID; +import java.util.WeakHashMap; +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.StopAnalyzer; +import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer; +import org.apache.lucene.analysis.ru.RussianAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.util.Version; + +import com.bigdata.btree.keys.IKeyBuilder; +import com.bigdata.btree.keys.KeyBuilder; +import com.bigdata.search.ConfigurableAnalyzerFactory.AnalyzerOptions; +import com.bigdata.search.ConfigurableAnalyzerFactory.Options; + + +/** + * <p> + * The bulk of the code in this class is invoked from {@link #init()} to set up the array of + * {@link ConfiguredAnalyzerFactory.AnalyzerPair}s. For example, all of the subclasses of {@link AnalyzerPair}s, + * are simply to call the appropriate constructor in the appropriate way: the difficulty is that many subclasses + * of {@link Analyzer} have constructors with different signatures, and our code needs to navigate each sort. + * @author jeremycarroll + * + */ +class NeedsConfiguringAnalyzerFactory implements IAnalyzerFactory { + final private static transient Logger log = Logger.getLogger(NeedsConfiguringAnalyzerFactory.class); + + /** + * We create only one {@link ConfiguredAnalyzerFactory} per namespace + * and store it here. The UUID is stable and allows us to side-step lifecycle + * issues such as creation and destruction of namespaces, potentially with different properties. + * We use a WeakHashMap to ensure that after the destruction of a namespace we clean up. + * We have to synchronize this for thread safety. + */ + private static final Map<UUID, ConfiguredAnalyzerFactory> allConfigs = + Collections.synchronizedMap(new WeakHashMap<UUID, ConfiguredAnalyzerFactory>()); + + + private static final String ALL_LUCENE_NATURAL_LANGUAGES = + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.*.like=eng\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.por.analyzerClass=org.apache.lucene.analysis.br.BrazilianAnalyzer\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.pt.like=por\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.zho.analyzerClass=org.apache.lucene.analysis.cn.ChineseAnalyzer\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.chi.like=zho\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.zh.like=zho\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.jpn.analyzerClass=org.apache.lucene.analysis.cjk.CJKAnalyzer\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ja.like=jpn\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.kor.like=jpn\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ko.like=kor\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ces.analyzerClass=org.apache.lucene.analysis.cz.CzechAnalyzer\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.cze.like=ces\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.cs.like=ces\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.dut.analyzerClass=org.apache.lucene.analysis.nl.DutchAnalyzer\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.nld.like=dut\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.nl.like=dut\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.deu.analyzerClass=org.apache.lucene.analysis.de.GermanAnalyzer\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ger.like=deu\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.de.like=deu\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.gre.analyzerClass=org.apache.lucene.analysis.el.GreekAnalyzer\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ell.like=gre\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.el.like=gre\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.rus.analyzerClass=org.apache.lucene.analysis.ru.RussianAnalyzer\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ru.like=rus\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.tha.analyzerClass=org.apache.lucene.analysis.th.ThaiAnalyzer\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.th.like=tha\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.eng.analyzerClass=org.apache.lucene.analysis.standard.StandardAnalyzer\n" + + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.en.like=eng\n"; + + private static final String LUCENE_STANDARD_ANALYZER = + "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.*.analyzerClass=org.apache.lucene.analysis.standard.StandardAnalyzer\n"; + + static int loggerIdCounter = 0; + + /** + * This class and all its subclasses provide a variety of patterns + * for mapping from the various constructor patterns of subclasses + * of {@link Analyzer} to {@link ConfiguredAnalyzerFactory#AnalyzerPair}. + * @author jeremycarroll + * + */ + private static class AnalyzerPair extends ConfiguredAnalyzerFactory.AnalyzerPair { + + AnalyzerPair(String range, Analyzer withStopWords, Analyzer withOutStopWords) { + super(range, withStopWords, withOutStopWords); + } + + /** + * This clone constructor implements {@link AnalyzerOptions#LIKE}. + * @param range + * @param copyMe + */ + AnalyzerPair(String range, AnalyzerPair copyMe) { + super(range, copyMe); + } + + /** + * If we have a constructor, with arguments including a populated + * stop word set, then we can use it to make both the withStopWords + * analyzer, and the withoutStopWords analyzer. + * @param range + * @param cons A Constructor including a {@link java.util.Set} argument + * for the stop words. + * @param params The arguments to pass to the constructor including a populated stopword set. + * @throws Exception + */ + AnalyzerPair(String range, Constructor<? extends Analyzer> cons, Object ... params) throws Exception { + this(range, cons.newInstance(params), cons.newInstance(useEmptyStopWordSet(params))); + } + AnalyzerPair(String range, Analyzer stopWordsNotSupported) { + this(range, stopWordsNotSupported, stopWordsNotSupported); + } + private static Object[] useEmptyStopWordSet(Object[] params) { + Object rslt[] = new Object[params.length]; + for (int i=0; i<params.length; i++) { + if (params[i] instanceof Set) { + rslt[i] = Collections.EMPTY_SET; + } else { + rslt[i] = params[i]; + } + } + return rslt; + } + + } + + + /** + * Used for Analyzer classes with a constructor with signature (Version, Set). + * @author jeremycarroll + * + */ + private static class VersionSetAnalyzerPair extends AnalyzerPair { + public VersionSetAnalyzerPair(ConfigOptionsToAnalyzer lro, + Class<? extends Analyzer> cls) throws Exception { + super(lro.languageRange, getConstructor(cls, Version.class, Set.class), Version.LUCENE_CURRENT, lro.getStopWords()); + } + } + + /** + * Used for Analyzer classes which do not support stopwords and have a constructor with signature (Version). + * @author jeremycarroll + * + */ + private static class VersionAnalyzerPair extends AnalyzerPair { + public VersionAnalyzerPair(String range, Class<? extends Analyzer> cls) throws Exception { + super(range, getConstructor(cls, Version.class).newInstance(Version.LUCENE_CURRENT)); + } + } + + /** + * Special case code for {@link PatternAnalyzer} + * @author jeremycarroll + * + */ + private static class PatternAnalyzerPair extends AnalyzerPair { + public PatternAnalyzerPair(ConfigOptionsToAnalyzer lro, Pattern pattern) throws Exception { + super(lro.languageRange, getConstructor(PatternAnalyzer.class,Version.class,Pattern.class,Boolean.TYPE,Set.class), + Version.LUCENE_CURRENT, + pattern, + true, + lro.getStopWords()); + } + } + + + + /** + * This class is initialized with the config options, using the {@link #setProperty(String, String)} + * method, for a particular language range and works out which pair of {@link Analyzer}s + * to use for that language range. + * <p> + * Instances of this class are only alive during the execution of + * {@link NeedsConfiguringAnalyzerFactory#ConfigurableAnalyzerFactory(FullTextIndex)}, + * the life-cycle is: + * <ol> + * <li>The relveant config properties are applied, and are used to populate the fields. + * <li>The fields are validated + * <li>An {@link AnalyzerPair} is constructed + * </ol> + * + * @author jeremycarroll + * + */ + private static class ConfigOptionsToAnalyzer { + + String like; + String className; + String stopwords; + Pattern pattern; + final String languageRange; + AnalyzerPair result; + Pattern wordBoundary; + Pattern subWordBoundary; + Pattern softHyphens; + Boolean alwaysRemoveSoftHyphens; + + public ConfigOptionsToAnalyzer(String languageRange) { + this.languageRange = languageRange; + } + + /** + * This is called only when we have already identified that + * the class does support stopwords. + * @return + */ + public Set<?> getStopWords() { + + if (doNotUseStopWords()) + return Collections.EMPTY_SET; + + if (useDefaultStopWords()) { + return getStopWordsForClass(className); + } + + return getStopWordsForClass(stopwords); + } + + boolean doNotUseStopWords() { + return AnalyzerOptions.STOPWORDS_VALUE_NONE.equals(stopwords) || (stopwords == null && pattern != null); + } + + protected Set<?> getStopWordsForClass(String clazzName) { + Class<? extends Analyzer> analyzerClass = getAnalyzerClass(clazzName); + try { + return (Set<?>) analyzerClass.getMethod("getDefaultStopSet").invoke(null); + } catch (Exception e) { + if (StandardAnalyzer.class.equals(analyzerClass)) { + return StandardAnalyzer.STOP_WORDS_SET; + } + if (StopAnalyzer.class.equals(analyzerClass)) { + return StopAnalyzer.ENGLISH_STOP_WORDS_SET; + } + throw new RuntimeException("Failed to find stop words from " + clazzName + " for language range "+languageRange); + } + } + + protected boolean useDefaultStopWords() { + return ( stopwords == null && pattern == null ) || AnalyzerOptions.STOPWORDS_VALUE_DEFAULT.equals(stopwords); + } + + /** + * The first step in the life-cycle, used to initialize the fields. + * @return true if the property was recognized. + */ + public boolean setProperty(String shortProperty, String value) { + if (shortProperty.equals(AnalyzerOptions.LIKE) ) { + like = value; + } else if (shortProperty.equals(AnalyzerOptions.ANALYZER_CLASS) ) { + className = value; + } else if (shortProperty.equals(AnalyzerOptions.STOPWORDS) ) { + stopwords = value; + } else if (shortProperty.equals(AnalyzerOptions.PATTERN) ) { + pattern = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS); + } else if (shortProperty.equals(AnalyzerOptions.WORD_BOUNDARY) ) { + wordBoundary = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS); + } else if (shortProperty.equals(AnalyzerOptions.SUB_WORD_BOUNDARY) ) { + subWordBoundary = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS); + } else if (shortProperty.equals(AnalyzerOptions.SOFT_HYPHENS) ) { + softHyphens = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS); + } else if (shortProperty.equals(AnalyzerOptions.ALWAYS_REMOVE_SOFT_HYPHENS) ) { + alwaysRemoveSoftHyphens = Boolean.valueOf(value); + } else { + return false; + } + return true; + } + + /** + * The second phase of the life-cycle, used for sanity checking. + */ + public void validate() { + if (pattern != null ) { + if ( className != null && className != PatternAnalyzer.class.getName()) { + throw new RuntimeException("Bad Option: Language range "+languageRange + " with pattern propety for class "+ className); + } + className = PatternAnalyzer.class.getName(); + } + if (this.wordBoundary != null ) { + if ( className != null && className != TermCompletionAnalyzer.class.getName()) { + throw new RuntimeException("Bad Option: Language range "+languageRange + " with pattern propety for class "+ className); + } + className = TermCompletionAnalyzer.class.getName(); + + if ( subWordBoundary == null ) { + subWordBoundary = AnalyzerOptions.DEFAULT_SUB_WORD_BOUNDARY; + } + if ( alwaysRemoveSoftHyphens != null && softHyphens == null ) { + throw new RuntimeException("Bad option: Language range "+languageRange + ": must specify softHypens when setting alwaysRemoveSoftHyphens"); + } + if (softHyphens != null && alwaysRemoveSoftHyphens == null) { + alwaysRemoveSoftHyphens = AnalyzerOptions.DEFAULT_ALWAYS_REMOVE_SOFT_HYPHENS; + } + + } else if ( subWordBoundary != null || softHyphens != null || alwaysRemoveSoftHyphens != null || + TermCompletionAnalyzer.class.getName().equals(className) ) { + throw new RuntimeException("Bad option: Language range "+languageRange + ": must specify wordBoundary for TermCompletionAnalyzer"); + } + + if (PatternAnalyzer.class.getName().equals(className) && pattern == null ) { + throw new RuntimeException("Bad Option: Language range "+languageRange + " must specify pattern for PatternAnalyzer."); + } + if ( (like != null) == (className != null) ) { + throw new RuntimeException("Bad Option: Language range "+languageRange + " must specify exactly one of implementation class or like."); + } + if (stopwords != null && like != null) { + throw new RuntimeException("Bad Option: Language range "+languageRange + " must not specify stopwords with like."); + } + + } + + /** + * The third and final phase of the life-cyle used for identifying + * the AnalyzerPair. + */ + private AnalyzerPair construct() throws Exception { + if (className == null) { + return null; + } + if (pattern != null) { + return new PatternAnalyzerPair(this, pattern); + } + if (softHyphens != null) { + return new AnalyzerPair( + languageRange, + new TermCompletionAnalyzer( + wordBoundary, + subWordBoundary, + softHyphens, + alwaysRemoveSoftHyphens)); + } + if (wordBoundary != null) { + return new AnalyzerPair( + languageRange, + new TermCompletionAnalyzer( + wordBoundary, + subWordBoundary)); + } + final Class<? extends Analyzer> cls = getAnalyzerClass(); + + if (hasConstructor(cls, Version.class, Set.class)) { + + // RussianAnalyzer is missing any way to access stop words. + if (RussianAnalyzer.class.equals(cls)) { + if (useDefaultStopWords()) { + return new AnalyzerPair(languageRange, new RussianAnalyzer(Version.LUCENE_CURRENT), new RussianAnalyzer(Version.LUCENE_CURRENT, Collections.EMPTY_SET)); + } + if (doNotUseStopWords()) { + return new AnalyzerPair(languageRange, new RussianAnalyzer(Version.LUCENE_CURRENT, Collections.EMPTY_SET)); + } + } + return new VersionSetAnalyzerPair(this, cls); + } + + if (stopwords != null && !stopwords.equals(AnalyzerOptions.STOPWORDS_VALUE_NONE)) { + throw new RuntimeException("Bad option: language range: " + languageRange + " stopwords are not supported by " + className); + } + if (hasConstructor(cls, Version.class)) { + return new VersionAnalyzerPair(languageRange, cls); + } + + if (hasConstructor(cls)) { + return new AnalyzerPair(languageRange, cls.newInstance()); + } + throw new RuntimeException("Bad option: cannot find constructor for class " + className + " for language range " + languageRange); + } + + /** + * Also part of the third phase of the life-cycle, following the {@link AnalyzerOptions#LIKE} + * properties. + * @param depth + * @param max + * @param analyzers + * @return + */ + AnalyzerPair followLikesToAnalyzerPair(int depth, int max, + Map<String, ConfigOptionsToAnalyzer> analyzers) { + if (result == null) { + if (depth == max) { + throw new RuntimeException("Bad configuration: - 'like' loop for language range " + languageRange); + } + ConfigOptionsToAnalyzer next = analyzers.get(like); + if (next == null) { + throw new RuntimeException("Bad ... [truncated message content] |
From: <mar...@us...> - 2014-06-30 11:32:35
|
Revision: 8508 http://sourceforge.net/p/bigdata/code/8508 Author: martyncutcher Date: 2014-06-30 11:32:23 +0000 (Mon, 30 Jun 2014) Log Message: ----------- For ticket #936, modified metabits demispace use to default to "false" and support an explicit, programmatic, request to toggle between fixed allocation and demispace. Provide a utility MetabitsUtil class with "main" to toggle from a command line invocation. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/RWStore.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/MetabitsUtil.java Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/MetabitsUtil.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/MetabitsUtil.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/MetabitsUtil.java 2014-06-30 11:32:23 UTC (rev 8508) @@ -0,0 +1,105 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2014. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package com.bigdata.rwstore; + +import java.io.File; +import java.util.Properties; + +import com.bigdata.journal.BufferMode; +import com.bigdata.journal.RWStrategy; +import com.bigdata.journal.Journal; +import com.bigdata.journal.Journal.Options; +import com.bigdata.rawstore.IRawStore; + +/** + * A utility class to explicitly change the metabits storage to allow for + * compatibility with previous versions. + * <p> + * There is an option to use a demispace rather than standard allocations to + * support stores with large numbers of allocations. If such a store needs to be + * opened by an earlier code-base, then the store must be amended to store the + * metabits in a standard allocation. + * + * @author Martyn Cutcher + * + */ +public class MetabitsUtil { + + static String getArg(final String[] args, final String arg, final String def) { + for (int p = 0; p < args.length; p += 2) { + if (arg.equals(args[p])) + return args[p + 1]; + } + + return def; + } + + static Journal getStore(String storeFile) { + + final Properties properties = new Properties(); + + properties.setProperty(Options.FILE, storeFile); + + properties.setProperty(Options.BUFFER_MODE, + BufferMode.DiskRW.toString()); + + return new Journal(properties);// .getBufferStrategy(); + + } + + /** + * Example usage: + * <p> + * MatabitsUtil -store "/path/store.jnl" -usedemispace true + */ + static public void main(final String[] args) { + final String store = getArg(args, "-store", null); + if (store == null) { + System.err.println("file must be specificed with -store"); + return; + } + final File file = new File(store); + if (!file.exists()) { + System.err.println("Specified file '" + store + "' not found"); + return; + } + + final boolean usedemi = "true".equals(getArg(args, "-usedemispace", + "true")); + + final Journal jnl = getStore(store); + + try { + final RWStore rws = ((RWStrategy) jnl.getBufferStrategy()) + .getStore(); + + if (rws.ensureMetabitsDemispace(usedemi)) { // changed + jnl.commit(); + } + } finally { + jnl.close(); + } + } + +} Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2014-06-28 01:45:04 UTC (rev 8507) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2014-06-30 11:32:23 UTC (rev 8508) @@ -342,7 +342,7 @@ */ String META_BITS_DEMI_SPACE = RWStore.class.getName() + ".metabitsDemispace"; - String DEFAULT_META_BITS_DEMI_SPACE = "true"; + String DEFAULT_META_BITS_DEMI_SPACE = "false"; /** * Defines the number of bits that must be free in a FixedAllocator for @@ -1478,11 +1478,17 @@ // Can handle minor store version incompatibility final int storeVersion = strBuf.readInt(); - if ((storeVersion & 0xFF00) != (cVersion & 0xFF00)) { + + switch ((storeVersion & 0xFF00)) { + case (cVersion & 0xFF00): + case (cVersionDemispace & 0xFF00): + break; + default: throw new IllegalStateException( "Incompatible RWStore header version: storeVersion=" + storeVersion + ", cVersion=" + cVersion); } + m_lastDeferredReleaseTime = strBuf.readLong(); if (strBuf.readInt() != cDefaultMetaBitsSize) { throw new IllegalStateException("Store opened with unsupported metabits size"); @@ -3031,7 +3037,7 @@ final FixedOutputStream str = new FixedOutputStream(buf); try { - str.writeInt(cVersion); + str.writeInt(m_metaBitsAddr > 0 ? cVersionDemispace : cVersion); str.writeLong(m_lastDeferredReleaseTime); str.writeInt(cDefaultMetaBitsSize); str.writeInt(m_allocSizes.length); @@ -3068,7 +3074,7 @@ * writeMetaBits(). */ //final long addr = physicalAddress(m_metaBitsAddr); - final long addr = ((long) m_metaBitsAddr) << ALLOCATION_SCALEUP; + final long addr = m_metaBitsAddr < 0 ? physicalAddress(m_metaBitsAddr) : ((long) m_metaBitsAddr) << ALLOCATION_SCALEUP; if (addr == 0) { throw new IllegalStateException("Invalid metabits address: " + m_metaBitsAddr); } @@ -3081,7 +3087,7 @@ // Similar to writeMetaBits, we are no longer writing to a FixedAllocator managed region, // so no latched address is provided - m_writeCacheService.write(addr, ByteBuffer.wrap(buf), 0/*chk*/, false/*useChecksum*/, 0 /*latchedAddr*/); + m_writeCacheService.write(addr, ByteBuffer.wrap(buf), 0/*chk*/, false/*useChecksum*/, m_metaBitsAddr < 0 ? m_metaBitsAddr : 0 /*latchedAddr*/); } catch (InterruptedException e) { throw new RuntimeException(e); } @@ -3134,42 +3140,69 @@ * that we do not need to reallocate the metabits region when we are * writing out the updated versions of the FixedAllocators). */ -// final long oldMetaBits = m_metaBitsAddr; -// final int oldMetaBitsSize = (m_metaBits.length + m_allocSizes.length + 1) * 4; -// m_metaBitsAddr = alloc(getRequiredMetaBitsStorage(), null); + if (m_metaBitsAddr > 0) { + // already using demi-space, remove from WCS + m_writeCacheService.removeWriteToAddr(convertAddr(-m_metaBitsAddr), 0); + } else { + final int reqmbc = getRequiredMetaBitsStorage(); + int nmbaddr = 0; + // if > max alloc or explicitly use the demi-space, then drop through for demi-space + if ((!m_useMetabitsDemispace) && reqmbc < m_maxFixedAlloc) { + nmbaddr = alloc(reqmbc, null); + } - /* - * If m_metaBitsAddr < 0 then was allocated from FixedAllocators (for existing-store compatibility) - */ - if (m_metaBitsAddr < 0) { - if (physicalAddress(m_metaBitsAddr) == 0) { - throw new IllegalStateException("Returned MetaBits Address not valid!"); - } - - final int oldMetaBitsSize = (m_metaBits.length + m_allocSizes.length + 1) * 4; - // Call immediateFree - no need to defer freeof metaBits, this - // has to stop somewhere! - // No more allocations must be made - immediateFree((int) m_metaBitsAddr, oldMetaBitsSize); - - m_metaBitsAddr = 0; - } - + // If existing allocation, then free it + if (m_metaBitsAddr < 0) { + + final int oldMetaBitsSize = (m_metaBits.length + + m_allocSizes.length + 1) * 4; + + // Call immediateFree - no need to defer freeof metaBits, this + // has to stop somewhere! + // No more allocations must be made + immediateFree((int) m_metaBitsAddr, oldMetaBitsSize); + + } + + m_metaBitsAddr = nmbaddr; + } + if (m_metaBitsAddr == 0) { // Allocate special region to be able to store maximum metabits (128k of 2 64K demi-space // Must be aligned on 128K boundary and allocations are made in units of 64K. + // + // May need to extend the file for teh demi-space! while (m_nextAllocation % 2 != 0) { m_nextAllocation--; } m_metaBitsAddr = -m_nextAllocation; // must be positive to differentiate from FixedAllocator address m_nextAllocation -= 2; // allocate 2 * 64K - } else { // remove previous write from WCS - m_writeCacheService.removeWriteToAddr(convertAddr(-m_metaBitsAddr), 0); + + // Check for file extension + while (m_nextAllocation <= m_fileSize) { + extendFile(); + } + + if (log.isInfoEnabled()) + log.info("Using Demi-space metabits"); } - // Now "toggle" m_metaBitsAddr - 64K boundary - m_metaBitsAddr ^= 0x01; // toggle zero or 64K offset + if (m_metaBitsAddr > 0) { // Demi-Space + // Now "toggle" m_metaBitsAddr - 64K boundary + m_metaBitsAddr ^= 0x01; // toggle zero or 64K offset + } + if (log.isDebugEnabled()) { + final long mbaddr; + if (m_metaBitsAddr < 0) { + mbaddr = physicalAddress((int) m_metaBitsAddr); + } else { + mbaddr = convertAddr(-m_metaBitsAddr); // maximum 48 bit address range + } + + log.debug("Writing metabits at " + mbaddr); + } + // There must be no buffered deferred frees // assert m_deferredFreeOut.getBytesWritten() == 0; @@ -3451,8 +3484,10 @@ * Versions * 0x0300 - extended header to include reserved ints * 0x0400 - removed explicit BlobAllocators + * 0x0500 - using metaBits demi-space */ final private int cVersion = 0x0400; + final private int cVersionDemispace = 0x0500; /** * cReservedMetaBits is the reserved space in the metaBits header @@ -4233,31 +4268,41 @@ * * @return long representation of metaBitsAddr PLUS the size */ - public long getMetaBitsAddr() { - assert m_metaBitsAddr > 0; - - // long ret = physicalAddress((int) m_metaBitsAddr); - long ret = convertAddr(-m_metaBitsAddr); // maximum 48 bit address range - ret <<= 16; - - // include space for version, allocSizes and deferred free info AND cDefaultMetaBitsSize - final int metaBitsSize = cMetaHdrFields + m_metaBits.length + m_allocSizes.length; - ret += metaBitsSize; - - if (log.isTraceEnabled()) - log.trace("Returning metabitsAddr: " + ret + ", for " - + m_metaBitsAddr + " - " + m_metaBits.length + ", " - + metaBitsSize); + public long getMetaBitsAddr() { + long ret = 0; - return ret; - } + if (m_metaBitsAddr < 0) { + ret = physicalAddress((int) m_metaBitsAddr); + } else { + // long ret = physicalAddress((int) m_metaBitsAddr); + ret = convertAddr(-m_metaBitsAddr); // maximum 48 bit address range + } + ret <<= 16; + // include space for version, allocSizes and deferred free info AND + // cDefaultMetaBitsSize + final int metaBitsSize = cMetaHdrFields + m_metaBits.length + + m_allocSizes.length; + ret += metaBitsSize; + + if (log.isTraceEnabled()) + log.trace("Returning metabitsAddr: " + ret + ", for " + + m_metaBitsAddr + " - " + m_metaBits.length + ", " + + metaBitsSize); + + return ret; + } + /** * - * @return the address of the metaBits demi-space + * @return the address of the metaBits */ - public long getMetaBitsDemiSpace() { - return convertAddr(-m_metaBitsAddr); + public long getMetaBitsStoreAddress() { + if (m_metaBitsAddr < 0) { + return physicalAddress((int) m_metaBitsAddr); + } else { + return convertAddr(-m_metaBitsAddr); // maximum 48 bit address range + } } /** @@ -7188,6 +7233,31 @@ return ret; } + + /** + * Forces a reset of the metabits allocation on the next commit. + * <p> + * Note that a side-effect of this is that there will be a memory leak + * of either a FixedAllocation slot or an existing demi-space. + * <p> + * @param useDemispace + * @return whether the storage has been modified. + */ + public boolean ensureMetabitsDemispace(final boolean useDemispace) { + final boolean isDemispace = m_metaBitsAddr > 0; + + if (isDemispace != useDemispace || m_useMetabitsDemispace != useDemispace) { + m_useMetabitsDemispace = useDemispace; + + m_metaBitsAddr = 0; + + m_recentAlloc = true; // force commit + + return true; + } else { + return false; + } + } // public void prepareForRebuild(final HARebuildRequest req) { // assert m_rebuildRequest == null; Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java 2014-06-28 01:45:04 UTC (rev 8507) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java 2014-06-30 11:32:23 UTC (rev 8508) @@ -1540,23 +1540,116 @@ public void test_metaAlloc() { Journal store = (Journal) getStore(); - try { + try { - final RWStrategy bs = (RWStrategy) store.getBufferStrategy(); + final RWStrategy bs = (RWStrategy) store.getBufferStrategy(); - final RWStore rw = bs.getStore(); - long realAddr = 0; - for (int i = 0; i < 100000; i++) { - int allocAddr = rw.metaAlloc(); + final RWStore rw = bs.getStore(); + long realAddr = 0; + for (int r = 0; r < 100; r++) { + for (int i = 0; i < 1000; i++) { + int allocAddr = rw.metaAlloc(); + + realAddr = rw.metaBit2Addr(allocAddr); + } + rw.commit(); + } - realAddr = rw.metaBit2Addr(allocAddr); - } - if(log.isInfoEnabled())log.info("metaAlloc lastAddr: " + realAddr); + if (log.isInfoEnabled()) + log.info("metaAlloc lastAddr: " + realAddr); } finally { store.destroy(); } } + + /** + * Tests the MetabitsUtil to switch the demispace. + * + * If the address is a demispace then addr % 64K == 0. + * + * If the address is NOT a demispace then it should be less than first + * demispace + */ + public void test_metabitsDemispace() { + Journal store = (Journal) getStore(); + try { + RWStrategy bs = (RWStrategy) store.getBufferStrategy(); + RWStore rw = bs.getStore(); + final String fname = rw.getStoreFile().getAbsolutePath(); + + store.commit(); + + final long fa1 = rw.getMetaBitsStoreAddress(); + + rw.ensureMetabitsDemispace(true); + store.commit(); + + final long ds1 = rw.getMetaBitsStoreAddress(); + + assertTrue((ds1 & 0xFFFF) == 0); // MOD 64K + assertTrue(ds1 > fa1); + + rw.ensureMetabitsDemispace(false); + store.commit(); + + final long fa2 = rw.getMetaBitsStoreAddress(); + + assertTrue(ds1 > fa2); + + rw.ensureMetabitsDemispace(true); + store.commit(); + + final long ds2 = rw.getMetaBitsStoreAddress(); + + assertTrue((ds2 & 0xFFFF) == 0); + assertTrue(ds2 > ds1); + + // Now use MetaBitsUtil + + store.close(); + + MetabitsUtil.main(new String[] { "-store", fname, "-usedemispace", "false"}); + + store = getExplicitStore(fname); + + bs = (RWStrategy) store.getBufferStrategy(); + rw = bs.getStore(); + final long fa3 = rw.getMetaBitsStoreAddress(); + + assertTrue(fa3 < ds1); + + store.close(); + + MetabitsUtil.main(new String[] { "-store", fname, "-usedemispace", "true"}); + + store = getExplicitStore(fname); + + bs = (RWStrategy) store.getBufferStrategy(); + rw = bs.getStore(); + + final long ds3 = rw.getMetaBitsStoreAddress(); + assertTrue((ds3 & 0xFFFF) == 0); + assertTrue(ds3 > ds2); + + } finally { + store.destroy(); + } + } + + Journal getExplicitStore(String storeFile) { + + final Properties properties = new Properties(); + + properties.setProperty(Options.FILE, storeFile); + + properties.setProperty(Options.BUFFER_MODE, + BufferMode.DiskRW.toString()); + + return new Journal(properties);// .getBufferStrategy(); + + } + static class DummyAllocationContext implements IAllocationContext { } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2014-07-03 10:21:32
|
Revision: 8521 http://sourceforge.net/p/bigdata/code/8521 Author: martyncutcher Date: 2014-07-03 10:21:20 +0000 (Thu, 03 Jul 2014) Log Message: ----------- Added CommitState to ensure that the RWStore is able to reset/abort following an error following or during the RWStore.commit() call - ticket #973. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/RWStore.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2014-07-02 22:44:27 UTC (rev 8520) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2014-07-03 10:21:20 UTC (rev 8521) @@ -343,7 +343,7 @@ String META_BITS_DEMI_SPACE = RWStore.class.getName() + ".metabitsDemispace"; String DEFAULT_META_BITS_DEMI_SPACE = "false"; - + /** * Defines the number of bits that must be free in a FixedAllocator for * it to be added to the free list. This is used to ensure a level @@ -824,8 +824,7 @@ m_metaBits = new int[m_metaBitsSize]; m_metaTransientBits = new int[m_metaBitsSize]; - - + m_quorum = quorum; m_fd = fileMetadata.file; @@ -1488,7 +1487,6 @@ "Incompatible RWStore header version: storeVersion=" + storeVersion + ", cVersion=" + cVersion); } - m_lastDeferredReleaseTime = strBuf.readLong(); if (strBuf.readInt() != cDefaultMetaBitsSize) { throw new IllegalStateException("Store opened with unsupported metabits size"); @@ -2880,15 +2878,19 @@ // } /** - * The semantics of reset are to revert unisolated writes to committed state. - * + * The semantics of reset are to revert unisolated writes to committed + * state. + * <p> * Unisolated writes must also be removed from the write cache. - * + * <p> * The AllocBlocks of the FixedAllocators maintain the state to determine * the correct reset behavior. - * + * <p> * If the store is using DirectFixedAllocators then an IllegalStateException - * is thrown + * is thrown. + * <p> + * If there is an active {@link #m_commitStateRef}, then this indicates a + * failure after the {@link RWStore#commit()} had "succeeded". */ public void reset() { @@ -2899,7 +2901,16 @@ try { assertOpen(); // assertNoRebuild(); + + final CommitState commitState = m_commitStateRef + .getAndSet(null/* newValue */); + + if (commitState != null) { + commitState.reset(); // restore state values on RWStore. + + } + boolean isolatedWrites = false; /** * Clear all allocators, not just dirty allocators, since we also @@ -3101,23 +3112,89 @@ return requiresCommit(); } -// static final float s_version = 3.0f; -// -// public String getVersionString() { -// return "RWStore " + s_version; -// } + /** + * Object recording the undo state for the {@link RWStore#commit()} ... + * {@link RWStore#postCommit()} sequence. The {@link CommitState} must + * either {@link CommitState#commit()} or {@link CommitState#reset()}. Those + * {@link CommitState} methods are invoked out of the corresponding + * {@link RWStore} methods. + * + * @see <a href="http://trac.bigdata.com/ticket/973" >RWStore commit is not + * robust to internal failure.</a> + */ + private class CommitState { + /* + * Critical pre-commit state that must be restored if a commit is + * discarded. + */ + private final int m_lastCommittedNextAllocation; + private final long m_storageStatsAddr; + private final int m_metaBitsAddr; + CommitState() { + // retain copy of critical pre-commit state + if (!m_allocationWriteLock.isHeldByCurrentThread()) + throw new IllegalMonitorStateException(); + m_lastCommittedNextAllocation = RWStore.this.m_committedNextAllocation; + m_storageStatsAddr = RWStore.this.m_storageStatsAddr; + m_metaBitsAddr = RWStore.this.m_metaBitsAddr; + } + + void postCommit() { + + // NOP + + } + + /** Reset pre-commit state to support reset/abort/rollback. */ + void reset() { + if (!m_allocationWriteLock.isHeldByCurrentThread()) + throw new IllegalMonitorStateException(); + RWStore.this.m_storageStatsAddr = m_storageStatsAddr; + RWStore.this.m_committedNextAllocation = m_lastCommittedNextAllocation; + RWStore.this.m_metaBitsAddr = m_metaBitsAddr; + } + + } + + /** + * @see <a href="http://trac.bigdata.com/ticket/973" >RWStore commit is not + * robust to internal failure.</a> + */ + private final AtomicReference<CommitState> m_commitStateRef = new AtomicReference<CommitState>(); + + /** + * Package private method used by the test suite. + */ + void clearCommitStateRef() { + + m_commitStateRef.set(null/* newValue */); + + } + + @Override public void commit() { assertOpen(); // assertNoRebuild(); checkCoreAllocations(); - // take allocation lock to prevent other threads allocating during commit + // take allocation lock to prevent other threads allocating during commit m_allocationWriteLock.lock(); try { + /* + * Create a transient object to retain values of previous + * commitState to support abort/reset/rollback if requested after + * this commit() is requested. + */ + if (!m_commitStateRef.compareAndSet(null/* expect */, + new CommitState())) { + throw new IllegalStateException( + "RWStore commitState found, incomplete previous commit must be rolled back/aborted"); + } + // final int totalFreed = checkDeferredFrees(true, journal); // free now if possible // // if (totalFreed > 0 && log.isInfoEnabled()) { @@ -3150,20 +3227,20 @@ if ((!m_useMetabitsDemispace) && reqmbc < m_maxFixedAlloc) { nmbaddr = alloc(reqmbc, null); } - + // If existing allocation, then free it - if (m_metaBitsAddr < 0) { - + if (m_metaBitsAddr < 0) { + final int oldMetaBitsSize = (m_metaBits.length + m_allocSizes.length + 1) * 4; - // Call immediateFree - no need to defer freeof metaBits, this - // has to stop somewhere! - // No more allocations must be made - immediateFree((int) m_metaBitsAddr, oldMetaBitsSize); - - } - + // Call immediateFree - no need to defer freeof metaBits, this + // has to stop somewhere! + // No more allocations must be made + immediateFree((int) m_metaBitsAddr, oldMetaBitsSize); + + } + m_metaBitsAddr = nmbaddr; } @@ -3188,8 +3265,8 @@ } if (m_metaBitsAddr > 0) { // Demi-Space - // Now "toggle" m_metaBitsAddr - 64K boundary - m_metaBitsAddr ^= 0x01; // toggle zero or 64K offset + // Now "toggle" m_metaBitsAddr - 64K boundary + m_metaBitsAddr ^= 0x01; // toggle zero or 64K offset } if (log.isDebugEnabled()) { @@ -3199,7 +3276,7 @@ } else { mbaddr = convertAddr(-m_metaBitsAddr); // maximum 48 bit address range } - + log.debug("Writing metabits at " + mbaddr); } @@ -3254,9 +3331,6 @@ throw new RuntimeException(e); } - // Now remember the committed next allocation that will be checked in reset() - m_committedNextAllocation = m_nextAllocation; - // Should not write rootBlock, this is responsibility of client // to provide control // writeFileSpec(); @@ -3291,12 +3365,13 @@ log.debug(showAllocatorList()); } - + } /** * {@inheritDoc} */ + @Override public Lock getCommitLock() { return m_allocationWriteLock; @@ -3308,11 +3383,25 @@ * <p> * Commits the FixedAllocator bits */ + @Override public void postCommit() { if (!m_allocationWriteLock.isHeldByCurrentThread()) throw new IllegalMonitorStateException(); + final CommitState commitState = m_commitStateRef.getAndSet(null/* newValue */); + + if (commitState == null) { + + throw new IllegalStateException( + "No current CommitState found on postCommit"); + + } else { + + commitState.postCommit(); + + } + for (FixedAllocator fa : m_commitList) { fa.postCommit(); @@ -3323,6 +3412,7 @@ } + @Override public int checkDeferredFrees(final AbstractJournal journal) { if (journal == null) @@ -4150,7 +4240,7 @@ try { if (addr >= 0) { - + return addr & 0xFFFFFFE0; } else { @@ -4277,31 +4367,31 @@ * * @return long representation of metaBitsAddr PLUS the size */ - public long getMetaBitsAddr() { + public long getMetaBitsAddr() { long ret = 0; - + if (m_metaBitsAddr < 0) { ret = physicalAddress((int) m_metaBitsAddr); } else { - // long ret = physicalAddress((int) m_metaBitsAddr); + // long ret = physicalAddress((int) m_metaBitsAddr); ret = convertAddr(-m_metaBitsAddr); // maximum 48 bit address range } - ret <<= 16; - + ret <<= 16; + // include space for version, allocSizes and deferred free info AND // cDefaultMetaBitsSize final int metaBitsSize = cMetaHdrFields + m_metaBits.length + m_allocSizes.length; - ret += metaBitsSize; + ret += metaBitsSize; + + if (log.isTraceEnabled()) + log.trace("Returning metabitsAddr: " + ret + ", for " + + m_metaBitsAddr + " - " + m_metaBits.length + ", " + + metaBitsSize); - if (log.isTraceEnabled()) - log.trace("Returning metabitsAddr: " + ret + ", for " - + m_metaBitsAddr + " - " + m_metaBits.length + ", " - + metaBitsSize); + return ret; + } - return ret; - } - /** * * @return the address of the metaBits @@ -5178,7 +5268,7 @@ checkRootBlock(rootBlock); assertOpen(); - + if (log.isTraceEnabled()) { log.trace("Writing new rootblock with commitCounter: " + rootBlock.getCommitCounter() + ", commitRecordAddr: " @@ -7242,7 +7332,7 @@ return ret; } - + /** * Forces a reset of the metabits allocation on the next commit. * <p> Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java 2014-07-02 22:44:27 UTC (rev 8520) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java 2014-07-03 10:21:20 UTC (rev 8521) @@ -27,24 +27,16 @@ package com.bigdata.rwstore; -import java.io.EOFException; import java.io.File; import java.io.IOException; -import java.io.InputStream; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Arrays; import java.util.Properties; import java.util.Random; import java.util.TreeMap; import java.util.UUID; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; import junit.extensions.proxy.ProxyTestSuite; import junit.framework.Test; @@ -577,8 +569,6 @@ * * @author <a href="mailto:tho...@us...">Bryan * Thompson</a> - * @version $Id: TestRWJournal.java 4010 2010-12-16 12:44:43Z martyncutcher - * $ */ public static class TestRawStore extends AbstractRestartSafeTestCase { @@ -1540,116 +1530,23 @@ public void test_metaAlloc() { Journal store = (Journal) getStore(); - try { + try { - final RWStrategy bs = (RWStrategy) store.getBufferStrategy(); + final RWStrategy bs = (RWStrategy) store.getBufferStrategy(); - final RWStore rw = bs.getStore(); - long realAddr = 0; - for (int r = 0; r < 100; r++) { - for (int i = 0; i < 1000; i++) { - int allocAddr = rw.metaAlloc(); - - realAddr = rw.metaBit2Addr(allocAddr); - } - rw.commit(); + final RWStore rw = bs.getStore(); + long realAddr = 0; + for (int i = 0; i < 100000; i++) { + int allocAddr = rw.metaAlloc(); + + realAddr = rw.metaBit2Addr(allocAddr); } - - if (log.isInfoEnabled()) - log.info("metaAlloc lastAddr: " + realAddr); + if(log.isInfoEnabled())log.info("metaAlloc lastAddr: " + realAddr); } finally { store.destroy(); } } - - /** - * Tests the MetabitsUtil to switch the demispace. - * - * If the address is a demispace then addr % 64K == 0. - * - * If the address is NOT a demispace then it should be less than first - * demispace - */ - public void test_metabitsDemispace() { - Journal store = (Journal) getStore(); - try { - RWStrategy bs = (RWStrategy) store.getBufferStrategy(); - RWStore rw = bs.getStore(); - final String fname = rw.getStoreFile().getAbsolutePath(); - - store.commit(); - - final long fa1 = rw.getMetaBitsStoreAddress(); - - rw.ensureMetabitsDemispace(true); - store.commit(); - - final long ds1 = rw.getMetaBitsStoreAddress(); - - assertTrue((ds1 & 0xFFFF) == 0); // MOD 64K - assertTrue(ds1 > fa1); - - rw.ensureMetabitsDemispace(false); - store.commit(); - - final long fa2 = rw.getMetaBitsStoreAddress(); - - assertTrue(ds1 > fa2); - - rw.ensureMetabitsDemispace(true); - store.commit(); - - final long ds2 = rw.getMetaBitsStoreAddress(); - - assertTrue((ds2 & 0xFFFF) == 0); - assertTrue(ds2 > ds1); - - // Now use MetaBitsUtil - - store.close(); - - MetabitsUtil.main(new String[] { "-store", fname, "-usedemispace", "false"}); - - store = getExplicitStore(fname); - - bs = (RWStrategy) store.getBufferStrategy(); - rw = bs.getStore(); - final long fa3 = rw.getMetaBitsStoreAddress(); - - assertTrue(fa3 < ds1); - - store.close(); - - MetabitsUtil.main(new String[] { "-store", fname, "-usedemispace", "true"}); - - store = getExplicitStore(fname); - - bs = (RWStrategy) store.getBufferStrategy(); - rw = bs.getStore(); - - final long ds3 = rw.getMetaBitsStoreAddress(); - assertTrue((ds3 & 0xFFFF) == 0); - assertTrue(ds3 > ds2); - - } finally { - store.destroy(); - } - } - - Journal getExplicitStore(String storeFile) { - - final Properties properties = new Properties(); - - properties.setProperty(Options.FILE, storeFile); - - properties.setProperty(Options.BUFFER_MODE, - BufferMode.DiskRW.toString()); - - return new Journal(properties);// .getBufferStrategy(); - - } - static class DummyAllocationContext implements IAllocationContext { } @@ -2104,7 +2001,136 @@ store.destroy(); } } + + /** + * Verify that we correctly restore the RWStore commit state if + * {@link RWStore#commit()} is followed by {@link RWStore#reset()} + * rather than {@link RWStore#postCommit()}. + * + * @see <a href="http://trac.bigdata.com/ticket/973" >RWStore commit is + * not robust to internal failure.</a> + */ + public void test_commitState() { + Journal store = (Journal) getStore(); + try { + + final RWStrategy bs = (RWStrategy) store.getBufferStrategy(); + + final RWStore rws = bs.getStore(); + + final long addr = bs.write(randomData(78)); + + // do 1st half of the RWStore commit protocol. + rws.commit(); + + // then discard write set. + store.abort(); + + assertFalse(bs.isCommitted(addr)); // rolled back + + // now cycle standard commit to confirm correct reset + for (int c = 0; c < 50; c++) { + bs.write(randomData(78)); + store.commit(); + } + + + } finally { + store.destroy(); + } + } + /** + * Test verifies that a failure to retain the commit state in + * {@link RWStore#commit()} will cause problems if the write set is + * discarded by {@link RWStore#reset()} such that subsequent write sets + * run into persistent addressing errors. + * + * @see <a href="http://trac.bigdata.com/ticket/973" >RWStore commit is + * not robust to internal failure.</a> + */ + public void test_commitStateError() { + Journal store = (Journal) getStore(); + try { + + RWStrategy bs = (RWStrategy) store.getBufferStrategy(); + + RWStore rws = bs.getStore(); + + final long addr = bs.write(randomData(78)); + + // do first half of the RWStore protocol. + rws.commit(); + + /* + * remove the commit state such that subsequent abort()/reset() + * will fail to correctly restore the pre-commit state. + */ + rws.clearCommitStateRef(); + + // abort() succeeds because it is allowed even if commit() was + // not called. + store.abort(); + + assertFalse(bs.isCommitted(addr)); // rolled back + + try { + // now cycle standard commit to force an error from bad reset + for (int c = 0; c < 50; c++) { + bs.write(randomData(78)); + store.commit(); + } + fail("Expected failure"); + } catch (Exception e) { + // expected + log.info("Expected!"); + } + + } finally { + store.destroy(); + } + } + + /** + * Verify that a double-commit causes an illegal state exception. + * Further verify that an {@link RWStore#reset()} allwos us to then + * apply and commit new write sets. + * + * @see <a href="http://trac.bigdata.com/ticket/973" >RWStore commit is + * not robust to internal failure.</a> + */ + public void test_commitStateIllegal() { + final Journal store = (Journal) getStore(); + try { + + final RWStrategy bs = (RWStrategy) store.getBufferStrategy(); + + final RWStore rws = bs.getStore(); + + bs.write(randomData(78)); + + rws.commit(); + + try { + store.commit(); + + fail("Expected failure"); + } catch (Exception ise) { + if (InnerCause.isInnerCause(ise, IllegalStateException.class)) { + store.abort(); + + store.commit(); + } else { + fail("Unexpected Exception"); + } + } + + + } finally { + store.destroy(); + } + } + public void test_allocCommitFreeWithHistory() { Journal store = (Journal) getStore(4); try { @@ -3052,8 +3078,6 @@ * * @author <a href="mailto:tho...@us...">Bryan * Thompson</a> - * @version $Id: TestRWJournal.java 4010 2010-12-16 12:44:43Z martyncutcher - * $ */ public static class TestMROW extends AbstractMROWTestCase { @@ -3065,15 +3089,11 @@ super(name); } - protected IRawStore getStore(String storeFile) { + protected IRawStore getStore() { final Properties properties = getProperties(); - if (storeFile == null) { - properties.setProperty(Options.CREATE_TEMP_FILE, "true"); - } else { - properties.setProperty(Options.FILE, storeFile); - } + properties.setProperty(Options.CREATE_TEMP_FILE, "true"); properties.setProperty(Options.DELETE_ON_EXIT, "true"); @@ -3085,86 +3105,6 @@ } - protected IRawStore getStore() { - - return getStore(null); // no file provided by default - - } - - static long getLongArg(final String[] args, final String arg, final long def) { - final String sv = getArg(args, arg, null); - - return sv == null ? def : Long.parseLong(sv); - } - - static String getArg(final String[] args, final String arg, final String def) { - for (int p = 0; p < args.length; p+=2) { - if (arg.equals(args[p])) - return args[p+1]; - } - - return def; - } - - /** - * Stress variant to support multiple parameterised runs - * - * Arguments - * - * -file - optional explicit file path - * -clients - reader threads - * -nwrites - number of records written - * -reclen - size of record written - * -ntrials - number of readers - * -nreads - number of reads made by each reader - * -nruns - number of times to repeat process with reopen each time - */ - public static void main(final String[] args) throws Exception { - final TestMROW test = new TestMROW("main"); - - final String storeFile = getArg(args, "-file", null); - - Journal store = (Journal) test.getStore(storeFile); - try { - - final long timeout = 20; - - final int nclients = (int) getLongArg(args, "-clients", 20); // 20 - - final long nwrites = getLongArg(args, "-nwrites", 100000); //1000000; - - final int writeDelayMillis = 1; - - final long ntrials = getLongArg(args, "-ntrials", 100000); // 100000; - - final int reclen = (int) getLongArg(args, "-reclen", 128); // 128; - - final long nreads = getLongArg(args, "-nreads", 1000); // 1000; - - final long nruns = getLongArg(args, "-nruns", 1); // 1000; - - final AtomicInteger nerr = new AtomicInteger(); - - for (int i = 0; i < nruns; i++) { - doMROWTest(store, nwrites, writeDelayMillis, timeout, nclients, - ntrials, reclen, nreads, nerr, true /*readAll*/); - - store.commit(); - - store = (new TestRWJournal()).reopenStore(store); - - System.out.println("Completed run: " + i); - } - - } finally { - - if (storeFile == null) - store.destroy(); - - } - - } - } /** @@ -3172,8 +3112,6 @@ * * @author <a href="mailto:tho...@us...">Bryan * Thompson</a> - * @version $Id: TestRWJournal.java 4010 2010-12-16 12:44:43Z martyncutcher - * $ */ public static class TestMRMW extends AbstractMRMWTestCase { @@ -3208,8 +3146,6 @@ * * @author <a href="mailto:tho...@us...">Bryan * Thompson</a> - * @version $Id: TestRWJournal.java 4010 2010-12-16 12:44:43Z martyncutcher - * $ */ public static class TestInterrupts extends AbstractInterruptsTestCase { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-07-15 13:01:43
|
Revision: 8549 http://sourceforge.net/p/bigdata/code/8549 Author: thompsonbry Date: 2014-07-15 13:01:35 +0000 (Tue, 15 Jul 2014) Log Message: ----------- Bug fix to the query deadline support. The remaining time until the next check of the deadline queue was computed incorrectly. This resulted in a progressive increase in the time until the next deadline check. The logic in QueryEngine.QueryTask.run() to check the deadline queue has been fixed. The code has been converted to use nanoseconds rather than milliseconds for checking deadlines. The deadline check code now runs every 100ms, as intended. We should recheck hot query performance for large concurrent query workloads to ensure that this does not translate into a query penalty (it should not as the code is polling on the query chunks available for processing and will grab a check before 100ms has expired if there is one available). See #242 (Deadlines do not play well with GROUP_BY, ORDER_BY, etc.) See #772 (Query timeout only checked at operator start/stop) See #865 (OutOfMemoryError instead of Timeout for SPARQL Property Paths) Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryDeadline.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryDeadlineOrder.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2014-07-14 22:21:43 UTC (rev 8548) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2014-07-15 13:01:35 UTC (rev 8549) @@ -99,7 +99,6 @@ * first result when compared with pipelined evaluation. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ */ abstract public class AbstractRunningQuery implements IRunningQuery { Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java 2014-07-14 22:21:43 UTC (rev 8548) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java 2014-07-15 13:01:35 UTC (rev 8549) @@ -114,8 +114,8 @@ Map<Integer/* bopId */, BOpStats> getStats(); /** - * Return the query deadline (the time at which it will terminate regardless - * of its run state). + * Return the query deadline in milliseconds (the time at which it will + * terminate regardless of its run state). * * @return The query deadline (milliseconds since the epoch) and * {@link Long#MAX_VALUE} if no explicit deadline was specified. Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryDeadline.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryDeadline.java 2014-07-14 22:21:43 UTC (rev 8548) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryDeadline.java 2014-07-15 13:01:35 UTC (rev 8549) @@ -19,9 +19,9 @@ class QueryDeadline implements Comparable<QueryDeadline> { /** - * The deadline for this query. + * The deadline for this query (in nanoseconds). */ - final long deadline; + final long deadlineNanos; /** * A reference to the query. @@ -33,14 +33,14 @@ /** * - * @param deadline - * The deadline. + * @param deadlineNanos + * The deadline for this query (in nanoseconds). * @param query * The query. */ - public QueryDeadline(final long deadline, final AbstractRunningQuery query) { + public QueryDeadline(final long deadlineNanos, final AbstractRunningQuery query) { - this.deadline = deadline; + this.deadlineNanos = deadlineNanos; this.queryRef = new WeakReference<AbstractRunningQuery>(query); @@ -61,8 +61,8 @@ */ @Override public int compareTo(final QueryDeadline o) { - final long d0 = this.deadline; - final long d1 = o.deadline; + final long d0 = this.deadlineNanos; + final long d1 = o.deadlineNanos; if (d0 < d1) return -1; if (d0 > d1) @@ -74,13 +74,13 @@ * Check the deadline on the query. If the query is not terminated and the * deadline has expired, then the query is terminated as a side-effect. * - * @param now + * @param nowNanosIsIgnored * A current timestamp. * * @return <code>null</code> if the query is terminated and * <code>this</code> if the query is not terminated. */ - QueryDeadline checkDeadline(final long now) { + QueryDeadline checkDeadline(final long nowNanosIsIgnored) { final AbstractRunningQuery q = queryRef.get(); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2014-07-14 22:21:43 UTC (rev 8548) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2014-07-15 13:01:35 UTC (rev 8549) @@ -195,7 +195,6 @@ * query manager task for the terminated join. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ * * @todo Expander patterns will continue to exist until we handle the standalone * backchainers in a different manner for scale-out so add support for @@ -669,15 +668,17 @@ throw new IllegalArgumentException(); } - deadlineQueue.add(new QueryDeadline(deadline, query)); + final long deadlineNanos = TimeUnit.MILLISECONDS.toNanos(deadline); + deadlineQueue.add(new QueryDeadline(deadlineNanos, query)); + } /** * Scan the priority queue of queries with a specified deadline, halting any * queries whose deadline has expired. */ - static private void checkDeadlines(final long now, + static private void checkDeadlines(final long nowNanos, final PriorityBlockingQueue<QueryDeadline> deadlineQueue) { /* @@ -690,7 +691,7 @@ * Check the head of the deadline queue for any queries whose * deadline has expired. */ - checkHeadOfDeadlineQueue(now, deadlineQueue); + checkHeadOfDeadlineQueue(nowNanos, deadlineQueue); if (deadlineQueue.size() > DEADLINE_QUEUE_SCAN_SIZE) { @@ -698,7 +699,7 @@ * Scan the deadline queue, removing entries for expired * queries. */ - scanDeadlineQueue(now, deadlineQueue); + scanDeadlineQueue(nowNanos, deadlineQueue); } @@ -710,7 +711,7 @@ * Check the head of the deadline queue for any queries whose deadline has * expired. */ - static private void checkHeadOfDeadlineQueue(final long now, + static private void checkHeadOfDeadlineQueue(final long nowNanos, final PriorityBlockingQueue<QueryDeadline> deadlineQueue) { QueryDeadline x; @@ -719,7 +720,7 @@ while ((x = deadlineQueue.poll()) != null) { // test for query done or deadline expired. - if (x.checkDeadline(now) == null) { + if (x.checkDeadline(nowNanos) == null) { /* * This query is known to be done. It was removed from the @@ -731,7 +732,7 @@ } - if (x.deadline > now) { + if (x.deadlineNanos > nowNanos) { /* * This query has not yet reached its deadline. That means that @@ -757,7 +758,7 @@ * has not be reached. Therefore, periodically, we need to scan the queue * and clear out entries for terminated queries. */ - static private void scanDeadlineQueue(final long now, + static private void scanDeadlineQueue(final long nowNanos, final PriorityBlockingQueue<QueryDeadline> deadlineQueue) { final List<QueryDeadline> c = new ArrayList<QueryDeadline>( @@ -770,7 +771,7 @@ for (QueryDeadline x : c) { - if (x.checkDeadline(now) != null) { + if (x.checkDeadline(nowNanos) != null) { // return this query to the deadline queue. deadlineQueue.add(x); @@ -939,27 +940,31 @@ if(log.isInfoEnabled()) log.info("Running: " + this); try { - long mark = System.currentTimeMillis(); - long remaining = DEADLINE_CHECK_MILLIS; + final long deadline = TimeUnit.MILLISECONDS + .toNanos(DEADLINE_CHECK_MILLIS); + long mark = System.nanoTime(); + long remaining = deadline; while (true) { try { + //log.warn("Polling deadline queue: remaining="+remaining+", deadlinkCheckMillis="+DEADLINE_CHECK_MILLIS); final AbstractRunningQuery q = priorityQueue.poll( - remaining, TimeUnit.MILLISECONDS); - final long now = System.currentTimeMillis(); - if ((remaining = now - mark) < 0) { + remaining, TimeUnit.NANOSECONDS); + final long now = System.nanoTime(); + if ((remaining = deadline - (now - mark)) < 0) { + //log.error("Checking deadline queue"); /* * Check for queries whose deadline is expired. - * + * * Note: We only do this every DEADLINE_CHECK_MILLIS * and then reset [mark] and [remaining]. - * + * * Note: In queue.pool(), we only wait only up to * the [remaining] time before the next check in * queue.poll(). */ checkDeadlines(now, deadlineQueue); mark = now; - remaining = DEADLINE_CHECK_MILLIS; + remaining = deadline; } // Consume chunk already on queue for this query. if (q != null && !q.isDone()) Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryDeadlineOrder.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryDeadlineOrder.java 2014-07-14 22:21:43 UTC (rev 8548) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryDeadlineOrder.java 2014-07-15 13:01:35 UTC (rev 8549) @@ -25,6 +25,7 @@ import java.util.Properties; import java.util.UUID; +import java.util.concurrent.TimeUnit; import junit.framework.TestCase2; @@ -155,20 +156,26 @@ final AbstractRunningQuery runningQuery1 = queryEngine.eval(UUID.randomUUID(), query1, new ListBindingSet()); - runningQuery1.setDeadline(now + 10000); + final long deadline1Millis = now + 10000/* millis */; + runningQuery1.setDeadline(deadline1Millis); + Thread.sleep(2); final AbstractRunningQuery runningQuery2 = queryEngine.eval(UUID.randomUUID(), query2, new ListBindingSet()); - runningQuery2.setDeadline(now + 20000); + final long deadline2Millis = now + 20000/* millis */; + + runningQuery2.setDeadline(deadline2Millis); final QueryDeadline queryDeadline1 = new QueryDeadline( - runningQuery1.getDeadline(), runningQuery1); + TimeUnit.MILLISECONDS.toNanos(runningQuery1.getDeadline()), + runningQuery1); final QueryDeadline queryDeadline2 = new QueryDeadline( - runningQuery2.getDeadline(), runningQuery2); + TimeUnit.MILLISECONDS.toNanos(runningQuery2.getDeadline()), + runningQuery2); // The earlier deadline is LT the later deadline. assertTrue(queryDeadline1.compareTo(queryDeadline2) < 0); @@ -180,6 +187,15 @@ assertEquals(0, queryDeadline1.compareTo(queryDeadline1)); assertEquals(0, queryDeadline2.compareTo(queryDeadline2)); + /* + * Verify that the query deadline (millis) was converted to nanos for + * QueryDeadline object. + */ + assertEquals(TimeUnit.MILLISECONDS.toNanos(deadline1Millis), + queryDeadline1.deadlineNanos); + assertEquals(TimeUnit.MILLISECONDS.toNanos(deadline2Millis), + queryDeadline2.deadlineNanos); + } } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2014-07-14 22:21:43 UTC (rev 8548) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2014-07-15 13:01:35 UTC (rev 8549) @@ -87,7 +87,6 @@ * </pre> * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ * * @see TestFederatedQueryEngine */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-07-16 15:50:48
|
Revision: 8557 http://sourceforge.net/p/bigdata/code/8557 Author: thompsonbry Date: 2014-07-16 15:50:37 +0000 (Wed, 16 Jul 2014) Log Message: ----------- I have added support for hierarchical locking of the resources in a namespace to the AbstractTask class. There is a new test suite for hierarchical locking support. We need to improve the GIST capabilities of AbstractTask before we can use it with RDF KBs. Right now, it only supports the B+Tree class and makes assumptions about ILocalBTreeView as the index type. The named solution set mechanisms use the HTree and Stream objects as well. Similar GIST support was introduced to the Journal about a year ago. The journal, service, and NSS test suites are green. See #585 GIST (Generalized Indices) Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractTask.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestJournalBasics.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/service/TestAll.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestHierarchicalLockingTasks.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractTask.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractTask.java 2014-07-16 15:37:58 UTC (rev 8556) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractTask.java 2014-07-16 15:50:37 UTC (rev 8557) @@ -387,10 +387,12 @@ for(String s : resource) { final Name2Addr.Entry tmp = name2Addr.getEntry(s); + + if (tmp != null) { - if(tmp != null) { - /* + * Exact match on a named index. + * * Add a read-only copy of the entry with additional state * for tracking registration and dropping of named indices. * @@ -400,6 +402,37 @@ n2a.put(s, new Entry(tmp)); + } else { + + /** + * Add a read-only copy of the Name2Addr entry for all + * entries spanned by that namespace. This provides the + * additional state for tracking registration and dropping + * of named indices and also supports hierarchical locking + * pattersn. + * + * Note: We do NOT fetch the indices here, just copy their + * last checkpoint metadata from Name2Addr. + * + * @see <a + * href="http://trac.bigdata.com/ticket/566" + * > Concurrent unisolated operations against multiple + * KBs </a> + */ + + final Iterator<String> itr = Name2Addr.indexNameScan(s, + name2Addr); + + while (itr.hasNext()) { + + final String t = itr.next(); + + final Name2Addr.Entry tmp2 = name2Addr.getEntry(t); + + n2a.put(t, new Entry(tmp2)); + + } + } } @@ -517,7 +550,7 @@ * delegated to this method. First, the task can use this method directly. * Second, the task can use {@link #getJournal()} and then use * {@link IJournal#getIndex(String)} on that journal, which is simply - * delegated to this method. See {@link IsolatedActionJournal}. + * delegated to this method. See {@link IsolatedActionJournal}. * * @param name * The name of the index. @@ -534,8 +567,12 @@ * * @return The index. * - * @todo modify to return <code>null</code> if the index is not - * registered? + * @todo modify to return <code>null</code> if the index is not registered? + * + * FIXME GIST. This will throw a ClassCastException if the returned + * index is an ILocalBTreeView. + * + * @see http://trac.bigdata.com/ticket/585 (GIST) */ @Override synchronized final public ILocalBTreeView getIndex(final String name) { @@ -955,7 +992,7 @@ * @author <a href="mailto:tho...@us...">Bryan * Thompson</a> * - * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/675" > + * @see <a href="http://trac.bigdata.com/ticket/675" > * Flush indices in parallel during checkpoint to reduce IO latency</a> */ private class CheckpointIndexTask implements Callable<Void> { @@ -1003,7 +1040,7 @@ * * @return The elapsed time in nanoseconds for this operation. * - * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/675" + * @see <a href="http://trac.bigdata.com/ticket/675" * >Flush indices in parallel during checkpoint to reduce IO * latency</a> */ @@ -1315,13 +1352,18 @@ * The transaction identifier -or- {@link ITx#UNISOLATED} IFF the * operation is NOT isolated by a transaction -or- * <code> - timestamp </code> to read from the most recent commit - * point not later than the absolute value of <i>timestamp</i> - * (a historical read). + * point not later than the absolute value of <i>timestamp</i> (a + * historical read). * @param resource * The resource on which the task will operate. E.g., the names * of the index. When the task is an unisolated write task an * exclusive lock will be requested on the named resource and the * task will NOT run until it has obtained that lock. + * <p> + * The name may identify either a namespace or a concrete index + * object. If a concrete index object is discovered, only that + * index is isolated. Otherwise all indices having the same + * prefix as the namespace are isolated. */ protected AbstractTask(final IConcurrencyManager concurrencyManager, final long timestamp, final String resource) { @@ -1346,6 +1388,11 @@ * task an exclusive lock will be requested on each named * resource and the task will NOT run until it has obtained those * lock(s). + * <p> + * The name may identify either a namespace or a concrete index + * object. If a concrete index object is discovered, only that + * index is isolated. Otherwise all indices having the same + * prefix as the namespace are isolated. */ protected AbstractTask(final IConcurrencyManager concurrencyManager, final long timestamp, final String[] resource) { @@ -1453,7 +1500,7 @@ if (transactionManager.getTx(timestamp) == null) { - /* + /** * Start tx on this data service. * * FIXME This should be passing the [readsOnCommitTime] into @@ -1464,11 +1511,11 @@ * submitted primarily for the clustered database * deployment. * - * @see https://sourceforge.net/apps/trac/bigdata/ticket/266 + * @see http://trac.bigdata.com/ticket/266 * (refactor native long tx id to thin object) * * @see <a - * href="http://sourceforge.net/apps/trac/bigdata/ticket/546" + * href="http://trac.bigdata.com/ticket/546" * > Add cache for access to historical index views on the * Journal by name and commitTime. </a> */ @@ -1588,41 +1635,40 @@ } /** - * FIXME GROUP_COMMIT: Supporting this requires us to support - * efficient scans of the indices in Name2Addr having the prefix - * values declared by [resources] since getIndex(name) will fail if - * the Name2Addr entry has not been buffered within the [n2a] cache. + * Look for a prefix that spans one or more resources. * - * @see <a - * href="http://sourceforge.net/apps/trac/bigdata/ticket/753" > - * HA doLocalAbort() should interrupt NSS requests and + * Note: Supporting this requires us to support efficient scans of + * the indices in Name2Addr since getIndex(name) will fail if the + * Name2Addr entry has not been buffered within the [n2a] cache. + * + * @see <a href="http://trac.bigdata.com/ticket/753" > HA + * doLocalAbort() should interrupt NSS requests and * AbstractTasks </a> - * @see <a - * href="- http://sourceforge.net/apps/trac/bigdata/ticket/566" - * > Concurrent unisolated operations against multiple KBs </a> + * @see <a href="http://trac.bigdata.com/ticket/566" > Concurrent + * unisolated operations against multiple KBs </a> */ -// if (theRequestedResource.startsWith(theDeclaredResource)) { -// -// // Possible prefix match. -// -// if (theRequestedResource.charAt(theDeclaredResource.length()) == '.') { -// -// /* -// * Prefix match. -// * -// * E.g., name:="kb.spo.osp" and the task declared the -// * resource "kb". In this case, "kb" is a PREFIX of the -// * declared resource and the next character is the separator -// * character for the resource names (this last point is -// * important to avoid unintended contention between -// * namespaces such as "kb" and "kb1"). -// */ -// return true; -// -// } -// -// } + if (theRequestedResource.startsWith(theDeclaredResource)) { + + // Possible prefix match. + + if (theRequestedResource.charAt(theDeclaredResource.length()) == '.') { + /* + * Prefix match. + * + * E.g., name:="kb.spo.osp" and the task declared the + * resource "kb". In this case, "kb" is a PREFIX of the + * declared resource and the next character is the separator + * character for the resource names (this last point is + * important to avoid unintended contention between + * namespaces such as "kb" and "kb1"). + */ + return true; + + } + + } + } return false; @@ -2488,7 +2534,7 @@ * FIXME GIST : Support registration of index types other than BTree * (HTree, Stream, etc). * - * @see https://sourceforge.net/apps/trac/bigdata/ticket/585 (GIST) + * @see http://trac.bigdata.com/ticket/585 (GIST) */ throw new UnsupportedOperationException(); @@ -2522,14 +2568,14 @@ * Note: access to an unisolated index is governed by the AbstractTask. */ @Override - public ICheckpointProtocol getUnisolatedIndex(String name) { + public ICheckpointProtocol getUnisolatedIndex(final String name) { try { /* * FIXME GIST. This will throw a ClassCastException if the * returned index is an ILocalBTreeView. * - * @see https://sourceforge.net/apps/trac/bigdata/ticket/585 (GIST) + * @see http://trac.bigdata.com/ticket/585 (GIST) */ return (ICheckpointProtocol) AbstractTask.this.getIndex(name); @@ -2599,7 +2645,7 @@ * in a ClassCastException. * * @see <a - * href="https://sourceforge.net/apps/trac/bigdata/ticket/585" + * href="http://trac.bigdata.com/ticket/585" * > GIST </a> */ return (ICheckpointProtocol) resourceManager.getIndex(name, commitTime); Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestHierarchicalLockingTasks.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestHierarchicalLockingTasks.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestHierarchicalLockingTasks.java 2014-07-16 15:50:37 UTC (rev 8557) @@ -0,0 +1,378 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 15, 2007 + */ + +package com.bigdata.journal; + +import java.util.UUID; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; + +import com.bigdata.btree.IIndex; +import com.bigdata.btree.ILocalBTreeView; +import com.bigdata.btree.IndexMetadata; + +/** + * Test suite for hierarchical locking of indices based on namespace prefixes. + * This test suite was introduced as part of the support for group commit for + * the RDF database layer in the non-scale-out modes. In order to be able to run + * RDF operations using job-based concurrency, we need to predeclare the + * namespace (rather than the individual indices) and then isolate all indices + * spanned by that namespace. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * + * @see <a href="- http://sourceforge.net/apps/trac/bigdata/ticket/566" > + * Concurrent unisolated operations against multiple KBs </a> + * + * FIXME GROUP COMMIT (hierarchical locking): We need to test each + * different kind of index access here (unisolated, isolated by a + * read/write tx, and isolated by a read-only tx, and read-historical (no + * isolation)). + */ +public class TestHierarchicalLockingTasks extends ProxyTestCase<Journal> { + + /** + * + */ + public TestHierarchicalLockingTasks() { + super(); + } + + /** + * @param name + */ + public TestHierarchicalLockingTasks(final String name) { + super(name); + } + + /** + * Test creates several named indices some of which have a shared namespace + * prefix and verifies that a lock declared for the namespace prefix permits + * that task to access any index in that namespace. The test also verifies + * that an index that does not share the namespace prefix may not be + * accessed by the task. + * + * @throws InterruptedException + * @throws ExecutionException + */ + public void test_hierarchicalLocking_001() throws InterruptedException, + ExecutionException { + + // namespace for declared locks. + final String[] namespace = new String[] { "foo" }; + + // indices that we create, write on, and verify. + final String[] allowed_indices = new String[] { "foo.bar", "foo.baz" }; + + // indices that are outside of the declared namespace. we verify that we + // can not access these indices. + final String[] disallowed_indices = new String[] { "goo", "goo.bar" }; + + final Journal journal = getStore(); + + try { + + /* + * Create indices that we should not be able to see when holding + * just the lock for the namespace. + */ + for (int i = 0; i < disallowed_indices.length; i++) { + + final String name = disallowed_indices[i]; + + journal.submit(new RegisterIndexTask(journal + .getConcurrencyManager(), name, new IndexMetadata(name, + UUID.randomUUID()))).get(); + + } + + /* + * Create indices that we should be able to see when holding the + * lock for the namespace. + */ + for (int i = 0; i < allowed_indices.length; i++) { + + final String name = allowed_indices[i]; + + journal.submit(new RegisterIndexTask(journal + .getConcurrencyManager(), name, new IndexMetadata(name, + UUID.randomUUID()))).get(); + + } + + /* + * Submit task holding the lock for the namespace and verify that we + * can see the indices that are spanned by the namespace, but not + * those that are not spanned by the namespace. + */ + + final Future<Void> ft = journal + .submit(new AbstractTask<Void>(journal + .getConcurrencyManager(), ITx.UNISOLATED, namespace) { + + @Override + protected Void doTask() throws Exception { + + // Verify access to the indices in that namespace. + for (String name : allowed_indices) { + + assertNotNull(name, getIndex(name)); + + } + + /* + * Verify no access to the indices outside of that + * namespace. + */ + for (String name : disallowed_indices) { + + try { + // Attempt to access index. + getIndex(name); + fail("Expecting: " + + IllegalStateException.class + + " for " + name); + } catch (IllegalStateException ex) { + // log and ignore expected exception. + if (log.isInfoEnabled()) + log.info("Ignoring expected exception"); + } + + } + + // Done. + return null; + } + }); + + try { + // Await outcome. + ft.get(); + } finally { + ft.cancel(true/* mayInterruptIfRunning */); + } + + } finally { + + journal.destroy(); + + } + + } + + /** + * Unit test for hierarchical locking verifies that we can declared a + * namespace for a task which then creates multiple indices spanned by that + * namespace. + * + * @throws InterruptedException + * @throws ExecutionException + */ + public void test_hierarchicalLocking_create_destroy() + throws InterruptedException, ExecutionException { + + // namespace for declared locks. + final String[] namespace = new String[] { "foo" }; + + // indices that we create, write on, and verify. + final String[] allowed_indices = new String[] { "foo.bar", "foo.baz" }; + + final Journal journal = getStore(); + + try { + + final UUID[] uuids = journal.submit( + new AbstractTask<UUID[]>(journal.getConcurrencyManager(), + ITx.UNISOLATED, namespace) { + + @Override + protected UUID[] doTask() throws Exception { + + final UUID[] indexUUIDs = new UUID[allowed_indices.length]; + + /* + * Create indices that we should be able to see when + * holding the lock for the namespace. + */ + for (int i = 0; i < allowed_indices.length; i++) { + + final String name = allowed_indices[i]; + + IIndex ndx = getJournal().getIndex(name); + + if (ndx != null) { + + final UUID indexUUID = ndx.getIndexMetadata().getIndexUUID(); + + if (log.isInfoEnabled()) + log.info("Index exists: name=" + name + ", indexUUID=" + indexUUID); + + indexUUIDs[i] = indexUUID; + + } + + // register the index. + ndx = getJournal().registerIndex( + name, + new IndexMetadata(name, UUID + .randomUUID())); + + final UUID indexUUID = ndx.getIndexMetadata().getIndexUUID(); + + if (log.isInfoEnabled()) + log.info("Registered index: name=" + name + ", class=" + + ndx.getClass() + ", indexUUID=" + indexUUID); + + indexUUIDs[i] = indexUUID; + + } + + // Done + return indexUUIDs; + } + + }).get(); + + // should be non-null. + assertNotNull(uuids); + + // Should be non-null. + for (UUID uuid : uuids) { + + assertNotNull(uuid); + + } + + /* + * Verify access to the newly created indices. + */ + journal.submit( + new AbstractTask<Void>(journal.getConcurrencyManager(), + ITx.UNISOLATED, namespace) { + + @Override + protected Void doTask() throws Exception { + + /* + * Verify access to the newly created indices. + */ + for (int i = 0; i < allowed_indices.length; i++) { + + final String name = allowed_indices[i]; + + final ILocalBTreeView ndx = getIndex(name); + + assertNotNull(ndx); + + assertEquals(0L, ndx.rangeCount()); + + // add a key. + ndx.insert(new byte[] {}, + new byte[] { (byte) i }); + + } + + // Done + return null; + } + + }).get(); + + /* + * Destroy the newly created indices. + */ + journal.submit( + new AbstractTask<Void>(journal.getConcurrencyManager(), + ITx.UNISOLATED, namespace) { + + @Override + protected Void doTask() throws Exception { + + /* + * Create indices that we should be able to see when + * holding the lock for the namespace. + */ + for (int i = 0; i < allowed_indices.length; i++) { + + final String name = allowed_indices[i]; + + getJournal().dropIndex(name); + + } + + // Done + return null; + } + + }).get(); + + /* + * Verify that the indices can no longer be accessed. + */ + journal.submit( + new AbstractTask<Void>(journal.getConcurrencyManager(), + ITx.UNISOLATED, namespace) { + + @Override + protected Void doTask() throws Exception { + + /* + * Verify no access to the dropped indices. + */ + for (int i = 0; i < allowed_indices.length; i++) { + + final String name = allowed_indices[i]; + + try { + // Attempt to access index. + getIndex(name); + fail("Expecting: " + + IllegalStateException.class + + " for " + name); + } catch (IllegalStateException ex) { + // log and ignore expected exception. + if (log.isInfoEnabled()) + log.info("Ignoring expected exception"); + } + + } + + // Done + return null; + } + + }).get(); + + } finally { + + journal.destroy(); + + } + + } + +} Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestJournalBasics.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestJournalBasics.java 2014-07-16 15:37:58 UTC (rev 8556) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestJournalBasics.java 2014-07-16 15:50:37 UTC (rev 8557) @@ -125,15 +125,25 @@ // suite.addTestSuite(TestAddDropIndexTask.class); // test writing on one or more unisolated indices and verify read back after the commit. suite.addTestSuite(TestUnisolatedWriteTasks.class); + // test suite for hierarchical locking (namespace prefixes). + suite.addTestSuite(TestHierarchicalLockingTasks.class); + // test suite for GIST operations using group commit. +// suite.addTestSuite(TestGISTTasks.class); // stress test of throughput when lock contention serializes unisolated writers. suite.addTestSuite(StressTestLockContention.class); // stress test of group commit. suite.addTestSuite(StressTestGroupCommit.class); // stress tests of writes on unisolated named indices using ConcurrencyManager. suite.addTestSuite(StressTestConcurrentUnisolatedIndices.class); - // stress tests of writes on unisolated named indices using UnisolatedReadWriteIndex. - suite.addTestSuite(StressTestConcurrentUnisolatedIndices.class); /* + * Stress tests of writes on unisolated named indices using + * UnisolatedReadWriteIndex. + * + * FIXME This test appears to cause #343 (Stochastic assert in + * AbstractBTree#writeNodeOrLeaf() in CI) + */ +// suite.addTestSuite(StressTestUnisolatedReadWriteIndex.class); + /* * Stress test of concurrent transactions. * * Note: transactions use unisolated operations on the live indices when Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/service/TestAll.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/service/TestAll.java 2014-07-16 15:37:58 UTC (rev 8556) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/service/TestAll.java 2014-07-16 15:50:37 UTC (rev 8557) @@ -28,8 +28,6 @@ package com.bigdata.service; - - import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; @@ -38,7 +36,6 @@ * Test suite for embedded services. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ */ public class TestAll extends TestCase { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-07-16 16:05:43
|
Revision: 8559 http://sourceforge.net/p/bigdata/code/8559 Author: thompsonbry Date: 2014-07-16 16:05:36 +0000 (Wed, 16 Jul 2014) Log Message: ----------- code cleanup Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/IIndexManager.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestHierarchicalLockingTasks.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/IIndexManager.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/IIndexManager.java 2014-07-16 16:01:00 UTC (rev 8558) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/IIndexManager.java 2014-07-16 16:05:36 UTC (rev 8559) @@ -34,7 +34,6 @@ * Interface for managing named indices. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ */ public interface IIndexManager extends IIndexStore, ICounterSetAccess { Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestHierarchicalLockingTasks.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestHierarchicalLockingTasks.java 2014-07-16 16:01:00 UTC (rev 8558) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/TestHierarchicalLockingTasks.java 2014-07-16 16:05:36 UTC (rev 8559) @@ -29,7 +29,6 @@ import java.util.UUID; import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; import com.bigdata.btree.IIndex; import com.bigdata.btree.ILocalBTreeView; @@ -130,9 +129,9 @@ * those that are not spanned by the namespace. */ - final Future<Void> ft = journal - .submit(new AbstractTask<Void>(journal - .getConcurrencyManager(), ITx.UNISOLATED, namespace) { + journal.submit( + new AbstractTask<Void>(journal.getConcurrencyManager(), + ITx.UNISOLATED, namespace) { @Override protected Void doTask() throws Exception { @@ -167,15 +166,8 @@ // Done. return null; } - }); + }).get(); - try { - // Await outcome. - ft.get(); - } finally { - ft.cancel(true/* mayInterruptIfRunning */); - } - } finally { journal.destroy(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2014-07-17 15:46:28
|
Revision: 8568 http://sourceforge.net/p/bigdata/code/8568 Author: martyncutcher Date: 2014-07-17 15:46:25 +0000 (Thu, 17 Jul 2014) Log Message: ----------- Add allocation strategy to reduce IOPs by allocating small slots from sparsely allocated regions. This enables the BufferedWrite to join adjacent slot writes into a single IO. Ticket #986 Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/io/writecache/BufferedWrite.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/io/writecache/IBufferedWriter.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/io/writecache/WriteCacheCounters.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/RWStore.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/io/writecache/BufferedWrite.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/io/writecache/BufferedWrite.java 2014-07-17 14:57:42 UTC (rev 8567) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/io/writecache/BufferedWrite.java 2014-07-17 15:46:25 UTC (rev 8568) @@ -31,10 +31,12 @@ import com.bigdata.counters.CAT; import com.bigdata.counters.CounterSet; +import com.bigdata.counters.Instrument; import com.bigdata.io.DirectBufferPool; import com.bigdata.io.FileChannelUtility; import com.bigdata.io.IBufferAccess; import com.bigdata.io.IReopenChannel; +import com.bigdata.rwstore.RWStore; /** * The BufferedWrite merges/elides sorted scattered writes to minimize IO @@ -88,12 +90,7 @@ */ private long m_endAddr = 0; - /* - * Counters. - */ - private final CAT m_dataBytes = new CAT(); - private final CAT m_dataWrites = new CAT(); - private final CAT m_fileWrites = new CAT(); + private final RWStore.StoreCounters<?> m_storeCounters; public BufferedWrite(final IBufferedWriter store) throws InterruptedException { @@ -102,6 +99,8 @@ m_store = store; + m_storeCounters = m_store.getStoreCounters(); + m_data.set( DirectBufferPool.INSTANCE.acquire() ); } @@ -162,7 +161,7 @@ public int write(final long offset, final ByteBuffer data, final IReopenChannel<FileChannel> opener) throws IOException { - m_dataWrites.increment(); + m_storeCounters.bufferDataWrites++; final int data_len = data.remaining(); final int slot_len = m_store.getSlotSize(data_len); @@ -239,12 +238,12 @@ } // increment by the amount of data currently in the buffer. - m_dataBytes.add( m_data.position() ); + m_storeCounters.bufferDataBytes += m_data.position(); // write out the data in the buffer onto the backing channel. m_data.flip(); final int nwrites = FileChannelUtility.writeAll(opener, m_data, m_startAddr); - m_fileWrites.add(nwrites); + m_storeCounters.bufferFileWrites += nwrites; reset(); @@ -280,19 +279,19 @@ public String getStats(final StringBuffer buf, final boolean reset) { - final String ret = "BufferedWrites, data: " + m_dataWrites + ", file: " + m_fileWrites + ", bytes: " + m_dataBytes; + final String ret = "BufferedWrites, data: " + m_storeCounters.bufferDataWrites + ", file: " + m_storeCounters.bufferFileWrites + ", bytes: " + m_storeCounters.bufferDataBytes; if (buf != null) { buf.append(ret + "\n"); } if (reset) { - m_dataBytes.set(0L); - m_fileWrites.set(0L); - m_dataWrites.set(0L); + m_storeCounters.bufferFileWrites = 0; + m_storeCounters.bufferDataWrites = 0; + m_storeCounters.bufferDataBytes = 0; } return ret; - } + } } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/io/writecache/IBufferedWriter.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/io/writecache/IBufferedWriter.java 2014-07-17 14:57:42 UTC (rev 8567) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/io/writecache/IBufferedWriter.java 2014-07-17 15:46:25 UTC (rev 8568) @@ -1,7 +1,11 @@ package com.bigdata.io.writecache; +import com.bigdata.rwstore.RWStore; + public interface IBufferedWriter { int getSlotSize(int data_len); + RWStore.StoreCounters<?> getStoreCounters(); + } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/io/writecache/WriteCacheCounters.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/io/writecache/WriteCacheCounters.java 2014-07-17 14:57:42 UTC (rev 8567) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/io/writecache/WriteCacheCounters.java 2014-07-17 15:46:25 UTC (rev 8568) @@ -182,7 +182,7 @@ setValue(elapsedWriteNanos / 1000000000.); } }); - + return root; } // getCounters() Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2014-07-17 14:57:42 UTC (rev 8567) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2014-07-17 15:46:25 UTC (rev 8568) @@ -49,7 +49,7 @@ public class FixedAllocator implements Allocator { private static final Logger log = Logger.getLogger(FixedAllocator.class); - + private final int cModAllocation = 1 << RWStore.ALLOCATION_SCALEUP; private final int cMinAllocation = cModAllocation * 1; // must be multiple of cModAllocation @@ -204,20 +204,26 @@ private ArrayList m_freeList; public void setFreeList(final ArrayList list) { - m_freeList = list; + if (m_freeList != list) { + m_freeList = list; + m_freeWaiting = true; + } - if (!m_pendingContextCommit && hasFree()) { - m_freeList.add(this); - m_freeWaiting = false; + if (!m_pendingContextCommit && hasFree() && meetsSmallSlotThreshold()) { + addToFreeList(); } + } /** * To support postHACommit an allocator can be removed from the current freelist */ void removeFromFreeList() { - if (m_freeList != null) + if (m_freeList != null) { + // log.warn("Removing allocator " + m_index + " from free list"); m_freeList.remove(this); + m_freeWaiting = true; + } } @@ -405,9 +411,13 @@ final int calcFree = calcFreeBits(); final int calcLiveFree = calcLiveFreeBits(); - return m_freeBits == calcFree + final boolean ret = m_freeBits == calcFree && (m_freeBits + m_freeTransients) == calcLiveFree; + + if (!ret) + throw new AssertionError("m_free: " + m_freeBits + ", calcFree: " + calcFree); + return ret; } // read does not read in m_size since this is read to determine the class of @@ -466,6 +476,13 @@ private int m_startAddr = 0; private int m_endAddr = 0; + + /** + * For "small slot" allocators the allocation search is + * always from bit areas with less than a maximum density to + * ensure that writes have better locality. + */ + int m_allocIndex = -1; /** * The #of int32 values in a single {@link AllocBlock} region. The @@ -533,6 +550,51 @@ } /** + * find the allocationIndex of first "sparsely committed" AllocBlock. + * + * Checks the committed bits of all the AllocBlocks until one is found with + * > 50% free (or less than 50% allocated) of the committed bits. + * @param store + * @param i + */ + void resetAllocIndex() { + resetAllocIndex(0); + } + + void resetAllocIndex(final int start) { + m_allocIndex = start; + + if (m_size <= 1024) { + for (int a = m_allocIndex/m_bitSize; a < m_allocBlocks.size(); a++) { + final AllocBlock ab = m_allocBlocks.get(a); + + checkBlock(ab); + + for (int i = (m_allocIndex%m_bitSize); i < m_bitSize; i++) { + // first check if transients are already full + if (ab.m_transients[i] != 0xFFFFFFFF) { + // then check maximum 50% commit allocated + if (Integer.bitCount(ab.m_commit[i]) < 16) { + final AllocBlock abr = m_allocBlocks.get(m_allocIndex/m_bitSize); + assert abr == ab; + + return; + } + } + m_allocIndex++; + } + } + + // must remove from free list if we cannot set the alloc Index for a small slot + if (start == 0) { + removeFromFreeList(); + } else { + resetAllocIndex(0); + } + } + } + + /** * This determines the size of the reservation required in terms of * the number of ints each holding bits for 32 slots. * @@ -693,6 +755,7 @@ try { if (log.isDebugEnabled()) checkBits(); + if (((AllocBlock) m_allocBlocks.get(block)) .freeBit(offset % nbits, m_sessionActive && !overideSession)) { // bit adjust @@ -721,7 +784,7 @@ if (log.isDebugEnabled()) checkBits(); - return true; + return true; } else if (addr >= m_startAddr && addr < m_endAddr) { final Iterator<AllocBlock> iter = m_allocBlocks.iterator(); @@ -755,112 +818,227 @@ private void checkFreeList() { if (m_freeWaiting && !m_pendingContextCommit) { - if (m_freeBits >= m_store.cDefaultFreeBitsThreshold) { - m_freeWaiting = false; + if (meetsSmallSlotThreshold()) { - if (log.isDebugEnabled()) - log.debug("Returning Allocator to FreeList - " + m_size); + addToFreeList(); - m_freeList.add(this); + resetAllocIndex(0); } } } + + private void addToFreeList() { + assert m_freeWaiting; + + m_freeWaiting = false; + m_freeList.add(this); + m_allocIndex = -1; + + if (log.isDebugEnabled()) + log.debug("Returning Allocator to FreeList - " + m_size); + } + + private boolean meetsSmallSlotThreshold() { + // check threshold for all slots + if (m_freeBits < m_store.cDefaultFreeBitsThreshold) { + return false; + } + + // then check for small slots + if (m_size <= m_store.cSmallSlot) { // it's a small slotSMALL_SLOT_TYPE + return m_freeBits > m_store.cSmallSlotThreshold; + } else { + return true; + } + } /** * The introduction of IAllocationContexts has added some complexity to * the older concept of a free list. With AllocationContexts it is * possibly for allocator to have free space available but this being - * restricted to a specific AllocaitonContext. The RWStore alloc method - * must therefore handle the + * restricted to a specific AllocationContext. + * <p> + * In addition to the standard free allocation search we want to add a + * "density" restriction for small slots to encourage the aggregation + * of writes (by increasing the likelihood of sibling slot allocation). + * <p> + * There is some "Do What I mean" complexity here, with difficulty in + * determining a good rule to identify an initial allocation point. There + * is a danger of significantly reducing the allocation efficiency of + * short transactions if we too naively check committed bit density. We + * should only do this when identifying the initial allocation, and when + * the allocIndex is incremented. */ - public int alloc(final RWStore store, final int size, final IAllocationContext context) { + public int alloc(final RWStore store, final int size, + final IAllocationContext context) { try { - if (size <= 0) - throw new IllegalArgumentException( - "Allocate requires positive size, got: " + size); + if (size <= 0) + throw new IllegalArgumentException( + "Allocate requires positive size, got: " + size); - if (size > m_size) - throw new IllegalArgumentException( - "FixedAllocator with slots of " + m_size - + " bytes requested allocation for "+ size + " bytes"); + if (size > m_size) + throw new IllegalArgumentException( + "FixedAllocator with slots of " + m_size + + " bytes requested allocation for " + size + + " bytes"); - int addr = -1; + if (m_freeBits == 0) { + throw new IllegalStateException("Request to allocate from " + m_size + "byte slot FixedAllocator with zero bits free - should not be on the Free List"); + } + + int addr = -1; + + // Special allocation for small slots + if (m_size <= m_store.cSmallSlot) { + return allocFromIndex(size); + } - final Iterator<AllocBlock> iter = m_allocBlocks.iterator(); - int count = -1; - while (addr == -1 && iter.hasNext()) { - count++; + final Iterator<AllocBlock> iter = m_allocBlocks.iterator(); + int count = -1; + while (addr == -1 && iter.hasNext()) { + count++; - final AllocBlock block = iter.next(); - if (block.m_addr == 0) { - int blockSize = 32 * m_bitSize; + final AllocBlock block = iter.next(); + checkBlock(block); + + addr = block.alloc(m_size); + } + + if (addr != -1) { + + addr += 3; // Tweak to ensure non-zero address for offset 0 + + if (--m_freeBits == 0) { + if (log.isTraceEnabled()) + log.trace("Remove from free list"); + removeFromFreeList(); + + // Should have been first on list, now check for first + if (m_freeList.size() > 0) { + if (log.isDebugEnabled()) { + final FixedAllocator nxt = (FixedAllocator) m_freeList + .get(0); + log.debug("Freelist head: " + nxt.getSummaryStats()); + } + } + } + + addr += (count * 32 * m_bitSize); + + final int value = -((m_index << RWStore.OFFSET_BITS) + addr); + if (m_statsBucket != null) { - m_statsBucket.addSlots(blockSize); + m_statsBucket.allocate(size); } - blockSize *= m_size; - blockSize >>= RWStore.ALLOCATION_SCALEUP; - block.m_addr = grabAllocation(store, blockSize); - if (log.isDebugEnabled()) - log.debug("Allocation block at " + block.m_addr + " of " + (blockSize << 16) + " bytes"); + return value; + } else { + StringBuilder sb = new StringBuilder(); + sb.append("FixedAllocator returning null address, with freeBits: " + + m_freeBits + "\n"); - if (m_startAddr == 0) { - m_startAddr = block.m_addr; + for (AllocBlock ab : m_allocBlocks) { + sb.append(ab.show() + "\n"); } - m_endAddr = block.m_addr - blockSize; + + log.error(sb); + + return 0; } - addr = block.alloc(m_size); + } finally { + if (log.isDebugEnabled()) + checkBits(); } + } + + void checkBlock(final AllocBlock block) { + if (block.m_addr == 0) { + int blockSize = 32 * m_bitSize; + if (m_statsBucket != null) { + m_statsBucket.addSlots(blockSize); + } + blockSize *= m_size; + blockSize >>= RWStore.ALLOCATION_SCALEUP; - if (addr != -1) { + block.m_addr = grabAllocation(m_store, blockSize); + if (log.isDebugEnabled()) + log.debug("Allocation block at " + block.m_addr + + " of " + (blockSize << 16) + " bytes"); - addr += 3; // Tweak to ensure non-zero address for offset 0 - - if (--m_freeBits == 0) { - if (log.isTraceEnabled()) - log.trace("Remove from free list"); - m_freeList.remove(this); - m_freeWaiting = true; - - // Should have been first on list, now check for first - if (m_freeList.size() > 0) { - if (log.isDebugEnabled()) { - final FixedAllocator nxt = (FixedAllocator) m_freeList.get(0); - log.debug("Freelist head: " + nxt.getSummaryStats()); - } - } + if (m_startAddr == 0) { + m_startAddr = block.m_addr; } + m_endAddr = block.m_addr - blockSize; + } - addr += (count * 32 * m_bitSize); - - final int value = -((m_index << RWStore.OFFSET_BITS) + addr); + } + + int allocFromIndex(final int size) { + + if (m_allocIndex == -1) { + resetAllocIndex(); - if (m_statsBucket != null) { - m_statsBucket.allocate(size); + if (m_allocIndex == -1) { + throw new AssertionError("Unable to set AllocIndex with m_freeBits: " + m_freeBits); } - - - return value; - } else { - StringBuilder sb = new StringBuilder(); - sb.append("FixedAllocator returning null address, with freeBits: " + m_freeBits + "\n"); + } - for (AllocBlock ab: m_allocBlocks) { - sb.append(ab.show() + "\n"); - } - - log.error(sb); + if (log.isDebugEnabled()) + checkBits(); - return 0; + + if (m_freeBits != calcFreeBits()) { + final int calc = calcFreeBits(); + throw new AssertionError("m_freeBits != calcFreeBits() : " + m_freeBits + "!=" + calc); } - } finally { - if (log.isDebugEnabled()) - checkBits(); + + // there MUST be bits free in the m_allocIndex block + final AllocBlock ab = m_allocBlocks.get(m_allocIndex/m_bitSize); + + if (ab.m_addr == 0) { + throw new AssertionError("No allocation for AllocBlock with m_allocIndex: " + m_allocIndex); } + + final int abblock = m_allocIndex % m_bitSize; + + assert ab.m_transients[abblock] != 0xFFFFFFFF; // not all set + + final int bit = RWStore.fndBit(ab.m_transients[abblock]); + + assert bit >= 0; + + m_freeBits--; + + final int abit = (abblock*32) + bit; + RWStore.setBit(ab.m_live, abit); + RWStore.setBit(ab.m_transients, abit); + + // Note +3 for address teak for special low order bits + final int addr = -((m_index << RWStore.OFFSET_BITS) + (m_allocIndex*32) + (bit + 3)); + + // Now check current index + if (ab.m_transients[abblock] == 0xFFFFFFFF) { + // find next allocIndex + resetAllocIndex(m_allocIndex+1); + } + + if (m_freeBits != calcFreeBits()) { + throw new AssertionError("m_freeBits != calcFreeBits()"); + } + // assert m_freeBits == calcFreeBits(); + + if (m_statsBucket != null) { + m_statsBucket.allocate(size); + } + + return addr; } protected int grabAllocation(RWStore store, int blockSize) { - return store.allocBlock(blockSize); + + final int ret = store.allocBlock(blockSize); + + return ret; } public boolean hasFree() { @@ -1040,9 +1218,16 @@ } m_freeTransients = transientbits(); + m_freeBits = calcFreeBits(); + // Ensure allocIndex is reset + m_allocIndex = -1; + assert calcSessionFrees(); + if (log.isDebugEnabled()) + checkBits(); + return isolatedWrites; } @@ -1145,6 +1330,8 @@ } ab.setBitExternal(bit); + + m_freeBits--; } public int getSlotSize() { @@ -1231,8 +1418,8 @@ if (m_pendingContextCommit) { m_pendingContextCommit = false; - if (hasFree()) { - m_freeList.add(this); + if (m_freeWaiting && meetsSmallSlotThreshold()) { + addToFreeList(); } } @@ -1241,15 +1428,17 @@ // Handle re-addition to free list once transient frees are // added back - if (m_freeWaiting && m_freeBits >= m_store.cDefaultFreeBitsThreshold) { - m_freeList.add(this); - m_freeWaiting = false; + if (m_freeWaiting && meetsSmallSlotThreshold()) { + addToFreeList(); } m_freeTransients = 0; } + if (log.isDebugEnabled()) + checkBits(); + } /* Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2014-07-17 14:57:42 UTC (rev 8567) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2014-07-17 15:46:25 UTC (rev 8568) @@ -67,6 +67,7 @@ import com.bigdata.btree.ITupleIterator; import com.bigdata.btree.IndexMetadata; import com.bigdata.cache.ConcurrentWeakValueCache; +import com.bigdata.counters.CAT; import com.bigdata.counters.CounterSet; import com.bigdata.counters.Instrument; import com.bigdata.counters.striped.StripedCounters; @@ -357,6 +358,24 @@ String DEFAULT_FREE_BITS_THRESHOLD = "300"; /** + * Defines the size of a slot that defines it as a small slot. + * <p> + * Any slot equal to or less than this is considered a small slot and + * its availability for allocation is restricted to ensure a high + * chance that contiguous allocations can be made. + * <p> + * This is arranged by only returning small slot allocators to the free list + * if they have greater than 50% available slots, and then only allocating + * slots from sparse regions with >= 50% free/committed bits. + * <p> + * Small slot processing can be disabled by setting the smallSlotType to zero. + */ + String SMALL_SLOT_TYPE = RWStore.class.getName() + ".smallSlotType"; + + // String DEFAULT_SMALL_SLOT_TYPE = "1024"; // standard default + String DEFAULT_SMALL_SLOT_TYPE = "0"; // initial default to no special processing + + /** * When <code>true</code>, scattered writes which are strictly ascending * will be coalesced within a buffer and written out as a single IO * (default {@value #DEFAULT_DOUBLE_BUFFER_WRITES}). This improves write @@ -820,7 +839,16 @@ throw new IllegalArgumentException(Options.FREE_BITS_THRESHOLD + " : Must be between 1 and 5000"); } - + + cSmallSlot = Integer.valueOf(fileMetadata.getProperty( + Options.SMALL_SLOT_TYPE, + Options.DEFAULT_SMALL_SLOT_TYPE)); + + if (cSmallSlot < 0 || cSmallSlot > 2048) { + throw new IllegalArgumentException(Options.SMALL_SLOT_TYPE + + " : Must be between 0 and 2048"); + } + m_metaBits = new int[m_metaBitsSize]; m_metaTransientBits = new int[m_metaBitsSize]; @@ -2117,37 +2145,13 @@ // With a non-null WCS, the actual read should be via a callback to readRaw, it should not get here // unless it is not possible to cache - but maybe even then the WCS should read into a temporary // buffer - final long beginDisk = System.nanoTime(); - // If checksum is required then the buffer should be sized to include checksum in final 4 bytes + + // If checksum is required then the buffer should be sized to include checksum in final 4 bytes final ByteBuffer bb = ByteBuffer.wrap(buf, offset, length); // Use ReadRaw - should be the same read all readRaw(paddr, bb); - // enable for debug - if (false) {//FIXME EXTENSION_LOCK REQUIRED FOR IO. - final byte[] nbuf = new byte[buf.length]; - final ByteBuffer nbb = ByteBuffer.wrap(nbuf, offset, length); - FileChannelUtility.readAll(m_reopener, nbb, paddr); - if (!Arrays.equals(buf, nbuf)) - throw new AssertionError(); - - m_diskReads++; - // Update counters. - final StoreCounters<?> c = (StoreCounters<?>) storeCounters.get() - .acquire(); - try { - final int nbytes = length; - c.nreads++; - c.bytesRead += nbytes; - c.bytesReadFromDisk += nbytes; - c.elapsedReadNanos += (System.nanoTime() - begin); - c.elapsedDiskReadNanos += (System.nanoTime() - beginDisk); - } finally { - c.release(); - } - } - final int chk = ChecksumUtility.getCHK().checksum(buf, offset, length-4); // read checksum final int tstchk = bb.getInt(offset + length-4); if (chk != tstchk) { @@ -2669,6 +2673,10 @@ } final int addr = allocator.alloc(this, size, context); + + if (addr == 0) { + throw new IllegalStateException("Free Allocator unable to allocate address: " + allocator.getSummaryStats()); + } if (allocator.isUnlocked() && !m_commitList.contains(allocator)) { m_commitList.add(allocator); @@ -3360,7 +3368,7 @@ + m_metaBitsAddr + ", active contexts: " + m_contexts.size()); - if (log.isDebugEnabled() && m_quorum.isHighlyAvailable()) { + if (log.isDebugEnabled() && m_quorum != null && m_quorum.isHighlyAvailable()) { log.debug(showAllocatorList()); @@ -3624,6 +3632,10 @@ */ final int cDefaultFreeBitsThreshold; + final int cSmallSlotThreshold = 4096; // debug test + + int cSmallSlot = 1024; // @see from Options#SMALL_SLOT_TYPE + /** * Each "metaBit" is a file region */ @@ -3952,7 +3964,7 @@ private void extendFile() { final int adjust = -1200 + (m_fileSize / 10); - + extendFile(adjust); } @@ -4050,12 +4062,21 @@ static int fndBit(final int[] bits, final int offset, final int size) { final int eob = size + offset; - for (int i = offset; i < eob; i++) { - if (bits[i] != 0xFFFFFFFF) { - for (int k = 0; k < 32; k++) { - if ((bits[i] & (1 << k)) == 0) { - return (i * 32) + k; - } + for (int i = offset; i < eob; i++) { + final int b = fndBit(bits[i]); + if (b != -1) { + return (i * 32) + b; + } + } + + return -1; + } + + static int fndBit(final int bits) { + if (bits != 0xFFFFFFFF) { + for (int k = 0; k < 32; k++) { + if ((bits & (1 << k)) == 0) { + return k; } } } @@ -5493,19 +5514,27 @@ * #of times one of the root blocks has been written. */ public volatile long nwriteRootBlock; + + /** + * buffer counters + */ + public volatile long bufferDataBytes; + public volatile long bufferDataWrites; + public volatile long bufferFileWrites; /** * {@inheritDoc} */ public StoreCounters() { - super(); + super(); } - /** + /** * {@inheritDoc} */ public StoreCounters(final int batchSize) { super(batchSize); + } /** @@ -5602,7 +5631,6 @@ ntruncate = 0; nreopen = 0; nwriteRootBlock = 0; - } @Override @@ -5695,9 +5723,25 @@ } }); + } // IRawStore - // disk statistics + // BufferedWriter + final CounterSet bc = root.makePath("buffer"); + + bc.addCounter("ndataWrites", new Instrument<Long>() { + public void sample() { + setValue(bufferDataWrites); + } + }); + + bc.addCounter("nfileWrites", new Instrument<Long>() { + public void sample() { + setValue(bufferFileWrites); + } + }); + + // disk statistics { final CounterSet disk = root.makePath("disk"); @@ -6180,19 +6224,32 @@ final int position = dst.position(); try { + final long beginDisk = System.nanoTime(); + // the offset into the disk file. // final long pos = FileMetadata.headerSize0 + offset; final long pos = offset; + final int length = dst.limit(); // read on the disk. final int ndiskRead = FileChannelUtility.readAll(m_reopener, dst, pos); + m_diskReads += ndiskRead; + + final long now = System.nanoTime(); + // update performance counters. final StoreCounters<?> c = (StoreCounters<?>) storeCounters .get().acquire(); try { c.ndiskRead += ndiskRead; + final int nbytes = length; + c.nreads++; + c.bytesRead += nbytes; + c.bytesReadFromDisk += nbytes; + c.elapsedReadNanos += now - beginDisk; + c.elapsedDiskReadNanos += now - beginDisk; } finally { c.release(); } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java 2014-07-17 14:57:42 UTC (rev 8567) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java 2014-07-17 15:46:25 UTC (rev 8568) @@ -639,6 +639,26 @@ } + protected IRawStore getSmallSlotStore() { + + return getSmallSlotStore(0); + + } + + protected IRawStore getSmallSlotStore(final int slotSize) { + + final Properties properties = new Properties(getProperties()); + + properties.setProperty( + AbstractTransactionService.Options.MIN_RELEASE_AGE, "0"); + + properties.setProperty( + RWStore.Options.SMALL_SLOT_TYPE, "" + slotSize); + + return getStore(properties); + + } + protected Journal getStore(final long retentionMillis) { final Properties properties = new Properties(getProperties()); @@ -830,6 +850,114 @@ } /** + * Ensures the allocation of unique addresses by mapping allocated + * address with uniqueness assertion against physical address. + */ + public void test_addressingContiguous() { + + final Journal store = (Journal) getStore(); + + try { + + final RWStrategy bufferStrategy = (RWStrategy) store.getBufferStrategy(); + + final RWStore rw = bufferStrategy.getStore(); + final int cSlotSize = 128; + final int cAllocSize = 99; + + long pap = rw.physicalAddress(rw.alloc(cAllocSize, null)); + for (int i = 0; i < 500000; i++) { + final int a = rw.alloc(cAllocSize, null); + final long pa = rw.physicalAddress(a); + + if (pa != (pap+cSlotSize)) { + // for debug + rw.physicalAddress(a); + fail("Non-Contiguous slots: " + i + ", " + pa + "!=" + (pap+cSlotSize)); + } + + pap = pa; + + } + + store.commit(); + + final StringBuilder sb = new StringBuilder(); + rw.showAllocators(sb); + + log.warn(sb.toString()); + + } finally { + + store.destroy(); + + } + + } + + /** + * Tests the recycling of small slot alloctors and outputs statistics related + * to contiguous allocations indicative of reduced IOPS. + */ + public void test_smallSlotRecycling() { + + final Journal store = (Journal) getSmallSlotStore(1024); + + try { + + final RWStrategy bufferStrategy = (RWStrategy) store.getBufferStrategy(); + + final RWStore rw = bufferStrategy.getStore(); + final int cSlotSize = 128; + final int cAllocSize = 99; + + int breaks = 0; + int contiguous = 0; + + ArrayList<Integer> recycle = new ArrayList<Integer>(); + + long pap = rw.physicalAddress(rw.alloc(cAllocSize, null)); + for (int i = 0; i < 500000; i++) { + final int a = rw.alloc(cSlotSize, null); + final long pa = rw.physicalAddress(a); + + if (r.nextInt(7) < 5) { // more than 50% recycle + recycle.add(a); + } + + if (pa == (pap+cSlotSize)) { + contiguous++; + } else { + breaks++; + } + + pap = pa; + + if (recycle.size() > 5000) { + log.warn("Transient Frees for immediate recyling"); + for (int e : recycle) { + rw.free(e, cAllocSize); + } + recycle.clear(); + } + } + + store.commit(); + + final StringBuilder sb = new StringBuilder(); + rw.showAllocators(sb); + + log.warn("Contiguous: " + contiguous + ", breaks: " + breaks + "\n" + sb.toString()); + + } finally { + + store.destroy(); + + } + + } + + /** * Basic allocation test to ensure the FixedAllocators are operating * efficiently. * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-08-16 14:33:59
|
Revision: 8619 http://sourceforge.net/p/bigdata/code/8619 Author: thompsonbry Date: 2014-08-16 14:33:51 +0000 (Sat, 16 Aug 2014) Log Message: ----------- This has been traced to a query hint override to control the operator parallelism (maxParallel=2). While the HashIndexOp does c Workaround: Bind the maxParallel query hint more closely to a specific join or other operator (sometimes difficult or impossible). Workaround: Do not use the maxParallel query hint if the query has solution set hash joins (which appear if there is a nested complex optional join group, sub-select, INCLUDE, etc.) Fix: Added ISingleThreadedOp interface and modified PipelineOp.getMaxParallel() to test for that interface and ignore annotation when the interface is present on an operator. This interface was added to the following operators: - HashIndexOp - HTreeNamedSubqueryOp - JVMNamedSubqueryOp - HTreeHashJoinOp - JVMHashJoinOp - HTreeDistinctBindingSetsOp (also fixed unit tests that looked for UnsupportedOperationException rather than IllegalArgumentException) - PipelinedAggregationOp (also fixed unit tests that looked for UnsupportedOperationException rather than IllegalArgumentException) Other changes to: - PipelineOp (added interface test and assertMaxParallelOne() method). Note: Also reduced the #of threads for StressTestConcurrentUnisolatedIndices since it was failing in CI with "unable to create new native thread". Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/controller/HTreeNamedSubqueryOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/controller/JVMNamedSubqueryOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/HTreeHashJoinOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/HashIndexOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/HashJoinOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/JVMHashJoinOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/HTreeDistinctBindingSetsOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/PipelinedAggregationOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/join/TestJVMHashJoinOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/solutions/TestHTreeDistinctBindingSets.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/StressTestUnisolatedReadWriteIndex.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/ISingleThreadedOp.java Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/ISingleThreadedOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/ISingleThreadedOp.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/ISingleThreadedOp.java 2014-08-16 14:33:51 UTC (rev 8619) @@ -0,0 +1,40 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Aug 26, 2010 + */ +package com.bigdata.bop; + +/** + * Marker interface for an operator whose instances do not support concurrent + * execution. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * + * @see PipelineOp.Annotations#MAX_PARALLEL + */ +public interface ISingleThreadedOp { + +} Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2014-08-13 13:48:32 UTC (rev 8618) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2014-08-16 14:33:51 UTC (rev 8619) @@ -122,6 +122,8 @@ * {@link #MAX_MESSAGES_PER_TASK} and {@link #PIPELINE_QUEUE_CAPACITY} * have less effect and performance tends to be best around a modest * value (10) for those annotations. + * + * @see ISingleThreadedOp */ String MAX_PARALLEL = PipelineOp.class.getName() + ".maxParallel"; @@ -505,17 +507,49 @@ // // } - /** - * The maximum parallelism with which tasks may be evaluated for this - * operator (this is a per-shard limit in scale-out). A value of ONE (1) - * indicates that at most ONE (1) instance of this task may be executing in - * parallel for a given shard and may be used to indicate that the operator - * evaluation task is not thread-safe. - * - * @see Annotations#MAX_PARALLEL - */ + /** + * If parallel evaluation is not allowed, then throws + * {@link IllegalArgumentException}. + */ + final protected void assertMaxParallelOne() { + + /* + * Note: Tests the annotation, not getMaxParallel(), since we want to + * make sure the annotation is valid and getMaxParallel() also tests for + * the ISingleThreadedOp interface. + */ + if (getProperty(PipelineOp.Annotations.MAX_PARALLEL, + PipelineOp.Annotations.DEFAULT_MAX_PARALLEL) != 1) { + + throw new IllegalArgumentException( + PipelineOp.Annotations.MAX_PARALLEL + "=" + + getMaxParallel()); + + } + + } + + /** + * The maximum parallelism with which tasks may be evaluated for this + * operator (this is a per-shard limit in scale-out). A value of ONE (1) + * indicates that at most ONE (1) instance of this task may be executing in + * parallel for a given shard and may be used to indicate that the operator + * evaluation task is not thread-safe. + * + * @see Annotations#MAX_PARALLEL + * @see ISingleThreadedOp + * + * @see <a href="http://trac.bigdata.com/ticket/1002"> </a> + */ final public int getMaxParallel() { + if(this instanceof ISingleThreadedOp) { + + // Ignore the annotation value. + return 1; + + } + return getProperty(PipelineOp.Annotations.MAX_PARALLEL, PipelineOp.Annotations.DEFAULT_MAX_PARALLEL); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/controller/HTreeNamedSubqueryOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/controller/HTreeNamedSubqueryOp.java 2014-08-13 13:48:32 UTC (rev 8618) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/controller/HTreeNamedSubqueryOp.java 2014-08-16 14:33:51 UTC (rev 8619) @@ -40,6 +40,7 @@ import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IQueryAttributes; +import com.bigdata.bop.ISingleThreadedOp; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; @@ -73,7 +74,8 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ -public class HTreeNamedSubqueryOp extends PipelineOp implements INamedSubqueryOp { +public class HTreeNamedSubqueryOp extends PipelineOp implements + INamedSubqueryOp, ISingleThreadedOp { static private final transient Logger log = Logger .getLogger(HTreeNamedSubqueryOp.class); @@ -123,11 +125,7 @@ + getEvaluationContext()); } - if (getMaxParallel() != 1) { - throw new IllegalArgumentException( - PipelineOp.Annotations.MAX_PARALLEL + "=" - + getMaxParallel()); - } + assertMaxParallelOne(); if (!isAtOnceEvaluation()) throw new IllegalArgumentException(); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/controller/JVMNamedSubqueryOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/controller/JVMNamedSubqueryOp.java 2014-08-13 13:48:32 UTC (rev 8618) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/controller/JVMNamedSubqueryOp.java 2014-08-16 14:33:51 UTC (rev 8619) @@ -40,6 +40,7 @@ import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IQueryAttributes; +import com.bigdata.bop.ISingleThreadedOp; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; @@ -73,7 +74,8 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ -public class JVMNamedSubqueryOp extends PipelineOp implements INamedSubqueryOp { +public class JVMNamedSubqueryOp extends PipelineOp implements INamedSubqueryOp, + ISingleThreadedOp { static private final transient Logger log = Logger .getLogger(JVMNamedSubqueryOp.class); @@ -112,11 +114,7 @@ + getEvaluationContext()); } - if (getMaxParallel() != 1) { - throw new IllegalArgumentException( - PipelineOp.Annotations.MAX_PARALLEL + "=" - + getMaxParallel()); - } + assertMaxParallelOne(); if (!isAtOnceEvaluation()) throw new IllegalArgumentException(); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/HTreeHashJoinOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/HTreeHashJoinOp.java 2014-08-13 13:48:32 UTC (rev 8618) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/HTreeHashJoinOp.java 2014-08-16 14:33:51 UTC (rev 8619) @@ -33,6 +33,7 @@ import com.bigdata.bop.BOpContext; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.ISingleThreadedOp; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.controller.INamedSolutionSetRef; @@ -94,9 +95,9 @@ * @see HTreeHashJoinUtility * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ */ -public class HTreeHashJoinOp<E> extends HashJoinOp<E> { +public class HTreeHashJoinOp<E> extends HashJoinOp<E> implements + ISingleThreadedOp { /** * @@ -117,7 +118,7 @@ } - public HTreeHashJoinOp(final BOp[] args, NV... annotations) { + public HTreeHashJoinOp(final BOp[] args, final NV... annotations) { this(args, NV.asMap(annotations)); @@ -132,9 +133,7 @@ super(args, annotations); - if (getMaxParallel() != 1) - throw new UnsupportedOperationException(Annotations.MAX_PARALLEL - + "=" + getMaxParallel()); + assertMaxParallelOne(); // Note: This is no longer true. It is now shared via the IQueryAttributes. // // shared state is used to share the hash table. Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/HashIndexOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/HashIndexOp.java 2014-08-13 13:48:32 UTC (rev 8618) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/HashIndexOp.java 2014-08-16 14:33:51 UTC (rev 8619) @@ -38,6 +38,7 @@ import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IQueryAttributes; +import com.bigdata.bop.ISingleThreadedOp; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; @@ -76,7 +77,7 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ -abstract public class HashIndexOp extends PipelineOp { +abstract public class HashIndexOp extends PipelineOp implements ISingleThreadedOp { // static private final transient Logger log = Logger // .getLogger(HashIndexOp.class); @@ -144,15 +145,11 @@ // + getEvaluationContext()); // } - if (getMaxParallel() != 1) { - /* - * Parallel evaluation is not allowed. This operator writes on an - * object that is not thread-safe for mutation. - */ - throw new IllegalArgumentException( - PipelineOp.Annotations.MAX_PARALLEL + "=" - + getMaxParallel()); - } + /* + * This operator writes on an object that is not thread-safe for + * mutation. + */ + assertMaxParallelOne(); if (!isLastPassRequested()) { /* Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/HashJoinOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/HashJoinOp.java 2014-08-13 13:48:32 UTC (rev 8618) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/HashJoinOp.java 2014-08-16 14:33:51 UTC (rev 8619) @@ -65,7 +65,6 @@ * which join are output. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ */ abstract public class HashJoinOp<E> extends PipelineOp implements IShardwisePipelineOp<E> { Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/JVMHashJoinOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/JVMHashJoinOp.java 2014-08-13 13:48:32 UTC (rev 8618) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/join/JVMHashJoinOp.java 2014-08-16 14:33:51 UTC (rev 8619) @@ -34,6 +34,7 @@ import com.bigdata.bop.HashMapAnnotations; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.ISingleThreadedOp; import com.bigdata.bop.NV; import com.bigdata.bop.controller.INamedSolutionSetRef; import com.bigdata.relation.accesspath.IAccessPath; @@ -56,9 +57,8 @@ * @see JVMHashJoinUtility * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ */ -public class JVMHashJoinOp<E> extends HashJoinOp<E> { +public class JVMHashJoinOp<E> extends HashJoinOp<E> implements ISingleThreadedOp { /** * @@ -94,9 +94,7 @@ super(args, annotations); - if (getMaxParallel() != 1) - throw new UnsupportedOperationException(Annotations.MAX_PARALLEL - + "=" + getMaxParallel()); + assertMaxParallelOne(); assertAtOnceJavaHeapOp(); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/HTreeDistinctBindingSetsOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/HTreeDistinctBindingSetsOp.java 2014-08-13 13:48:32 UTC (rev 8618) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/HTreeDistinctBindingSetsOp.java 2014-08-16 14:33:51 UTC (rev 8619) @@ -9,6 +9,7 @@ import com.bigdata.bop.HTreeAnnotations; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IQueryAttributes; +import com.bigdata.bop.ISingleThreadedOp; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; @@ -42,10 +43,9 @@ * on the native heap and eventually the machine will begin to swap. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z - * thompsonbry $ */ -public class HTreeDistinctBindingSetsOp extends PipelineOp { +public class HTreeDistinctBindingSetsOp extends PipelineOp implements + ISingleThreadedOp { // private final static transient Logger log = Logger // .getLogger(DistinctBindingSetsWithHTreeOp.class); @@ -96,9 +96,7 @@ + getEvaluationContext()); } - if (getMaxParallel() != 1) - throw new UnsupportedOperationException(Annotations.MAX_PARALLEL - + "=" + getMaxParallel()); + assertMaxParallelOne(); // // shared state is used to share the hash table. // if (!isSharedState()) { Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/PipelinedAggregationOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/PipelinedAggregationOp.java 2014-08-13 13:48:32 UTC (rev 8618) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/PipelinedAggregationOp.java 2014-08-16 14:33:51 UTC (rev 8619) @@ -19,6 +19,7 @@ import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IConstraint; +import com.bigdata.bop.ISingleThreadedOp; import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; import com.bigdata.bop.PipelineOp; @@ -58,10 +59,9 @@ * the operator can still be invoked multiple times). * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z - * thompsonbry $ */ -public class PipelinedAggregationOp extends GroupByOp { +public class PipelinedAggregationOp extends GroupByOp implements + ISingleThreadedOp { private final static transient Logger log = Logger .getLogger(PipelinedAggregationOp.class); @@ -136,14 +136,11 @@ + "=" + isLastPassRequested()); } - if (getMaxParallel() != 1) { - /* - * Note: The operator MUST be single threaded in order to receive - * the isLastInvocation notice. - */ - throw new UnsupportedOperationException(Annotations.MAX_PARALLEL - + "=" + getMaxParallel()); - } + /* + * Note: The operator MUST be single threaded in order to receive the + * isLastInvocation notice. + */ + assertMaxParallelOne(); } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/join/TestJVMHashJoinOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/join/TestJVMHashJoinOp.java 2014-08-13 13:48:32 UTC (rev 8618) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/join/TestJVMHashJoinOp.java 2014-08-16 14:33:51 UTC (rev 8619) @@ -201,8 +201,8 @@ new NV(PipelineOp.Annotations.MAX_PARALLEL, 2),// namedSet,// })); - fail("Expecting: " + UnsupportedOperationException.class); - } catch (UnsupportedOperationException ex) { + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { if (log.isInfoEnabled()) log.info("Ignoring expected exception: " + ex); } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/solutions/TestHTreeDistinctBindingSets.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/solutions/TestHTreeDistinctBindingSets.java 2014-08-13 13:48:32 UTC (rev 8618) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/solutions/TestHTreeDistinctBindingSets.java 2014-08-16 14:33:51 UTC (rev 8619) @@ -41,8 +41,6 @@ * Unit tests for {@link HTreeDistinctBindingSetsOp}. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id: TestDistinctBindingSets.java 4259 2011-02-28 16:24:53Z - * thompsonbry $ */ public class TestHTreeDistinctBindingSets extends AbstractDistinctSolutionsTestCase { @@ -161,8 +159,8 @@ // new NV(PipelineOp.Annotations.MAX_PARALLEL, // 1),// })); - fail("Expecting: "+UnsupportedOperationException.class); - } catch(UnsupportedOperationException ex) { + fail("Expecting: "+IllegalArgumentException.class); + } catch(IllegalArgumentException ex) { if(log.isInfoEnabled()) log.info("Ignoring expected exception: "+ex); } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/StressTestUnisolatedReadWriteIndex.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/StressTestUnisolatedReadWriteIndex.java 2014-08-13 13:48:32 UTC (rev 8618) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/journal/StressTestUnisolatedReadWriteIndex.java 2014-08-16 14:33:51 UTC (rev 8619) @@ -142,9 +142,9 @@ 3, // 3,// nresources // 20 1, // minLocks 2, // 5 // maxLocks // 3 - 1000, //5000, // ntrials // 1000 + 500, // ntrials // Note: fails in CI @ 1000 (java.lang.OutOfMemoryError: unable to create new native thread) 3, // keyLen - 1000, // 1000, // nops + 1000, // nops 0.02d,// failureRate 0.10d // commitRate ); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |