From: <tho...@us...> - 2013-04-30 17:44:49
|
Revision: 7095 http://bigdata.svn.sourceforge.net/bigdata/?rev=7095&view=rev Author: thompsonbry Date: 2013-04-30 17:44:39 +0000 (Tue, 30 Apr 2013) Log Message: ----------- Moved the bounceZK and enterErrorState RMI methods from HAGlue into a HAGlueTest interface that is only exposed by the HAJournalTest class. This class is accessible from the HA CI test suite, but is not deployed. This is a much nicer way of exposing hooks for the test suite. The HA test suite is 100% green. Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAPipelineGlue.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA2JournalServer.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java Added Paths: ----------- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java Removed Paths: ------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlueDelegate.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/MyHAJournal.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java 2013-04-30 15:32:54 UTC (rev 7094) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java 2013-04-30 17:44:39 UTC (rev 7095) @@ -74,28 +74,6 @@ */ /** - * This method may be issued to force the service to close and then reopen - * its zookeeper connection. This is a drastic action which will cause all - * <i>ephemeral</i> tokens for that service to be retracted from zookeeper. - * When the service reconnects, it will reestablish those connections. - * <p> - * Note: This method is intended primarily as an aid in writing various HA - * unit tests. - * - * @see http://wiki.apache.org/hadoop/ZooKeeper/FAQ#A4 - */ - public Future<Void> bounceZookeeperConnection() throws IOException; - - /** - * Force the end point to enter into an error state from which it will - * naturally move back into a consistent state. - * <p> - * Note: This method is intended primarily as an aid in writing various HA - * unit tests. - */ - public Future<Void> enterErrorState() throws IOException; - - /** * Await the service being ready to partitipate in an HA quorum. The * preconditions include: * <ol> Deleted: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlueDelegate.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlueDelegate.java 2013-04-30 15:32:54 UTC (rev 7094) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlueDelegate.java 2013-04-30 17:44:39 UTC (rev 7095) @@ -1,309 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -package com.bigdata.ha; - -import java.io.IOException; -import java.net.InetSocketAddress; -import java.rmi.RemoteException; -import java.security.DigestException; -import java.security.NoSuchAlgorithmException; -import java.util.UUID; -import java.util.concurrent.BrokenBarrierException; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - -import com.bigdata.ha.msg.IHA2PhaseAbortMessage; -import com.bigdata.ha.msg.IHA2PhaseCommitMessage; -import com.bigdata.ha.msg.IHA2PhasePrepareMessage; -import com.bigdata.ha.msg.IHADigestRequest; -import com.bigdata.ha.msg.IHADigestResponse; -import com.bigdata.ha.msg.IHAGatherReleaseTimeRequest; -import com.bigdata.ha.msg.IHAGlobalWriteLockRequest; -import com.bigdata.ha.msg.IHALogDigestRequest; -import com.bigdata.ha.msg.IHALogDigestResponse; -import com.bigdata.ha.msg.IHALogRequest; -import com.bigdata.ha.msg.IHALogRootBlocksRequest; -import com.bigdata.ha.msg.IHALogRootBlocksResponse; -import com.bigdata.ha.msg.IHANotifyReleaseTimeRequest; -import com.bigdata.ha.msg.IHANotifyReleaseTimeResponse; -import com.bigdata.ha.msg.IHAReadRequest; -import com.bigdata.ha.msg.IHAReadResponse; -import com.bigdata.ha.msg.IHARebuildRequest; -import com.bigdata.ha.msg.IHARootBlockRequest; -import com.bigdata.ha.msg.IHARootBlockResponse; -import com.bigdata.ha.msg.IHASendStoreResponse; -import com.bigdata.ha.msg.IHASnapshotDigestRequest; -import com.bigdata.ha.msg.IHASnapshotDigestResponse; -import com.bigdata.ha.msg.IHASnapshotRequest; -import com.bigdata.ha.msg.IHASnapshotResponse; -import com.bigdata.ha.msg.IHASyncRequest; -import com.bigdata.ha.msg.IHAWriteMessage; -import com.bigdata.ha.msg.IHAWriteSetStateRequest; -import com.bigdata.ha.msg.IHAWriteSetStateResponse; -import com.bigdata.quorum.AsynchronousQuorumCloseException; -import com.bigdata.quorum.QuorumException; - -/** - * Delegation pattern. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * - * @deprecated No longer used. - */ -public class HAGlueDelegate implements HAGlue { - - private final HAGlue delegate; - - public HAGlueDelegate(final HAGlue delegate) { - - if(delegate == null) - throw new IllegalArgumentException(); - - this.delegate = delegate; - - } - - @Override - public Future<Void> bounceZookeeperConnection() throws IOException { - return delegate.bounceZookeeperConnection(); - } - - @Override - public Future<Void> enterErrorState() throws IOException { - return delegate.enterErrorState(); - } - - @Override - public UUID getServiceId() throws IOException { - return delegate.getServiceId(); - } - - @Override - public Future<Boolean> prepare2Phase(IHA2PhasePrepareMessage msg) - throws IOException { - return delegate.prepare2Phase(msg); - } - - @Override - public Future<IHAReadResponse> readFromDisk( - IHAReadRequest readMessage) throws IOException { - return delegate.readFromDisk(readMessage); - } - - @Override - public InetSocketAddress getWritePipelineAddr() throws IOException { - return delegate.getWritePipelineAddr(); - } - - @Override - public IHARootBlockResponse getRootBlock(IHARootBlockRequest msg) throws IOException { - return delegate.getRootBlock(msg); - } - - @Override - public Future<Void> moveToEndOfPipeline() throws IOException { - return delegate.moveToEndOfPipeline(); - } - - @Override - public Future<Void> commit2Phase(IHA2PhaseCommitMessage commitMessage) - throws IOException { - return delegate.commit2Phase(commitMessage); - } - - @Override - public Future<Void> abort2Phase(IHA2PhaseAbortMessage abortMessage) - throws IOException { - return delegate.abort2Phase(abortMessage); - } - - @Override - public Future<Void> receiveAndReplicate(final IHASyncRequest req, - final IHAWriteMessage msg) throws IOException { - return delegate.receiveAndReplicate(req, msg); - } - - @Override - public UUID getServiceUUID() throws IOException { - return delegate.getServiceUUID(); - } - - @Override - public Class getServiceIface() throws IOException { - return delegate.getServiceIface(); - } - - @Override - public String getHostname() throws IOException { - return delegate.getHostname(); - } - - @Override - public String getServiceName() throws IOException { - return delegate.getServiceName(); - } - - @Override - public void destroy() throws RemoteException { - delegate.destroy(); - } - - @Override - public Future<Void> gatherMinimumVisibleCommitTime( - final IHAGatherReleaseTimeRequest req) throws IOException { - return delegate.gatherMinimumVisibleCommitTime(req); - } - - @Override - public IHANotifyReleaseTimeResponse notifyEarliestCommitTime( - final IHANotifyReleaseTimeRequest req) throws IOException, - InterruptedException, BrokenBarrierException { - return delegate.notifyEarliestCommitTime(req); - } - -// @Override -// public Future<Void> getTXSCriticalSectionLockOnLeader( -// final IHATXSLockRequest req) throws IOException { -// return delegate.getTXSCriticalSectionLockOnLeader(req); -// } - -// @Override -// public long nextTimestamp() throws IOException { -// return delegate.nextTimestamp(); -// } -// -// @Override -// public long newTx(long timestamp) throws IOException { -// return delegate.newTx(timestamp); -// } -// -// @Override -// public long commit(long tx) throws ValidationError, IOException { -// return delegate.commit(tx); -// } -// -// @Override -// public void abort(long tx) throws IOException { -// delegate.abort(tx); -// } -// -// @Override -// public void notifyCommit(long commitTime) throws IOException { -// delegate.notifyCommit(commitTime); -// } -// -// @Override -// public long getLastCommitTime() throws IOException { -// return delegate.getLastCommitTime(); -// } -// -// @Override -// public long getReleaseTime() throws IOException { -// return delegate.getReleaseTime(); -// } - - @Override - public IHALogRootBlocksResponse getHALogRootBlocksForWriteSet( - IHALogRootBlocksRequest msg) throws IOException { - return delegate.getHALogRootBlocksForWriteSet(msg); - } - - @Override - public Future<Void> sendHALogForWriteSet(IHALogRequest msg) - throws IOException { - return delegate.sendHALogForWriteSet(msg); - } - - @Override - public int getNSSPort() throws IOException { - return delegate.getNSSPort(); - } - - @Override - public RunState getRunState() throws IOException { - return delegate.getRunState(); - } - - @Override - public String getExtendedRunState() throws IOException { - return delegate.getExtendedRunState(); - } - - @Override - public HAStatusEnum getHAStatus() throws IOException { - return delegate.getHAStatus(); - } - - @Override - public Future<IHASendStoreResponse> sendHAStore(IHARebuildRequest msg) - throws IOException { - return delegate.sendHAStore(msg); - } - - @Override - public IHADigestResponse computeDigest(final IHADigestRequest req) - throws IOException, NoSuchAlgorithmException, DigestException { - return delegate.computeDigest(req); - } - - @Override - public IHALogDigestResponse computeHALogDigest(final IHALogDigestRequest req) - throws IOException, NoSuchAlgorithmException, DigestException { - return delegate.computeHALogDigest(req); - } - - @Override - public IHASnapshotDigestResponse computeHASnapshotDigest( - final IHASnapshotDigestRequest req) throws IOException, - NoSuchAlgorithmException, DigestException { - return delegate.computeHASnapshotDigest(req); - } - - @Override - public Future<Void> globalWriteLock(final IHAGlobalWriteLockRequest req) - throws IOException, TimeoutException, InterruptedException { - return delegate.globalWriteLock(req); - } - - @Override - public IHAWriteSetStateResponse getHAWriteSetState( - final IHAWriteSetStateRequest req) throws IOException { - return delegate.getHAWriteSetState(req); - } - - @Override - public long awaitHAReady(final long timeout, final TimeUnit unit) - throws IOException, InterruptedException, QuorumException, - AsynchronousQuorumCloseException, TimeoutException { - return delegate.awaitHAReady(timeout, unit); - } - - @Override - public Future<IHASnapshotResponse> takeSnapshot(final IHASnapshotRequest req) - throws IOException { - return delegate.takeSnapshot(req); - } - -} Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAPipelineGlue.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAPipelineGlue.java 2013-04-30 15:32:54 UTC (rev 7094) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAPipelineGlue.java 2013-04-30 17:44:39 UTC (rev 7095) @@ -95,7 +95,7 @@ * leave without a service leave, if services which are joined always * rejoin the write pipeline when the observe a pipeline leave, and if * we can force a service leave (by instructing the service to - * {@link #bounceZookeeperConnection()}) if the service fails to + * bounce the zookeeper connection) if the service fails to * rejoin the pipeline, then it would be easier to reorganize the * pipeline. [This would still make it possible for services to add * themselves to the pipeline without being joined with the quorum but Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-04-30 15:32:54 UTC (rev 7094) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-04-30 17:44:39 UTC (rev 7095) @@ -6558,28 +6558,28 @@ } - @Override - public Future<Void> bounceZookeeperConnection() { - final FutureTask<Void> ft = new FutureTaskMon<Void>(new Runnable() { - public void run() { - // NOP (not implemented at this layer). - } - }, null); - ft.run(); - return getProxy(ft); - } +// @Override +// public Future<Void> bounceZookeeperConnection() { +// final FutureTask<Void> ft = new FutureTaskMon<Void>(new Runnable() { +// public void run() { +// // NOP (not implemented at this layer). +// } +// }, null); +// ft.run(); +// return getProxy(ft); +// } +// +// @Override +// public Future<Void> enterErrorState() { +// final FutureTask<Void> ft = new FutureTaskMon<Void>(new Runnable() { +// public void run() { +// // NOP (not implemented at this layer). +// } +// }, null); +// ft.run(); +// return getProxy(ft); +// } - @Override - public Future<Void> enterErrorState() { - final FutureTask<Void> ft = new FutureTaskMon<Void>(new Runnable() { - public void run() { - // NOP (not implemented at this layer). - } - }, null); - ft.run(); - return getProxy(ft); - } - /** * {@inheritDoc} * <p> Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-04-30 15:32:54 UTC (rev 7094) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-04-30 17:44:39 UTC (rev 7095) @@ -103,7 +103,6 @@ import com.bigdata.journal.jini.ha.HAJournalServer.RunStateEnum; import com.bigdata.quorum.AsynchronousQuorumCloseException; import com.bigdata.quorum.Quorum; -import com.bigdata.quorum.zk.ZKQuorumImpl; import com.bigdata.service.AbstractTransactionService; import com.bigdata.service.jini.JiniClient; import com.bigdata.service.jini.RemoteAdministrable; @@ -1375,84 +1374,6 @@ } - @Override - public Future<Void> bounceZookeeperConnection() { - - final FutureTask<Void> ft = new FutureTaskMon<Void>( - new BounceZookeeperConnectionTask(), null/* result */); - - ft.run(); - - return getProxy(ft); - - } - - private class BounceZookeeperConnectionTask implements Runnable { - - @SuppressWarnings("rawtypes") - public void run() { - - if (getQuorum() instanceof ZKQuorumImpl) { - - // Note: Local method call on AbstractJournal. - final UUID serviceId = getServiceId(); - - try { - - haLog.warn("BOUNCING ZOOKEEPER CONNECTION: " - + serviceId); - - // Close the current connection (if any). - ((ZKQuorumImpl) getQuorum()).getZookeeper().close(); - - // Obtain a new connection. - ((ZKQuorumImpl) getQuorum()).getZookeeper(); - - haLog.warn("RECONNECTED TO ZOOKEEPER: " + serviceId); - - } catch (InterruptedException e) { - - // Propagate the interrupt. - Thread.currentThread().interrupt(); - - } - - } - } - - } - - @Override - public Future<Void> enterErrorState() { - - final FutureTask<Void> ft = new FutureTaskMon<Void>( - new EnterErrorStateTask(), null/* result */); - - ft.run(); - - return getProxy(ft); - - } - - private class EnterErrorStateTask implements Runnable { - - public void run() { - - @SuppressWarnings("unchecked") - final HAQuorumService<HAGlue, HAJournal> service = (HAQuorumService<HAGlue, HAJournal>) getQuorum() - .getClient(); - - // Note: Local method call on AbstractJournal. - final UUID serviceId = getServiceId(); - - haLog.warn("ENTERING ERROR STATE: " + serviceId); - - service.enterErrorState(); - - } - - } - /** * Note: The invocation layer factory is reused for each exported proxy (but * the exporter itself is paired 1:1 with the exported proxy). Copied: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java (from rev 7092, branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/MyHAJournal.java) =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java (rev 0) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java 2013-04-30 17:44:39 UTC (rev 7095) @@ -0,0 +1,209 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 31, 2012 + */ +package com.bigdata.journal.jini.ha; + +import java.io.IOException; +import java.rmi.Remote; +import java.util.UUID; +import java.util.concurrent.Future; +import java.util.concurrent.FutureTask; + +import net.jini.config.Configuration; +import net.jini.config.ConfigurationException; + +import org.apache.log4j.Logger; + +import com.bigdata.concurrent.FutureTaskMon; +import com.bigdata.ha.HAGlue; +import com.bigdata.ha.QuorumService; +import com.bigdata.journal.jini.ha.HAJournalServer.HAQuorumService; +import com.bigdata.quorum.Quorum; +import com.bigdata.quorum.zk.ZKQuorumImpl; + +/** + * Class extends {@link HAJournal} and allows the unit tests to play various + * games with the services, simulating a variety of different kinds of problems. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class HAJournalTest extends HAJournal { + + private static final Logger log = Logger.getLogger(HAJournal.class); + + public HAJournalTest(final HAJournalServer server, + final Configuration config, + final Quorum<HAGlue, QuorumService<HAGlue>> quorum) + throws ConfigurationException, IOException { + + super(server, config, quorum); + + } + + @Override + protected HAGlue newHAGlue(final UUID serviceId) { + + return new HAGlueTestService(serviceId); + + } + + /** + * A {@link Remote} interface for new methods published by the service. + */ + public static interface HAGlueTest extends HAGlue { + + /** + * Logs a "hello world" message. + */ + public void helloWorld() throws IOException; + + /** + * Force the end point to enter into an error state from which it will + * naturally move back into a consistent state. + * <p> + * Note: This method is intended primarily as an aid in writing various HA + * unit tests. + */ + public Future<Void> enterErrorState() throws IOException; + + /** + * This method may be issued to force the service to close and then reopen + * its zookeeper connection. This is a drastic action which will cause all + * <i>ephemeral</i> tokens for that service to be retracted from zookeeper. + * When the service reconnects, it will reestablish those connections. + * <p> + * Note: This method is intended primarily as an aid in writing various HA + * unit tests. + * + * @see http://wiki.apache.org/hadoop/ZooKeeper/FAQ#A4 + */ + public Future<Void> bounceZookeeperConnection() throws IOException; + + } + + /** + * Class extends the public RMI interface of the {@link HAJournal}. + * <p> + * Note: Any new RMI methods must be (a) declared on an interface; and (b) + * must throw {@link IOException}. + */ + protected class HAGlueTestService extends HAJournal.HAGlueService implements + HAGlueTest { + + protected HAGlueTestService(final UUID serviceId) { + + super(serviceId); + + } + + @Override + public void helloWorld() throws IOException { + + log.warn("Hello world!"); + + } + + @Override + public Future<Void> enterErrorState() { + + final FutureTask<Void> ft = new FutureTaskMon<Void>( + new EnterErrorStateTask(), null/* result */); + + ft.run(); + + return getProxy(ft); + + } + + private class EnterErrorStateTask implements Runnable { + + public void run() { + + @SuppressWarnings("unchecked") + final HAQuorumService<HAGlue, HAJournal> service = (HAQuorumService<HAGlue, HAJournal>) getQuorum() + .getClient(); + + // Note: Local method call on AbstractJournal. + final UUID serviceId = getServiceId(); + + haLog.warn("ENTERING ERROR STATE: " + serviceId); + + service.enterErrorState(); + + } + + } + + @Override + public Future<Void> bounceZookeeperConnection() { + + final FutureTask<Void> ft = new FutureTaskMon<Void>( + new BounceZookeeperConnectionTask(), null/* result */); + + ft.run(); + + return getProxy(ft); + + } + + private class BounceZookeeperConnectionTask implements Runnable { + + @SuppressWarnings("rawtypes") + public void run() { + + if (getQuorum() instanceof ZKQuorumImpl) { + + // Note: Local method call on AbstractJournal. + final UUID serviceId = getServiceId(); + + try { + + haLog.warn("BOUNCING ZOOKEEPER CONNECTION: " + + serviceId); + + // Close the current connection (if any). + ((ZKQuorumImpl) getQuorum()).getZookeeper().close(); + + // Obtain a new connection. + ((ZKQuorumImpl) getQuorum()).getZookeeper(); + + haLog.warn("RECONNECTED TO ZOOKEEPER: " + serviceId); + + } catch (InterruptedException e) { + + // Propagate the interrupt. + Thread.currentThread().interrupt(); + + } + + } + } + + } + + } // HAGlueTestService + +} Deleted: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/MyHAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/MyHAJournal.java 2013-04-30 15:32:54 UTC (rev 7094) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/MyHAJournal.java 2013-04-30 17:44:39 UTC (rev 7095) @@ -1,101 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Oct 31, 2012 - */ -package com.bigdata.journal.jini.ha; - -import java.io.IOException; -import java.rmi.Remote; -import java.util.UUID; - -import org.apache.log4j.Logger; - -import net.jini.config.Configuration; -import net.jini.config.ConfigurationException; - -import com.bigdata.ha.HAGlue; -import com.bigdata.ha.QuorumService; -import com.bigdata.quorum.Quorum; - -/** - * Class extends {@link HAJournal}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - */ -public class MyHAJournal extends HAJournal { - - private static final Logger log = Logger.getLogger(HAJournal.class); - - public MyHAJournal(final HAJournalServer server, - final Configuration config, - final Quorum<HAGlue, QuorumService<HAGlue>> quorum) - throws ConfigurationException, IOException { - - super(server, config, quorum); - - } - - @Override - protected HAGlue newHAGlue(final UUID serviceId) { - -// return super.newHAGlue(serviceId); - return new MyHAGlueService(serviceId); - - } - - /** - * A {@link Remote} interface for new methods published by the service. - */ - public static interface MyHAGlue extends HAGlue { - - public void helloWorld() throws IOException; - - } - - /** - * Class extends the public RMI interface of the {@link HAJournal}. - * <p> - * Note: Any new RMI methods must be (a) declared on an interface; and (b) - * must throw {@link IOException}. - */ - protected class MyHAGlueService extends HAJournal.HAGlueService implements - MyHAGlue { - - protected MyHAGlueService(final UUID serviceId) { - - super(serviceId); - - } - - @Override - public void helloWorld() throws IOException { - - log.warn("Hello world!"); - - } - - } - -} Modified: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA2JournalServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA2JournalServer.java 2013-04-30 15:32:54 UTC (rev 7094) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA2JournalServer.java 2013-04-30 17:44:39 UTC (rev 7095) @@ -37,6 +37,7 @@ import com.bigdata.ha.HAStatusEnum; import com.bigdata.ha.msg.HARootBlockRequest; import com.bigdata.journal.IRootBlockView; +import com.bigdata.journal.jini.ha.HAJournalTest.HAGlueTest; import com.bigdata.quorum.Quorum; import com.bigdata.rdf.sail.webapp.client.RemoteRepository; @@ -66,7 +67,8 @@ return new String[]{ "com.bigdata.journal.jini.ha.HAJournalServer.restorePolicy=new com.bigdata.journal.jini.ha.DefaultRestorePolicy(0L,1,0)", - "com.bigdata.journal.jini.ha.HAJournalServer.snapshotPolicy=new com.bigdata.journal.jini.ha.NoSnapshotPolicy()" + "com.bigdata.journal.jini.ha.HAJournalServer.snapshotPolicy=new com.bigdata.journal.jini.ha.NoSnapshotPolicy()", + "com.bigdata.journal.jini.ha.HAJournalServer.HAJournalClass=\""+HAJournalTest.class.getName()+"\"" }; } @@ -74,7 +76,7 @@ public TestHA2JournalServer() { } - public TestHA2JournalServer(String name) { + public TestHA2JournalServer(final String name) { super(name); } @@ -214,11 +216,11 @@ if (leader.equals(serverA)) { - serverB.bounceZookeeperConnection().get(); + ((HAGlueTest) serverB).bounceZookeeperConnection().get(); } else { - serverA.bounceZookeeperConnection().get(); + ((HAGlueTest) serverA).bounceZookeeperConnection().get(); } @@ -317,7 +319,7 @@ // final UUID leaderId1 = leader.getServiceId(); - leader.bounceZookeeperConnection().get(); + ((HAGlueTest)leader).bounceZookeeperConnection().get(); // Wait for the quorum to break and then meet again. final long token2 = awaitNextQuorumMeet(token1); Modified: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java 2013-04-30 15:32:54 UTC (rev 7094) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java 2013-04-30 17:44:39 UTC (rev 7095) @@ -33,7 +33,7 @@ import com.bigdata.ha.HAGlue; import com.bigdata.ha.HAStatusEnum; -import com.bigdata.journal.jini.ha.MyHAJournal.MyHAGlue; +import com.bigdata.journal.jini.ha.HAJournalTest.HAGlueTest; /** * Unit test of the ability to override the {@link HAJournal} implementation @@ -63,7 +63,7 @@ return new String[]{ "com.bigdata.journal.jini.ha.HAJournalServer.restorePolicy=new com.bigdata.journal.jini.ha.DefaultRestorePolicy(0L,1,0)", "com.bigdata.journal.jini.ha.HAJournalServer.snapshotPolicy=new com.bigdata.journal.jini.ha.NoSnapshotPolicy()", - "com.bigdata.journal.jini.ha.HAJournalServer.HAJournalClass=\"com.bigdata.journal.jini.ha.MyHAJournal\"" + "com.bigdata.journal.jini.ha.HAJournalServer.HAJournalClass=\""+HAJournalTest.class.getName()+"\"" }; } @@ -115,7 +115,7 @@ assertTrue(getSnapshotDirA().exists()); // Verify that we can invoke extension methods on the service. - ((MyHAGlue)serverA).helloWorld(); + ((HAGlueTest)serverA).helloWorld(); // serverA.enterErrorState().get(); // This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-05-03 16:35:52
|
Revision: 7100 http://bigdata.svn.sourceforge.net/bigdata/?rev=7100&view=rev Author: thompsonbry Date: 2013-05-03 16:35:41 +0000 (Fri, 03 May 2013) Log Message: ----------- - RWStore.postHACommit(): Lock ordering problem was causing a deadlock in testABCMultiTransactionFollowerReads(). The allocation lock must be taken before the extension lock. Javadoc update. - RWStore.resetFromRootBlock(). The same lock ordering problem existed here. Javadoc update. - RWStore: Added / updated javadoc for the allocation and extension locks and reviewed all locking patterns in RWStore. - RWStore: getData() requires shared access to the allocators. That access must be MUTEX with allocation mutation in postHACommit(). Modified getData() to use the m_allocationReadLock for shared access (was using the m_extension.readLock() which is to make file IO MUTEX with file extension). Note: The m_allocationReadLock COULD be pushed down into physicalAddress() since that is the enter point for readers to translate. Should it? - RWStore: freeDeferrals(). Modified the code to not release historical commit points if the key range scan on the commit record index would have a toKey LT the fromKey. This was done to support the HA TXS use case. - RWStore.readRootBlock(). Modified to take the m_extensionLock.readLock() to protect against concurrent file extension. - RWStore.readFromLatchedAddress(). Modified to take the allocator read lock since accesses the allocators. Modified to take the m_extensionLock.readLock() to protect against concurrent file extension. This method is used by DumpJournal. DumpJournal can now be invoked from the NSS on a live Journal. - RWStore.writeRaw(). Modified to take the ReadLock of the extensionLock to protect against concurrent file extension. - RWStore.writeRawBuffer(). Removed the code that was taking the allocation lock. It is not required for file IO. - WORMStrategy.writeRawBuffer(). Removed the code that was taking the file extension lock since it is always taken by writeOnChannel. - RWStore.physicalAddress(). Must take the ReadLock of the allocationLock since this is a public method and it reads on the allocators. - RWStore.getFixedAllocatorCount(). Must take allocationReadLock. - RWStore.getAllocatedBlocks(). Must take allocationReadLock. - RWStore.getFileStorage(). Must take allocationReadLock. - RWStore.getAllocatorSlots(). Must take allocationReadLock. - RWStore.computeDigest(). Made it easier to change between the two digest methods (Old and Alt). - TestJournalRW is green. - TestWORMStrategy is green. - TestHA3JournalStrategy.testABCMultiTransactionFollowerReads() is green locally (I have other changes locally pertaining to how and when the releaseTime is updated). - Bug fix to TestHA3SnapshotPolicy. It needed to enabled online disaster recovery. - All HA test are passing. @see https://sourceforge.net/apps/trac/bigdata/ticket/530 (Journal HA) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWStore.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3SnapshotPolicy2.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java 2013-05-03 12:11:49 UTC (rev 7099) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java 2013-05-03 16:35:41 UTC (rev 7100) @@ -1167,7 +1167,7 @@ } - @Override + @Override public long getBlockSequence() { return lastBlockSequence; @@ -1238,8 +1238,8 @@ writeCacheService.close(); writeCacheService = newWriteCacheService(); } else { - writeCacheService.reset(); - writeCacheService.setExtent(extent); + writeCacheService.reset(); + writeCacheService.setExtent(extent); } } catch (InterruptedException e) { throw new RuntimeException(e); @@ -1545,10 +1545,10 @@ * @return the physical address of the offset provided */ private long offset2PhysicalAddress(final long offset) { - return offset + headerSize; - } + return offset + headerSize; + } - /** + /** * Read on the backing file. {@link ByteBuffer#remaining()} bytes will be * read into the caller's buffer, starting at the specified offset in the * backing file. @@ -1567,7 +1567,7 @@ final Lock readLock = extensionLock.readLock(); readLock.lock(); try { - final int startPos = dst.position(); + final int startPos = dst.position(); try { // the offset into the disk file. @@ -1635,23 +1635,23 @@ */ private FileChannel reopenChannel() throws IOException { - /* - * Note: This is basically a double-checked locking pattern. It is - * used to avoid synchronizing when the backing channel is already - * open. - */ - { - final RandomAccessFile tmp = raf; - if (tmp != null) { - final FileChannel channel = tmp.getChannel(); - if (channel.isOpen()) { - // The channel is still open. - return channel; - } - } - } + /* + * Note: This is basically a double-checked locking pattern. It is + * used to avoid synchronizing when the backing channel is already + * open. + */ + { + final RandomAccessFile tmp = raf; + if (tmp != null) { + final FileChannel channel = tmp.getChannel(); + if (channel.isOpen()) { + // The channel is still open. + return channel; + } + } + } - synchronized (opener) { + synchronized (opener) { assertOpen(); @@ -1862,9 +1862,9 @@ offset = getOffset(addr); - final long paddr = offset2PhysicalAddress(offset); + final long paddr = offset2PhysicalAddress(offset); - boolean wroteOnCache = false; + boolean wroteOnCache = false; if (writeCacheService != null) { if (!writeCacheService.write(paddr, data, chk)) throw new AssertionError(); @@ -1952,9 +1952,9 @@ */ private final ByteBuffer _checkbuf; -// private HARebuildRequest m_rebuildRequest; +// private HARebuildRequest m_rebuildRequest; // -// private int m_rebuildSequence; +// private int m_rebuildSequence; /** * Make sure that the file is large enough to accept a write of @@ -2431,13 +2431,13 @@ super.closeForWrites(); // do not discard the write cache, just reset it to preserve - // read cache + // read cache // releaseWriteCache(); try { - writeCacheService.reset(); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } + writeCacheService.reset(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } } @@ -2466,7 +2466,7 @@ * of this method are ignored. */ @Override - public void delete(final long addr) { + public void delete(final long addr) { if (writeCacheService != null) { @@ -2517,44 +2517,49 @@ final int limit = bb.limit(); bb.position(limit); - // Flush the write in the write cache to the backing store. - final Lock readLock = extensionLock.readLock(); - readLock.lock(); - try { + /* Flush the write in the write cache to the backing store. + * + * Note: writeOnChannel() takes the extensionLock for us. + */ +// final Lock readLock = extensionLock.readLock(); +// readLock.lock(); +// try { + writeCache.flush(false/* force */); - - // install reads into readCache (if any) - writeCacheService.installReads(writeCache); - } finally { - readLock.unlock(); - } +// } finally { +// readLock.unlock(); +// } + + // install reads into readCache (if any) + writeCacheService.installReads(writeCache); + } @Override public Future<Void> sendHALogBuffer(final IHALogRequest req, - final IHAWriteMessage msg, final IBufferAccess b) - throws IOException, InterruptedException { + final IHAWriteMessage msg, final IBufferAccess b) + throws IOException, InterruptedException { - // read direct from store - final ByteBuffer clientBuffer = b.buffer(); - final int nbytes = msg.getSize(); - clientBuffer.position(0); - clientBuffer.limit(nbytes); + // read direct from store + final ByteBuffer clientBuffer = b.buffer(); + final int nbytes = msg.getSize(); + clientBuffer.position(0); + clientBuffer.limit(nbytes); readRaw(/*nbytes, */msg.getFirstOffset(), clientBuffer); - - assert clientBuffer.remaining() > 0 : "Empty buffer: " + clientBuffer; + + assert clientBuffer.remaining() > 0 : "Empty buffer: " + clientBuffer; - @SuppressWarnings("unchecked") - final QuorumPipeline<HAPipelineGlue> quorumMember = (QuorumPipeline<HAPipelineGlue>) quorum - .getMember(); + @SuppressWarnings("unchecked") + final QuorumPipeline<HAPipelineGlue> quorumMember = (QuorumPipeline<HAPipelineGlue>) quorum + .getMember(); - final Future<Void> remoteWriteFuture = quorumMember.replicate(req, msg, - clientBuffer); + final Future<Void> remoteWriteFuture = quorumMember.replicate(req, msg, + clientBuffer); - return remoteWriteFuture; - } + return remoteWriteFuture; + } @Override public Future<Void> sendRawBuffer(final IHARebuildRequest req, @@ -2863,38 +2868,38 @@ } } - @Override - public void writeRawBuffer(HARebuildRequest req, IHAWriteMessage msg, - ByteBuffer transfer) throws IOException { -// if (m_rebuildRequest == null) -// throw new IllegalStateException("Store is not in rebuild state"); -// -// if (m_rebuildSequence != msg.getSequence()) -// throw new IllegalStateException("Invalid sequence number for rebuild, expected: " + m_rebuildSequence + ", actual: " + msg.getSequence()); + @Override + public void writeRawBuffer(HARebuildRequest req, IHAWriteMessage msg, + ByteBuffer transfer) throws IOException { +// if (m_rebuildRequest == null) +// throw new IllegalStateException("Store is not in rebuild state"); +// +// if (m_rebuildSequence != msg.getSequence()) +// throw new IllegalStateException("Invalid sequence number for rebuild, expected: " + m_rebuildSequence + ", actual: " + msg.getSequence()); - FileChannelUtility.writeAll(this.opener, transfer, msg.getFirstOffset()); - -// m_rebuildSequence++; - } + FileChannelUtility.writeAll(this.opener, transfer, msg.getFirstOffset()); + +// m_rebuildSequence++; + } -// @Override -// public void prepareForRebuild(HARebuildRequest req) { -// assert m_rebuildRequest == null; -// -// m_rebuildRequest = req; -// m_rebuildSequence = 0; -// } +// @Override +// public void prepareForRebuild(HARebuildRequest req) { +// assert m_rebuildRequest == null; +// +// m_rebuildRequest = req; +// m_rebuildSequence = 0; +// } // -// @Override -// public void completeRebuild(final HARebuildRequest req, final IRootBlockView rbv) { -// assert m_rebuildRequest != null; -// -// assert m_rebuildRequest.equals(req); -// -// // TODO: reinit from file -// this.resetFromHARootBlock(rbv); -// -// m_rebuildRequest = null; -// } - +// @Override +// public void completeRebuild(final HARebuildRequest req, final IRootBlockView rbv) { +// assert m_rebuildRequest != null; +// +// assert m_rebuildRequest.equals(req); +// +// // TODO: reinit from file +// this.resetFromHARootBlock(rbv); +// +// m_rebuildRequest = null; +// } + } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2013-05-03 12:11:49 UTC (rev 7099) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2013-05-03 16:35:41 UTC (rev 7100) @@ -53,7 +53,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; @@ -267,27 +266,27 @@ */ public interface Options { - /** - * Option defines the Allocation block sizes for the RWStore. The values - * defined are multiplied by 64 to provide the actual allocations. The - * list of allocations should be ',' delimited and in increasing order. - * This array is written into the store so changing the values does not - * break older stores. For example, - * - * <pre> - * "1,2,4,8,116,32,64" - * </pre> - * - * defines allocations from 64 to 4K in size. It is a good to define - * block sizes on 4K boundaries as soon as possible to optimize IO. This - * is particularly relevant for SSDs. A 1K boundary is expressed as - * <code>16</code> in the allocation sizes, so a 4K boundary is - * expressed as <code>64</code> and an 8k boundary as <code>128</code>. - * <p> - * The default allocations are {@value #DEFAULT_ALLOCATION_SIZES}. - * - * @see #DEFAULT_ALLOCATION_SIZES - */ + /** + * Option defines the Allocation block sizes for the RWStore. The values + * defined are multiplied by 64 to provide the actual allocations. The + * list of allocations should be ',' delimited and in increasing order. + * This array is written into the store so changing the values does not + * break older stores. For example, + * + * <pre> + * "1,2,4,8,116,32,64" + * </pre> + * + * defines allocations from 64 to 4K in size. It is a good to define + * block sizes on 4K boundaries as soon as possible to optimize IO. This + * is particularly relevant for SSDs. A 1K boundary is expressed as + * <code>16</code> in the allocation sizes, so a 4K boundary is + * expressed as <code>64</code> and an 8k boundary as <code>128</code>. + * <p> + * The default allocations are {@value #DEFAULT_ALLOCATION_SIZES}. + * + * @see #DEFAULT_ALLOCATION_SIZES + */ String ALLOCATION_SIZES = RWStore.class.getName() + ".allocationSizes"; /** @@ -335,12 +334,12 @@ String DEFAULT_FREE_BITS_THRESHOLD = "300"; - /** - * When <code>true</code>, scattered writes which are strictly ascending - * will be coalesced within a buffer and written out as a single IO - * (default {@value #DEFAULT_DOUBLE_BUFFER_WRITES}). This improves write - * performance for SATA, SAS, and even SSD. - */ + /** + * When <code>true</code>, scattered writes which are strictly ascending + * will be coalesced within a buffer and written out as a single IO + * (default {@value #DEFAULT_DOUBLE_BUFFER_WRITES}). This improves write + * performance for SATA, SAS, and even SSD. + */ String DOUBLE_BUFFER_WRITES = RWStore.class.getName() + ".doubleBuffer"; String DEFAULT_DOUBLE_BUFFER_WRITES = "true"; @@ -370,19 +369,19 @@ private static final String ERR_WRITE_CACHE_CREATE = "Unable to create write cache service"; - /** - * The fixed size of any allocator on the disk in bytes. The #of allocations - * managed by an allocator is this value times 8 because each slot uses one - * bit in the allocator. When an allocator is allocated, the space on the - * persistent heap is reserved for all slots managed by that allocator. - * However, the {@link FixedAllocator} only incrementally allocates the - * {@link AllocBlock}s. - */ - static private final int ALLOC_BLOCK_SIZE = 1024; - -// // from 32 bits, need 13 to hold max offset of 8 * 1024, leaving 19 for number of blocks: 256K -// static final int BLOCK_INDEX_BITS = 19; /** + * The fixed size of any allocator on the disk in bytes. The #of allocations + * managed by an allocator is this value times 8 because each slot uses one + * bit in the allocator. When an allocator is allocated, the space on the + * persistent heap is reserved for all slots managed by that allocator. + * However, the {@link FixedAllocator} only incrementally allocates the + * {@link AllocBlock}s. + */ + static private final int ALLOC_BLOCK_SIZE = 1024; + +// // from 32 bits, need 13 to hold max offset of 8 * 1024, leaving 19 for number of blocks: 256K +// static final int BLOCK_INDEX_BITS = 19; + /** * The #of low bits in a latched address that encode the offset of the bit * in a {@link FixedAllocator}. The {@link FixedAllocator} will map the bit * onto an allocation slot. @@ -392,107 +391,107 @@ * order in which it was created. This is used to index into * {@link #m_allocs}, which are the {@link FixedAllocator}s. */ - static final int OFFSET_BITS = 13; - static final int OFFSET_BITS_MASK = 0x1FFF; // was 0xFFFF - - static final int ALLOCATION_SCALEUP = 16; // multiplier to convert allocations based on minimum allocation of 32k - static private final int META_ALLOCATION = 8; // 8 * 32K is size of meta Allocation + static final int OFFSET_BITS = 13; + static final int OFFSET_BITS_MASK = 0x1FFF; // was 0xFFFF + + static final int ALLOCATION_SCALEUP = 16; // multiplier to convert allocations based on minimum allocation of 32k + static private final int META_ALLOCATION = 8; // 8 * 32K is size of meta Allocation - // If required, then allocate 1M direct buffers - private static final int cDirectBufferCapacity = 1024 * 1024; + // If required, then allocate 1M direct buffers + private static final int cDirectBufferCapacity = 1024 * 1024; - private int cMaxDirectBuffers = 20; // 20M of direct buffers - static final int cDirectAllocationOffset = 64 * 1024; + private int cMaxDirectBuffers = 20; // 20M of direct buffers + static final int cDirectAllocationOffset = 64 * 1024; - // /////////////////////////////////////////////////////////////////////////////////////// - // RWStore Data - // /////////////////////////////////////////////////////////////////////////////////////// + // /////////////////////////////////////////////////////////////////////////////////////// + // RWStore Data + // /////////////////////////////////////////////////////////////////////////////////////// - private final File m_fd; -// private RandomAccessFile m_raf; -// protected FileMetadata m_metadata; -// protected int m_transactionCount; -// private boolean m_committing; + private final File m_fd; +// private RandomAccessFile m_raf; +// protected FileMetadata m_metadata; +// protected int m_transactionCount; +// private boolean m_committing; // /** // * When <code>true</code> the allocations will not actually be recycled // * until after a store restart. When <code>false</code>, the allocations are // * recycled once they satisfy the history retention requirement. // */ -// private boolean m_preserveSession = false; -// private boolean m_readOnly; +// private boolean m_preserveSession = false; +// private boolean m_readOnly; - /** - * The UUID of the backing store. - * - * @see #initfromRootBlock(IRootBlockView) - * @see IRawStore#getUUID() - */ - private UUID m_storeUUID; - - /** - * lists of total alloc blocks. - * - * @todo examine concurrency and lock usage for {@link #m_alloc} and the - * rest of these lists. - */ - private final ArrayList<FixedAllocator> m_allocs; + /** + * The UUID of the backing store. + * + * @see #initfromRootBlock(IRootBlockView) + * @see IRawStore#getUUID() + */ + private UUID m_storeUUID; + + /** + * lists of total alloc blocks. + * + * @todo examine concurrency and lock usage for {@link #m_alloc} and the + * rest of these lists. + */ + private final ArrayList<FixedAllocator> m_allocs; - /** - * A fixed length array of lists of free {@link FixedAllocator}s with one - * entry in the array for each configured allocator size. An allocator is - * put onto this free list when it is initially created. When the store is - * opened, it will be added to this list if {@link Allocator#hasFree()} - * returns true. It will be removed when it has no free space remaining. It - * will be added back to the free list when its free slots exceeds a - * configured threshold. - */ - private ArrayList<FixedAllocator> m_freeFixed[]; - -// /** lists of free blob allocators. */ - // private final ArrayList<BlobAllocator> m_freeBlobs; + /** + * A fixed length array of lists of free {@link FixedAllocator}s with one + * entry in the array for each configured allocator size. An allocator is + * put onto this free list when it is initially created. When the store is + * opened, it will be added to this list if {@link Allocator#hasFree()} + * returns true. It will be removed when it has no free space remaining. It + * will be added back to the free list when its free slots exceeds a + * configured threshold. + */ + private ArrayList<FixedAllocator> m_freeFixed[]; + +// /** lists of free blob allocators. */ + // private final ArrayList<BlobAllocator> m_freeBlobs; - /** lists of blocks requiring commitment. */ - private final ArrayList<FixedAllocator> m_commitList; + /** lists of blocks requiring commitment. */ + private final ArrayList<FixedAllocator> m_commitList; -// private WriteBlock m_writes; - - private final Quorum<?,?> m_quorum; - - /** - * The #of buffers that will be used by the {@link WriteCacheService}. - * - * @see com.bigdata.journal.Options#WRITE_CACHE_BUFFER_COUNT - */ - private final int m_writeCacheBufferCount; +// private WriteBlock m_writes; + + private final Quorum<?,?> m_quorum; + + /** + * The #of buffers that will be used by the {@link WriteCacheService}. + * + * @see com.bigdata.journal.Options#WRITE_CACHE_BUFFER_COUNT + */ + private final int m_writeCacheBufferCount; - /** - * @see com.bigdata.journal.Options#WRITE_CACHE_MIN_CLEAN_LIST_SIZE - */ + /** + * @see com.bigdata.journal.Options#WRITE_CACHE_MIN_CLEAN_LIST_SIZE + */ private final int m_minCleanListSize; - /** - * The #of read buffers that will be used by the {@link WriteCacheService}. - * - * @see com.bigdata.journal.Options#READ_CACHE_BUFFER_COUNT - */ - private final int m_readCacheBufferCount; + /** + * The #of read buffers that will be used by the {@link WriteCacheService}. + * + * @see com.bigdata.journal.Options#READ_CACHE_BUFFER_COUNT + */ + private final int m_readCacheBufferCount; /** * @see com.bigdata.journal.Options#WRITE_CACHE_COMPACTION_THRESHOLD */ - private final int m_compactionThreshold; - + private final int m_compactionThreshold; + /** * @see com.bigdata.journal.Options#HOT_CACHE_THRESHOLD */ - private final int m_hotCacheThreshold; - + private final int m_hotCacheThreshold; + /** * @see com.bigdata.journal.Options#HOT_CACHE_SIZE */ - private final int m_hotCacheSize; - + private final int m_hotCacheSize; + /** * Note: This is not final because we replace the {@link WriteCacheService} * during {@link #reset(long)} in order to propagate the then current quorum @@ -500,79 +499,110 @@ */ RWWriteCacheService m_writeCacheService; - /** - * The actual allocation sizes as read from the store. - * - * @see #DEFAULT_ALLOCATION_SIZES - */ - private int[] m_allocSizes; + /** + * The actual allocation sizes as read from the store. + * + * @see #DEFAULT_ALLOCATION_SIZES + */ + private int[] m_allocSizes; - /** - * The maximum allocation size (bytes). - */ + /** + * The maximum allocation size (bytes). + */ final int m_maxFixedAlloc; /** * The minimum allocation size (bytes). */ final int m_minFixedAlloc; - + /** * We allow blob headers so the maximum blob size is Integer.MAX_VALUE. */ final int m_maxBlobAllocSize = Integer.MAX_VALUE; - + /** - * This lock is used to exclude readers when the extent of the backing file - * is about to be changed. + * This lock is used to exclude readers/writers performing IOs against the + * backing file when the extent of the backing file is about to be changed. + * Readers and writers take the {@link ReadLock}. The {@link WriteLock} is + * taken when the file extent must be changed. This is a workaround for an + * old (an unresolved as of February 2010) Sun bug. * <p> - * At present we use synchronized (this) for alloc/commitChanges and - * getData, since only alloc and commitChanges can cause a file extend, and - * only getData can read. + * Note: Any public method that ONLY takes the extensionLock MUST NOT make + * calls that could take the {@link #m_allocationLock}. This would cause a + * lock ordering problem. If both locks must be taken, then the + * {@link #m_allocationLock} MUST be taken first. + * + * @see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6371642 + * @see #m_allocationLock + */ + final private ReentrantReadWriteLock m_extensionLock = new ReentrantReadWriteLock(); + + /** + * An explicit allocation lock supports exclusive access for allocator + * mutation and shared access for readers. * <p> - * By using an explicit extensionLock we can unsure that that the taking of - * the lock is directly related to the functionality, plus we can support - * concurrent reads. + * Note: You must hold the {@link #m_allocationReadLock} to read the + * allocators. * <p> - * You MUST hold the {@link #m_allocationLock} before acquiring the - * {@link ReentrantReadWriteLock#writeLock()} of the - * {@link #m_extensionLock}. + * Note: You must hold the {@link #m_allocationWriteLock} while allocating + * or clearing allocations. + * <p> + * Note: It is only when an allocation triggers a file extension that the + * {@link WriteLock} of the {@link #m_extensionLock} needs to be taken. + * + * TODO: There is scope to take advantage of the different allocator sizes + * and provide allocation locks on the fixed allocators. We will still need + * a store-wide allocation lock when creating new allocation areas, but + * significant contention may be avoided. */ - final private ReentrantReadWriteLock m_extensionLock = new ReentrantReadWriteLock(); - - /** - * An explicit allocation lock allows for reads concurrent with allocation - * requests. You must hold the allocation lock while allocating or clearing - * allocations. It is only when an allocation triggers a file extension that - * the write extensionLock needs to be taken. - * - * TODO: There is scope to take advantage of the different allocator sizes - * and provide allocation locks on the fixed allocators. We will still need - * a store-wide allocation lock when creating new allocation areas, but - * significant contention may be avoided. - */ final private ReentrantReadWriteLock m_allocationLock = new ReentrantReadWriteLock(); + /** + * Lock used for exclusive access to the allocators. + * <p> + * Note: Historically, this lock was only required for mutation and readers + * did not content for a lock. + */ final private WriteLock m_allocationWriteLock = m_allocationLock.writeLock(); + /** + * Lock used for shared access to allocators. + * <p> + * Note: Historically the allocators were unprotected for shared acccess + * (readers) and protected by a single lock for mutation (writes). Shared + * access by readers was safe since (a) old allocators were never replaced; + * and (b) readers had access only to committed data. + * <p> + * This situation was changed when the {@link #postHACommit(IRootBlockView)} + * method was introduced since it could replace allocators in a manner that + * was not safe for shared access by readers. Methods that were historically + * using unprotected shared access now require protected shared access using + * this lock. + * + * @see #postHACommit(IRootBlockView) + * @see #getData(long, int) + * @see #getData(long, byte[]) + * @see #getData(long, byte[], int, int) + */ final private ReadLock m_allocationReadLock = m_allocationLock.readLock(); - /** - * The deferredFreeList is simply an array of releaseTime,freeListAddrs - * stored at commit. - * <p> - * Note that when the deferredFreeList is saved, ONLY thefreeListAddrs - * are stored, NOT the releaseTime. This is because on any open of - * the store, all deferredFrees can be released immediately. This - * mechanism may be changed in the future to enable explicit history - * retention, but if so a different header structure would be used since - * it would not be appropriate to retain a simple header linked to - * thousands if not millions of commit points. - */ + /** + * The deferredFreeList is simply an array of releaseTime,freeListAddrs + * stored at commit. + * <p> + * Note that when the deferredFreeList is saved, ONLY thefreeListAddrs + * are stored, NOT the releaseTime. This is because on any open of + * the store, all deferredFrees can be released immediately. This + * mechanism may be changed in the future to enable explicit history + * retention, but if so a different header structure would be used since + * it would not be appropriate to retain a simple header linked to + * thousands if not millions of commit points. + */ // * // * If the current txn list exceeds the MAX_DEFERRED_FREE then it is // * incrementally saved and a new list begun. The master list itself // * serves as a BLOB header when there is more than a single entry with // * the same txReleaseTime. -// private static final int MAX_DEFERRED_FREE = 4094; // fits in 16k block +// private static final int MAX_DEFERRED_FREE = 4094; // fits in 16k block private final long m_minReleaseAge; /** @@ -582,25 +612,25 @@ */ private int m_activeTxCount = 0; - private volatile long m_lastDeferredReleaseTime = 0L; -// private final ArrayList<Integer> m_currentTxnFreeList = new ArrayList<Integer>(); - private final PSOutputStream m_deferredFreeOut; + private volatile long m_lastDeferredReleaseTime = 0L; +// private final ArrayList<Integer> m_currentTxnFreeList = new ArrayList<Integer>(); + private final PSOutputStream m_deferredFreeOut; /** * Used to transparently re-open the backing channel if it has been closed * by an interrupt during an IO. */ - private final ReopenFileChannel m_reopener; + private final ReopenFileChannel m_reopener; - private volatile BufferedWrite m_bufferedWrite; - - /** - * Our StoreageStats objects - */ - private StorageStats m_storageStats; - private long m_storageStatsAddr = 0; - + private volatile BufferedWrite m_bufferedWrite; + /** + * Our StoreageStats objects + */ + private StorageStats m_storageStats; + private long m_storageStatsAddr = 0; + + /** * <code>true</code> iff the backing store is open. */ private volatile boolean m_open = true; @@ -619,7 +649,7 @@ // private ConcurrentHashMap<Integer, String> m_blacklist = null; private ConcurrentHashMap<Integer, Long> m_lockAddresses = null; - class WriteCacheImpl extends WriteCache.FileChannelScatteredWriteCache { + class WriteCacheImpl extends WriteCache.FileChannelScatteredWriteCache { public WriteCacheImpl(final IBufferAccess buf, final boolean useChecksum, final boolean bufferHasData, @@ -661,23 +691,23 @@ // Added to enable debug of rare problem // FIXME: disable by removal once solved protected void registerWriteStatus(long offset, int length, char action) { - m_writeCacheService.debugAddrs(offset, length, action); + m_writeCacheService.debugAddrs(offset, length, action); } - + @Override protected void addAddress(int latchedAddr, int size) { - // No longer valid - // RWStore.this.addAddress(latchedAddr, size); + // No longer valid + // RWStore.this.addAddress(latchedAddr, size); } @Override protected void removeAddress(int latchedAddr) { - // No longer valid - // RWStore.this.removeAddress(latchedAddr); + // No longer valid + // RWStore.this.removeAddress(latchedAddr); } - }; - + }; + /** * The ALLOC_SIZES must be initialized from either the file or the * properties associated with the fileMetadataView @@ -722,50 +752,50 @@ } m_metaBits = new int[m_metaBitsSize]; - - m_metaTransientBits = new int[m_metaBitsSize]; - - + + m_metaTransientBits = new int[m_metaBitsSize]; + + m_quorum = quorum; - - m_fd = fileMetadata.file; - - // initialize striped performance counters for this store. + + m_fd = fileMetadata.file; + + // initialize striped performance counters for this store. this.storeCounters.set(new StoreCounters(10/* batchSize */)); - final IRootBlockView m_rb = fileMetadata.rootBlock; + final IRootBlockView m_rb = fileMetadata.rootBlock; - m_commitList = new ArrayList<FixedAllocator>(); + m_commitList = new ArrayList<FixedAllocator>(); - m_allocs = new ArrayList<FixedAllocator>(); - - // m_freeBlobs = new ArrayList<BlobAllocator>(); + m_allocs = new ArrayList<FixedAllocator>(); + + // m_freeBlobs = new ArrayList<BlobAllocator>(); - try { - final RandomAccessFile m_raf = fileMetadata.getRandomAccessFile(); - m_reopener = new ReopenFileChannel(m_fd, m_raf, "rw"); - } catch (IOException e1) { - throw new RuntimeException(e1); - } + try { + final RandomAccessFile m_raf = fileMetadata.getRandomAccessFile(); + m_reopener = new ReopenFileChannel(m_fd, m_raf, "rw"); + } catch (IOException e1) { + throw new RuntimeException(e1); + } - if (Boolean.valueOf(fileMetadata.getProperty( - Options.DOUBLE_BUFFER_WRITES, - Options.DEFAULT_DOUBLE_BUFFER_WRITES))) { - try { - m_bufferedWrite = new BufferedWrite(this); - } catch (InterruptedException e1) { - m_bufferedWrite = null; - } - } else { - m_bufferedWrite = null; - } + if (Boolean.valueOf(fileMetadata.getProperty( + Options.DOUBLE_BUFFER_WRITES, + Options.DEFAULT_DOUBLE_BUFFER_WRITES))) { + try { + m_bufferedWrite = new BufferedWrite(this); + } catch (InterruptedException e1) { + m_bufferedWrite = null; + } + } else { + m_bufferedWrite = null; + } - m_writeCacheBufferCount = fileMetadata.writeCacheBufferCount; - - m_readCacheBufferCount = Integer.valueOf(fileMetadata.getProperty( + m_writeCacheBufferCount = fileMetadata.writeCacheBufferCount; + + m_readCacheBufferCount = Integer.valueOf(fileMetadata.getProperty( com.bigdata.journal.Options.READ_CACHE_BUFFER_COUNT, com.bigdata.journal.Options.DEFAULT_READ_CACHE_BUFFER_COUNT)); - + if (log.isInfoEnabled()) log.info(com.bigdata.journal.Options.WRITE_CACHE_BUFFER_COUNT + "=" + m_writeCacheBufferCount); @@ -804,87 +834,87 @@ // m_writeCache = newWriteCache(); - try { + try { if (m_rb.getNextOffset() == 0) { // if zero then new file - setAllocations(fileMetadata); + setAllocations(fileMetadata); /* * FIXME Martyn, the code paths here are crazy complicated. * defaultInit() is also invoked from initFromRootBlock(). * Simplify this. BBT */ - m_storeUUID = m_rb.getUUID(); + m_storeUUID = m_rb.getUUID(); - defaultInit(); - - m_maxFixedAlloc = m_allocSizes[m_allocSizes.length-1]*64; - m_minFixedAlloc = m_allocSizes[0]*64; - - m_storageStats = new StorageStats(m_allocSizes); + defaultInit(); + + m_maxFixedAlloc = m_allocSizes[m_allocSizes.length-1]*64; + m_minFixedAlloc = m_allocSizes[0]*64; + + m_storageStats = new StorageStats(m_allocSizes); -// // Check for overwrite option and set overwrite buffer if -// // required -// if (Boolean.valueOf(fileMetadata.getProperty( -// Options.OVERWRITE_DELETE, -// Options.DEFAULT_OVERWRITE_DELETE))) { -// m_writeCache.setOverwriteBuffer(m_maxFixedAlloc); -// } - } else { - - initfromRootBlock(m_rb); - - m_maxFixedAlloc = m_allocSizes[m_allocSizes.length-1]*64; - m_minFixedAlloc = m_allocSizes[0]*64; +// // Check for overwrite option and set overwrite buffer if +// // required +// if (Boolean.valueOf(fileMetadata.getProperty( +// Options.OVERWRITE_DELETE, +// Options.DEFAULT_OVERWRITE_DELETE))) { +// m_writeCache.setOverwriteBuffer(m_maxFixedAlloc); +// } + } else { + + initfromRootBlock(m_rb); + + m_maxFixedAlloc = m_allocSizes[m_allocSizes.length-1]*64; + m_minFixedAlloc = m_allocSizes[0]*64; - if (m_storageStatsAddr != 0) { - final long statsAddr = m_storageStatsAddr >> 16; - final int statsLen = ((int) m_storageStatsAddr) & 0xFFFF; - final byte[] stats = new byte[statsLen + 4]; // allow for checksum - getData(statsAddr, stats); - final DataInputStream instr = new DataInputStream(new ByteArrayInputStream(stats)); - m_storageStats = new StorageStats(instr); - - for (FixedAllocator fa: m_allocs) { - m_storageStats.register(fa); - } - } else { - m_storageStats = new StorageStats(m_allocSizes); - } - + if (m_storageStatsAddr != 0) { + final long statsAddr = m_storageStatsAddr >> 16; + final int statsLen = ((int) m_storageStatsAddr) & 0xFFFF; + final byte[] stats = new byte[statsLen + 4]; // allow for checksum + getData(statsAddr, stats); + final DataInputStream instr = new DataInputStream(new ByteArrayInputStream(stats)); + m_storageStats = new StorageStats(instr); + + for (FixedAllocator fa: m_allocs) { + m_storageStats.register(fa); + } + } else { + m_storageStats = new StorageStats(m_allocSizes); + } + if (log.isTraceEnabled()) { final StringBuilder str = new StringBuilder(); this.showAllocators(str); log.trace(str); } - } - - // Maximum theoretically addressable file size is determined by the - // maximum allocator slot size multiplied by Integer.MAX_VALUE + } + + // Maximum theoretically addressable file size is determined by the + // maximum allocator slot size multiplied by Integer.MAX_VALUE // FIXME: do we want to constrain this as a system property? - m_maxFileSize = ((long) Integer.MAX_VALUE) * m_maxFixedAlloc; + m_maxFileSize = ((long) Integer.MAX_VALUE) * m_maxFixedAlloc; - // setup write cache AFTER init to ensure filesize is correct! + // setup write cache AFTER init to ensure filesize is correct! - m_writeCacheService = newWriteCache(); + m_writeCacheService = newWriteCache(); - final int maxBlockLessChk = m_maxFixedAlloc-4; + final int maxBlockLessChk = m_maxFixedAlloc-4; - assert m_maxFixedAlloc > 0; - - m_deferredFreeOut = PSOutputStream.getNew(this, m_maxFixedAlloc, null); + assert m_maxFixedAlloc > 0; + + m_deferredFreeOut = PSOutputStream.getNew(this, m_maxFixedAlloc, null); -// if (Boolean.valueOf(fileMetadata.getProperty( -// Options.MAINTAIN_BLACKLIST, -// Options.DEFAULT_MAINTAIN_BLACKLIST))) { -// m_blacklist = new ConcurrentHashMap<Integer, String>(); -// m_lockAddresses = new ConcurrentHashMap<Integer, Long>(); -// } +// if (Boolean.valueOf(fileMetadata.getProperty( +// Options.MAINTAIN_BLACKLIST, +// Options.DEFAULT_MAINTAIN_BLACKLIST))) { +// m_blacklist = new ConcurrentHashMap<Integer, String>(); +// m_lockAddresses = new ConcurrentHashMap<Integer, Long>(); +// } - } catch (IOException e) { - throw new StorageTerminalError("Unable to initialize store", e); - } - } + } catch (IOException e) { + throw new StorageTerminalError("Unable to initialize store", e); + } + } /** * Called from WriteCache.resetRecordMapFromBuffer @@ -901,59 +931,59 @@ * <i>latchedAddr</i> but the address itself should not yet be * allocated. */ - void addAddress(final int latchedAddr, final int size) { - // ignore zero address - if (latchedAddr == 0) - return; + void addAddress(final int latchedAddr, final int size) { + // ignore zero address + if (latchedAddr == 0) + return; - m_allocationWriteLock.lock(); - try { - FixedAllocator alloc = null; - try { - alloc = getBlock(latchedAddr); - } catch (final PhysicalAddressResolutionException par) { - // Must create new allocator - } - final int size2 = size < 0 ? -size : size; - if (alloc == null) { - final int i = fixedAllocatorIndex(size2); - final int block = 64 * m_allocSizes[i]; - final ArrayList<FixedAllocator> list = m_freeFixed[i]; - if (log.isTraceEnabled()) - log.trace("Creating new Allocator for address: " - + latchedAddr); + m_allocationWriteLock.lock(); + try { + FixedAllocator alloc = null; + try { + alloc = getBlock(latchedAddr); + } catch (final PhysicalAddressResolutionException par) { + // Must create new allocator + } + final int size2 = size < 0 ? -size : size; + if (alloc == null) { + final int i = fixedAllocatorIndex(size2); + final int block = 64 * m_allocSizes[i]; + final ArrayList<FixedAllocator> list = m_freeFixed[i]; + if (log.isTraceEnabled()) + log.trace("Creating new Allocator for address: " + + latchedAddr); - final FixedAllocator allocator = new FixedAllocator(this, block); + final FixedAllocator allocator = new FixedAllocator(this, block); - allocator.setFreeList(list); - allocator.setIndex(m_allocs.size()); + allocator.setFreeList(list); + allocator.setIndex(m_allocs.size()); - m_allocs.add(allocator); + m_allocs.add(allocator); - // Check correctly synchronized creation - assert allocator == getBlock(latchedAddr); + // Check correctly synchronized creation + assert allocator == getBlock(latchedAddr); - alloc = allocator; - } + alloc = allocator; + } - assert size2 <= alloc.getSlotSize(); + assert size2 <= alloc.getSlotSize(); - if (size > 0) { + if (size > 0) { - /* - * This is a real allocation. - */ + /* + * This is a real allocation. + */ - alloc.setAddressExternal(latchedAddr); + alloc.setAddressExternal(latchedAddr); - } + } - } finally { + } finally { - m_allocationWriteLock.unlock(); + m_allocationWriteLock.unlock(); - } - } + } + } /** * Called from WriteCache.resetRecordMapFromBuffer @@ -962,40 +992,40 @@ * * @param latchedAddr */ - void removeAddress(final int latchedAddr) { - // ignore zero address - if (latchedAddr == 0) - return; + void removeAddress(final int latchedAddr) { + // ignore zero address + if (latchedAddr == 0) + return; - m_allocationWriteLock.lock(); - try { - // assert m_commitList.size() == 0; + m_allocationWriteLock.lock(); + try { + // assert m_commitList.size() == 0; - final FixedAllocator alloc = getBlockByAddress(latchedAddr); + final FixedAllocator alloc = getBlockByAddress(latchedAddr); - assert alloc != null; + assert alloc != null; - final int addrOffset = getOffset(latchedAddr); - if (alloc == null) { - throw new IllegalArgumentException( - "Invalid address provided to immediateFree: " - + latchedAddr); - } - final long pa = alloc.getPhysicalAddress(addrOffset); + final int addrOffset = getOffset(latchedAddr); + if (alloc == null) { + throw new IllegalArgumentException( + "Invalid address provided to immediateFree: " + + latchedAddr); + } + final long pa = alloc.getPhysicalAddress(addrOffset); - if (log.isTraceEnabled()) - log.trace("Freeing allocation at " + latchedAddr - + ", physical address: " + pa); + if (log.isTraceEnabled()) + log.trace("Freeing allocation at " + latchedAddr + + ", physical address: " + pa); - alloc.free(latchedAddr, 0, false); + alloc.free(latchedAddr, 0, false); - // assert m_commitList.size() == 0; - } finally { - m_allocationWriteLock.unlock(); - } - } - - /** + // assert m_commitList.size() == 0; + } finally { + m_allocationWriteLock.unlock(); + } + } + + /** * Create and return a new {@link RWWriteCacheService} instance. The caller * is responsible for closing out the old one and must be holding the * appropriate locks when it switches in the new instance. @@ -1033,7 +1063,7 @@ } } - private void setAllocations(final FileMetadata fileMetadata) + private void setAllocations(final FileMetadata fileMetadata) throws IOException { final String buckets = fileMetadata.getProperty( @@ -1052,30 +1082,30 @@ } private void defaultInit() throws IOException { - final int numFixed = m_allocSizes.length; + final int numFixed = m_allocSizes.length; - m_freeFixed = new ArrayList[numFixed]; + m_freeFixed = new ArrayList[numFixed]; - for (int i = 0; i < numFixed; i++) { - m_freeFixed[i] = new ArrayList<FixedAllocator>(); - } + for (int i = 0; i < numFixed; i++) { + m_freeFixed[i] = new ArrayList<FixedAllocator>(); + } - m_fileSize = convertFromAddr(m_fd.length()); - - // make space for meta-allocators - m_metaBits[0] = -1; - m_metaTransientBits[0] = -1; - m_nextAllocation = -(1 + META_ALLOCATION); // keep on a minimum 8K boundary - m_committedNextAllocation = m_nextAllocation; - - if (m_fileSize > m_nextAllocation) { - m_fileSize = m_nextAllocation; - } - - if (log.isInfoEnabled()) - log.info("Set default file extent " + convertAddr(m_fileSize)); - - m_reopener.raf.setLength(convertAddr(m_fileSize)); + m_fileSize = convertFromAddr(m_fd.length()); + + // make space for meta-allocators + m_metaBits[0] = -1; + m_metaTransientBits[0] = -1; + m_nextAllocation = -(1 + META_ALLOCATION); // keep on a minimum 8K boundary + m_committedNextAllocation = m_nextAllocation; + + if (m_fileSize > m_nextAllocation) { + m_fileSize = m_nextAllocation; + } + + if (log.isInfoEnabled()) + log.info("Set default file extent " + convertAddr(m_fileSize)); + + m_reopener.raf.setLength(convertAddr(m_fileSize)); } @@ -1104,71 +1134,71 @@ } } - /** - * Basic check on key root block validity - * - * @param rbv - */ - private void checkRootBlock(final IRootBlockView rbv) { - final long nxtOffset = rbv.getNextOffset(); - final int nxtalloc = -(int) (nxtOffset >> 32); + /** + * Basic check on key root block validity + * + * @param rbv + */ + private void checkRootBlock(final IRootBlockView rbv) { + final long nxtOffset = rbv.getNextOffset(); + final int nxtalloc = -(int) (nxtOffset >> 32); - final int metaBitsAddr = -(int) nxtOffset; + final int metaBitsAddr = -(int) nxtOffset; - final long metaAddr = rbv.getMetaStartAddr(); - final long rawMetaBitsAddr = rbv.getMetaBitsAddr(); - if (metaAddr == 0 || rawMetaBitsAddr == 0) { + final long metaAddr = rbv.getMetaStartAddr(); + final long rawMetaBitsAddr = rbv.getMetaBitsAddr(); + if (metaAddr == 0 || rawMetaBitsAddr == 0) { /* * possible when rolling back to empty file. */ - log.warn("No meta allocation data included in root block for RWStore"); - } - - if (log.isTraceEnabled()) { + log.warn("No meta allocation data included in root block for RWStore"); + } + + if (log.isTraceEnabled()) { final int commitRecordAddr = (int) (rbv.getCommitRecordAddr() >> 32); log.trace("CommitRecord " + rbv.getCommitRecordAddr() + " at physical address: " + physicalAddress(commitRecordAddr)); } - - final long commitCounter = rbv.getCommitCounter(); + + final long commitCounter = rbv.getCommitCounter(); -// final int metaStartAddr = (int) -(metaAddr >> 32); // void -// final int fileSize = (int) -(metaAddr & 0xFFFFFFFF); +// final int metaStartAddr = (int) -(metaAddr >> 32); // void +// final int fileSize = (int) -(metaAddr & 0xFFFFFFFF); - if (log.isTraceEnabled()) - log.trace("m_allocation: " + nxtalloc + ", m_metaBitsAddr: " - + metaBitsAddr + ", m_commitCounter: " + commitCounter); - - } - - /* - * Utility to encapsulate RootBlock interpreation - */ - static class RootBlockInfo { - - static int nextAllocation(final IRootBlockView rb) { - final long nxtOffset = rb.getNextOffset(); + if (log.isTraceEnabled()) + log.trace("m_allocation: " + nxtalloc + ", m_metaBitsAddr: " + + metaBitsAddr + ", m_commitCounter: " + commitCounter); + + } + + /* + * Utility to encapsulate RootBlock interpreation + */ + static class RootBlockInfo { + + static int nextAllocation(final IRootBlockView rb) { + final long nxtOffset = rb.getNextOffset(); - // next allocation to be made (in -32K units). - final int ret = -(int) (nxtOffset >> 32); - + // next allocation to be made (in -32K units). + final int ret = -(int) (nxtOffset >> 32); + /* * Skip the first 32K in the file. The root blocks live here but * nothing else. */ - return ret == 0 ? -(1 + META_ALLOCATION) : ret; - } - - /* - * Meta-Allocations stored as {int address; int[8] bits}, so each block - * holds 8*32=256 allocation slots of 1K totaling 256K. - * - * The returned int array is a flattened list of these int[9] blocks - */ - static int[] metabits(final IRootBlockView rb, final ReopenFileChannel reopener) throws IOException { - final long rawmbaddr = rb.getMetaBitsAddr(); - + return ret == 0 ? -(1 + META_ALLOCATION) : ret; + } + + /* + * Meta-Allocations stored as {int address; int[8] bits}, so each block + * holds 8*32=256 allocation slots of 1K totaling 256K. + * + * The returned int array is a flattened list of these int[9] blocks + */ + static int[] metabits(final IRootBlockView rb, final ReopenFileChannel reopener) throws IOException { + final long rawmbaddr = rb.getMetaBitsAddr(); + /* * The #of int32 values in the metabits region. * @@ -1176,86 +1206,86 @@ * gives the #of int32 values in the metabits regions (up to 64k * int32 values). */ - final int metaBitsStore = (int) (rawmbaddr & 0xFFFF); - - - // The byte offset of the metabits region in the file. - final long pmaddr = rawmbaddr >> 16; - + final int metaBitsStore = (int) (rawmbaddr & 0xFFFF); + + + // The byte offset of the metabits region in the file. + final long pmaddr = rawmbaddr >> 16; + /* * Read the metabits block, including a header and the int32[] * that encodes both startAddrs and bit vectors. */ final byte[] buf = new byte[metaBitsStore * 4]; - FileChannelUtility.readAll(reopener, ByteBuffer.wrap(buf), pmaddr); - - final DataInputStream strBuf = new DataInputStream(new ByteArrayInputStream(buf)); - - // Can handle minor store version incompatibility - strBuf.readInt(); // STORE VERSION - strBuf.readLong(); // Last Deferred Release Time - strBuf.readInt(); // cDefaultMetaBitsSize - - final int allocBlocks = strBuf.readInt(); - strBuf.readLong(); // m_storageStatsAddr + FileChannelUtility.readAll(reopener, ByteBuffer.wrap(buf), pmaddr); + + final DataInputStream strBuf = new DataInputStream(new ByteArrayInputStream(buf)); + + // Can handle minor store version incompatibility + strBuf.readInt(); // STORE VERSION + strBuf.readLong(); // Last Deferred Release Time + strBuf.readInt(); // cDefaultMetaBitsSize + + final int allocBlocks = strBuf.readInt(); + strBuf.readLong(); // m_storageStatsAddr - // step over those reserved ints + // step over those reserved ints for (int i = 0; i < cReservedMetaBits; i++) { strBuf.readInt(); } // step over the allocSizes - for (int i = 0; i < allocBlocks; i++) { - strBuf.readInt(); - } - final int metaBitsSize = metaBitsStore - allocBlocks - cMetaHdrFields; // allow for header fields - - // Must be multiple of 9 - assert metaBitsSize % 9 == 0; - - int[] ret = new int[metaBitsSize]; - for (int i = 0; i < metaBitsSize; i++) { - ret[i] = strBuf.readInt(); - } + for (int i = 0; i < allocBlocks; i++) { + strBuf.readInt(); + } + final int metaBitsSize = metaBitsStore - allocBlocks - cMetaHdrFields; // allow for header fields + + // Must be multiple of 9 + assert metaBitsSize % 9 == 0; + + int[] ret = new int[metaBitsSize]; + for (int i = 0; i < metaBitsSize; i++) { + ret[i] = strBuf.readInt(); + } - /* - * Meta-Allocations stored as {int address; int[8] bits}, so each block - * holds 8*32=256 allocation slots of 1K totaling 256K. - */ - return ret; - } - } - - /** - * Should be called where previously initFileSpec was used. - * - * Rather than reading from file, instead reads from the current root block. - * - * We use the rootBlock fields, nextOffset, metaStartAddr, metaBitsAddr. - * - * metaBitsAddr indicates where the meta allocation bits are. - * - * metaStartAddr is the offset in the file where the allocation blocks are - * allocated the long value also indicates the size of the allocation, such - * that the address plus the size is the "filesize". - * - * Note that metaBitsAddr must be an absolute address, with the low order 16 - * bits used to indicate the size. - * - * @throws IOException - */ - private void initfromRootBlock(final IRootBlockView rb) throws IOException { - // m_rb = m_fmv.getRootBlock(); - assert(rb != null); + /* + * Meta-Allocations stored as {int address; int[8] bits}, so each block + * holds 8*32=256 allocation slots of 1K totaling 256K. + */ + return ret; + } + } + + /** + * Should be called where previously initFileSpec was used. + * + * Rather than reading from file, instead reads from the current root block. + * + * We use the rootBlock fields, nextOffset, metaStartAddr, metaBitsAddr. + * + * metaBitsAddr indicates where the meta allocation bits are. + * + * metaStartAddr is the offset in the file where the allocation blocks are + * allocated the long value also indicates the size of the allocation, such + * that the address plus the size is the "filesize". + * + * Note that metaBitsAddr must be an absolute address, with the low order 16 + * bits used to indicate the size. + * + * @throws IOException + */ + private void initfromRootBlock(final IRootBlockView rb) throws IOException { + // m_rb = m_fmv.getRootBlock(); + assert(rb != null); - m_storeUUID = rb.getUUID(); - - if (rb.getNextOffset() == 0) { + m_storeUUID = rb.getUUID(); + + if (rb.getNextOffset() == 0) { - defaultInit(); - - } else { + defaultInit(); + + } else { /* * The RWStore stores in IRootBlock.getNextOffset() two distinct @@ -1271,52 +1301,52 @@ * FixedAllocators in order to turn it into a byte offset on the * file. */ - final long nxtOffset = rb.getNextOffset(); + final long nxtOffset = rb.getNextOffset(); - // next allocation to be made (in -32K units). - m_nextAllocation = -(int) (nxtOffset >> 32); - - if (m_nextAllocation == 0) { + // next allocation to be made (in -32K units). + m_nextAllocation = -(int) (nxtOffset >> 32); + + if (m_nextAllocation == 0) { /* * Skip the first 32K in the file. The root blocks live here but * nothing else. */ - - m_nextAllocation = -(1 + META_ALLOCATION); - - } - - m_committedNextAllocation = m_nextAllocation; - - // latched offset of the metabits region. - m_metaBitsAddr = -(int) nxtOffset; - - if (log.isInfoEnabled()) { - log.info("MetaBitsAddr: " + m_metaBitsAddr); - } + + m_nextAllocation = -(1 + META_ALLOCATION); + + } + + ... [truncated message content] |
From: <tho...@us...> - 2013-05-06 20:00:02
|
Revision: 7111 http://bigdata.svn.sourceforge.net/bigdata/?rev=7111&view=rev Author: thompsonbry Date: 2013-05-06 19:59:50 +0000 (Mon, 06 May 2013) Log Message: ----------- This commit resolves the problem for this ticket. The commit includes a significant refactor of some functionality in AbstractTransactionService and addresses the integration of the HA TXS and the Journal in more depth, paying special attention to when the releaseTime on the TXS must be updated through local commits and local transaction completions and when the releaseTime on the TXS must be updated through the release time consensus protocol. - TestHA3SnapshotPolicy.testAB_snapshot_multipleTx_restore_validate() was failing because the logic to compute the effective release time for HA was using [now] rather than lastCommitTime as the default value for the readsOnCommitTime in AbstractTransactionService.getEffectiveReleaseTimeForHA(). That method was modified to accept the lastCommitTime from the caller. Guards were added to AbstractJournal.getEarliestVisibleCommitRecordForHA() to fail fast for cases where the caller's releaseTime are invalid. - AbstractTransactionService.earliestOpenTxId was not being tracked. Added some unit test coverage for this. Refactored AbstractTransactionService.updateReleaseTime() which was responsible for tracking this. Refactored TxId2CommitTimeIndex to stort the txId as well as the readsOnCommitTime. Refactored assignTransactionIdentifier() and getStartTime() to return the TxState object directly rather than using an AtomicLong for a side-effect. - AbstractTransactionService.updateReleaseTime() MUST be called when a tx is done. However, it is updating [releaseTime] on the TXS without regard to whether this is an HA TXS or not. This will cause the releaseTime to advance outside of the consensus protocol for an HA TXS. That will cause failures in HA. - AbstractTransactionService.isReleaseTimeConsensusProtocol() was added to capture the dynamic distinction between a service whose TXS updates its own releaseTime as transactions complete and as the service goes through commit points and a serice whose releaseTime is updated by the consensus protocol. This distinction is dynamic (rather than static) because the service may leave or join with a met quorum at any point. All TXS, Journal, and HA test suites are 100% green. See https://sourceforge.net/apps/trac/bigdata/ticket/671 (Query on follower fails during UPDATE on leader) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/ITransactionService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/ITx.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/JournalTransactionService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Tx.java branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractTransactionService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/service/TxId2CommitTimeIndex.java branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestTransactionService.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3SnapshotPolicy.java Added Paths: ----------- branches/READ_CACHE/bigdata/src/java/com/bigdata/service/ITxState.java branches/READ_CACHE/bigdata/src/java/com/bigdata/service/ITxState0.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-03 21:17:17 UTC (rev 7110) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-06 19:59:50 UTC (rev 7111) @@ -3526,8 +3526,12 @@ try { - if (_rootBlock.getCommitCounter() == 0L) { + final long commitCounter = _rootBlock.getCommitCounter(); + final long lastCommitTime = _rootBlock.getLastCommitTime(); + + if (commitCounter == 0L) { + if (log.isTraceEnabled()) log.trace("No commit points"); @@ -3535,6 +3539,19 @@ return null; } + + if (releaseTime >= lastCommitTime) { + + /* + * The caller is querying with an effective releaseTime GTE the + * lastCommitTime. It is not valid to have a releaseTime GTE the + * current committed state. + */ + + throw new IllegalArgumentException("releaseTime(" + releaseTime + + ") >= lastCommitTime(" + lastCommitTime + ")"); + + } final CommitRecordIndex commitRecordIndex = _commitRecordIndex; @@ -3549,9 +3566,18 @@ * and this will return us the first record in the * CommitRecordIndex. */ + + final long effectiveTimestamp = releaseTime == 0L ? 1 : releaseTime; + final ICommitRecord commitRecord = commitRecordIndex - .findNext(releaseTime == 0L ? 1 : releaseTime); + .findNext(effectiveTimestamp); + if (commitRecord == null) + throw new AssertionError("commitCounter=" + commitCounter + + " but no commitRecord for releaseTime=" + releaseTime + + ", effectiveTimestamp=" + effectiveTimestamp + " :: " + + commitRecordIndex); + if (log.isTraceEnabled()) log.trace("releaseTime=" + releaseTime + ",commitRecord=" + commitRecord); @@ -6210,34 +6236,28 @@ try { - final long commitTime = commitMessage.getCommitTime(); - - if (haLog.isInfoEnabled()) - haLog.info("commitTime=" + commitTime + ", vote=" - + vote); + final IHA2PhasePrepareMessage prepareMessage = prepareRequest + .get(); - if (!vote.get()) { - - /* - * This service voted NO. It will not participate in - * the commit. - */ - - return; - - } - - final IHA2PhasePrepareMessage prepareMessage = prepareRequest.get(); - if (prepareMessage == null) throw new IllegalStateException(); - final IRootBlockView rootBlock = prepareMessage - .getRootBlock(); + // Note: Could throw ChecksumError. + final IRootBlockView rootBlock = prepareMessage == null ? null + : prepareMessage.getRootBlock(); + final long commitTime = commitMessage.getCommitTime(); + if (rootBlock == null) throw new IllegalStateException(); + if (haLog.isInfoEnabled()) + haLog.info("commitTime=" + + commitTime + + ", commitCounter=" + + prepareMessage.getRootBlock() + .getCommitCounter() + ", vote=" + vote); + if (rootBlock.getLastCommitTime() != commitTime) { /* * The commit time does not agree with the root @@ -6246,6 +6266,17 @@ throw new IllegalStateException(); } + if (!vote.get()) { + + /* + * This service voted NO. It will not participate in + * the commit. + */ + + return; + + } + // verify that the qourum has not changed. quorum.assertQuorum(rootBlock.getQuorumToken()); @@ -6299,13 +6330,31 @@ localService.purgeHALogs(rootBlock.getQuorumToken()); - } catch(Throwable t) { - - haLog.error("ERROR IN 2-PHASE COMMIT: " + t - + ", rootBlock=" + prepareRequest.get(), t); + } catch (Throwable t) { - quorum.getActor().serviceLeave(); + try { + haLog.error("ERROR IN 2-PHASE COMMIT: " + t + + ", rootBlock=" + + prepareRequest.get().getRootBlock(), t); + + } catch (Throwable t2) { + + log.error(t2, t2); + + } + + try { + + quorum.getActor().serviceLeave(); + + } catch (Throwable t2) { + + log.error(t2, t2); + + } + + // always rethrow the root cause exception. throw new RuntimeException(t); } finally { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/ITransactionService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/ITransactionService.java 2013-05-03 21:17:17 UTC (rev 7110) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/ITransactionService.java 2013-05-06 19:59:50 UTC (rev 7111) @@ -134,15 +134,22 @@ * <li>A timestamp (GT ZERO), which will result in a * read-historical (read-only) transaction that reads from the * most recent committed state whose commit timestamp is less - * than or equal to <i>timestamp</i>.</li> - * <li>The symbolic constant {@link ITx#READ_COMMITTED} to - * obtain a read-historical transaction reading from the most - * recently committed state of the database. The transaction will - * be assigned a start time corresponding to the most recent - * commit point of the database and will be a fully isolated - * read-only view of the state of the database as of that start - * time. (This is an atomic shorthand for - * newTx(getLastCommitTime())).</li> + * than or equal to <i>timestamp</i>. (As a special case, a + * timestamp that is GT the <code>lastCommitTime</code> will + * produce a read-only transaction that is reading on the + * <code>lastCommitTime</code> with snapshot isolation (new + * writes will not become visible in the view). This is basically + * the same behavior as specifying {@link ITx#READ_COMMITTED}. + * While perhaps counter-intuitive, this behavior is preferred to + * throwing an exception when a user attempts to read from a + * commit time GT the most recent commit point.).</li> + * <li>The symbolic constant {@link ITx#READ_COMMITTED} to obtain + * a read-historical transaction reading from the most recently + * committed state of the database. The transaction will be + * assigned a start time corresponding to the most recent commit + * point of the database and will be a fully isolated read-only + * view of the state of the database as of that start time. (This + * is an atomic shorthand for newTx(getLastCommitTime())).</li> * <li>{@link ITx#UNISOLATED} for a read-write transaction.</li> * </ul> * Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/ITx.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/ITx.java 2013-05-03 21:17:17 UTC (rev 7110) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/ITx.java 2013-05-06 19:59:50 UTC (rev 7111) @@ -23,10 +23,9 @@ */ package com.bigdata.journal; -import java.util.Date; - import com.bigdata.btree.ILocalBTreeView; import com.bigdata.btree.isolation.IsolatedFusedView; +import com.bigdata.service.ITxState; /** * <p> @@ -36,7 +35,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ -public interface ITx { +public interface ITx extends ITxState { /** * The constant that SHOULD used as the timestamp for an <em>unisolated</em> @@ -82,37 +81,37 @@ */ public static final long READ_COMMITTED = -1L; - /** - * The start time for the transaction as assigned by a centralized - * transaction manager service. Transaction start times are unique and also - * serve as transaction identifiers. Note that this is NOT the time at which - * a transaction begins executing on a specific journal as the same - * transaction may start at different moments on different journals and - * typically will only start on some journals rather than all. - * - * @return The transaction start time. - * - * @todo rename since the sign indicates read-only vs read-write? - */ - public long getStartTimestamp(); - - /** - * The timestamp of the commit point against which this transaction is - * reading. - * <p> - * Note: This is not currently available on a cluster. In that context, we - * wind up with the same timestamp for {@link #startTime} and - * {@link #readsOnCommitTime} which causes cache pollution for things which - * cache based on {@link #readsOnCommitTime}. - * - * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/266"> - * Refactor native long tx id to thin object</a> - * - * @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/546" > Add - * cache for access to historical index views on the Journal by name - * and commitTime. </a> - */ - public long getReadsOnCommitTime(); +// /** +// * The start time for the transaction as assigned by a centralized +// * transaction manager service. Transaction start times are unique and also +// * serve as transaction identifiers. Note that this is NOT the time at which +// * a transaction begins executing on a specific journal as the same +// * transaction may start at different moments on different journals and +// * typically will only start on some journals rather than all. +// * +// * @return The transaction start time. +// * +// * @todo rename since the sign indicates read-only vs read-write? +// */ +// public long getStartTimestamp(); +// +// /** +// * The timestamp of the commit point against which this transaction is +// * reading. +// * <p> +// * Note: This is not currently available on a cluster. In that context, we +// * wind up with the same timestamp for {@link #startTime} and +// * {@link #readsOnCommitTime} which causes cache pollution for things which +// * cache based on {@link #readsOnCommitTime}. +// * +// * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/266"> +// * Refactor native long tx id to thin object</a> +// * +// * @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/546" > Add +// * cache for access to historical index views on the Journal by name +// * and commitTime. </a> +// */ +// public long getReadsOnCommitTime(); // /** // * Return the timestamp assigned to this transaction by a centralized @@ -184,49 +183,49 @@ */ public boolean isEmptyWriteSet(); - /** - * A transaction is "active" when it is created and remains active until it - * prepares or aborts. An active transaction accepts READ, WRITE, DELETE, - * PREPARE and ABORT requests. - * - * @return True iff the transaction is active. - */ - public boolean isActive(); +// /** +// * A transaction is "active" when it is created and remains active until it +// * prepares or aborts. An active transaction accepts READ, WRITE, DELETE, +// * PREPARE and ABORT requests. +// * +// * @return True iff the transaction is active. +// */ +// public boolean isActive(); +// +// /** +// * A transaction is "prepared" once it has been successfully validated and +// * has fulfilled its pre-commit contract for a multi-stage commit protocol. +// * An prepared transaction accepts COMMIT and ABORT requests. +// * +// * @return True iff the transaction is prepared to commit. +// */ +// public boolean isPrepared(); +// +// /** +// * A transaction is "complete" once has either committed or aborted. A +// * completed transaction does not accept any requests. +// * +// * @return True iff the transaction is completed. +// */ +// public boolean isComplete(); +// +// /** +// * A transaction is "committed" iff it has successfully committed. A +// * committed transaction does not accept any requests. +// * +// * @return True iff the transaction is committed. +// */ +// public boolean isCommitted(); +// +// /** +// * A transaction is "aborted" iff it has successfully aborted. An aborted +// * transaction does not accept any requests. +// * +// * @return True iff the transaction is aborted. +// */ +// public boolean isAborted(); /** - * A transaction is "prepared" once it has been successfully validated and - * has fulfilled its pre-commit contract for a multi-stage commit protocol. - * An prepared transaction accepts COMMIT and ABORT requests. - * - * @return True iff the transaction is prepared to commit. - */ - public boolean isPrepared(); - - /** - * A transaction is "complete" once has either committed or aborted. A - * completed transaction does not accept any requests. - * - * @return True iff the transaction is completed. - */ - public boolean isComplete(); - - /** - * A transaction is "committed" iff it has successfully committed. A - * committed transaction does not accept any requests. - * - * @return True iff the transaction is committed. - */ - public boolean isCommitted(); - - /** - * A transaction is "aborted" iff it has successfully aborted. An aborted - * transaction does not accept any requests. - * - * @return True iff the transaction is aborted. - */ - public boolean isAborted(); - - /** * Return an isolated view onto a named index. The index will be isolated at * the same level as this transaction. Changes on the index will be made * restart-safe iff the transaction successfully commits. Writes on the Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java 2013-05-03 21:17:17 UTC (rev 7110) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java 2013-05-06 19:59:50 UTC (rev 7111) @@ -437,21 +437,6 @@ // Note: Local method call. timestampOnLeader = leadersValue.getTimestamp(); -// final long effectiveReleaseTimeForHA = getTransactionService() -// .getEffectiveReleaseTimeForHA(); -// -// final ICommitRecord commitRecord = getEarliestVisibleCommitRecordForHA(effectiveReleaseTimeForHA); -// -// final long commitTime = commitRecord == null ? 0L : commitRecord -// .getTimestamp(); -// -// final long commitCounter = commitRecord == null ? 0L : commitRecord -// .getCommitCounter(); -// -// this.leadersValue = new HANotifyReleaseTimeRequest( -// quorumService.getServiceId(), commitTime, commitCounter, -// timestampOnLeader); - /* * Only the followers will countDown() at the barrier. The leader * will await() until the barrier breaks. @@ -511,30 +496,22 @@ /** * Send an {@link IHAGatherReleaseTimeRequest} message to each follower. * Block until the responses are received. - * - * TODO Timeout on duration that we will wait for the followers to - * response? (Probably not, this is very similar to the 2-phase commit). - * - * FIXME HA TXS: Like the 2-phase commit, the overall protocol should - * succeed if we can get ((k+1)/2) services that do not fail this step. - * Thus for HA3, we should allow one error on a follower, the leader is - * sending the messages and is presumed to succeed, and one follower - * COULD fail without failing the protocol. If the protocol does fail we - * have to fail the commit, to getting this right is NECESSARY. At a - * mimimum, we must not fail if all joined services on entry to this - * method respond without failing (that is, succeed if no services fail - * during this protocol). [Review further whether we can allow the - * 2-phase commit to rely on a service that was not joined when we took - * this step. We probably can given that the serviceJoin() code path of - * the follower is MUTEX with the negotiation of the consensus - * releaseTime value so long as the service uses an appropriate - * releaseTime when it joins. Which it should do, but review this - * closely.] + * <p> + * Note: Like the 2-phase commit, the overall protocol should succeed if + * we can get <code>((k+1)/2)</code> services that do not fail this + * step. Thus for HA3, we should allow one error on a follower, the + * leader is sending the messages and is presumed to succeed, and one + * follower COULD fail without failing the protocol. If the protocol + * does fail we have to fail the commit, so getting this right is + * NECESSARY. At a mimimum, we must not fail if all joined services on + * entry to this method respond without failing (that is, succeed if no + * services fail during this protocol) - this is implemented. */ private void messageFollowers(final long token) throws IOException { getQuorum().assertLeader(token); + // Future for gather task for each follower. final List<Future<Void>> remoteFutures = new LinkedList<Future<Void>>(); try { @@ -600,16 +577,15 @@ * * FIXME HA TXS: A reset() here does not allow us to proceed * with the consensus protocol unless all services - * "vote yes". + * "vote yes". Thus, a single node failure during the + * release time consensus protocol will cause the commit to + * fail. */ barrier.reset(); } /* * If there were any errors, then throw an exception listing them. - * - * FIXME But only throw the exception if the errors were for a joined - * service. Otherwise just log. */ if (!causes.isEmpty()) { // Cancel remote futures. @@ -694,8 +670,13 @@ * effective release time based on the last commit time for the * store. * - * [TODO HA TXS: Review startup and serviceJoin() conditions to - * set the releaseTime.] + * Note: For HA, the releaseTime is updated by the consensus + * protocol once a quorum is met. Before the quorum meets (and + * before a service joins with a met quorum) each service will + * track its own releaseTime. Therefore, during startup, the + * quorum will be null or HAStatusEnum will be NotReady so the + * TXS will automatically track the release time until the + * service joins with a met quorum. */ if (log.isInfoEnabled()) @@ -813,8 +794,14 @@ final long consensusValue = barrierState.consensus .getCommitTime(); - setReleaseTime(Math.max(0L, consensusValue - 1)); + final long newReleaseTime = Math.max(0L, consensusValue - 1); + + if (log.isInfoEnabled()) + log.info("Advancing releaseTime on leader: " + + newReleaseTime); + setReleaseTime(newReleaseTime); + } finally { barrierLock.unlock(); @@ -822,39 +809,87 @@ } } - + /** * {@inheritDoc} * <p> - * Note: When we are using a 2-phase commit, the leader can not update - * the release time from commit() using this methods. It must rely on - * the consensus protocol to update the release time instead. + * Overridden to notice whether this service is using the consensus + * protocol to update the releaseTime or updating it automatically as + * transactions complete. * * @see <a href= * "https://sourceforge.net/apps/trac/bigdata/ticket/530#comment:116"> * Journal HA </a> */ @Override - protected void updateReleaseTimeForBareCommit(final long commitTime) { + protected boolean isReleaseTimeConsensusProtocol() { final HAStatusEnum haStatus = getHAStatus(); if (haStatus == null || haStatus == HAStatusEnum.NotReady) { - super.updateReleaseTimeForBareCommit(commitTime); - - } else { - /* - * Note: When we are using a 2-phase commit, the leader can not - * update the release time from commit() using this methods. It - * must rely on the consensus protocol to update the release - * time instead. + * Since we are not HA or this service is not HAReady, we will + * not use the consensus protocol to update the releaseTime. + * + * Therefore the releaseTime is updated here since we will not + * (actually, did not) run the consensus protocol to update it. */ + + return false; } + + /* + * Note: When we are using a 2-phase commit, the leader can not + * update the release time from commit() using this methods. It + * must rely on the consensus protocol to update the release + * time instead. + */ + return true; + } + +// /** +// * {@inheritDoc} +// * <p> +// * Note: When we are using a 2-phase commit, the leader can not update +// * the release time from commit() using this methods. It must rely on +// * the consensus protocol to update the release time instead. +// * +// * @see <a href= +// * "https://sourceforge.net/apps/trac/bigdata/ticket/530#comment:116"> +// * Journal HA </a> +// */ +// @Override +// protected void updateReleaseTimeForBareCommit(final long commitTime) { +// +// final HAStatusEnum haStatus = getHAStatus(); +// +// if (haStatus == null || haStatus == HAStatusEnum.NotReady) { +// +// /* +// * Since we are not HA or this service is not HAReady, we will +// * not use the consensus protocol to update the releaseTime. +// * +// * Therefore the releaseTime is updated here since we will not +// * (actually, did not) run the consensus protocol to update it. +// */ +// super.updateReleaseTimeForBareCommit(commitTime); +// +// } else { +// +// /* +// * Note: When we are using a 2-phase commit, the leader can not +// * update the release time from commit() using this methods. It +// * must rely on the consensus protocol to update the release +// * time instead. +// */ +// +// } +// +// } /** * {@inheritDoc} @@ -892,7 +927,7 @@ */ protected IHANotifyReleaseTimeRequest newHANotifyReleaseTimeRequest( final UUID serviceId) { - + // On AbstractTransactionService. final long effectiveReleaseTimeForHA = getEffectiveReleaseTimeForHA(); @@ -911,11 +946,13 @@ serviceId, commitTime, commitCounter, now); if (log.isTraceEnabled()) - log.trace("releaseTime=" + getReleaseTime() - + ", effectiveReleaseTimeForHA=" - + effectiveReleaseTimeForHA + ",rootBlock=" - + getRootBlockView() + ", req=" + req); - + log.trace("releaseTime=" + getReleaseTime()// + + ",effectiveReleaseTimeForHA=" + + effectiveReleaseTimeForHA // + + ",rootBlock=" + getRootBlockView() // + + ",req=" + req// + ); + return req; } @@ -994,30 +1031,6 @@ final IHANotifyReleaseTimeRequest req2 = newHANotifyReleaseTimeRequest(quorumService .getServiceId()); -// final long effectiveReleaseTimeForHA = getEffectiveReleaseTimeForHA(); -// -// final ICommitRecord commitRecord = getEarliestVisibleCommitRecordForHA(effectiveReleaseTimeForHA); -// -// final long commitCounter = commitRecord == null ? 0 -// : commitRecord.getCommitCounter(); -// -// final long commitTime = commitRecord == null ? 0 -// : commitRecord.getTimestamp(); -// -// final long timestampOnFollower = getLocalTransactionManager() -// .nextTimestamp(); -// -// final IHANotifyReleaseTimeRequest req2 = new HANotifyReleaseTimeRequest( -// quorumService.getServiceId(), commitTime, -// commitCounter, timestampOnFollower); -// -// if (log.isTraceEnabled()) -// log.trace("releaseTime=" + getReleaseTime() -// + ", effectiveReleaseTimeForHA=" -// + effectiveReleaseTimeForHA -// + ",rootBlock=" + getRootBlockView() -// + ", req=" + req); -// /* * RMI to leader. * @@ -1026,9 +1039,59 @@ final IHANotifyReleaseTimeResponse resp = leader .notifyEarliestCommitTime(req2); - // Update the release time on the follower - setReleaseTime(Math.max(0L, resp.getCommitTime() - 1)); + /* + * Now spot check the earliest active tx on this follower. + * We want to make sure that this tx is not reading against + * a commit point whose state would be released by the new + * consensus releaseTime. + * + * If everything is Ok, we update the releaseTime on the + * follower. + */ + lock.lock(); + + try { + + final TxState txState = getEarliestActiveTx(); + + if (txState != null + && txState.getReadsOnCommitTime() > resp + .getCommitTime()) { + + /* + * At least one transaction exists on the follower + * that is reading on a commit point which would be + * released by the new releaseTime. This is either a + * failure in the logic to compute the consensus + * releaseTime or a failure to exclude new + * transaction starts on the follower while + * computing the new consensus releaseTime. + */ + + throw new AssertionError( + "The releaseTime consensus would release an in use commit point" + + ": consensus=" + resp + + ", earliestActiveTx=" + txState); + + } + + final long newReleaseTime = Math.max(0L, + resp.getCommitTime() - 1); + + if (log.isInfoEnabled()) + log.info("Advancing releaseTime on follower: " + + newReleaseTime); + + // Update the releaseTime on the follower + setReleaseTime(newReleaseTime); + + } finally { + + lock.unlock(); + + } + // Done. return null; @@ -1204,17 +1267,6 @@ final long now = nextTimestamp(); - /* - * TODO Should we reject tx starts against future history? The - * AbstractTransactionService used to reject this case but that - * code has been commented out. Perhaps because of minor clock - * differences that could arise? - */ -// if (timestamp > now) { -// // Timestamp is in the future. -// throw new IllegalArgumentException(); -// } - final long minReleaseAge = getMinReleaseAge(); final long ageOfTxView = now - timestamp; @@ -1232,9 +1284,9 @@ */ { - final TxState state = getEarlestActiveTx(); + final TxState state = getEarliestActiveTx(); - if (state != null && state.readsOnCommitTime <= timestamp) { + if (state != null && state.getReadsOnCommitTime() <= timestamp) { // Start Tx. Commit point pinned by earliestActiveTx. return _newTx(timestamp); @@ -1364,7 +1416,7 @@ protected void activateTx(final TxState state) { if (txLog.isInfoEnabled()) txLog.info("OPEN : txId=" + state.tx - + ", readsOnCommitTime=" + state.readsOnCommitTime); + + ", readsOnCommitTime=" + state.getReadsOnCommitTime()); final IBufferStrategy bufferStrategy = Journal.this.getBufferStrategy(); if (bufferStrategy instanceof IHistoryManager) { final IRawTx tx = ((IHistoryManager)bufferStrategy).newTx(); @@ -1379,7 +1431,7 @@ protected void deactivateTx(final TxState state) { if (txLog.isInfoEnabled()) txLog.info("CLOSE: txId=" + state.tx - + ", readsOnCommitTime=" + state.readsOnCommitTime); + + ", readsOnCommitTime=" + state.getReadsOnCommitTime()); /* * Note: We need to deactivate the tx before RawTx.close() is * invoked otherwise the activeTxCount will never be zero inside Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/JournalTransactionService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/JournalTransactionService.java 2013-05-03 21:17:17 UTC (rev 7110) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/JournalTransactionService.java 2013-05-06 19:59:50 UTC (rev 7111) @@ -82,7 +82,7 @@ */ new Tx(journal.getLocalTransactionManager(), journal, state.tx, - state.readsOnCommitTime); + state.getReadsOnCommitTime()); } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Tx.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Tx.java 2013-05-03 21:17:17 UTC (rev 7110) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Tx.java 2013-05-06 19:59:50 UTC (rev 7111) @@ -198,15 +198,14 @@ } + @Override public long getReadsOnCommitTime() { return readsOnCommitTime; } - - /** - * Return <code>true</code> iff this is a read-only transaction. - */ + + @Override public boolean isReadOnly() { return readOnly; Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractTransactionService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractTransactionService.java 2013-05-03 21:17:17 UTC (rev 7110) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractTransactionService.java 2013-05-06 19:59:50 UTC (rev 7111) @@ -43,10 +43,13 @@ import org.apache.log4j.Logger; +import com.bigdata.btree.ITuple; +import com.bigdata.btree.ITupleIterator; import com.bigdata.config.LongValidator; import com.bigdata.counters.CounterSet; import com.bigdata.counters.ICounterSetAccess; import com.bigdata.counters.Instrument; +import com.bigdata.ha.HAStatusEnum; import com.bigdata.journal.ITransactionService; import com.bigdata.journal.ITx; import com.bigdata.journal.Journal; @@ -220,7 +223,7 @@ /** * A hash map containing all active transactions. A transaction that is * preparing will remain in this collection until it has completed (aborted - * or committed). + * or committed). The key is the txId of the transaction. * * @todo config param for the initial capacity of the map. * @todo config for the concurrency rating of the map. @@ -235,12 +238,13 @@ * internal synchronization APIs for the transaction service. * * @param tx - * The transaction identifier. + * The transaction identifier (the signed value, NOT the absolute + * value). * * @return The {@link TxState} -or- <code>null</code> if there is no such * active transaction. */ - final protected TxState getTxState(final long tx) { + protected TxState getTxState(final long tx) { return activeTx.get(tx); @@ -278,6 +282,7 @@ /** * Any state other than {@link TxServiceRunState#Halted}. */ + @Override public boolean isOpen() { return runState != TxServiceRunState.Halted; @@ -339,6 +344,7 @@ * until existing transactions (both read-write and read-only) are complete * (either aborted or committed). */ + @Override public void shutdown() { if(log.isInfoEnabled()) @@ -489,6 +495,7 @@ * exceptions from various methods, including {@link #nextTimestamp()}) * when the service halts. */ + @Override public void shutdownNow() { if(log.isInfoEnabled()) @@ -578,7 +585,7 @@ * Note: We are already holding the outer lock so we do not * need to acquire it here. */ - updateReleaseTime(Math.abs(state.tx)); + updateReleaseTime(Math.abs(state.tx), null/* deactivatedTx */); } @@ -607,6 +614,7 @@ * Immediate/fast shutdown of the service and then destroys any persistent * state associated with the service. */ + @Override synchronized public void destroy() { log.warn(""); @@ -678,7 +686,7 @@ * {@link #nextTimestamp()}. * <p> * Note: The transaction service will refuse to start new transactions whose - * timestamps are LTE to {@link #earliestOpenTxId}. + * timestamps are LTE to {@link #getReleaseTime()}. * * @throws RuntimeException * Wrapping {@link TimeoutException} if a timeout occurs @@ -722,15 +730,12 @@ try { - final AtomicLong readCommitTime = new AtomicLong(); + final TxState txState = assignTransactionIdentifier(timestamp); - final long tx = assignTransactionIdentifier(timestamp, - readCommitTime); + activateTx(txState); - activateTx(new TxState(tx, readCommitTime.get())); + return txState.tx; - return tx; - } catch(TimeoutException ex) { throw new RuntimeException(ex); @@ -838,33 +843,37 @@ * <p> * Note: The {@link #lock} is required in order to make atomic decisions * about the earliest active tx. Without the {@link #lock}, the tx could - * stop or a new tx could start, thereby invalidating the "easliest active" + * stop or a new tx could start, thereby invalidating the "earliest active" * guarantee. * * @throws IllegalMonitorStateException * unless the {@link #lock} is held by the caller. */ - protected TxState getEarlestActiveTx() { + protected TxState getEarliestActiveTx() { if (!lock.isHeldByCurrentThread()) throw new IllegalMonitorStateException(); - final TxState state = getTxState(earliestOpenTxId); +// final TxState state = getTxState(earliestOpenTxId); +// +// return state; - return state; + return earliestOpenTx; } /** - * The minimum over the absolute values of the active transactions and ZERO - * (0) if there are no open transactions. + * The earliest open transaction. * <p> - * Note: This is a transaction identifier. It is NOT the commitTime on which - * that transaction is reading. + * Note: This field is guarded by the {@link #lock}. However, it is declared + * <code>volatile</code> to provide visibility to {@link #getCounters()} + * without taking the lock. * - * @see https://sourceforge.net/apps/trac/bigdata/ticket/467 + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/467" > + * IllegalStateException trying to access lexicon index using RWStore + * with recycling </a> */ - private volatile long earliestOpenTxId = 0L; + private volatile TxState earliestOpenTx = null; /** * {@inheritDoc} @@ -873,8 +882,8 @@ */ public long getReleaseTime() { - if (log.isDebugEnabled()) - log.debug("releaseTime=" + releaseTime + ", lastKnownCommitTime=" + if (log.isTraceEnabled()) + log.trace("releaseTime=" + releaseTime + ", lastKnownCommitTime=" + getLastCommitTime()); return releaseTime; @@ -901,53 +910,23 @@ /** * Sets the new release time. + * <p> + * Note: For a joined service in HA (the leader or a follower), the release + * time is set by the consensus protocol. Otherwise it is automatically + * maintained by {@link #updateReleaseTime(long, TxState)} and + * {@link #updateReleaseTimeForBareCommit(long)}. * * @param newValue * The new value. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/671"> + * Query on follower fails during UPDATE on leader </a> */ protected void setReleaseTime(final long newValue) { if (!lock.isHeldByCurrentThread()) throw new IllegalMonitorStateException(); - /* - * FIXME HA TXS: The HA release time consensus protocol relies on the - * evolving release time on each joined service. This means that the - * release times on a joined service can increase until we hit the next - * commit point on the leader. At that point, we look at the pinned - * history (minReleaseAge) and the earliest visible commit point (taking - * into account the local releaseTime) on each joined service. This - * gives us a consensus value for the earliest visible commit record on - * any service. That new release time is then propagated to the - * services. This CAN result in historical state being "invisible" on a - * follower and then "reappearing" after a commit since either the - * leader or another follower still has an earlier commit point pinned. - * - * This should be modified such that the releaseTime is ONLY updated in - * HA each time we go through a commit point. This means that the - * consensus protocol must discover the earliest active tx and its - * readsOnCommitTime rather than relying on the then current - * releaseTime. This is the only way to have a tx on one TXS in an HA - * replication cluster pin the history of all TXS services in that - * cluster. - * - * The necessary changes are to: - * - * - Journal.InnerJournalTransactionService.GatherTask.call(). This is - * where we decide the earliest visible commit record on the follower. - * - * - AbstractJournal.getEarliestVisibleCommitRecord(). This must use - * getEarliestActiveTx() while holding the TXS lock, so this method - * needs to be lifted onto the InnerJournalTransactionService. - * - * FIXME - AbstractTransactionService.updateReleaseTime(). This must not - * advance the releaseTime in HA unless it is done as part of the - * consensus protocol. Note: I have already modified - * updateReleaseTimeForBareCommit() which was having some similar - * issues. However, I do not believe that my changes are yet coherent or - * sufficient. - */ - final long oldValue = releaseTime; if (newValue < oldValue) { @@ -955,7 +934,6 @@ // throw new IllegalStateException("oldValue=" + oldValue // + ", newValue=" + newValue); - // FIXME HA TXS : Observe newV = oldV - 1? final String msg = "oldValue=" + oldValue + ", newValue=" + newValue; @@ -979,12 +957,29 @@ * <code>now - minReleaseAge</code> and the readsOnCommitTime of the * earliest active Tx. If the value would be negative, then ZERO (0L) is * reported instead. + * <p> + * Note: This duplicates logic in {@link #updateReleaseTime(long)}, but + * handles the special case in HA where the releaseTime is not being updated + * by {@link #updateReleaseTimeForBareCommit(long)}. * + * @return The effective release time. + * * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/623"> HA * TXS </a> + * + * @see #updateReleaseTime(long) */ protected long getEffectiveReleaseTimeForHA() { + if (minReleaseAge == Long.MAX_VALUE) { + + // All history is pinned. + return 0L; + + } + + final long lastCommitTime = getLastCommitTime(); + lock.lock(); try { @@ -992,31 +987,53 @@ final long now = _nextTimestamp(); // Find the earliest commit time pinned by an active tx. - final long readsOnCommitTime; + final long earliestTxReadsOnCommitTime; - final TxState txState = getEarlestActiveTx(); + final TxState txState = getEarliestActiveTx(); if (txState == null) { // No active tx. Use now. - readsOnCommitTime = now; + earliestTxReadsOnCommitTime = now; } else { // Earliest active tx. - readsOnCommitTime = txState.readsOnCommitTime; + earliestTxReadsOnCommitTime = txState.readsOnCommitTime; } + /* + * The release time will be the minimum of: + * + * a) The timestamp BEFORE the lastCommitTime. + * + * b) The timestamp BEFORE the earliestTxStartTime. + * + * c) minReleaseAge milliseconds in the past. + * + * Note: NEVER let go of the last commit time! + * + * @todo there is a fence post here for [now-minReleaseAge] when + * minReleaseAge is very large, e.g., Long#MAX_VALUE. This is caught + * above for that specific value, but other very large values could + * also cause problems. + * + * @see https://sourceforge.net/apps/trac/bigdata/ticket/467 + */ final long effectiveReleaseTimeForHA = Math.min( - readsOnCommitTime - 1, now - minReleaseAge); + lastCommitTime - 1, + Math.min(earliestTxReadsOnCommitTime - 1, now + - minReleaseAge)); if (log.isDebugEnabled()) - log.debug("releaseTime=" + releaseTime + ", earliestActiveTx=" - + txState + ", readsOnCommitTime=" + readsOnCommitTime - + ", (now-minReleasAge)=" + (now - minReleaseAge) - + ", effectiveReleaseTimeForHA=" - + effectiveReleaseTimeForHA); + log.debug("releaseTime=" + releaseTime // + + ", lastCommitTime=" + lastCommitTime + + ", earliestActiveTx=" + txState// + + ", readsOnCommitTime=" + earliestTxReadsOnCommitTime// + + ", (now-minReleaseAge)=" + (now - minReleaseAge)// + + ": effectiveReleaseTimeForHA=" + effectiveReleaseTimeForHA// + ); return effectiveReleaseTimeForHA; @@ -1045,6 +1062,22 @@ if (!state.isActive()) throw new IllegalArgumentException(); + + if (this.earliestOpenTx == null + || Math.abs(state.tx) < Math.abs(this.earliestOpenTx.tx)) { + + /* + * This is the earliest open transaction. This is defined as the + * transaction whose readsOnCommitTime is LTE all other + * transactions and whose absolute txId value is LT all other + * transactions. Since we assign the txIds in intervals GTE the + * readsOnCommitTime and LT the next possible commit point, we + * can maintain this invariant by only comparing abs(txId). + */ + + this.earliestOpenTx = state; + + } activeTx.put(state.tx, state); @@ -1057,7 +1090,8 @@ * otherwise this will throw out an exception. */ - startTimeIndex.add(Math.abs(state.tx), state.readsOnCommitTime); +// startTimeIndex.add(Math.abs(state.tx), state.readsOnCommitTime); + startTimeIndex.add(state); } @@ -1074,7 +1108,7 @@ } if (log.isInfoEnabled()) - log.info(state.toString() + ", startCount=" + startCount + log.info(state.toString() + ", releaseTime="+releaseTime+", earliestActiveTx="+earliestOpenTx+", startCount=" + startCount + ", abortCount=" + abortCount + ", commitCount=" + commitCount + ", readOnlyActiveCount=" + readOnlyActiveCount + ", readWriteActiveCount=" @@ -1186,31 +1220,65 @@ // } } + + /** + * Return <code>true</code> iff the release time consensus protocol is being + * used to update the releaseTime (HA and this service is either a leader or + * a follower). Return <code>false</code> iff the service should locally + * manage its own release time (non-HA and HA when the service is + * {@link HAStatusEnum#NotReady}). + * <p> + * Note: When we are using a 2-phase commit, the leader can not update the + * release time from commit() using this methods. It must rely on the + * consensus protocol to update the release time instead. + * + * @see <a href= + * "https://sourceforge.net/apps/trac/bigdata/ticket/530#comment:116"> + * Journal HA </a> + */ + protected boolean isReleaseTimeConsensusProtocol() { + + return false; + + } /** * This method MUST be invoked each time a transaction completes with the * absolute value of the transaction identifier that has just been * deactivated. The method will remove the transaction entry in the ordered - * set of running transactions ({@link #startTimeIndex}). If the specified - * timestamp corresponds to the earliest running transaction, then the - * <code>releaseTime</code> will be updated and the new releaseTime will be - * set using {@link #setReleaseTime(long)}. + * set of running transactions ({@link #startTimeIndex}). * <p> - * Note that the {@link #startTimeIndex} keys are the absolute value of the - * transaction identifiers! The values are the commit times on which the - * corresponding transaction is reading. + * If the specified timestamp corresponds to the earliest running + * transaction, then the <code>releaseTime</code> will be updated and the + * new releaseTime will be set using {@link #setReleaseTime(long)}. For HA, + * the releaseTime is updated by a consensus protocol and the individual + * services MUST NOT advance their releaseTime as transactions complete. + * <p> + * Note: When we are using a 2-phase commit, the leader can not update the + * release time from commit() using this methods. It must rely on the + * consensus protocol to update the release time instead. * * @param timestamp * The absolute value of a transaction identifier that has just * been deactivated. + * @param deactivatedTx + * The transaction object that has been deactivated -or- + * <code>null</code> if there are known to be no active + * transactions remaining (e.g., startup and abortAll()). * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/467" > + * IllegalStateException trying to access lexicon index using RWStore + * with recycling </a> + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/671"> + * Query on follower fails during UPDATE on leader </a> + * * @todo the {@link #startTimeIndex} could be used by * {@link #findUnusedTimestamp(long, long)} so that it could further * constrain its search within the half-open interval. - * - @see https://sourceforge.net/apps/trac/bigdata/ticket/467 */ - final protected void updateReleaseTime(final long timestamp) { + final protected void updateReleaseTime(final long timestamp, + final TxState deactivatedTx) { if (timestamp <= 0) throw new IllegalArgumentException(); @@ -1234,24 +1302,26 @@ // current value for the releaseTime. final long oldReleaseTime = this.releaseTime; - + /* * true iff the tx specified by the caller was the earliest running * transaction. */ final boolean isEarliestTx; - /* - * The earliest tx remaining now that the caller's tx is complete and - * [now] if there are no more running transactions. - */ - final long earliestTxStartTime; - /* - * The commit time on which the earliest remaining tx is reading and - * [now] if there are no more running transactions. - */ - final long earliestTxReadsOnCommitTime; +// /* +// * The earliest tx remaining now that the caller's tx is complete and +// * [now] if there are no more running transactions. +// */ +// final long earliestTxStartTimeX; +// /* +// * The commit time on which the earliest remaining tx is reading and +// * [now] if there are no more running transactions. +// */ +// final long earliestTxReadsOnCommitTimeX; + TxState earliestActiveTx = null; + synchronized (startTimeIndex) { // Note: ZERO (0) is the first tuple in the B+Tree. @@ -1264,35 +1334,89 @@ if (indexOf != -1) startTimeIndex.remove(timestamp); - if (!isEarliestTx) { +// if (!isEarliestTx) { +// +// // No change unless earliest tx terminates. +// return; +// +// } - // No change unless earliest tx terminates. - return; + if (startTimeIndex.getEntryCount() > 0) { + /* There are remaining entries in the [startTimeIndex]. Scan it for the earliestActiveTx remaining. + * + * Note: We need to handle a data race where the earliest active + * tx in the [startTimeIndex] has been concurrently deactivated + * (and removed from the [activeTx] map). This is done by + * scanning until we find the first active tx in the + * [startTimeIndex]. It will typically be the first entry. + * + * Note: transactions can not start or end while we are + * synchronized the [startTimeIndex]. + */ - } + @SuppressWarnings("rawtypes") + final ITupleIterator titr = startTimeIndex.rangeIterator(); - if (startTimeIndex.getEntryCount() > 0) { + while (titr.hasNext()) { - /* - * The start time associated with the earliest remaining tx. - */ - final byte[] key = startTimeIndex.keyAt(0L); - - earliestTxStartTime = startTimeIndex.decodeKey(key); + @SuppressWarnings("rawtypes") + final ITuple t = titr.next(); - /* - * The commit point on which that tx is reading. - ... [truncated message content] |
From: <tho...@us...> - 2013-05-07 15:09:35
|
Revision: 7113 http://bigdata.svn.sourceforge.net/bigdata/?rev=7113&view=rev Author: thompsonbry Date: 2013-05-07 15:09:24 +0000 (Tue, 07 May 2013) Log Message: ----------- Checkpoint on refactor that will introduce an index over the HALog files to reduce the latency at the commit. This abstracts out a common base class for the SnapshotIndex and the HALogIndex. The HALogIndex is not yet in use. The next step is to see if I can get the test suite to pass with the alt HALogManager class. See https://sourceforge.net/apps/trac/bigdata/ticket/670 (Accumulating HALog files cause latency for HA commit) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/btree/UnisolatedReadWriteIndex.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogFile.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogManager.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/IHALogReader.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DefaultRestorePolicy.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HARestore.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotIndex.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotManager.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestAll.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3SnapshotPolicy2.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestSnapshotIndex.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java Added Paths: ----------- branches/READ_CACHE/bigdata/src/java/com/bigdata/btree/DelegateBTree.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractCommitTimeIndex.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/ICommitTimeEntry.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HALogIndex.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHALogIndex.java Added: branches/READ_CACHE/bigdata/src/java/com/bigdata/btree/DelegateBTree.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/btree/DelegateBTree.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/btree/DelegateBTree.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -0,0 +1,77 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Feb 20, 2008 + */ +package com.bigdata.btree; + +/** + * An object that delegates the {@link IIndex} and {@link ILinearList} + * interfaces. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class DelegateBTree extends DelegateIndex implements ILinearList { + + private final ILinearList delegate; + + public DelegateBTree(final ILinearList ndx) { + + super((IIndex) ndx); + + this.delegate = ndx; + + } + + public DelegateBTree(final BTree btree) { + + super(btree); + + this.delegate = btree; + + } + + @Override + public long indexOf(final byte[] key) { + + return delegate.indexOf(key); + + } + + @Override + public byte[] keyAt(final long index) { + + return delegate.keyAt(index); + + } + + @Override + public byte[] valueAt(final long index) { + + return delegate.valueAt(index); + + } + +} Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/btree/UnisolatedReadWriteIndex.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/btree/UnisolatedReadWriteIndex.java 2013-05-07 11:25:20 UTC (rev 7112) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/btree/UnisolatedReadWriteIndex.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -121,7 +121,7 @@ * @version $Id: UnisolatedReadWriteIndex.java 4054 2011-01-05 13:51:25Z * thompsonbry $ */ -public class UnisolatedReadWriteIndex implements IIndex { +public class UnisolatedReadWriteIndex implements IIndex, ILinearList { private static final Logger log = Logger.getLogger(UnisolatedReadWriteIndex.class); @@ -189,7 +189,7 @@ * * @return The acquired lock. */ - protected Lock readLock() { + public Lock readLock() { final Lock readLock = readWriteLock.readLock(); @@ -863,4 +863,34 @@ } + @Override + public long indexOf(final byte[] key) { + final Lock lock = readLock(); + try { + return ndx.indexOf(key); + } finally { + lock.unlock(); + } + } + + @Override + public byte[] keyAt(final long index) { + final Lock lock = readLock(); + try { + return ndx.keyAt(index); + } finally { + lock.unlock(); + } + } + + @Override + public byte[] valueAt(final long index) { + final Lock lock = readLock(); + try { + return ndx.valueAt(index); + } finally { + lock.unlock(); + } + } + } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogFile.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogFile.java 2013-05-07 11:25:20 UTC (rev 7112) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogFile.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -33,7 +33,6 @@ import java.nio.channels.FileChannel; import java.security.DigestException; import java.security.MessageDigest; -import java.util.Formatter; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; @@ -41,7 +40,6 @@ import org.apache.log4j.Logger; -import com.bigdata.btree.BytesUtil; import com.bigdata.ha.althalog.HALogManager.IHALogManagerCallback; import com.bigdata.ha.msg.IHAWriteMessage; import com.bigdata.io.DirectBufferPool; @@ -53,6 +51,7 @@ import com.bigdata.journal.RootBlockUtility; import com.bigdata.journal.RootBlockView; import com.bigdata.journal.StoreTypeEnum; +import com.bigdata.journal.jini.ha.CommitCounterUtility; import com.bigdata.rawstore.Bytes; import com.bigdata.util.ChecksumError; import com.bigdata.util.ChecksumUtility; @@ -173,9 +172,8 @@ public HALogFile(final IRootBlockView rbv, final IHALogManagerCallback callback) throws IOException { m_callback = callback; - final File hadir = m_callback.getHALogDir(); - m_haLogFile = new File(hadir, getHALogFileName(rbv.getCommitCounter()) - + IHALogReader.HA_LOG_EXT); + m_haLogFile = getHALogFileName(m_callback.getHALogDir(), + rbv.getCommitCounter()); if (m_haLogFile.exists()) throw new IllegalStateException("File already exists: " @@ -659,32 +657,34 @@ * @param commitCounter * @return */ - public static String getHALogFileName(final long commitCounter) { + public static File getHALogFileName(final File dir, final long commitCounter) { - /* - * Format the name of the log file. - * - * Note: The commit counter in the file name should be zero filled to 20 - * digits so we have the files in lexical order in the file system (for - * convenience). - */ - final String logFile; - { + return CommitCounterUtility.getCommitCounterFile(dir, commitCounter, + IHALogReader.HA_LOG_EXT); +// /* +// * Format the name of the log file. +// * +// * Note: The commit counter in the file name should be zero filled to 20 +// * digits so we have the files in lexical order in the file system (for +// * convenience). +// */ +// final String logFile; +// { +// +// final StringBuilder sb = new StringBuilder(); +// +// final Formatter f = new Formatter(sb); +// +// f.format("%020d" + IHALogReader.HA_LOG_EXT, commitCounter); +// f.flush(); +// f.close(); +// +// logFile = sb.toString(); +// +// } +// +// return logFile; - final StringBuilder sb = new StringBuilder(); - - final Formatter f = new Formatter(sb); - - f.format("%020d" + IHALogReader.HA_LOG_EXT, commitCounter); - f.flush(); - f.close(); - - logFile = sb.toString(); - - } - - return logFile; - } private class HALogAccess { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogManager.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogManager.java 2013-05-07 11:25:20 UTC (rev 7112) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogManager.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -174,7 +174,7 @@ m_currentLock.unlock(); } - final File file = new File(m_halogdir, HALogFile.getHALogFileName(commitCounter)); + final File file = HALogFile.getHALogFileName(m_halogdir, commitCounter); final HALogFile halog = new HALogFile(file); return halog.getReader(); @@ -192,7 +192,7 @@ /* * Check the file exists first */ - final File file = new File(m_halogdir, HALogFile.getHALogFileName(commitCounter)); + final File file = HALogFile.getHALogFileName(m_halogdir, commitCounter); if (!file.exists()) throw new FileNotFoundException(); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/IHALogReader.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/IHALogReader.java 2013-05-07 11:25:20 UTC (rev 7112) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/IHALogReader.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -23,6 +23,8 @@ */ package com.bigdata.ha.althalog; +import java.io.File; +import java.io.FileFilter; import java.io.IOException; import java.nio.ByteBuffer; import java.security.DigestException; @@ -46,6 +48,29 @@ */ public static final String HA_LOG_EXT = ".ha-log"; + /** + * A {@link FileFilter} that visits all files ending with the + * {@link #HA_LOG_EXT} and the names of all direct child directories. This + * {@link FileFilter} may be used to establish recursive scans of the HALog + * directory. + */ + static public final FileFilter HALOG_FILTER = new FileFilter() { + + @Override + public boolean accept(final File f) { + + if (f.isDirectory()) { + + return true; + + } + + return f.getName().endsWith(HA_LOG_EXT); + + } + + }; + /** * Closes the Reader. * Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java 2013-05-07 11:25:20 UTC (rev 7112) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -45,6 +45,7 @@ import com.bigdata.journal.RootBlockUtility; import com.bigdata.journal.RootBlockView; import com.bigdata.journal.StoreTypeEnum; +import com.bigdata.journal.jini.ha.CommitCounterUtility; import com.bigdata.rawstore.Bytes; /** @@ -177,38 +178,45 @@ } - /** - * Return the local name of the HA Log file associated with the - * - * @param commitCounter - * @return - */ - public static String getHALogFileName(final long commitCounter) { + /** + * Return the HA Log file associated with the commit counter. + * + * @param dir + * The HALog directory. + * @param commitCounter + * The commit counter. + * + * @return The HALog {@link File}. + */ + public static File getHALogFileName(final File dir, + final long commitCounter) { - /* - * Format the name of the log file. - * - * Note: The commit counter in the file name should be zero filled to 20 - * digits so we have the files in lexical order in the file system (for - * convenience). - */ - final String logFile; - { + return CommitCounterUtility.getCommitCounterFile(dir, commitCounter, + IHALogReader.HA_LOG_EXT); +// /* +// * Format the name of the log file. +// * +// * Note: The commit counter in the file name should be zero filled to 20 +// * digits so we have the files in lexical order in the file system (for +// * convenience). +// */ +// final String logFile; +// { +// +// final StringBuilder sb = new StringBuilder(); +// +// final Formatter f = new Formatter(sb); +// +// f.format("%020d" + IHALogReader.HA_LOG_EXT, commitCounter); +// f.flush(); +// f.close(); +// +// logFile = sb.toString(); +// +// } +// +// return logFile; - final StringBuilder sb = new StringBuilder(); - - final Formatter f = new Formatter(sb); - - f.format("%020d" + IHALogReader.HA_LOG_EXT, commitCounter); - f.flush(); - f.close(); - - logFile = sb.toString(); - - } - - return logFile; - } public String toString() { @@ -267,9 +275,12 @@ final long commitCounter = rootBlock.getCommitCounter(); - final String logFile = getHALogFileName(commitCounter + 1); +// final String logFile = getHALogFileName(commitCounter + 1); +// +// final File log = new File(m_haLogDir, logFile); + final File log = getHALogFileName(m_haLogDir, commitCounter + 1); - final File log = new File(m_haLogDir, logFile); +// final File log = new File(m_haLogDir, logFile); // Must delete file if it exists. if (log.exists() && !log.delete()) { @@ -678,8 +689,8 @@ public IHALogReader getReader(final long commitCounter) throws FileNotFoundException, IOException { - final File logFile = new File(m_haLogDir, - HALogWriter.getHALogFileName(commitCounter)); + final File logFile = //new File(m_haLogDir, + HALogWriter.getHALogFileName(m_haLogDir, commitCounter); final Lock lock = m_stateLock.readLock(); lock.lock(); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java 2013-05-07 11:25:20 UTC (rev 7112) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -55,7 +55,7 @@ static public final FileFilter HALOG_FILTER = new FileFilter() { @Override - public boolean accept(File f) { + public boolean accept(final File f) { if (f.isDirectory()) { Added: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractCommitTimeIndex.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractCommitTimeIndex.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractCommitTimeIndex.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -0,0 +1,533 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.journal; + +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; + +import com.bigdata.btree.BTree; +import com.bigdata.btree.DelegateBTree; +import com.bigdata.btree.ILinearList; +import com.bigdata.btree.IRangeQuery; +import com.bigdata.btree.ITuple; +import com.bigdata.btree.ITupleIterator; +import com.bigdata.btree.Tuple; +import com.bigdata.btree.UnisolatedReadWriteIndex; + +/** + * Abstract {@link BTree} mapping <em>commitTime</em> (long integers) to + * {@link ICommitTimeEntry} objects. + * <p> + * This class is thread-safe for concurrent readers and writers. + */ +public class AbstractCommitTimeIndex<T extends ICommitTimeEntry> extends + DelegateBTree implements ILinearList { + + /** + * The underlying index. Access to this is NOT thread safe unless you take + * the appropriate lock on the {@link #readWriteLock}. + */ + private final BTree btree; + + /** + * The {@link ReadWriteLock} used by the {@link UnisolatedReadWriteIndex} to + * make operations on the underlying {@link #btree} thread-safe. + */ + private final ReadWriteLock readWriteLock; + + @SuppressWarnings("unchecked") + private Tuple<T> getLookupTuple() { + + return btree.getLookupTuple(); + + } + +// /** +// * Instance used to encode the timestamp into the key. +// */ +// final private IKeyBuilder keyBuilder = new KeyBuilder(Bytes.SIZEOF_LONG); + + protected AbstractCommitTimeIndex(final BTree ndx) { + + // Wrap B+Tree for read/write thread safety. + super(new UnisolatedReadWriteIndex(ndx)); + + this.btree = ndx; + +// this.delegate = new UnisolatedReadWriteIndex(ndx); + + // Save reference to lock for extended synchronization patterns. + this.readWriteLock = UnisolatedReadWriteIndex.getReadWriteLock(ndx); + + } + + /** + * Encodes the commit time into a key. + * + * @param commitTime + * The commit time. + * + * @return The corresponding key. + */ + public byte[] getKey(final long commitTime) { + + return getIndexMetadata().getKeyBuilder().reset().append(commitTime) + .getKey(); + + } + + /** + * Returns (but does not take) the {@link ReadLock}. + */ + public Lock readLock() { + + return readWriteLock.readLock(); + + } + + /** + * Returns (but does not take) the {@link WriteLock}. + */ + public Lock writeLock() { + + return readWriteLock.writeLock(); + + } + + public long getEntryCount() { + + return super.rangeCount(); + + } + + @SuppressWarnings("unchecked") + public Tuple<T> valueAt(final long index, final Tuple<T> t) { + final Lock lock = readLock(); + lock.lock(); + try { + return btree.valueAt(index, t); + } finally { + lock.unlock(); + } + } + + /** + * Return the {@link IRootBlock} identifying the journal having the largest + * commitTime that is less than or equal to the given timestamp. This is + * used primarily to locate the commit record that will serve as the ground + * state for a transaction having <i>timestamp</i> as its start time. In + * this context the LTE search identifies the most recent commit state that + * not later than the start time of the transaction. + * + * @param timestamp + * The given timestamp. + * + * @return The description of the relevant journal resource -or- + * <code>null</code> iff there are no journals in the index that + * satisify the probe. + * + * @throws IllegalArgumentException + * if <i>timestamp</i> is less than ZERO (0L). + */ + public T find(final long timestamp) { + + if (timestamp < 0L) + throw new IllegalArgumentException(); + + final Lock lock = readLock(); + + lock.lock(); + + try { + + // find (first less than or equal to). + final long index = findIndexOf(timestamp); + + if (index == -1) { + + // No match. + return null; + + } + + return valueAtIndex(index); + + } finally { + + lock.unlock(); + + } + + } + + /** + * Retrieve the entry from the index. + */ + private T valueAtIndex(final long index) { + + return (T) valueAt(index, getLookupTuple()).getObject(); + +// final byte[] val = super.valueAt(index); +// +// assert val != null : "Entry has null value: index=" + index; +// +// final IRootBlockView entry = new RootBlockView(false/* rootBlock0 */, +// ByteBuffer.wrap(val), ChecksumUtility.getCHK()); +// +// return entry; + + } + + /** + * Return the first entry whose <em>commitTime</em> is strictly greater than + * the timestamp. + * + * @param timestamp + * The timestamp. A value of ZERO (0) may be used to find the + * first entry. + * + * @return The root block of that entry -or- <code>null</code> if there is + * no entry whose timestamp is strictly greater than + * <i>timestamp</i>. + * + * @throws IllegalArgumentException + * if <i>timestamp</i> is less than ZERO (0L). + */ + public T findNext(final long timestamp) { + + if (timestamp < 0L) + throw new IllegalArgumentException(); + + final Lock lock = readLock(); + + lock.lock(); + + try { + + // find first strictly greater than. + final long index = findIndexOf(timestamp) + 1; + + if (index == rangeCount()) { + + // No match. + + return null; + + } + + return valueAtIndex(index); + + } finally { + + lock.unlock(); + + } + + } + + /** + * Find the index of the entry associated with the largest commitTime that + * is less than or equal to the given timestamp. + * + * @param commitTime + * The timestamp. + * + * @return The index of the entry associated with the largest commitTime + * that is less than or equal to the given timestamp -or- + * <code>-1</code> iff the index is empty. + * + * @throws IllegalArgumentException + * if <i>timestamp</i> is less than ZERO (0L). + */ + public long findIndexOf(final long commitTime) { + + if (commitTime < 0L) + throw new IllegalArgumentException(); + + /* + * Note: Since this is the sole index access, we don't need to take the + * lock to coordinate a consistent view of the index in this method. + */ + long pos = indexOf(getKey(commitTime)); + + if (pos < 0) { + + /* + * the key lies between the entries in the index, or possible before + * the first entry in the index. [pos] represents the insert + * position. we convert it to an entry index and subtract one to get + * the index of the first commit record less than the given + * timestamp. + */ + + pos = -(pos+1); + + if (pos == 0) { + + // No entry is less than or equal to this timestamp. + return -1; + + } + + pos--; + + return pos; + + } else { + + /* + * exact hit on an entry. + */ + + return pos; + + } + + } + + /** + * Add an entry under the commitTime associated with the entry. + * + * @param entry + * The entry + * + * @exception IllegalArgumentException + * if <i>commitTime</i> is <code>0L</code>. + * @exception IllegalArgumentException + * if <i>rootBLock</i> is <code>null</code>. + * @exception IllegalArgumentException + * if there is already an entry registered under for the + * given timestamp. + */ + public void add(final T entry) { + + if (entry == null) + throw new IllegalArgumentException(); + + final long commitTime = entry.getCommitTime(); + + if (commitTime == 0L) + throw new IllegalArgumentException(); + + final Lock lock = writeLock(); + + lock.lock(); + + try { + + final byte[] key = getKey(commitTime); + + if (super.contains(key)) { + + throw new IllegalArgumentException("entry exists: timestamp=" + + commitTime); + + } + + // add a serialized entry to the persistent index. + super.insert(key, entry); + + } finally { + + lock.unlock(); + + } + + } + + /** + * Find and return the oldest entry (if any). + * + * @return That entry -or- <code>null</code> if there are no entries. + */ + public T getOldestEntry() { + + final Lock lock = readLock(); + + lock.lock(); + + try { + + if (rangeCount() == 0L) { + + // Empty index. + return null; + + } + + // Lookup first tuple in index. + final ITuple<T> t = valueAt(0L, getLookupTuple()); + + final T r = t.getObject(); + + return r; + + } finally { + + lock.unlock(); + + } + + } + + /** + * Find the the most recent entry (if any). + * + * @return That entry -or- <code>null</code> if there are no entries. + */ + public T getNewestEntry() { + + final Lock lock = readLock(); + + lock.lock(); + + try { + + final long entryCount = getEntryCount(); + + if (entryCount == 0L) + return null; + + return valueAt(entryCount - 1, getLookupTuple()).getObject(); + + } finally { + + lock.unlock(); + + } + + } + + /** + * Find the oldest entry whose commit counter is LTE the specified commit + * counter. + * + * @return The entry -or- <code>null</code> if there is no such entry. + * + * @throws IllegalArgumentException + * if <code>commitCounter LT ZERO (0)</code> + */ + public T findByCommitCounter(final long commitCounter) { + + if (commitCounter < 0L) + throw new IllegalArgumentException(); + + final Lock lock = readLock(); + + lock.lock(); + + try { + + // Reverse scan. + @SuppressWarnings("unchecked") + final ITupleIterator<T> itr = rangeIterator( + null/* fromKey */, null/* toKey */, 0/* capacity */, + IRangeQuery.DEFAULT | IRangeQuery.REVERSE/* flags */, null/* filter */); + + while (itr.hasNext()) { + + final ITuple<T> t = itr.next(); + + final T r = t.getObject(); + + final IRootBlockView rb = r.getRootBlock(); + + if (rb.getCommitCounter() <= commitCounter) { + + // First entry LTE that commit counter. + return r; + + } + + } + + return null; + + } finally { + + lock.unlock(); + + } + + } + + /** + * Return the entry that is associated with the specified ordinal index + * (origin ZERO) counting backwards from the most recent entry (0) towards + * the earliest entry (nentries-1). + * <p> + * Note: The effective index is given by <code>(entryCount-1)-index</code>. + * If the effective index is LT ZERO (0) then there is no such entry and + * this method will return <code>null</code>. + * + * @param index + * The index. + * + * @return The entry -or- <code>null</code> if there is no such entry. + * + * @throws IllegalArgumentException + * if <code>index LT ZERO (0)</code> + */ + public T getEntryByReverseIndex(final int index) { + + if (index < 0) + throw new IllegalArgumentException(); + + final Lock lock = readLock(); + + lock.lock(); + + try { + + final long entryCount = rangeCount(); + + if (entryCount > Integer.MAX_VALUE) + throw new AssertionError(); + + final int effectiveIndex = ((int) entryCount - 1) - index; + + if (effectiveIndex < 0) { + + // No such entry. + return null; + + } + + final ITuple<T> t = valueAt(effectiveIndex, + getLookupTuple()); + + final T r = t.getObject(); + + return r; + + } finally { + + lock.unlock(); + + } + + } + +} Added: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/ICommitTimeEntry.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/ICommitTimeEntry.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/ICommitTimeEntry.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -0,0 +1,244 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.journal; + + +/** + * Interface for access to the snapshot metadata. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public interface ICommitTimeEntry { + + /** + * Return the bytes on the disk for the snapshot file. + */ + public long sizeOnDisk(); + + /** + * The commit counter associated with the index entry. + */ + public long getCommitCounter(); + + /** + * The commit time associated with the index entry. + */ + public long getCommitTime(); + + /** + * Return the {@link IRootBlockView} of the snapshot. + */ + public IRootBlockView getRootBlock(); + +//public static class SnapshotRecord implements ISnapshotRecord, +// Externalizable { +// +// private static final int VERSION0 = 0x0; +// +// private static final int currentVersion = VERSION0; +// +// /** +// * Note: This is NOT {@link Serializable}. +// */ +// private IRootBlockView rootBlock; +// +// private long sizeOnDisk; +// +// /** +// * De-serialization constructor. +// */ +// public SnapshotRecord() { +// } +// +// public SnapshotRecord(final IRootBlockView rootBlock, +// final long sizeOnDisk) { +// +// if (rootBlock == null) +// throw new IllegalArgumentException(); +// +// if (sizeOnDisk < 0L) +// throw new IllegalArgumentException(); +// +// this.rootBlock = rootBlock; +// +// this.sizeOnDisk = sizeOnDisk; +// +// } +// +// @Override +// public long sizeOnDisk() { +// return sizeOnDisk; +// } +// +// @Override +// public IRootBlockView getRootBlock() { +// return rootBlock; +// } +// +// @Override +// public boolean equals(final Object o) { +// if (this == o) +// return true; +// if (!(o instanceof ISnapshotRecord)) +// return false; +// final ISnapshotRecord t = (ISnapshotRecord) o; +// if (sizeOnDisk() != t.sizeOnDisk()) +// return false; +// if (!getRootBlock().equals(t.getRootBlock())) +// return false; +// return true; +// } +// +// @Override +// public int hashCode() { +// return getRootBlock().hashCode(); +// } +// +// @Override +// public void writeExternal(final ObjectOutput out) throws IOException { +// +// out.writeInt(currentVersion); +// +// final byte[] a = BytesUtil.getBytes(rootBlock.asReadOnlyBuffer()); +// +// final int sizeOfRootBlock = a.length; +// +// out.writeInt(sizeOfRootBlock); +// +// out.write(a, 0, sizeOfRootBlock); +// +// out.writeLong(sizeOnDisk); +// +// } +// +// @Override +// public void readExternal(final ObjectInput in) throws IOException, +// ClassNotFoundException { +// +// final int version = in.readInt(); +// +// switch (version) { +// case VERSION0: +// break; +// default: +// throw new IOException("Unknown version: " + version); +// } +// +// final int sizeOfRootBlock = in.readInt(); +// +// final byte[] a = new byte[sizeOfRootBlock]; +// +// in.readFully(a, 0, sizeOfRootBlock); +// +// rootBlock = new RootBlockView(false/* rootBlock0 */, +// ByteBuffer.wrap(a), ChecksumUtility.getCHK()); +// +// sizeOnDisk = in.readLong(); +// +// } +// +//} // SnapshotRecord +// +///** +// * Encapsulates key and value formation. +// * +// * @author <a href="mailto:tho...@us...">Bryan Thompson</a> +// */ +//static protected class TupleSerializer extends +// DefaultTupleSerializer<Long, ISnapshotRecord> { +// +// /** +// * +// */ +// private static final long serialVersionUID = -2851852959439807542L; +// +// /** +// * De-serialization ctor. +// */ +// public TupleSerializer() { +// +// super(); +// +// } +// +// /** +// * Ctor when creating a new instance. +// * +// * @param keyBuilderFactory +// */ +// public TupleSerializer(final IKeyBuilderFactory keyBuilderFactory) { +// +// super(keyBuilderFactory); +// +// } +// +// /** +// * Decodes the key as a commit time. +// */ +// @Override +// @SuppressWarnings("rawtypes") +// public Long deserializeKey(final ITuple tuple) { +// +// return KeyBuilder +// .decodeLong(tuple.getKeyBuffer().array(), 0/* offset */); +// +// } +// +// /** +// * The initial version (no additional persistent state). +// */ +// private final static transient byte VERSION0 = 0; +// +// /** +// * The current version. +// */ +// private final static transient byte VERSION = VERSION0; +// +// public void readExternal(final ObjectInput in) throws IOException, +// ClassNotFoundException { +// +// super.readExternal(in); +// +// final byte version = in.readByte(); +// +// switch (version) { +// case VERSION0: +// break; +// default: +// throw new UnsupportedOperationException("Unknown version: " +// + version); +// } +// +// } +// +// public void writeExternal(final ObjectOutput out) throws IOException { +// +// super.writeExternal(out); +// +// out.writeByte(VERSION); +// +// } + +} + Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DefaultRestorePolicy.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DefaultRestorePolicy.java 2013-05-07 11:25:20 UTC (rev 7112) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DefaultRestorePolicy.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -279,7 +279,7 @@ * returns its commit counter. If there is no such snapshot, then this * returns ZERO (0). */ - private long getEarliestRestorableCommitCounterByHALogs( + private long getEarliestRestorableCommitCounterByCommitPoints( final HAJournal jnl, final long commitCounterOnJournal) { // The commit point that is [minRestorePoints] old. @@ -329,7 +329,7 @@ final long commitCounterRetainedBySnapshotCount = getEarliestRestorableCommitCounterBySnapshots( jnl, commitCounterOnJournal); - final long commitCounterRetainedByHALogCount = getEarliestRestorableCommitCounterByHALogs( + final long commitCounterRetainedByHALogCount = getEarliestRestorableCommitCounterByCommitPoints( jnl, commitCounterOnJournal); /* Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-07 11:25:20 UTC (rev 7112) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -540,19 +540,8 @@ super.deleteResources(); - recursiveDelete(getHALogDir(), new FileFilter() { - - @Override - public boolean accept(File f) { + recursiveDelete(getHALogDir(), IHALogReader.HALOG_FILTER); - if (f.isDirectory()) - return true; - - return f.getName().endsWith(IHALogReader.HA_LOG_EXT); - } - - }); - recursiveDelete(getSnapshotManager().getSnapshotDir(), SnapshotManager.SNAPSHOT_FILTER); @@ -662,8 +651,8 @@ // The commit counter of the desired closing root block. final long commitCounter = msg.getCommitCounter(); - final File logFile = new File(getHALogDir(), - HALogWriter.getHALogFileName(commitCounter)); + final File logFile = HALogWriter.getHALogFileName( + getHALogDir(), commitCounter); if (!logFile.exists()) { Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-07 11:25:20 UTC (rev 7112) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -3342,155 +3342,6 @@ } -// /** -// * Delete snapshots that are no longer required. -// * <p> -// * Note: If ZERO (0) is passed into this method, then no snapshots will -// * be deleted. This is because the first possible commit counter is ONE -// * (1). -// * -// * @param earliestRestorableCommitPoint -// * The earliest commit point that we need to be able to -// * restore from local backups. -// * -// * @return The commitCounter of the earliest retained snapshot. -// */ -// private long deleteSnapshots(final long token, -// final long earliestRestorableCommitPoint) { -// /* -// * List the snapshot files for this service. -// */ -// final File[] files; -// // #of snapshot files found. Set during scan. -// final AtomicLong nfound = new AtomicLong(); -// // Set to the commit counter of the earliest retained snapshot. -// final AtomicLong earliestRetainedSnapshotCommitCounter = new AtomicLong(Long.MAX_VALUE); -// final SnapshotManager snapshotManager = journal -// .getSnapshotManager(); -// { -// -// final File snapshotDir = snapshotManager.getSnapshotDir(); -// -// files = snapshotDir.listFiles(new FilenameFilter() { -// -// /** -// * Return <code>true</code> iff the file is an snapshot file -// * that should be deleted. -// * -// * @param name -// * The name of that file (encodes the -// * commitCounter). -// */ -// @Override -// public boolean accept(final File dir, final String name) { -// -// if (!name.endsWith(SnapshotManager.SNAPSHOT_EXT)) { -// // Not an snapshot file. -// return false; -// } -// -// // Closing commitCounter for snapshot file. -// final long commitCounter = SnapshotManager -// .parseCommitCounterFile(name); -// -// // Count all snapshot files. -// nfound.incrementAndGet(); -// -// // true iff we will delete this snapshot. -// final boolean deleteFile = commitCounter < earliestRestorableCommitPoint; -// -// if (haLog.isInfoEnabled()) -// log.info("snapshotFile=" -// + name// -// + ", deleteFile=" -// + deleteFile// -// + ", commitCounter=" -// + commitCounter// -// + ", earliestRestoreableCommitPoint=" -// + earliestRestorableCommitPoint); -// -// if (!deleteFile -// && commitCounter < earliestRetainedSnapshotCommitCounter -// .get()) { -// -// /* -// * Update the earliest retained snapshot. -// */ -// -// earliestRetainedSnapshotCommitCounter -// .set(commitCounter); -// -// } -// -// return deleteFile; -// -// } -// }); -// -// } -// -// int ndeleted = 0; -// long totalBytes = 0L; -// -// /* -// * If people specify NoSnapshotPolicy then backup is in their hands. -// * HALogs will not be retained beyond a fully met commit unless -// * there is a snapshot against which they can be applied.. -// */ -// -//// if (files.length == 0) { -//// -//// /* -//// * Note: If there are no snapshots then we MUST retain ALL HALog -//// * files. -//// */ -//// earliestRetainedSnapshotCommitCounter.set(0L); -//// -//// } else { -// -// for (File file : files) { -// -// // #of bytes in that file. -// final long len = file.length(); -// -// if (!getQuorum().isQuorumFullyMet(token)) { -// /* -// * Halt operation. -// * -// * Note: This is not an error, but we can not remove -// * snapshots or HALogs if this invariant is violated. -// */ -// break; -// } -// -// if (!snapshotManager.removeSnapshot(file)) { -// -// haLog.warn("COULD NOT DELETE FILE: " + file); -// -// continue; -// -// } -// -// ndeleted++; -// -// totalBytes += len; -// -// } -// -//// } -// -// if (haLog.isInfoEnabled()) -// haLog.info("PURGED SNAPSHOTS: nfound=" + nfound + ", ndeleted=" -// + ndeleted + ", totalBytes=" + totalBytes -// + ", earliestRestorableCommitPoint=" -// + earliestRestorableCommitPoint -// + ", earliestRetainedSnapshotCommitCounter=" -// + earliestRetainedSnapshotCommitCounter.get()); -// -// return earliestRetainedSnapshotCommitCounter.get(); -// -// } - /** * Delete HALogs that are no longer required. * Added: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HALogIndex.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HALogIndex.java (rev 0) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HALogIndex.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -0,0 +1,323 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.journal.jini.ha; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.util.UUID; + +import com.bigdata.btree.BTree; +import com.bigdata.btree.BytesUtil; +import com.bigdata.btree.DefaultTupleSerializer; +import com.bigdata.btree.ITuple; +import com.bigdata.btree.IndexMetadata; +import com.bigdata.btree.keys.ASCIIKeyBuilderFactory; +import com.bigdata.btree.keys.IKeyBuilderFactory; +import com.bigdata.btree.keys.KeyBuilder; +import com.bigdata.journal.AbstractCommitTimeIndex; +import com.bigdata.journal.ICommitTimeEntry; +import com.bigdata.journal.IRootBlockView; +import com.bigdata.journal.RootBlockView; +import com.bigdata.journal.jini.ha.HALogIndex.IHALogRecord; +import com.bigdata.rawstore.Bytes; +import com.bigdata.util.ChecksumUtility; + +/** + * {@link BTree} mapping <em>commitTime</em> (long integers) to + * {@link IHALogRecord} records for each closed HALog file. HALog files are + * added to this index when their closing {@link IRootBlockView} is applied. + * Thus, the live HALog is not entered into this index until it the closing + * {@link IRootBlockView} has been applied. + * <p> + * This object is thread-safe for concurrent readers and writers. + * <p> + * Note: This is used as a transient data structure that is populated from the + * file system by the {@link HAJournalServer}. + */ +public class HALogIndex extends AbstractCommitTimeIndex<IHALogRecord> { + + /** + * Create a transient instance. + * + * @return The new instance. + */ + static public HALogIndex createTransient() { + + final IndexMetadata metadata = new IndexMetadata(UUID.randomUUID()); + + metadata.setTupleSerializer(new TupleSerializer( + new ASCIIKeyBuilderFactory(Bytes.SIZEOF_LONG))); + + final BTree ndx = BTree.createTransient(/*store, */metadata); + + return new HALogIndex(ndx); + + } + + private HALogIndex(final BTree ndx) { + + super(ndx); + + } + + /** + * Interface for access to the HALog metadata. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ + public static interface IHALogRecord extends ICommitTimeEntry { + +// /** +// * Return the bytes on the disk for the HALog file. +// */ +// public long sizeOnDisk(); + + /** + * Return the closing {@link IRootBlockView} of the HALog file. + */ + @Override + public IRootBlockView getRootBlock(); + + } + + public static class HALogRecord implements IHALogRecord, + Externalizable { + + private static final int VERSION0 = 0x0; + + private static final int currentVersion = VERSION0; + + /** + * Note: This is NOT {@link Serializable}. + */ + private IRootBlockView rootBlock; + + private long sizeOnDisk; + + /** + * De-serialization constructor. + */ + public HALogRecord() { + } + + public HALogRecord(final IRootBlockView rootBlock, + final long sizeOnDisk) { + + if (rootBlock == null) + throw new IllegalArgumentException(); + + if (sizeOnDisk < 0L) + throw new IllegalArgumentException(); + + this.rootBlock = rootBlock; + + this.sizeOnDisk = sizeOnDisk; + + } + + @Override + public long sizeOnDisk() { + return sizeOnDisk; + } + + @Override + public IRootBlockView getRootBlock() { + return rootBlock; + } + + @Override + public boolean equals(final Object o) { + if (this == o) + return true; + if (!(o instanceof IHALogRecord)) + return false; + final IHALogRecord t = (IHALogRecord) o; + if (sizeOnDisk() != t.sizeOnDisk()) + return false; + if (!getRootBlock().equals(t.getRootBlock())) + return false; + return true; + } + + @Override + public int hashCode() { + return getRootBlock().hashCode(); + } + + @Override + public void writeExternal(final ObjectOutput out) throws IOException { + + out.writeInt(currentVersion); + + final byte[] a = BytesUtil.getBytes(rootBlock.asReadOnlyBuffer()); + + final int sizeOfRootBlock = a.length; + + out.writeInt(sizeOfRootBlock); + + out.write(a, 0, sizeOfRootBlock); + + out.writeLong(sizeOnDisk); + + } + + @Override + public void readExternal(final ObjectInput in) throws IOException, + ClassNotFoundException { + + final int version = in.readInt(); + + switch (version) { + case VERSION0: + break; + default: + throw new IOException("Unknown version: " + version); + } + + final int sizeOfRootBlock = in.readInt(); + + final byte[] a = new byte[sizeOfRootBlock]; + + in.readFully(a, 0, sizeOfRootBlock); + + rootBlock = new RootBlockView(false/* rootBlock0 */, + ByteBuffer.wrap(a), ChecksumUtility.getCHK()); + + sizeOnDisk = in.readLong(); + + } + + @Override + public long getCommitCounter() { + return getRootBlock().getCommitCounter(); + } + + @Override + public long getCommitTime() { + return getRootBlock().getLastCommitTime(); + } + + } // HALogRecord + + /** + * Encapsulates key and value formation. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ + static protected class TupleSerializer extends + DefaultTupleSerializer<Long, IHALogRecord> { + + /** + * + */ + private static final long serialVersionUID = -2851852959439807542L; + + /** + * De-serialization ctor. + */ + public TupleSerializer() { + + super(); + + } + + /** + * Ctor when creating a new instance. + * + * @param keyBuilderFactory + */ + public TupleSerializer(final IKeyBuilderFactory keyBuilderFactory) { + + super(keyBuilderFactory); + + } + + /** + * Decodes the key as a commit time. + */ + @Override + @SuppressWarnings("rawtypes") + public Long deserializeKey(final ITuple tuple) { + + return KeyBuilder + .decodeLong(tuple.getKeyBuffer().array(), 0/* offset */); + + } + +// /** +// * De-serializes an object from the {@link ITuple#getValue() value} stored +// * in the tuple (ignores the key stored in the tuple). +// */ +// public IHALogRecord deserialize(final ITuple tuple) { +// +// if (tuple == null) +// throw new IllegalArgumentException(); +// +// return (IRootBlockView) new RootBlockView(false/* rootBlock0 */, +// ByteBuffer.wrap(tuple.getValue()), ChecksumUtility.getCHK()); +// +// } + + /** + * The initial version (no additional persistent state). + */ + private final static transient byte VERSION0 = 0; + + /** + * The current version. + */ + private final static transient byte VERSION = VERSION0; + + public void readExternal(final ObjectInput in) throws IOException, + ClassNotFoundException { + + super.readExternal(in); + + final byte version = in.readByte(); + + switch (version) { + case VERSION0: + break; + default: + throw new UnsupportedOperationException("Unknown version: " + + version); + } + + } + + public void writeExternal(final ObjectOutput out) throws IOException { + + super.writeExternal(out); + + out.writeByte(VERSION); + + } + + } + +} Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HARestore.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HARestore.java 2013-05-07 11:25:20 UTC (rev 7112) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HARestore.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -127,8 +127,7 @@ } - final File logFile = new File(haLogDir, - HALogWriter.getHALogFileName(cc)); + final File logFile = HALogWriter.getHALogFileName(haLogDir, cc); if (!logFile.exists()) { Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotIndex.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotIndex.java 2013-05-07 11:25:20 UTC (rev 7112) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotIndex.java 2013-05-07 15:09:24 UTC (rev 7113) @@ -30,25 +30,20 @@ import java.io.Serializable; import java.nio.ByteBuffer; import java.util.UUID; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReadWriteLock; import com.bigdata.btree.BTree; import com.bigdata.btree.BytesUtil; import com.bigdata.btree.DefaultTupleSerializer; -import com.bigdata.btree.DelegateIndex; -import com.bigdata.btree.ILinearList; -import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.ITuple; -import com.bigdata.btree.ITupleIterator; import com.bigdata.btree.IndexMetadata; -import com.bigdata.btree.Tuple; -import com.bigdata.btree.UnisolatedReadWriteIndex; import com.bigdata.btree.keys.ASCIIKeyBuilderFactory; import com.bigdata.btree.keys.IKeyBuilderFactory; import com.bigdata.btree.keys.KeyBuilder; +import com.bigdata.journal.AbstractCommitTimeIndex; +import com.bigdata.journal.ICommitTimeEntry; import com.bigdata.journal.IRootBlockView; import com.bigdata.journal.RootBlockView; +import com.bigdata.journal.jini.ha.SnapshotIndex.ISnapshotRecord; import com.bigdata.rawstore.Bytes; import com.bigdata.util.ChecksumUtility; @@ -61,31 +56,13 @@ * Note: This is used as a transient data structure that is populated from the * file system by the {@link HAJournalServer}. */ -public class SnapshotIndex extends DelegateIndex implements ILinearList { - - /** - * The underlying index. Access to this is NOT thread safe unless you take - * the appropriate lock on the {@link #readWriteLock}. - */ - private final BTree btree; - - /** - * The {@link ReadWriteLock} used by the {@link UnisolatedReadWriteIndex} to - * make operations on the underlying {@link #btree} thread-safe. - */ - private final ReadWriteLock readWriteLock; +public class SnapshotIndex extends AbstractCommitTimeIndex<ISnapshotRecord> { - @SuppressWarnings("unchecked") - private Tuple<ISnapshotRecord> getLookupTuple() { - - return btree.getLookupTuple(); - - } - // /** -// * Instance used to encode the timestamp into the key. +// * The underlying index. Access to this is NOT thread safe unless you take +// * the appropriate lock on the {@link #readWriteLock}. // */ -// final private IKeyBuilder keyBuilder = new KeyBuilder(Bytes.SIZEOF_LONG); +// private final BTree btree; /** * Create a transient instance. @@ -110,15 +87,15 @@ private SnapshotIndex(final BTree ndx) { // Wrap B+Tree for read/write thread safety. - super(new UnisolatedReadWriteIndex(ndx)); + super(ndx); - this.btree = ndx; +// this.btree = ndx; +// +//// this.delegate = new UnisolatedReadWriteIndex(ndx); +// +// // Save reference to lock for extended synchronization patterns. +// this.readWriteLock = Unisola... [truncated message content] |
From: <tho...@us...> - 2013-05-08 15:53:31
|
Revision: 7117 http://bigdata.svn.sourceforge.net/bigdata/?rev=7117&view=rev Author: thompsonbry Date: 2013-05-08 15:53:19 +0000 (Wed, 08 May 2013) Log Message: ----------- I have modified the HAJournalServer to use a recursive directory structure for the HALogs and the snapshot files (this avoids problems with latency and limits for directories with a large number of files) and to maintain an in-memory index over the HALogs and an in-memory index over the snapshot files. All operations now use the in-memory indices. I have also refactored the HALog interfaces to consolidate everything in the HALogNexus. This will make it easier to integrate and test the altha log package. NOTE: This commit is NOT compatible with the historical locations of the HALog files and snapshot files. Since we are in pre-release, no attempt has been made to provide migration of those files. If you need to migrate the files, then move them into subdirectories named as follows: {{{ (HALog|snapshot)/000/000/000/000/000/000/000000000000000000001 }}} where the last component is a 21 digit number (the old files used 20 digits). The components leading up to the last component will all be zeros unless you have more than 1000 log files. See CommitCounterUtility for how to generate these file names. Change list: {{{ - done. clean up HALog directory initialization and centralize. - done. clean up haLogLock initialization, centralize, and confirm rational with the HALogManager (appropriately mutex). - done. Verify that all code paths that delete an HALog or Snapshot file cause the file to be removed from the appropriate index. - done. HAJournalServer.deleteHALogs(long token, long earliestRetainedSnapshotCommitCounter). Rewrite. Must handle recursion in the directory structure. This is invoked after a snapshot and at each commit. Must be low latency. - done. HASnapshotManager.getHALogFileBytesSinceCommitCounter(long): must use index scan with running sum. (moved to HALogNexus). - done. HAStatusUtil: Counts #of snapshots and bytes in those snapshots. The CommitTimeIndex maps the timestamp of the commit point to the IRootBlockView. The IRootBlockView does not include the size on the disk since the snapshot is a compressed file. If we want to rapidly report the size on the disk as well, then we need to add that to the tuple. E.g., a {rootBlock, sizeOnDisk} record structure. That is simple enough. - done. Change to a recursive directory structure for snapshots and HALog files. }}} The HA test suite is 100% green. The last step is to verify that the latency at the commit is gone in an HA3 cluster configuration using the BSBM EXPLORE+UPDATE benchmark. See https://sourceforge.net/apps/trac/bigdata/ticket/670 (Accumulating HALog files cause latency for HA commit) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumServiceBase.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogFile.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractCommitTimeIndex.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/RootBlockUtility.java branches/READ_CACHE/bigdata/src/test/com/bigdata/ha/halog/TestHALogWriter.java branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestAll.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotManager.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3BackupTestCase.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHAJournalServerTestCase.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3SnapshotPolicy.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java Added Paths: ----------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogWriter.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/CommitCounterUtility.java branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestCommitCounterUtility.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HALogNexus.java Removed Paths: ------------- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/CommitCounterUtility.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumService.java 2013-05-07 19:11:27 UTC (rev 7116) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumService.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -80,10 +80,10 @@ */ File getServiceDir(); - /** - * Return the directory in which we are logging the write blocks. - */ - File getHALogDir(); +// /** +// * Return the directory in which we are logging the write blocks. +// */ +// File getHALogDir(); /** * Return the best guess at the process identifier for this process. Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumServiceBase.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumServiceBase.java 2013-05-07 19:11:27 UTC (rev 7116) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumServiceBase.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -27,7 +27,6 @@ package com.bigdata.ha; -import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Set; @@ -356,12 +355,12 @@ } - @Override - final public File getHALogDir() { - - return getLocalService().getHALogDir(); - - } +// @Override +// final public File getHALogDir() { +// +// return getLocalService().getHALogDir(); +// +// } @Override public long getPrepareTimeout() { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogFile.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogFile.java 2013-05-07 19:11:27 UTC (rev 7116) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogFile.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -47,11 +47,11 @@ import com.bigdata.io.IBufferAccess; import com.bigdata.io.IReopenChannel; import com.bigdata.io.SerializerUtil; +import com.bigdata.journal.CommitCounterUtility; import com.bigdata.journal.IRootBlockView; import com.bigdata.journal.RootBlockUtility; import com.bigdata.journal.RootBlockView; import com.bigdata.journal.StoreTypeEnum; -import com.bigdata.journal.jini.ha.CommitCounterUtility; import com.bigdata.rawstore.Bytes; import com.bigdata.util.ChecksumError; import com.bigdata.util.ChecksumUtility; @@ -179,7 +179,15 @@ throw new IllegalStateException("File already exists: " + m_haLogFile.getAbsolutePath()); - m_raf = new RandomAccessFile(m_haLogFile, "rw"); + final File parentDir = m_haLogFile.getParentFile(); + + // Make sure the parent directory(ies) exist. + if (!parentDir.exists()) + if (!parentDir.mkdirs()) + throw new IOException("Could not create directory: " + + parentDir); + + m_raf = new RandomAccessFile(m_haLogFile, "rw"); m_channel = m_raf.getChannel(); m_storeType = rbv.getStoreType(); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java 2013-05-07 19:11:27 UTC (rev 7116) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -40,11 +40,11 @@ import com.bigdata.io.FileChannelUtility; import com.bigdata.io.IReopenChannel; import com.bigdata.io.SerializerUtil; +import com.bigdata.journal.CommitCounterUtility; import com.bigdata.journal.IRootBlockView; import com.bigdata.journal.RootBlockUtility; import com.bigdata.journal.RootBlockView; import com.bigdata.journal.StoreTypeEnum; -import com.bigdata.journal.jini.ha.CommitCounterUtility; import com.bigdata.rawstore.Bytes; /** @@ -58,7 +58,7 @@ * * @author Martyn Cutcher */ -public class HALogWriter { +public class HALogWriter implements IHALogWriter { /** * Logger for HA events. @@ -97,12 +97,12 @@ /** * Magic value for HA Log (the root blocks have their own magic value). */ - static final int MAGIC = 0x83d9b735; + static public final int MAGIC = 0x83d9b735; /** * HA log version number (version 1). */ - static final int VERSION1 = 0x1; + static public final int VERSION1 = 0x1; /** HA log directory. */ private final File m_haLogDir; @@ -152,6 +152,10 @@ } + /** + * @throws IllegalStateException + * if the HALog is not open. + */ private void assertOpen() { if (m_state == null) @@ -159,8 +163,10 @@ } - public boolean isOpen() { + public boolean isHALogOpen() { + return m_state != null && !m_state.isCommitted(); + } /** @@ -178,12 +184,14 @@ } /** - * Return the HA Log file associated with the commit counter. + * Return the HALog file associated with the commit counter. * * @param dir * The HALog directory. * @param commitCounter - * The commit counter. + * The closing commit counter (the HALog file is named for the + * commit counter that will be associated with the closing root + * block). * * @return The HALog {@link File}. */ @@ -294,6 +302,14 @@ } + final File parentDir = log.getParentFile(); + + // Make sure the parent directory(ies) exist. + if (!parentDir.exists()) + if (!parentDir.mkdirs()) + throw new IOException("Could not create directory: " + + parentDir); + final Lock lock = m_stateLock.writeLock(); lock.lock(); try { @@ -347,17 +363,16 @@ } }; - /** - * Write the final root block on the HA log and close the file. This "seals" - * the file, which now represents the entire write set associated with the - * commit point in the given root block. - * - * @param rootBlock - * The final root block for the write set. - * @throws FileNotFoundException - * @throws IOException - */ - public void closeLog(final IRootBlockView rootBlock) + /** + * Write the final root block on the HA log and close the file. This "seals" + * the file, which now represents the entire write set associated with the + * commit point in the given root block. + * + * @param rootBlock + * The final root block for the write set. + * @throws IOException + */ + public void closeHALog(final IRootBlockView rootBlock) throws FileNotFoundException, IOException { final Lock lock = m_stateLock.writeLock(); @@ -443,11 +458,18 @@ } /** - * - * @param msg - * @param data + * Write the message and the data on the live HALog. + * + * @param msg + * The message. + * @param data + * The data. + * @throws IllegalStateException + * if the message is not appropriate for the state of the log. + * @throws IOException + * if we can not write on the log. */ - public void write(final IHAWriteMessage msg, final ByteBuffer data) + public void writeOnHALog(final IHAWriteMessage msg, final ByteBuffer data) throws IOException, IllegalStateException { final Lock lock = m_stateLock.readLock(); @@ -628,9 +650,9 @@ } /** - * Disable the current log file if one is open. + * Disable (and remove) the current log file if one is open. */ - public void disable() throws IOException { + public void disableHALog() throws IOException { if (haLog.isInfoEnabled()) haLog.info(""); @@ -917,4 +939,5 @@ } } + } Added: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogWriter.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogWriter.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogWriter.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -0,0 +1,88 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2012. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package com.bigdata.ha.halog; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import com.bigdata.ha.msg.IHAWriteMessage; +import com.bigdata.journal.IRootBlockView; + +/** + * A constrained interface providing access to limited operations on the + * live HALog. + * + * @author Martyn Cutcher + * @author Bryan Thompson + */ +public interface IHALogWriter { + + /** + * Return <code>true</code> iff there is an HALog file that is + * currently open for writing. + */ + public boolean isHALogOpen(); + + /** + * Return the commit counter that is expected for the writes that will be + * logged (the same commit counter that is on the opening root block). + */ + public long getCommitCounter(); + + /** + * Return the sequence number that is expected for the next write. + */ + public long getSequence(); + + /** + * Write the message and the data on the live HALog. + * + * @param msg + * The message. + * @param data + * The data. + * @throws IllegalStateException + * if the message is not appropriate for the state of the log. + * @throws IOException + * if we can not write on the log. + */ + public void writeOnHALog(IHAWriteMessage msg, ByteBuffer data) throws IOException; + + /** + * Write the final root block on the HA log and close the file. This "seals" + * the file, which now represents the entire write set associated with the + * commit point in the given root block. + * + * @param rootBlock + * The final root block for the write set. + * @throws IOException + */ + public void closeHALog(IRootBlockView rootBlock) throws IOException; + + /** + * Disable (and remove) the current log file if one is open. + */ + public void disableHALog() throws IOException; + +} \ No newline at end of file Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractCommitTimeIndex.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractCommitTimeIndex.java 2013-05-07 19:11:27 UTC (rev 7116) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractCommitTimeIndex.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -428,6 +428,12 @@ * * @throws IllegalArgumentException * if <code>commitCounter LT ZERO (0)</code> + * + * TODO It is possible to improve the performance for this for + * large indices using a binary search. Each time we probe the + * index we discover a specific commit counter value. If the + * value is LT the target, we need to search above that probe. + * If GT the target, we need to search below that problem. */ public T findByCommitCounter(final long commitCounter) { @@ -530,4 +536,22 @@ } + public void removeAll() { + + final Lock lock = writeLock(); + + lock.lock(); + + try { + + btree.removeAll(); + + } finally { + + lock.unlock(); + + } + + } + } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-07 19:11:27 UTC (rev 7116) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -138,7 +138,7 @@ import com.bigdata.io.IDataRecordAccess; import com.bigdata.io.SerializerUtil; import com.bigdata.journal.Name2Addr.Entry; -import com.bigdata.journal.jini.ha.HAJournal; +import com.bigdata.journal.jini.ha.HAJournalServer; import com.bigdata.mdi.IResourceMetadata; import com.bigdata.mdi.JournalMetadata; import com.bigdata.quorum.AsynchronousQuorumCloseException; @@ -1713,24 +1713,24 @@ } - /** - * The HA log directory. - * - * @see HAJournal.Options#HA_LOG_DIR - * - * @throws UnsupportedOperationException - * always. - */ - public File getHALogDir() { - - throw new UnsupportedOperationException(); +// /** +// * The HA log directory. +// * +// * @see HAJournal.Options#HA_LOG_DIR +// * +// * @throws UnsupportedOperationException +// * always. +// */ +// public File getHALogDir() { +// +// throw new UnsupportedOperationException(); +// +// } - } - /** * The HA timeout in milliseconds for a 2-phase prepare. * - * @see HAJournal.Options#HA_PREPARE_TIMEOUT + * @see HAJournalServer.ConfigurationOptions#HA_PREPARE_TIMEOUT * * @throws UnsupportedOperationException * always. Copied: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/CommitCounterUtility.java (from rev 7079, branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/CommitCounterUtility.java) =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/CommitCounterUtility.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/CommitCounterUtility.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -0,0 +1,237 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.journal; + +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; +import java.util.Formatter; + +import org.apache.log4j.Logger; + +/** + * Utility class for operations on files that are named using a commit counter. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class CommitCounterUtility { + + private static final Logger log = Logger + .getLogger(CommitCounterUtility.class); + + /** + * Return the name of the {@link File} associated with the commitCounter. + * + * @param dir + * The directory spanning all such files. + * @param commitCounter + * The commit counter for the current root block on the journal. + * @param ext + * The filename extension. + * + * @return The name of the corresponding snapshot file. + */ + public static File getCommitCounterFile(final File dir, + final long commitCounter, final String ext) { + + /* + * Format the name of the file. + * + * Note: The commit counter in the file name should be zero filled to 20 + * digits so we have the files in lexical order in the file system (for + * convenience). [I have changed this to 21 digits since that can be + * broken up into groups of three per below.] + * + * Note: The files are placed into a recursive directory structure with + * 1000 files per directory. This is done by taking the lexical form of + * the file name and then partitioning it into groups of THREE (3) + * digits. + */ + final String basename; + { + + final StringBuilder sb = new StringBuilder(); + + final Formatter f = new Formatter(sb); + + f.format("%021d", commitCounter); + f.flush(); + f.close(); + + basename = sb.toString(); + + } + + /* + * Now figure out the recursive directory name. + */ + File t = dir; + + if (true) { + + for (int i = 0; i < (21 - 3); i += 3) { + + t = new File(t, basename.substring(i, i + 3)); + + } + + } + + final File file = new File(t, basename + ext); + + return file; + + } + + /** + * Parse out the commitCounter from the file name. + * + * @param name + * The file name + * @param ext + * The expected file extension. + * + * @return The commit counter from the file name. + * + * @throws IllegalArgumentException + * if either argument is <code>null</code> + * @throws NumberFormatException + * if the file name can not be interpreted as a commit counter. + */ + public static long parseCommitCounterFile(final String name, + final String ext) throws NumberFormatException { + + if (name == null) + throw new IllegalArgumentException(); + + if (ext == null) + throw new IllegalArgumentException(); + + // Strip off the filename extension. + final int len = name.length() - ext.length(); + + final String fileBaseName = name.substring(0, len); + + // Closing commitCounter for snapshot file. + final long commitCounter = Long.parseLong(fileBaseName); + + return commitCounter; + + } + + /** + * Return the basename of the file (strip off the extension). + * + * @param name + * The file name. + * @param ext + * The extension. + * + * @return The base name of the file without the extension. + */ + public static String getBaseName(final String name, final String ext) { + + final String basename = name.substring(0, name.length() - ext.length()); + + return basename; + + } + + /** + * Recursively removes any files and subdirectories and then removes the + * file (or directory) itself. Only files recognized by + * {@link #getFileFilter()} will be deleted. + * <p> + * Note: A dedicated version of this method exists here to thrown an + * {@link IOException} if we can not delete a file. This is deliberate. It + * is thrown to prevent a REBUILD from proceeding unless we can clear out + * the old snapshot and HALog files. + * + * @param errorIfDeleteFails + * When <code>true</code> and {@link IOException} is thrown if a + * file matching the filter or an empty directory matching the + * filter can not be removed. When <code>false</code>, that event + * is logged @ WARN instead. + * @param f + * A file or directory. + * @param fileFilter + * A filter matching the files and directories to be visited and + * removed. If directories are matched, then they will be removed + * iff they are empty. A depth first visitation is used, so the + * files and sub-directories will be cleared before we attempt to + * remove the parent directory. + * @throws IOException + * if any file or non-empty directory can not be deleted (iff + * <i>errorIfDeleteFails</i> is <code>true</code>). + */ + public static void recursiveDelete(final boolean errorIfDeleteFails, + final File f, final FileFilter fileFilter) throws IOException { + + if (f.isDirectory()) { + + final File[] children = f.listFiles(fileFilter); + + for (int i = 0; i < children.length; i++) { + + recursiveDelete(errorIfDeleteFails, children[i], fileFilter); + + } + + } + + if (!f.exists()) + return; + + if (log.isInfoEnabled()) + log.info("Removing: " + f); + + final boolean deleted = f.delete(); + + if (!deleted) { + + if (f.isDirectory() && f.list().length != 0) { + + // Ignore non-empty directory. + return; + + } + + final String msg = "Could not remove file: " + f; + + if (errorIfDeleteFails) { + + // Complete if we can not delete a file. + throw new IOException(msg); + + } else { + + log.warn(msg); + + } + + } + + } + +} Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/RootBlockUtility.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/RootBlockUtility.java 2013-05-07 19:11:27 UTC (rev 7116) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/RootBlockUtility.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -129,6 +129,13 @@ this.rootBlock = chooseRootBlock(rootBlock0, rootBlock1, ignoreBadRootBlock, alternateRootBlock); } + + public RootBlockUtility(final IRootBlockView rb0, final IRootBlockView rb1) { + this.rootBlock0 = rb0; + this.rootBlock1 = rb1; + this.rootBlock = chooseRootBlock(rootBlock0, rootBlock1, + false/* ignoreBadRootBlock */, false/* alternateRootBlock */); + } /** * Return the chosen root block. The root block having the greater Modified: branches/READ_CACHE/bigdata/src/test/com/bigdata/ha/halog/TestHALogWriter.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/ha/halog/TestHALogWriter.java 2013-05-07 19:11:27 UTC (rev 7116) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/ha/halog/TestHALogWriter.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -121,9 +121,9 @@ .checksum(data), rbv.getStoreType(), rbv.getQuorumToken(), 1000, 0); - writer.write(msg, data); + writer.writeOnHALog(msg, data); - writer.closeLog(closeRBV(rbv)); + writer.closeHALog(closeRBV(rbv)); // for sanity, let's run through the standard reader try { @@ -162,7 +162,7 @@ .checksum(data), rbv.getStoreType(), rbv.getQuorumToken(), 1000, 0); - writer.write(msg, data); + writer.writeOnHALog(msg, data); final IHALogReader reader = writer.getReader(); @@ -174,7 +174,7 @@ assertTrue(rmsg.getSize() == msg.getSize()); // commit the log file - writer.closeLog(closeRBV(rbv)); + writer.closeHALog(closeRBV(rbv)); // the writer should have closed the file, so the reader should return // immediately to report no more buffers @@ -221,17 +221,17 @@ .checksum(data), rbv.getStoreType(), rbv.getQuorumToken(), 1000, 0); - writer.write(msg, data); + writer.writeOnHALog(msg, data); if (((i+1) % (1 + r.nextInt(count/3))) == 0) { System.out.println("Cycling HALog after " + sequence + " records"); rbv = closeRBV(rbv); - writer.closeLog(rbv); + writer.closeHALog(rbv); sequence = 0; writer.createLog(rbv); } } rbv = closeRBV(rbv); - writer.closeLog(rbv); + writer.closeHALog(rbv); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { Modified: branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestAll.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestAll.java 2013-05-07 19:11:27 UTC (rev 7116) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestAll.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -72,7 +72,10 @@ // tests of the index used to access historical commit records suite.addTestSuite( TestCommitRecordIndex.class ); - + + // test suites for file names based on commit counters. + suite.addTestSuite( TestCommitCounterUtility.class ); + /* * Test a scalable temporary store (uses the transient and disk-only * buffer modes). Added: branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestCommitCounterUtility.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestCommitCounterUtility.java (rev 0) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestCommitCounterUtility.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -0,0 +1,66 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on May 8th, 2013 + */ +package com.bigdata.journal; + +import java.io.File; + +import junit.framework.TestCase2; + +/** + * Test suite for the {@link CommitCounterUtility}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class TestCommitCounterUtility extends TestCase2 { + + public TestCommitCounterUtility() { + } + + public TestCommitCounterUtility(String name) { + super(name); + } + + public void test01() { + + final File dir = new File("/tmp"); + + final String ext = ".tmp"; + + final File f = new File( + "/tmp/000/000/000/000/000/000/000000000000000000001.tmp"); + + assertEquals(f, CommitCounterUtility.getCommitCounterFile(dir, 1L, ext)); + + assertEquals(1L, + CommitCounterUtility.parseCommitCounterFile(f.getName(), ext)); + + assertEquals("000000000000000000001", + CommitCounterUtility.getBaseName(f.getName(), ext)); + + } + +} Deleted: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/CommitCounterUtility.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/CommitCounterUtility.java 2013-05-07 19:11:27 UTC (rev 7116) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/CommitCounterUtility.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -1,198 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -package com.bigdata.journal.jini.ha; - -import java.io.File; -import java.io.FileFilter; -import java.io.IOException; -import java.util.Formatter; - -import org.apache.log4j.Logger; - -/** - * Utility class for operations on files that are named using a commit counter. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - */ -public class CommitCounterUtility { - - private static final Logger log = Logger - .getLogger(CommitCounterUtility.class); - - /** - * Return the name of the {@link File} associated with the commitCounter. - * - * @param dir - * The directory spanning all such files. - * @param commitCounter - * The commit counter for the current root block on the journal. - * @param ext - * The filename extension. - * @return The name of the corresponding snapshot file. - */ - public static File getCommitCounterFile(final File dir, - final long commitCounter, final String ext) { - - /* - * Format the name of the file. - * - * Note: The commit counter in the file name should be zero filled to 20 - * digits so we have the files in lexical order in the file system (for - * convenience). - */ - final String file; - { - - final StringBuilder sb = new StringBuilder(); - - final Formatter f = new Formatter(sb); - - f.format("%020d" + ext, commitCounter); - f.flush(); - f.close(); - - file = sb.toString(); - - } - - return new File(dir, file); - - } - - /** - * Parse out the commitCounter from the file name. - * - * @param name - * The file name - * @param ext - * The expected file extension. - * - * @return The commit counter from the file name. - * - * @throws IllegalArgumentException - * if either argument is <code>null</code> - * @throws NumberFormatException - * if the file name can not be interpreted as a commit counter. - */ - public static long parseCommitCounterFile(final String name, - final String ext) throws NumberFormatException { - - if (name == null) - throw new IllegalArgumentException(); - - if (ext == null) - throw new IllegalArgumentException(); - - // Strip off the filename extension. - final int len = name.length() - ext.length(); - - final String fileBaseName = name.substring(0, len); - - // Closing commitCounter for snapshot file. - final long commitCounter = Long.parseLong(fileBaseName); - - return commitCounter; - - } - - /** - * Return the basename of the file (strip off the extension). - * - * @param name - * The file name. - * @param ext - * The extension. - * - * @return The base name of the file without the extension. - */ - public static String getBaseName(final String name, final String ext) { - - final String basename = name.substring(0, name.length() - ext.length()); - - return basename; - - } - - /** - * Recursively removes any files and subdirectories and then removes the - * file (or directory) itself. Only files recognized by - * {@link #getFileFilter()} will be deleted. - * <p> - * Note: A dedicated version of this method exists here to thrown an - * {@link IOException} if we can not delete a file. This is deliberate. It - * is thrown to prevent a REBUILD from proceeding unless we can clear out - * the old snapshot and HALog files. - * - * @param errorIfDeleteFails - * When <code>true</code> and {@link IOException} is thrown if a - * file matching the filter or an empty directory matching the - * filter can not be removed. When <code>false</code>, that event - * is logged @ WARN instead. - * @param f - * A file or directory. - * @param fileFilter - * A filter matching the files and directories to be visited and - * removed. If directories are matched, then they will be removed - * iff they are empty. A depth first visitation is used, so the - * files and sub-directories will be cleared before we attempt to - * remove the parent directory. - * @throws IOException - * if any file or non-empty directory can not be deleted (iff - * <i>errorIfDeleteFails</i> is <code>true</code>). - */ - public static void recursiveDelete(final boolean errorIfDeleteFails, - final File f, final FileFilter fileFilter) throws IOException { - - if (f.isDirectory()) { - - final File[] children = f.listFiles(fileFilter); - - for (int i = 0; i < children.length; i++) { - - recursiveDelete(errorIfDeleteFails, children[i], fileFilter); - - } - - } - - if (log.isInfoEnabled()) - log.info("Removing: " + f); - - if (f.exists() && !f.delete()) { - if (f.isDirectory() && f.list().length != 0) { - // Ignore non-empty directory. - return; - } - final String msg = "Could not remove file: " + f; - if (errorIfDeleteFails) { - // Complete if we can not delete a file. - throw new IOException(msg); - } else { - log.warn(msg); - } - } - - } - -} Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-07 19:11:27 UTC (rev 7116) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -47,7 +47,6 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; import net.jini.config.Configuration; import net.jini.config.ConfigurationException; @@ -62,7 +61,6 @@ import com.bigdata.concurrent.FutureTaskMon; import com.bigdata.ha.HAGlue; import com.bigdata.ha.QuorumService; -import com.bigdata.ha.QuorumServiceBase; import com.bigdata.ha.RunState; import com.bigdata.ha.halog.HALogReader; import com.bigdata.ha.halog.HALogWriter; @@ -86,12 +84,12 @@ import com.bigdata.ha.msg.IHASnapshotDigestResponse; import com.bigdata.ha.msg.IHASnapshotRequest; import com.bigdata.ha.msg.IHASnapshotResponse; -import com.bigdata.ha.msg.IHASyncRequest; import com.bigdata.ha.msg.IHAWriteMessage; import com.bigdata.io.DirectBufferPool; import com.bigdata.io.IBufferAccess; import com.bigdata.io.writecache.WriteCache; import com.bigdata.journal.BufferMode; +import com.bigdata.journal.CommitCounterUtility; import com.bigdata.journal.FileMetadata; import com.bigdata.journal.IHABufferStrategy; import com.bigdata.journal.IRootBlockView; @@ -189,39 +187,39 @@ */ private final long haPrepareTimeout; - /** - * @see HAJournalServer.ConfigurationOptions#HA_LOG_DIR - */ - private final File haLogDir; - - /** - * Write ahead log for replicated writes used to resynchronize services that - * are not in the met quorum. - * - * @see HAJournalServer.ConfigurationOptions#HA_LOG_DIR - * @see HALogWriter - */ - private final HALogWriter haLogWriter; - - /** - * Lock to guard the HALogWriter. - */// FIXME logLock: Refactor visibility and initialization in HAQuorumService. - final Lock logLock = new ReentrantLock(); - - /** - * The most recently observed *live* {@link IHAWriteMessage}. - * <p> - * Note: The {@link HALogWriter} will log live messages IFF they are - * consistent with the state of the {@link HAJournalServer} when they are - * received. In contrast, this field notices each *live* message that is - * replicated along the HA pipline. - * <p> - * Note: package private - exposed to {@link HAJournalServer}. - * - * @see QuorumServiceBase#handleReplicatedWrite(IHASyncRequest, - * IHAWriteMessage, ByteBuffer) - */ - volatile IHAWriteMessage lastLiveHAWriteMessage = null; +// /** +// * @see HAJournalServer.ConfigurationOptions#HA_LOG_DIR +// */ +// private final File haLogDir; +// +// /** +// * Write ahead log for replicated writes used to resynchronize services that +// * are not in the met quorum. +// * +// * @see HAJournalServer.ConfigurationOptions#HA_LOG_DIR +// * @see HALogWriter +// */ +// private final HALogWriter haLogWriter; +// +// /** +// * Lock to guard the HALogWriter. +// */ +// final Lock logLock = new ReentrantLock(); +// +// /** +// * The most recently observed *live* {@link IHAWriteMessage}. +// * <p> +// * Note: The {@link HALogWriter} will log live messages IFF they are +// * consistent with the state of the {@link HAJournalServer} when they are +// * received. In contrast, this field notices each *live* message that is +// * replicated along the HA pipline. +// * <p> +// * Note: package private - exposed to {@link HAJournalServer}. +// * +// * @see QuorumServiceBase#handleReplicatedWrite(IHASyncRequest, +// * IHAWriteMessage, ByteBuffer) +// */ +// volatile IHAWriteMessage lastLiveHAWriteMessage = null; /** * Manager for journal snapshots. @@ -229,13 +227,18 @@ private final SnapshotManager snapshotManager; /** - * The {@link HALogWriter} for this {@link HAJournal} and never + * Manager for HALog files. + */ + private final HALogNexus haLogNexus; + + /** + * The manager for HALog files for this {@link HAJournal} and never * <code>null</code>. */ - HALogWriter getHALogWriter() { - - return haLogWriter; + public HALogNexus getHALogNexus() { + return haLogNexus; + } /** @@ -332,36 +335,10 @@ } - // Note: This is the effective service directory. - final File serviceDir = server.getServiceDir(); - - { - - // Note: Default is relative to the serviceDir. - haLogDir = (File) config - .getEntry( - HAJournalServer.ConfigurationOptions.COMPONENT, - HAJournalServer.ConfigurationOptions.HA_LOG_DIR, - File.class,// - new File( - serviceDir, - HAJournalServer.ConfigurationOptions.DEFAULT_HA_LOG_DIR)// - ); - - if (!haLogDir.exists()) { - - // Create the directory. - if (!haLogDir.mkdirs()) - throw new IOException("Could not create directory: " - + haLogDir); - - } - - // Set up the HA log writer. - haLogWriter = new HALogWriter(haLogDir); - - } - + // HALog manager. + haLogNexus = new HALogNexus(server, this, config); + + // Snapshot manager. snapshotManager = new SnapshotManager(server, this, config); } @@ -499,13 +476,13 @@ } - @Override - public final File getHALogDir() { +// @Override +// public final File getHALogDir() { +// +// return haLogNexus.getHALogDir(); +// +// } - return haLogDir; - - } - public SnapshotManager getSnapshotManager() { return snapshotManager; @@ -521,7 +498,7 @@ protected void _close() { try { - haLogWriter.disable(); + haLogNexus.getHALogWriter().disableHALog(); } catch (IOException e) { haLog.error(e, e); } @@ -540,7 +517,8 @@ super.deleteResources(); - recursiveDelete(getHALogDir(), IHALogReader.HALOG_FILTER); + recursiveDelete(getHALogNexus().getHALogDir(), + IHALogReader.HALOG_FILTER); recursiveDelete(getSnapshotManager().getSnapshotDir(), SnapshotManager.SNAPSHOT_FILTER); @@ -560,8 +538,8 @@ private void recursiveDelete(final File f, final FileFilter fileFilter) { try { - CommitCounterUtility.recursiveDelete(false/* errorIfDeleteFails */, f, - fileFilter); + CommitCounterUtility.recursiveDelete(false/* errorIfDeleteFails */, + f, fileFilter); } catch (IOException e) { /* * Note: IOException is not thrown here since @@ -618,7 +596,7 @@ protected void doLocalAbort() { // Clear the last live message out. - this.lastLiveHAWriteMessage = null; + haLogNexus.lastLiveHAWriteMessage = null; super.doLocalAbort(); @@ -645,14 +623,15 @@ * for the HALog file atomic with respect to other operations on the * HALog. This lock is shared by the HAQuorumService. */ + final Lock logLock = getHALogNexus().getLogLock(); logLock.lock(); try { // The commit counter of the desired closing root block. final long commitCounter = msg.getCommitCounter(); - final File logFile = HALogWriter.getHALogFileName( - getHALogDir(), commitCounter); + final File logFile = getHALogNexus() + .getHALogFile(commitCounter); if (!logFile.exists()) { @@ -694,7 +673,7 @@ * file needs to be an atomic decision and thus MUST be made by the * HALogManager. */ - final IHALogReader r = getHALogWriter().getReader(commitCounter); + final IHALogReader r = getHALogNexus().getReader(commitCounter); // Task sends an HALog file along the pipeline. final FutureTask<Void> ft = new FutureTaskMon<Void>( @@ -1013,7 +992,7 @@ * file needs to be an atomic decision and thus MUST be made by the * HALogManager. */ - final IHALogReader r = getHALogWriter().getReader(commitCounter); + final IHALogReader r = getHALogNexus().getReader(commitCounter); final MessageDigest digest = MessageDigest.getInstance("MD5"); @@ -1377,17 +1356,20 @@ * <p> * Note: This uses [port := 0], which means a random port is assigned. * <p> - * Note: The VM WILL NOT be kept alive by the exported proxy (keepAlive is - * <code>false</code>). + * Note: The VM WILL NOT be kept alive by the exported proxy (keepAlive + * is <code>false</code>). * * @param enableDGC * if distributed garbage collection should be used for the * object to be exported. * * @return The {@link Exporter}. + * + * TODO This should be based on the {@link Configuration} object + * (the EXPORTER field). See AbstractServer. */ protected Exporter getExporter(final boolean enableDGC) { - // TODO This should be based on the Configuration object (EXPORTER field). See AbstractServer. + return new BasicJeriExporter(TcpServerEndpoint .getInstance(0/* port */), invocationLayerFactory, enableDGC, false/* keepAlive */); Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-07 19:11:27 UTC (rev 7116) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-08 15:53:19 UTC (rev 7117) @@ -24,9 +24,7 @@ package com.bigdata.journal.jini.ha; import java.io.File; -import java.io.FileFilter; import java.io.FileNotFoundException; -import java.io.FilenameFilter; import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; @@ -44,7 +42,6 @@ import java.util.concurrent.FutureTask; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.Lock; @@ -67,6 +64,7 @@ import com.bigdata.ha.QuorumServiceBase; import com.bigdata.ha.halog.HALogWriter; import com.bigdata.ha.halog.IHALogReader; +import com.bigdata.ha.halog.IHALogWriter; import com.bigdata.ha.msg.HALogRequest; import com.bigdata.ha.msg.HALogRootBlocksRequest; import com.bigdata.ha.msg.HARebuildRequest; @@ -113,6 +111,10 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/530"> Journal * HA </a> + * + * TODO It would be nice if we could roll an {@link HAJournalServer} back + * (or forward) to a specific commit point while it was running (but + * naturally not while it was joined with a met quorum). */ public class HAJournalServer extends AbstractServer { @@ -263,26 +265,6 @@ ISnapshotPolicy DEFAULT_SNAPSHOT_POLICY = new DefaultSnapshotPolicy(); -// /** -// * <strong>TEST SUITE OPTION ONLY!</strong> -// * <p> -// * By default, a snapshot will be taken the first time the quorum meets -// * for each service. This provide the initial restore point, which -// * corresponds to an empty {@link HAJournal} with the correct root -// * blocks for the quorum. -// * <p> -// * This option MAY be used to suppress this behavior. This is used by -// * the test suite to avoid the creation of the initial snapshot. In -// * combination with an {@link DefaultRestorePolicy} which specifies -// * <code>minRestorePoints:=0</code>, this has the effect that we do not -// * hold onto HALog files (other than the current HALog file) until a -// * snapshot has been taken. The test suites control when snapshots are -// * taken and are thus able to test a variety of backup scenarios. -// */ -// String SNAPSHOT_ON_FIRST_MEET = "snapshotOnFirstMeet"; -// -// boolean DEFAULT_SNAPSHOT_ON_FIRST_MEET = true; - /** * The policy identifies the first commit point whose backups MUST NOT * be released. The policy may be based on the age of the commit point, @@ -1263,7 +1245,7 @@ super(logicalServiceZPath, serviceId, remoteServiceImpl, store); this.journal = store; - this.logLock = store.logLock; + this.logLock = store.getHALogNexus().getLogLock(); this.server = server; } @@ -1339,10 +1321,10 @@ */ logLock.lock(); try { - final HALogWriter logWriter = journal.getHALogWriter(); - if (!logWriter.isOpen()) { - logWriter.disable(); - logWriter.createLog(journal.getRootBlockView()); + if (!journal.getHALogNexus().isHALogOpen()) { + journal.getHALogNexus().disableHALog(); + journal.getHALogNexus().createHALog( + journal.getRootBlockView()); } } finally { logLock.unlock(); @@ -1375,7 +1357,7 @@ journal.setQuorumToken(Quorum.NO_QUORUM); try { - journal.getHALogWriter().disable(); + journal.getHALogNexus().disableHALog(); } catch (IOException e) { haLog.error(e, e); } @@ -1414,7 +1396,7 @@ */ journal.setQuorumToken(getQuorum().token()); try { - journal.getHALogWriter().disable(); + journal.getHALogNexus().disableHALog(); } catch (IOException e) { haLog.error(e, e); } @@ -1508,13 +1490,13 @@ // server.haGlueService.bounceZookeeperConnection(); logLock.lock(); try { - if (journal.getHALogWriter().isOpen()) { + if (journal.getHALogNexus().isHALogOpen()) { /* * Note: Closing the HALog is necessary for us to be * able to re-enter SeekConsensus without violating a * pre-condition for that run state. */ - journal.getHALogWriter().disable(); + journal.getHALogNexus().disableHALog(); } } finally { logLock.unlock(); @@ -1558,7 +1540,7 @@ throw new IllegalStateException("Vote already cast."); } - if (journal.getHALogWriter().isOpen()) + if (journal.getHALogNexus().isHALogOpen()) throw new IllegalStateException("HALogWriter is open."); } @@ -1820,7 +1802,7 @@ try { - final IHALogReader r = journal.getHALogWriter() + final IHALogReader r = journal.getHALogNexus() .getReader(commitCounter + 1); if (r.isEmpty()) { @@ -2330,9 +2312,8 @@ // TODO Replace with pipelineSetup()? logLock.lock(); try { - final HALogWriter logWriter = journal.getHALogWriter(); - logWriter.disable(); - logWriter.createLog(openRootBlock); + journal.getHALogNexus().disableHALog(); + journal.getHALogNexus().createHALog(openRootBlock); } finally { logLock.unlock(); } @@ -2393,8 +2374,7 @@ // Close out the current HALog writer. logLock.lock(); try { - final HALogWriter logWriter = journal.getHALogWriter(); - logWriter.closeLog(closeRootBlock); + journal.getHALogNexus().closeHALog(closeRootBlock); } finally { logLock.unlock(); } @@ -2569,7 +2549,7 @@ try { - final IHAWriteMessage lastLiveMsg = journal.lastLiveHAWriteMessage; + final IHAWriteMessage lastLiveMsg = journal.getHALogNexus().lastLiveHAWriteMessage; if (lastLiveMsg != null && lastLiveMsg.getCommitCounter() >= currentWriteSetStateOnLeader @@ -2588,7 +2568,7 @@ } - final HALogWriter logWriter = journal.getHALogWriter(); + final IHALogWriter logWriter = journal.getHALogNexus(); if (haLog.isDebugEnabled()) haLog.debug("HALog.commitCounter=" @@ -2668,9 +2648,7 @@ try { - final HALogWriter logWriter = journal.getHALogWriter(); - - if (!logWriter.isOpen()) { + if (!journal.getHALogNexus().isHALogOpen()) { /* * Open the HALogWriter for our current root blocks. @@ -2681,7 +2659,7 @@ * leader to send us a prior commit point in RESYNC. */ - journal.getHALogWriter().createLog( + journal.getHALogNexus().createHALog( journal.getRootBlockView()); } @@ -2734,7 +2712,7 @@ if (req == null) { // Save off reference to most recent *live* message. - journal.lastLiveHAWriteMessage = msg; + journal.getHALogNexus().lastLiveHAWriteMessage = msg; } @@ -2773,9 +2751,9 @@ } - final HALogWriter logWriter = journal.getHALogWriter(); + final IHALogWriter logWriter = journal.getHALogNexus(); - assert logWriter.isOpen(); + assert logWriter.isHALogOpen(); if (msg.getCommitCounter() == logWriter.getCommitCounter() && msg.getSequence() == (logWriter.getSequence() - 1)) { @@ -2910,7 +2888,7 @@ try { - final HALogWriter logWriter = journal.getHALogWriter(); + final IHALogWriter logWriter = journal.getHALogNexus(); if (req == null) { @@ -2992,7 +2970,7 @@ private void resyncTransitionToMetQuorum(final IHAWriteMessage msg, final ByteBuffer data) throws IOException, InterruptedException { - final HALogWriter logWriter = journal.getHALogWriter(); + final IHALogWriter logWriter = journal.getHALogNexus(); final IRootBlockView rootBlock = journal.getRootBlockView(); @@ -3124,10 +3102,9 @@ private void acceptHAWriteMessage(final IHAWriteMessage msg, final ByteBuffer data) throws IOException, InterruptedException { - final HALogWriter logWriter = journal.getHALogWriter(); + if (msg.getCommitCounter() != journal.getHALogNexus() + .getCommitCounter()) { ... [truncated message content] |
From: <tho...@us...> - 2013-05-10 18:14:30
|
Revision: 7122 http://bigdata.svn.sourceforge.net/bigdata/?rev=7122&view=rev Author: thompsonbry Date: 2013-05-10 18:14:18 +0000 (Fri, 10 May 2013) Log Message: ----------- Refactoring to address the DGC memory leak associated with the release time consensus protocol. - AbstractServer: javadoc only. - HATXSGlue: Modified gatherMinimumVisibleCommitTime() to be a sync RMI method (confirmation of the outcome is done by prepare2Phase). This addresses the DGC issue by NOT exporting a Future with DGC enabled. - AbstractJournal: support for the new pattern (without DGC). - HAJournal.getProxy() : javadoc only (no semantics changes). - Journal: Extensive changes to messageFollowers() to support the non-DGC reliant pattern. - AbstractHATransactionService: Added factory pattern for the GatherTask. This facilitates the layered abstraction with part implemented by AbstractJournal and part implemented by Journal. - QuorumCommitImpl: Some optimizations. isDone() is an RMI for the ClientFuture. Code was modified to track more cases when the Future was known to be done to reduce RMIs (specifically, if an ExecutionException was thrown). Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HATXSGlue.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommitImpl.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractHATransactionService.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/AbstractServer.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HATXSGlue.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HATXSGlue.java 2013-05-09 13:54:49 UTC (rev 7121) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HATXSGlue.java 2013-05-10 18:14:18 UTC (rev 7122) @@ -20,10 +20,11 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ + */ package com.bigdata.ha; import java.io.IOException; +import java.rmi.Remote; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.Future; @@ -40,11 +41,11 @@ * @see <a * href="https://docs.google.com/document/d/14FO2yJFv_7uc5N0tvYboU-H6XbLEFpvu-G8RhAzvxrk/edit?pli=1#" * > HA TXS Design Document </a> - * + * * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/623" > HA TXS * / TXS Bottleneck </a> */ -public interface HATXSGlue { +public interface HATXSGlue extends Remote { /** * Message used to request information about the earliest commit point that @@ -56,13 +57,29 @@ * point pinned by either a transaction or the minReleaseAge of their * {@link ITransactionService} using * {@link #notifyEarliestCommitTime(IHANotifyReleaseTimeResponse)}. + * <p> + * The message is a sync RMI call. The follower will clear an outcome and + * execute a task which runs asynchronously and messages back to the leader + * with its {@link IHANotifyReleaseTimeResponse}. The leader will report + * back the consensus release time. The outcome of these on the follower is + * not directly reported back to the leader, e.g., through a remote + * {@link Future} because this causes a DGC thread leak on the follower. See + * the ticket below. Instead, the follower notes the outcome of the gather + * operation and will vote "NO" in + * {@link HACommitGlue#prepare2Phase(IHA2PhasePrepareMessage)} unless it + * completes its side of the release time consensus protocol without error + * (that is, the otherwise unmonitored outcome of the asynchronous task for + * {@link #gatherMinimumVisibleCommitTime(IHAGatherReleaseTimeRequest)}). * * @param req * The request from the leader. * * @see #notifyEarliestCommitTime(IHANotifyReleaseTimeResponse) + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/673" > + * Native thread leak in HAJournalServer process </a> */ - Future<Void> gatherMinimumVisibleCommitTime(IHAGatherReleaseTimeRequest req) + void gatherMinimumVisibleCommitTime(IHAGatherReleaseTimeRequest req) throws IOException; /** Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommitImpl.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommitImpl.java 2013-05-09 13:54:49 UTC (rev 7121) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommitImpl.java 2013-05-10 18:14:18 UTC (rev 7122) @@ -315,6 +315,14 @@ * non-joined service has been violated. */ log.error(ex, ex); + done = true; // Note: ExecutionException indicates isDone(). + } catch (RuntimeException ex) { + /* + * Note: ClientFuture.get() can throw a RuntimeException + * if there is a problem with the RMI call. In this case + * we do not know whether the Future is done. + */ + log.error(ex, ex); } finally { if (!done) { // Cancel the request on the remote service (RMI). @@ -446,6 +454,15 @@ } catch (ExecutionException ex) { log.error(ex, ex); causes.add(ex); + done = true; // Note: ExecutionException indicates isDone(). + } catch (RuntimeException ex) { + /* + * Note: ClientFuture.get() can throw a RuntimeException + * if there is a problem with the RMI call. In this case + * we do not know whether the Future is done. + */ + log.error(ex, ex); + causes.add(ex); } finally { if (!done) { // Cancel the request on the remote service (RMI). @@ -468,6 +485,8 @@ // Cancel remote futures. cancelRemoteFutures(remoteFutures); // Throw exception back to the leader. + if (causes.size() == 1) + throw new RuntimeException(causes.get(0)); throw new RuntimeException("remote errors: nfailures=" + causes.size(), new ExecutionExceptions(causes)); } @@ -557,6 +576,15 @@ } catch (ExecutionException ex) { log.error(ex, ex); causes.add(ex); + done = true; // Note: ExecutionException indicates isDone(). + } catch (RuntimeException ex) { + /* + * Note: ClientFuture.get() can throw a RuntimeException + * if there is a problem with the RMI call. In this case + * we do not know whether the Future is done. + */ + log.error(ex, ex); + causes.add(ex); } finally { if (!done) { // Cancel the request on the remote service (RMI). @@ -579,6 +607,8 @@ // Cancel remote futures. cancelRemoteFutures(remoteFutures); // Throw exception back to the leader. + if (causes.size() == 1) + throw new RuntimeException(causes.get(0)); throw new RuntimeException("remote errors: nfailures=" + causes.size(), new ExecutionExceptions(causes)); } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-09 13:54:49 UTC (rev 7121) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-10 18:14:18 UTC (rev 7122) @@ -51,6 +51,7 @@ import java.util.UUID; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; @@ -2468,7 +2469,7 @@ assert commitTime > 0L; - int ncommitters = 0; +// int ncommitters = 0; final long[] rootAddrs = new long[_committers.length]; @@ -2483,7 +2484,7 @@ rootAddrs[i] = addr; - ncommitters++; +// ncommitters++; } @@ -2568,6 +2569,9 @@ if (log.isInfoEnabled()) log.info("start"); + // Clear + gatherFuture.set(null/* newValue */); + if (_bufferStrategy == null) { // Nothing to do. @@ -5690,6 +5694,19 @@ private final Quorum<HAGlue,QuorumService<HAGlue>> quorum; /** + * Used to pin the {@link Future} of the gather operation on the client + * to prevent it from being finalized while the leader is still running + * its side of the consensus protocol to update the release time for the + * replication cluster. + * + * @see #gatherMinimumVisibleCommitTime(IHAGatherReleaseTimeRequest) + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/673" > + * Native thread leak in HAJournalServer process </a> + */ + private final AtomicReference<Future<Void>> gatherFuture = new AtomicReference<Future<Void>>(); + + /** * The {@link Quorum} for this service -or- <code>null</code> if the service * is not running with a quorum. */ @@ -5995,7 +6012,7 @@ // Save off a reference to the prepare request. prepareRequest.set(prepareMessage); - + // Clear vote (assume NO unless proven otherwise). vote.set(false); @@ -6024,8 +6041,8 @@ * met quorum. */ - ft = new FutureTaskMon<Boolean>(new Prepare2PhaseTask( - prepareMessage) { + ft = new FutureTaskMon<Boolean>(new Prepare2PhaseTask(isJoined, + isLeader, prepareMessage) { }); } @@ -6094,13 +6111,21 @@ */ private class Prepare2PhaseTask implements Callable<Boolean> { + private final boolean isJoined; + private final boolean isLeader; private final IHA2PhasePrepareMessage prepareMessage; - public Prepare2PhaseTask(final IHA2PhasePrepareMessage prepareMessage) { + public Prepare2PhaseTask(final boolean isJoined, + final boolean isLeader, + final IHA2PhasePrepareMessage prepareMessage) { if (prepareMessage == null) throw new IllegalArgumentException(); + + this.isJoined = isJoined; + this.isLeader = isLeader; + this.prepareMessage = prepareMessage; } @@ -6158,6 +6183,53 @@ quorum.assertQuorum(prepareToken); /* + * if(follower) {...} + */ + if (isJoined && !isLeader) { + + /** + * This is a follower. + * + * Validate the release time consensus protocol was + * completed successfully on the follower. + * + * @see <a + * href="https://sourceforge.net/apps/trac/bigdata/ticket/673" + * > Native thread leak in HAJournalServer process </a> + */ + + final Future<Void> oldFuture = gatherFuture + .getAndSet(null/* newValue */); + + if (oldFuture == null) { + + throw new IllegalStateException( + "Follower did not execute consensus protocol"); + } + + if (!oldFuture.isDone()) { + // Ensure cancelled. + oldFuture.cancel(true/* mayInterruptIfRunning */); + } + + try { + oldFuture.get(); + // Gather was successful - fall through. + } catch (InterruptedException e) { + // Note: Future isDone(). Caller will not block. + throw new AssertionError(); + } catch (ExecutionException e) { + /* + * Gather failed on the follower. + */ + haLog.error("Gather failed on follower: serviceId=" + + getServiceId() + " : " + e, e); + return vote.get(); + } + + } + + /* * Call to ensure strategy does everything required for itself * before final root block commit. At a minimum it must flush * its write cache to the backing file (issue the writes). @@ -6670,30 +6742,78 @@ } /* - * ITransactionService. + * HATXSGlue. * * Note: API is mostly implemented by Journal/HAJournal. + */ + +// /** +// * Clear the {@link #gatherFuture} and return <code>true</code> iff the +// * {@link Future} was available, was already done, and the computation +// * did not result in an error. Othewise return <code>false</code>. +// * <p> +// * Note: This is invoked from +// * {@link #prepare2Phase(IHA2PhasePrepareMessage)} to determine whether +// * the gather operation on the follower completed normally. It is also +// * invoked from {@link AbstractJournal#doLocalAbort()} and from +// * {@link #gatherMinimumVisibleCommitTime(IHAGatherReleaseTimeRequest)} +// * to ensure that the outcome from a previous gather is cleared before a +// * new one is attempted. +// * +// * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/673" > +// * Native thread leak in HAJournalServer process </a> +// */ +// private boolean clearGatherOutcome() { +// final Future<Void> oldFuture = gatherFuture +// .getAndSet(null/* newValue */); +// if (oldFuture != null) { +// if(!oldFuture.isDone()) { +// // Ensure cancelled. +// oldFuture.cancel(true/*mayInterruptIfRunning*/); +// } +// try { +// oldFuture.get(); +// // Gather was successful. +// return true; +// } catch (InterruptedException e) { +// // Note: Future isDone(). Caller will not block. +// throw new AssertionError(); +// } catch (ExecutionException e) { +// haLog.error("Gather failed on follower: serviceId=" +// + getServiceId() + " : " + e, e); +// return false; +// } +// } +// // Outcome was not available. +// return false; +// } + + /** + * {@inheritDoc} * - * Note: We should either not expose the ITransactionService or we - * should delegate the rest of this API. I am leaning toward NOT - * exposing the ITransactionService interface since (a) it does not - * appear to be necessary to start transactions on a specific service; - * and (b) if we do, then we really need to track remote transactions - * (by the remote Service UUID) and cancel them if the remote service - * leaves the met quorum. All we really need to expose here is the - * HATXSGlue interface and that DOES NOT need to extend the - * ITransactionService interface. + * @see <a + * href="https://sourceforge.net/apps/trac/bigdata/ticket/673" + * > Native thread leak in HAJournalServer process </a> */ - @Override - public Future<Void> gatherMinimumVisibleCommitTime( + public void gatherMinimumVisibleCommitTime( final IHAGatherReleaseTimeRequest req) throws IOException { - final Future<Void> ft = ((HATXSGlue) AbstractJournal.this - .getLocalTransactionManager().getTransactionService()) - .gatherMinimumVisibleCommitTime(req); + // Clear the old outcome. + gatherFuture.set(null); - return getProxy(ft, true/* asynchFuture */); + final Callable<Void> task = ((AbstractHATransactionService) AbstractJournal.this + .getLocalTransactionManager() + .getTransactionService()) + .newGatherMinimumVisibleCommitTimeTask(req); + + final FutureTask<Void> ft = new FutureTask<Void>(task); + + gatherFuture.set(ft); + // Fire and forget. The Future is checked by prepare2Phase. + getExecutorService().execute(ft); + + return; } @@ -6708,19 +6828,6 @@ } -// @Override -// public Future<Void> getTXSCriticalSectionLockOnLeader( -// final IHATXSLockRequest req) throws IOException { -// -// final Future<Void> f = ((HATXSGlue) AbstractJournal.this -// .getLocalTransactionManager().getTransactionService()) -// .getTXSCriticalSectionLockOnLeader(req); -// -// // Note: MUST be an asynchronous Future!!! -// return getProxy(f, true/* asynchronousFuture */); -// -// } - /* * IService */ @@ -6732,6 +6839,7 @@ } + @SuppressWarnings("rawtypes") @Override public Class getServiceIface() throws IOException { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java 2013-05-09 13:54:49 UTC (rev 7121) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java 2013-05-10 18:14:18 UTC (rev 7122) @@ -27,7 +27,6 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.Collection; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Properties; @@ -36,16 +35,15 @@ import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CyclicBarrier; -import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.Semaphore; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReentrantLock; @@ -98,7 +96,6 @@ import com.bigdata.sparse.SparseRowStore; import com.bigdata.util.ClocksNotSynchronizedException; import com.bigdata.util.concurrent.DaemonThreadFactory; -import com.bigdata.util.concurrent.ExecutionExceptions; import com.bigdata.util.concurrent.LatchedExecutor; import com.bigdata.util.concurrent.ShutdownHelper; import com.bigdata.util.concurrent.ThreadPoolExecutorBaseStatisticsTask; @@ -338,10 +335,9 @@ * atomic consensus on the new <i>releaseTime</i> for the services * joined with a met quorum. * <p> - * Note: The {@link #barrier} - * provides visibilty for the fields that are modified by {@link #run()} - * so we do not need additional locks or atomics for synchronizing these - * state updates. + * Note: The {@link #barrier} provides visibilty for the fields that are + * modified by {@link #run()} so we do not need additional locks or + * atomics for synchronizing these state updates. */ final private CyclicBarrier barrier; @@ -392,33 +388,33 @@ } - /** - * Cancel the requests on the remote services (RMI). This is a best effort - * implementation. Any RMI related errors are trapped and ignored in order - * to be robust to failures in RMI when we try to cancel the futures. - */ - private <F extends Future<T>, T> void cancelRemoteFutures( - final List<F> remoteFutures) { +// /** +// * Cancel the requests on the remote services (RMI). This is a best effort +// * implementation. Any RMI related errors are trapped and ignored in order +// * to be robust to failures in RMI when we try to cancel the futures. +// */ +// private <F extends Future<T>, T> void cancelRemoteFutures( +// final F[] remoteFutures) { +// +// if (log.isInfoEnabled()) +// log.info(""); +// +// for (F rf : remoteFutures) { +// +// try { +// +// rf.cancel(true/* mayInterruptIfRunning */); +// +// } catch (Throwable t) { +// +// // ignored (to be robust). +// +// } +// +// } +// +// } - if (log.isInfoEnabled()) - log.info(""); - - for (F rf : remoteFutures) { - - try { - - rf.cancel(true/* mayInterruptIfRunning */); - - } catch (Throwable t) { - - // ignored (to be robust). - - } - - } - - } - public BarrierState() { token = getQuorum().token(); @@ -437,11 +433,11 @@ // Note: Local method call. timestampOnLeader = leadersValue.getTimestamp(); - /* - * Only the followers will countDown() at the barrier. The leader - * will await() until the barrier breaks. - */ - final int nparties = joinedServiceIds.length - 1; +// /* +// * Only the followers will countDown() at the barrier. The leader +// * will await() until the barrier breaks. +// */ + final int nparties = joinedServiceIds.length;// - 1; barrier = new CyclicBarrier(nparties, this); @@ -506,13 +502,38 @@ * NECESSARY. At a mimimum, we must not fail if all joined services on * entry to this method respond without failing (that is, succeed if no * services fail during this protocol) - this is implemented. + * + * @throws InterruptedException + * @throws BrokenBarrierException + * @throws TimeoutException + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/673" > + * Native thread leak in HAJournalServer process </a> */ - private void messageFollowers(final long token) throws IOException { + private void messageFollowers(final long token) throws IOException, + InterruptedException, BrokenBarrierException, TimeoutException { getQuorum().assertLeader(token); - // Future for gather task for each follower. - final List<Future<Void>> remoteFutures = new LinkedList<Future<Void>>(); +// /* +// * Future for gather task for each follower. +// * +// * Note: These are asynchronous remote Futures. They must not escape +// * the local scope and must be cancelled regardless of the outcome. +// * +// * Note: DGC for these remote causes a native thread leak on the +// * followers. To avoid that, I am attempting to rely on proxies for +// * remote futures that do not use DGC. In this case, I believe that +// * it will work since the Future is in scope on the follower (it is +// * the future for the GatherTask running on the follower) and thus +// * we should not need DGC to keep the follower from finalizing the +// * remote futures on which this method is relying. +// * +// * @see https://sourceforge.net/apps/trac/bigdata/ticket/673 +// */ +// @SuppressWarnings("unchecked") +// final Future<Void>[] remoteFutures = new Future[joinedServiceIds.length]; +// final boolean[] remoteDone = new boolean[joinedServiceIds.length]; try { @@ -528,41 +549,98 @@ * Runnable which will execute this message on the remote * service. */ + + // Resolve joined service. final HATXSGlue service = getService(serviceId); - final Future<Void> rf = service.gatherMinimumVisibleCommitTime(msg); + + // Message joined service (can throw NPE if service is gone). + service.gatherMinimumVisibleCommitTime(msg); - // add to list of futures we will check. - remoteFutures.add(rf); +// // add to list of futures we will check. +// remoteFutures[i] = rf; } - /* - * Check the futures for the other services in the quorum. - */ - final List<Throwable> causes = new LinkedList<Throwable>(); - for (Future<Void> rf : remoteFutures) { - boolean success = false; - try { - rf.get(); - success = true; - } catch (InterruptedException ex) { - log.error(ex, ex); - causes.add(ex); - } catch (ExecutionException ex) { - log.error(ex, ex); - causes.add(ex); - } finally { - if (!success) { - // Cancel the request on the remote service (RMI). - try { - rf.cancel(true/* mayInterruptIfRunning */); - } catch (Throwable t) { - // ignored. - } - } - } +// /* +// * Check the futures for the other services in the quorum. +// */ +// final List<Throwable> causes = new LinkedList<Throwable>(); +// for (int i = 1; i < remoteFutures.length; i++) { +// final Future<Void> rf = remoteFutures[i]; +// boolean success = false; +// try { +// rf.get(); +// success = true; +// remoteDone[i] = true; +// } catch (InterruptedException ex) { +// log.error(ex, ex); +// causes.add(ex); +// } catch (ExecutionException ex) { +// log.error(ex, ex); +// causes.add(ex); +// remoteDone[i] = true; +// } catch (RuntimeException ex) { +// /* +// * Note: ClientFuture.get() can throw a RuntimeException +// * if there is a problem with the RMI call. In this case +// * we do not know whether the Future is done. +// */ +// log.error(ex, ex); +// causes.add(ex); +// } finally { +// if (!success) { +// // Cancel the request on the remote service (RMI). +// try { +// rf.cancel(true/* mayInterruptIfRunning */); +// } catch (Throwable t) { +// // ignored. +// } +// remoteDone[i] = true; +// } +// } +// } + + try { // FIXME HA TXS : Configuration option for timeout (lift into caller, config @ HAJournal(Server) similar to other timeout. Could be total timeout across 2-phase commit protocol). + barrier.await(20, TimeUnit.SECONDS); + // fall through. + } catch (TimeoutException e) { + throw e; + } catch (InterruptedException e) { + throw e; + } catch (BrokenBarrierException e) { + throw e; } - + +// /* +// * If there were any errors, then throw an exception listing them. +// */ +// if (!causes.isEmpty()) { +// // Note: Cancelled below. +//// // Cancel remote futures. +//// cancelRemoteFutures(remoteFutures); +// // Throw exception back to the leader. +// if (causes.size() == 1) +// throw new RuntimeException(causes.get(0)); +// throw new RuntimeException("remote errors: nfailures=" +// + causes.size(), new ExecutionExceptions(causes)); +// } + + } finally { +// /* +// * Regardless of outcome or errors above, ensure that all remote +// * futures are cancelled. +// */ +// for (int i = 0; i < remoteFutures.length; i++) { +// final Future<Void> rf = remoteFutures[i]; +// if (!remoteDone[i]) { +// // Cancel the request on the remote service (RMI). +// try { +// rf.cancel(true/* mayInterruptIfRunning */); +// } catch (Throwable t) { +// // ignored. +// } +// } +// } if (!barrier.isBroken()) { /* * If there were any followers that did not message the @@ -579,38 +657,18 @@ * with the consensus protocol unless all services * "vote yes". Thus, a single node failure during the * release time consensus protocol will cause the commit to - * fail. + * fail. [Actually, we could use getNumberWaiting(). If it + * is a bare majority, then we could force the barrier to + * meet break (either with reset or with running an await() + * in other threads) and take the barrier break action + * ourselves. E.g., in the thread that calls + * barrier.reset()]. */ barrier.reset(); } - /* - * If there were any errors, then throw an exception listing them. - */ - if (!causes.isEmpty()) { - // Cancel remote futures. - cancelRemoteFutures(remoteFutures); - // Throw exception back to the leader. - throw new RuntimeException("remote errors: nfailures=" - + causes.size(), new ExecutionExceptions(causes)); - } + }// finally - } finally { - /* - * Ensure that all futures are cancelled. - */ - for (Future<Void> rf : remoteFutures) { - if (!rf.isDone()) { - // Cancel the request on the remote service (RMI). - try { - rf.cancel(true/* mayInterruptIfRunning */); - } catch (Throwable t) { - // ignored. - } - } - } - } - } } @@ -746,10 +804,12 @@ * node, etc. * * @throws IOException + * @throws BrokenBarrierException */ // Note: Executed on the leader. @Override - public void updateReleaseTimeConsensus() throws IOException { + public void updateReleaseTimeConsensus() throws IOException, + InterruptedException, TimeoutException, BrokenBarrierException { final long token = getQuorum().token(); @@ -958,26 +1018,33 @@ } /** + * Return the {@link GatherTask} that will be executed by the follower. + */ + @Override + public Callable<Void> newGatherMinimumVisibleCommitTimeTask( + final IHAGatherReleaseTimeRequest req) { + + return new GatherTask(req); + + } + + /** * {@inheritDoc} * <p> - * "Gather" runs on the follower. + * Note: This method is implemented by {@link AbstractJournal.BasicHA} + * which calls through to + * {@link #newGatherMinimumVisibleCommitTimeTask(IHAGatherReleaseTimeRequest)} + * + * @throws UnsupportedOperationException */ @Override - public Future<Void> gatherMinimumVisibleCommitTime( + public void gatherMinimumVisibleCommitTime( final IHAGatherReleaseTimeRequest req) throws IOException { - final FutureTask<Void> ft = new FutureTask<Void>( - new GatherTask(req)); - - getExecutorService().submit(ft); - - /* - * Note: This MUST be an ASYNC Future. - */ - return ft; - + throw new UnsupportedOperationException(); + } - + /** * "Gather" task runs on the followers. * <p> Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractHATransactionService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractHATransactionService.java 2013-05-09 13:54:49 UTC (rev 7121) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractHATransactionService.java 2013-05-10 18:14:18 UTC (rev 7122) @@ -29,9 +29,11 @@ import java.io.IOException; import java.util.Properties; +import java.util.concurrent.Callable; import java.util.concurrent.TimeoutException; import com.bigdata.ha.HATXSGlue; +import com.bigdata.ha.msg.IHAGatherReleaseTimeRequest; import com.bigdata.journal.ITransactionService; /** @@ -47,13 +49,27 @@ super(properties); } + + /** + * Factory for the Gather task on the follower. + * + * @param req + * The request. + * + * @return The task to run on the follower. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/673" > + * Native thread leak in HAJournalServer process </a> + */ + abstract public Callable<Void> newGatherMinimumVisibleCommitTimeTask( + final IHAGatherReleaseTimeRequest req); /** * Coordinate the update of the <i>releaseTime</i> on each service that is * joined with the met quorum. */ abstract public void updateReleaseTimeConsensus() throws IOException, - TimeoutException, InterruptedException; + TimeoutException, InterruptedException, Exception; /** * Used to make a serviceJoin() MUTEX with the consensus protocol. Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/AbstractServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/AbstractServer.java 2013-05-09 13:54:49 UTC (rev 7121) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/AbstractServer.java 2013-05-10 18:14:18 UTC (rev 7122) @@ -289,6 +289,8 @@ * used to export more than one object at a time! Therefore the * {@link Configuration} entry for the <code>exporter</code> only effects * how <em>this</em> server exports its service. + * + * @see ConfigurationOptions#EXPORTER */ private Exporter exporter; Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-09 13:54:49 UTC (rev 7121) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-10 18:14:18 UTC (rev 7122) @@ -1409,7 +1409,7 @@ return new ThickFuture<E>(future); } - /* + /** * Setup the Exporter for the Future. * * Note: Distributed garbage collection is enabled since the proxied @@ -1417,8 +1417,26 @@ * can get() the result. Distributed garbage collection handles this * for us and automatically unexports the proxied iterator once it * is no longer strongly referenced by the client. + * + * Note: DGC is observed to leak native threads and should not be + * used for any common operations. + * + * @see <a + * href="https://sourceforge.net/apps/trac/bigdata/ticket/433" + * > Cluster leaks threads under read-only index operations + * </a> + * @see <a + * href="https://sourceforge.net/apps/trac/bigdata/ticket/437" + * > Thread-local cache combined with unbounded thread pools + * causes effective memory leak </a> + * @see <a + * href="https://sourceforge.net/apps/trac/bigdata/ticket/673" + * >Native thread leak in HAJournalServer process</a> */ - final Exporter exporter = getExporter(true/* enableDGC */); + + final boolean enableDGC = true; + + final Exporter exporter = getExporter(enableDGC); // wrap the future in a proxyable object. final RemoteFuture<E> impl = new RemoteFutureImpl<E>(future); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-05-13 15:21:28
|
Revision: 7125 http://bigdata.svn.sourceforge.net/bigdata/?rev=7125&view=rev Author: thompsonbry Date: 2013-05-13 15:21:20 +0000 (Mon, 13 May 2013) Log Message: ----------- HAJournal/AbstractJournal: Added validation of the root block and the last live write message. HAJournalServer/Journal/AbstractJournal: Added explicit configuration of the release time consensus protocol timeout. default is currently 10s (same as for the prepare-2phase timeout). Renamed the methods on the IHANotifyReleaseTimeRequest message to indicate that they are "pinned" commit times and commit counters rather than the current commit time and commit counter. This was done to reduce confusion. Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HANotifyReleaseTimeRequest.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHANotifyReleaseTimeRequest.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractHATransactionService.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HANotifyReleaseTimeRequest.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HANotifyReleaseTimeRequest.java 2013-05-13 15:13:37 UTC (rev 7124) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HANotifyReleaseTimeRequest.java 2013-05-13 15:21:20 UTC (rev 7125) @@ -33,39 +33,40 @@ private static final long serialVersionUID = 1L; private final UUID serviceUUID; - private final long commitTime; - private final long commitCounter; + private final long pinnedCommitTime; + private final long pinnedCommitCounter; private final long timestamp; public HANotifyReleaseTimeRequest(final UUID serviceUUID, - final long commitTime, final long commitCounter, + final long pinnedCommitTime, final long pinnedCommitCounter, final long timestamp) { this.serviceUUID = serviceUUID; - this.commitTime = commitTime; - this.commitCounter = commitCounter; + this.pinnedCommitTime = pinnedCommitTime; + this.pinnedCommitCounter = pinnedCommitCounter; this.timestamp = timestamp; } @Override public String toString() { return super.toString() + "{serviceUUID=" + serviceUUID - + ",commitTime=" + commitTime + ",commitCounter=" - + commitCounter + ",timestamp=" + timestamp + "}"; + + ",pinnedCommitTime=" + pinnedCommitTime + + ",pinnedCommitCounter=" + pinnedCommitCounter + ",timestamp=" + + timestamp + "}"; } - + @Override public UUID getServiceUUID() { return serviceUUID; } @Override - public long getCommitTime() { - return commitTime; + public long getPinnedCommitTime() { + return pinnedCommitTime; } @Override - public long getCommitCounter() { - return commitCounter; + public long getPinnedCommitCounter() { + return pinnedCommitCounter; } @Override Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHANotifyReleaseTimeRequest.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHANotifyReleaseTimeRequest.java 2013-05-13 15:13:37 UTC (rev 7124) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHANotifyReleaseTimeRequest.java 2013-05-13 15:21:20 UTC (rev 7125) @@ -44,12 +44,12 @@ /** * The earliest pinned commit time on the follower. */ - public long getCommitTime(); + public long getPinnedCommitTime(); /** * The earliest pinned commit counter on the follower. */ - public long getCommitCounter(); + public long getPinnedCommitCounter(); // /** // * The readsOnCommitTime of the earliest active transaction on the follower. Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-13 15:13:37 UTC (rev 7124) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-13 15:21:20 UTC (rev 7125) @@ -139,7 +139,6 @@ import com.bigdata.io.IDataRecordAccess; import com.bigdata.io.SerializerUtil; import com.bigdata.journal.Name2Addr.Entry; -import com.bigdata.journal.jini.ha.HAJournalServer; import com.bigdata.mdi.IResourceMetadata; import com.bigdata.mdi.JournalMetadata; import com.bigdata.quorum.AsynchronousQuorumCloseException; @@ -1731,8 +1730,6 @@ /** * The HA timeout in milliseconds for a 2-phase prepare. * - * @see HAJournalServer.ConfigurationOptions#HA_PREPARE_TIMEOUT - * * @throws UnsupportedOperationException * always. */ @@ -1742,7 +1739,19 @@ } - /** + /** + * The HA timeout in milliseconds for the release time consensus protocol. + * + * @throws UnsupportedOperationException + * always. + */ + public long getHAReleaseTimeConsensusTimeout() { + + throw new UnsupportedOperationException(); + + } + + /** * Core implementation of immediate shutdown handles event reporting. */ protected void _close() { @@ -2923,8 +2932,10 @@ ((AbstractHATransactionService) getLocalTransactionManager() .getTransactionService()) - .updateReleaseTimeConsensus(); - + .updateReleaseTimeConsensus( + getHAReleaseTimeConsensusTimeout(), + TimeUnit.MILLISECONDS); + } catch (Exception ex) { // Wrap and rethrow. @@ -6140,48 +6151,9 @@ if (rootBlock == null) throw new IllegalStateException(); - if (!rootBlock.getUUID().equals( - AbstractJournal.this._rootBlock.getUUID())) { + // Validate the new root block against the current root block. + validateNewRootBlock(isJoined, isLeader, AbstractJournal.this._rootBlock, rootBlock); - /* - * The root block has a different UUID. We can not accept this - * condition. - */ - - throw new IllegalStateException(); - - } - - if (rootBlock.getLastCommitTime() <= AbstractJournal.this._rootBlock - .getLastCommitTime()) { - - /* - * The root block has a commit time that is LTE the most recent - * commit on this Journal. We can not accept this condition. - */ - - throw new IllegalStateException(); - - } - - if (rootBlock.getCommitCounter() <= AbstractJournal.this._rootBlock - .getCommitCounter()) { - - /* - * The root block has a commit counter that is LTE the most - * recent commit counter on this Journal. We can not accept this - * condition. - */ - - throw new IllegalStateException(); - - } - - // the quorum token from the leader is in the root block. - final long prepareToken = rootBlock.getQuorumToken(); - - quorum.assertQuorum(prepareToken); - /* * if(follower) {...} */ @@ -6266,6 +6238,93 @@ } + /** + * Validate the new root block against the current root block. This + * method checks a variety of invariants: + * <ul> + * <li>The UUID of the store must be the same.</li> + * <li>The commitTime must be strictly increasing.</li> + * <li>The commitCounter must increase by ONE (1).</li> + * <li></li> + * </ul> + * + * @param isJoined + * iff this service was joined at the atomic decision point + * in the 2-phase commit protocol. + * @param isLeader + * iff this service is the leader for this commit. + * @param oldRB + * the old (aka current) root block. + * @param newRB + * the new (aka proposed) root block. + */ + protected void validateNewRootBlock(final boolean isJoined, + final boolean isLeader, final IRootBlockView oldRB, + final IRootBlockView newRB) { + + if (oldRB == null) + throw new IllegalStateException(); + + if (newRB == null) + throw new IllegalStateException(); + + // Validate UUID of store is consistent. + if (!newRB.getUUID().equals(oldRB.getUUID())) { + + /* + * The root block has a different UUID. We can not accept this + * condition. + */ + + throw new IllegalStateException("Store UUID: old=" + + oldRB.getUUID() + " != new=" + newRB.getUUID()); + + } + + // Validate commit time is strictly increasing. + if (newRB.getLastCommitTime() <= oldRB.getLastCommitTime()) { + + /* + * The root block has a commit time that is LTE the most recent + * commit on this Journal. We can not accept this condition. + */ + + throw new IllegalStateException("lastCommitTime: old=" + + oldRB.getLastCommitTime() + " > new=" + + newRB.getLastCommitTime()); + + } + + // Validate the new commit counter. + { + + final long newcc = newRB.getCommitCounter(); + + final long oldcc = oldRB.getCommitCounter(); + + if (newcc != (oldcc + 1)) { + + /* + * The new root block MUST have a commit counter that is ONE + * more than the current commit counter on this Journal. We + * can not accept any other value for the commit counter. + */ + + throw new IllegalStateException("commitCounter: ( old=" + + oldcc + " + 1 ) != new=" + newcc); + + } + + // The quorum token from the leader is in the root block. + final long prepareToken = newRB.getQuorumToken(); + + // Verify that the same quorum is still met. + quorum.assertQuorum(prepareToken); + + } + + } + @Override public Future<Void> commit2Phase( final IHA2PhaseCommitMessage commitMessage) { @@ -6361,7 +6420,13 @@ * Only the services that are joined go through the * commit protocol. */ - + if (localService == null) { + /* + * The quorum has been terminated. We can't go + * through the 2-phase commit. + */ + throw new IllegalStateException(); + } AbstractJournal.this.doLocalCommit(localService, rootBlock); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java 2013-05-13 15:13:37 UTC (rev 7124) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java 2013-05-13 15:21:20 UTC (rev 7125) @@ -464,8 +464,8 @@ if (log.isTraceEnabled()) log.trace("follower: " + response); - if (minimumResponse.getCommitCounter() > response - .getCommitCounter()) { + if (minimumResponse.getPinnedCommitCounter() > response + .getPinnedCommitCounter()) { minimumResponse = response; @@ -481,8 +481,8 @@ // Restate the consensus as an appropriate message object. consensus = new HANotifyReleaseTimeResponse( - minimumResponse.getCommitTime(), - minimumResponse.getCommitCounter()); + minimumResponse.getPinnedCommitTime(), + minimumResponse.getPinnedCommitCounter()); if (log.isTraceEnabled()) log.trace("consensus: " + consensus); @@ -510,8 +510,9 @@ * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/673" > * Native thread leak in HAJournalServer process </a> */ - private void messageFollowers(final long token) throws IOException, - InterruptedException, BrokenBarrierException, TimeoutException { + private void messageFollowers(final long token, final long timeout, + final TimeUnit units) throws IOException, InterruptedException, + BrokenBarrierException, TimeoutException { getQuorum().assertLeader(token); @@ -600,8 +601,8 @@ // } // } - try { // FIXME HA TXS : Configuration option for timeout (lift into caller, config @ HAJournal(Server) similar to other timeout. Could be total timeout across 2-phase commit protocol). - barrier.await(20, TimeUnit.SECONDS); + try { + barrier.await(timeout, units); // fall through. } catch (TimeoutException e) { throw e; @@ -808,8 +809,9 @@ */ // Note: Executed on the leader. @Override - public void updateReleaseTimeConsensus() throws IOException, - InterruptedException, TimeoutException, BrokenBarrierException { + public void updateReleaseTimeConsensus(final long timeout, + final TimeUnit units) throws IOException, InterruptedException, + TimeoutException, BrokenBarrierException { final long token = getQuorum().token(); @@ -833,7 +835,7 @@ /* * Message the followers and block until the barrier breaks. */ - barrierState.messageFollowers(token); + barrierState.messageFollowers(token,timeout,units); } finally { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractHATransactionService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractHATransactionService.java 2013-05-13 15:13:37 UTC (rev 7124) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractHATransactionService.java 2013-05-13 15:21:20 UTC (rev 7125) @@ -30,6 +30,7 @@ import java.io.IOException; import java.util.Properties; import java.util.concurrent.Callable; +import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import com.bigdata.ha.HATXSGlue; @@ -67,9 +68,15 @@ /** * Coordinate the update of the <i>releaseTime</i> on each service that is * joined with the met quorum. + * + * @param timeout + * The timeout for the release time consensus protocol. + * @param units + * The units for that timeout. */ - abstract public void updateReleaseTimeConsensus() throws IOException, - TimeoutException, InterruptedException, Exception; + abstract public void updateReleaseTimeConsensus(final long timeout, + final TimeUnit units) throws IOException, TimeoutException, + InterruptedException, Exception; /** * Used to make a serviceJoin() MUTEX with the consensus protocol. Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-13 15:13:37 UTC (rev 7124) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-13 15:21:20 UTC (rev 7125) @@ -183,10 +183,15 @@ private final InetSocketAddress writePipelineAddr; /** - * @see Options#HA_PREPARE_TIMEOUT + * @see HAJournalServer.ConfigurationOptions#HA_PREPARE_TIMEOUT */ private final long haPrepareTimeout; + /** + * @see HAJournalServer.ConfigurationOptions#HA_RELEASE_TIME_CONSENSUS_TIMEOUT + */ + private final long haReleaseTimeConsensusTimeout; + // /** // * @see HAJournalServer.ConfigurationOptions#HA_LOG_DIR // */ @@ -335,6 +340,25 @@ } + { + haReleaseTimeConsensusTimeout = (Long) config + .getEntry( + HAJournalServer.ConfigurationOptions.COMPONENT, + HAJournalServer.ConfigurationOptions.HA_RELEASE_TIME_CONSENSUS_TIMEOUT, + Long.TYPE, + HAJournalServer.ConfigurationOptions.DEFAULT_HA_RELEASE_TIME_CONSENSUS_TIMEOUT); + + if (haReleaseTimeConsensusTimeout < HAJournalServer.ConfigurationOptions.MIN_HA_RELEASE_TIME_CONSENSUS_TIMEOUT) { + throw new ConfigurationException( + HAJournalServer.ConfigurationOptions.HA_RELEASE_TIME_CONSENSUS_TIMEOUT + + "=" + + haReleaseTimeConsensusTimeout + + " : must be GTE " + + HAJournalServer.ConfigurationOptions.MIN_HA_RELEASE_TIME_CONSENSUS_TIMEOUT); + } + + } + // HALog manager. haLogNexus = new HALogNexus(server, this, config); @@ -469,6 +493,11 @@ } + /** + * {@inheritDoc} + * + * @see HAJournalServer.ConfigurationOptions#HA_PREPARE_TIMEOUT + */ @Override public final long getHAPrepareTimeout() { @@ -476,6 +505,18 @@ } + /** + * {@inheritDoc} + * + * @see HAJournalServer.ConfigurationOptions#HA_RELEASE_TIME_CONSENSUS_TIMEOUT + */ + @Override + public final long getHAReleaseTimeConsensusTimeout() { + + return haReleaseTimeConsensusTimeout; + + } + // @Override // public final File getHALogDir() { // @@ -615,6 +656,95 @@ } @Override + protected void validateNewRootBlock(final boolean isJoined, + final boolean isLeader, final IRootBlockView oldRB, + final IRootBlockView newRB) { + + super.validateNewRootBlock(isJoined, isLeader, oldRB, newRB); + + if (isJoined && !isLeader) { + + /* + * Verify that the [lastLiveHAWriteMessage] is consisent with + * the proposed new root block. + * + * Note: The [lastLiveHAWriteMessage] is only tracked on the + * followers. Hence we do not use this code path for the leader. + */ + + final IHAWriteMessage msg = getHALogNexus().lastLiveHAWriteMessage; + + if (msg == null) { + + /* + * We should not go through a 2-phase commit without a write + * set. If there is a write set, then the + * lastLiveHAWriteMessage will not be null. + * + * Note: One possible explanation of this exception would be + * a concurrent local abort. That could discard the + * lastLiveHAWriteMessage. + */ + + throw new IllegalStateException("Commit without write set?"); + + } + + if (!msg.getUUID().equals(newRB.getUUID())) { + + /* + * The root block has a different UUID. We can not accept + * this condition. + */ + + throw new IllegalStateException("Store UUID: lastLiveMsg=" + + msg.getUUID() + " != newRB=" + newRB.getUUID()); + + } + + // Validate the new commit counter. + if ((msg.getCommitCounter() + 1) != newRB.getCommitCounter()) { + + /* + * Each message is tagged with the commitCounter for the + * last commit point on the disk. The new root block must + * have a commit counter is that PLUS ONE when compared to + * the last live message. + */ + + throw new IllegalStateException( + "commitCounter: ( lastLiveMsg=" + + msg.getCommitCounter() + + " + 1 ) != newRB=" + + newRB.getCommitCounter()); + + } + + // Validate the write cache block sequence. + if ((msg.getSequence() + 1) != newRB.getBlockSequence()) { + + /* + * This checks two conditions: + * + * 1. The new root block must reflect each live + * HAWriteMessage received. + * + * 2. The service must not PREPARE until all expected + * HAWriteMessages have been received. + */ + + throw new IllegalStateException( + "blockSequence: lastLiveMsg=" + msg.getSequence() + + " + 1 != newRB=" + + newRB.getBlockSequence()); + + } + + } + + } + + @Override public IHALogRootBlocksResponse getHALogRootBlocksForWriteSet( final IHALogRootBlocksRequest msg) throws IOException { @@ -1578,22 +1708,27 @@ final StringBuilder innerRunStateStr = new StringBuilder(); if (innerRunState != null) { innerRunStateStr.append(innerRunState.name()); - switch (innerRunState) { - case Resync: - innerRunStateStr.append(" @ " - + journal.getRootBlockView().getCommitCounter()); - break; - case Operator: { - final String msg = server.getOperatorAlert(); - innerRunStateStr.append("msg=" + msg); - break; - } - default: - break; - } +// switch (innerRunState) { +// case Resync: +// innerRunStateStr.append(" @ " +// + journal.getRootBlockView().getCommitCounter()); +// break; +// case Operator: { +// final String msg = server.getOperatorAlert(); +// innerRunStateStr.append("msg=" + msg); +// break; +// } +// default: +// break; +// } } else { innerRunStateStr.append("N/A"); } + innerRunStateStr.append(" @ " + + journal.getRootBlockView().getCommitCounter()); + final String msg = server.getOperatorAlert(); + if (msg != null) + innerRunStateStr.append(", msg=[" + msg + "]"); return "{server=" + server.getRunState() + ", quorumService=" + innerRunStateStr + "}"; Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-13 15:13:37 UTC (rev 7124) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-13 15:21:20 UTC (rev 7125) @@ -179,6 +179,24 @@ long MIN_HA_PREPARE_TIMEOUT = 100; // milliseconds. /** + * The timeout in milliseconds that the leader will await the followers + * during the release time consensus protocol. + * <p> + * Note: The timeout must be set with a realistic expectation concerning + * the possibility of garbage collection. A long GC pause could + * otherwise cause the 2-phase commit to fail. With this in mind, a + * reasonable timeout is on the order of 10 seconds. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/623" > + * HA TXS / TXS Bottleneck </a> + */ + String HA_RELEASE_TIME_CONSENSUS_TIMEOUT = "haReleaseTimeConsensusTimeout"; + + long DEFAULT_HA_RELEASE_TIME_CONSENSUS_TIMEOUT = 10000; // milliseconds. + + long MIN_HA_RELEASE_TIME_CONSENSUS_TIMEOUT = 100; // milliseconds. + + /** * The property whose value is the name of the directory in which write * ahead log files will be created to support resynchronization services * trying to join an HA quorum (default {@value #DEFAULT_HA_LOG_DIR}). This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-05-15 14:32:32
|
Revision: 7127 http://bigdata.svn.sourceforge.net/bigdata/?rev=7127&view=rev Author: thompsonbry Date: 2013-05-15 14:32:20 +0000 (Wed, 15 May 2013) Log Message: ----------- {{{ 1. Added a unit test for the 2-phase commit where the 1st follower votes "NO". To do this I subclassed HAJournal and HAGlueService. We plan to add more such tests to investigate different failure modes and how the HA cluster performs under those failure modes. 2. Refactored QuorumCommit and QuorumCommitImpl. These are the local implementation that the leader uses to coordinate a distributed 2-phase commit. The arguments were encapsulated in helper objects. This refactoring also includes a semantics change. The QuorumCommitImpl now reports for each service whether it voted YES or NO for the prepare. The commit2Phase() method now only messages the services that voted YES. (I am looking at how to do the same thing for abort2Phase(), but that method is also invoked from abort() and not just in commitNow() so we do not always have the necessary joined[] available, though one could be created for this purpose). 3. Modified the default timeouts for PREPARE and the release time consensus protocol to Long.MAX_VALUE. This is being done in an attempt to track down some odd behaviors on the HA3 cluster for BSBM UPDATE (on leader) + EXPLORE (on follower). If an RMI can not succeed then it will eventually fail anyway. 4. ClientFuture was modified to cache locally when it knows that the remote Future is done. This helps to reduce RMI calls during code snips such as if(!ft.isDone()) ft.cancel(true); 5. Added the current root block to the data reported by /status for HA. This makes it possible to inspect the root blocks on each service to verify that they are consistent. 6. Refactored how AbstractHA3JournalServerTestCase discloses the serviceDir for each HAJournalServer process. I have also stubbed in some code that we might use to verify that the .lock file has been released in awaitServiceGone(). }}} HA CI is green locally with this change set. See https://sourceforge.net/apps/trac/bigdata/ticket/530 (Journal HA) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommit.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommitImpl.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumPipelineImpl.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumServiceBase.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/service/proxy/ClientFuture.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestAll.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java Added Paths: ----------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/CommitRequest.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/PrepareRequest.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/PrepareResponse.java Added: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/CommitRequest.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/CommitRequest.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/CommitRequest.java 2013-05-15 14:32:20 UTC (rev 7127) @@ -0,0 +1,84 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Jun 13, 2010 + */ +package com.bigdata.ha; + +/** + * Commit request for a 2-phase commit as coodinated by the leader (local + * object). + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class CommitRequest { + + private final PrepareRequest prepareRequest; + private final PrepareResponse prepareResponse; + + /** + * The request used for the PREPARE. + */ + public PrepareRequest getPrepareRequest() { + return prepareRequest; + } + + /** + * The response for the PREPARE. This indicates which services voted to + * commit and which did not. + */ + public PrepareResponse getPrepareResponse() { + return prepareResponse; + } + + /** + * + * @param prepareRequest + * The request used for the PREPARE. + * @param prepareResponse + * The response for the PREPARE. This indicates which services + * voted to commit and which did not. + */ + public CommitRequest(final PrepareRequest prepareRequest, + final PrepareResponse prepareResponse) { + + if (prepareRequest == null) + throw new IllegalArgumentException(); + + if (prepareResponse == null) + throw new IllegalArgumentException(); + + this.prepareRequest = prepareRequest; + this.prepareResponse = prepareResponse; + } + + @Override + public String toString() { + + return super.toString() + "{req=" + prepareRequest + ", resp=" + + prepareResponse + "}"; + + } + +} Added: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/PrepareRequest.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/PrepareRequest.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/PrepareRequest.java 2013-05-15 14:32:20 UTC (rev 7127) @@ -0,0 +1,114 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Jun 13, 2010 + */ +package com.bigdata.ha; + +import java.util.Arrays; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +import com.bigdata.journal.IRootBlockView; + +/** + * A 2-phase request as coordinated by the leader (local object). + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class PrepareRequest { + + private final UUID[] joinedServiceIds; + private final Set<UUID> nonJoinedPipelineServiceIds; + private final IRootBlockView rootBlock; + private final long timeout; + private final TimeUnit unit; + + public UUID[] getJoinedServiceIds() { + return joinedServiceIds; + } + + public Set<UUID> getNonJoinedPipelineServiceIds() { + return nonJoinedPipelineServiceIds; + } + + public IRootBlockView getRootBlock() { + return rootBlock; + } + + public long getTimeout() { + return timeout; + } + + public TimeUnit getUnit() { + return unit; + } + + /** + * + * @param joinedServiceIds + * The services joined with the met quorum, in their join order. + * @param nonJoinedPipelineServiceIds + * The non-joined services in the write pipeline (in any order). + * @param isRootBlock0 + * if this is rootBlock0. + * @param rootBlock + * The new root block. + * @param timeout + * How long to wait for the other services to prepare. + * @param unit + * The unit for the timeout. + */ + public PrepareRequest( + final UUID[] joinedServiceIds, // + final Set<UUID> nonJoinedPipelineServiceIds,// + final IRootBlockView rootBlock, final long timeout, + final TimeUnit unit) { + + if (rootBlock == null) + throw new IllegalArgumentException(); + + if (unit == null) + throw new IllegalArgumentException(); + + this.joinedServiceIds = joinedServiceIds; + this.nonJoinedPipelineServiceIds = nonJoinedPipelineServiceIds; + this.rootBlock = rootBlock; + this.timeout = timeout; + this.unit = unit; + } + + @Override + public String toString() { + return super.toString() + "{isRootBlock0=" + rootBlock.isRootBlock0() + + ", rootBlock=" + rootBlock + ", #joined=" + + joinedServiceIds.length + ", #nonJoined=" + + nonJoinedPipelineServiceIds.size() + ", joinedServices=" + + Arrays.toString(joinedServiceIds) + ", nonJoined=" + + nonJoinedPipelineServiceIds + ", timeout=" + timeout + + ", unit=" + unit + "}"; + } + +} Added: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/PrepareResponse.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/PrepareResponse.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/PrepareResponse.java 2013-05-15 14:32:20 UTC (rev 7127) @@ -0,0 +1,107 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Jun 13, 2010 + */ +package com.bigdata.ha; + +import cern.colt.Arrays; + + +/** + * The 2-phase prepare outcome as coordinated by the leader. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class PrepareResponse { + + private final int k; + private final int nyes; + private final boolean willCommit; + private final boolean[] votes; + + /** + * The replication factor for the quorum. + */ + public int replicationFactor() { + return k; + } + + /** + * Return the #of services that voted "YES". + */ + public int getYesCount() { + return nyes; + } + + /** + * Return <code>true</code> iff the transaction will commit based on the + * responses to the prepare requests. + */ + public boolean willCommit() { + return willCommit; + } + + public boolean getVote(final int index) { + + return votes[index]; + + } + + /** + * + * @param k + * The replication factor for the quorum. + * @param nyes + * The #of YES votes. + * @param willCommit + * <code>true</code> iff the transaction will commit based on the + * responses to the prepare requests. + */ + public PrepareResponse(final int k, final int nyes, + final boolean willCommit, final boolean[] votes) { + + if (k < 1) + throw new IllegalArgumentException(); + + if (nyes > k) + throw new IllegalArgumentException(); + + this.k = k; + this.nyes = nyes; + this.willCommit = willCommit; + this.votes = votes; + + } + + @Override + public String toString() { + + return super.toString() + "{k=" + k + ", nyes=" + nyes + + ", willCommit=" + willCommit + ", votes=" + + Arrays.toString(votes) + "}"; + + } + +} Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommit.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommit.java 2013-05-15 11:55:06 UTC (rev 7126) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommit.java 2013-05-15 14:32:20 UTC (rev 7127) @@ -28,9 +28,6 @@ package com.bigdata.ha; import java.io.IOException; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -58,27 +55,11 @@ * root block for use with the next {@link #commit2Phase(long, long) commit} * message. * - * @param joinedServiceIds - * The services joined with the met quorum, in their join order. - * @param nonJoinedPipelineServiceIds - * The non-joined services in the write pipeline (in any order). - * @param isRootBlock0 - * if this is rootBlock0. - * @param rootBlock - * The new root block. - * @param timeout - * How long to wait for the other services to prepare. - * @param unit - * The unit for the timeout. - * - * @return A {@link Future} which evaluates to a yes/no vote on whether the - * service is prepared to commit. + * @return The outcome of the distributed PREPARE request, indicating + * whether each service is prepared to commit. */ - int prepare2Phase(final UUID[] joinedServiceIds, // - final Set<UUID> nonJoinedPipelineServiceIds,// - final IRootBlockView rootBlock, final long timeout, - final TimeUnit unit) throws InterruptedException, TimeoutException, - IOException; + PrepareResponse prepare2Phase(PrepareRequest req) + throws InterruptedException, TimeoutException, IOException; /** * Used by the leader to send a message to each joined service in the quorum @@ -87,20 +68,8 @@ * The commit MAY NOT go forward unless both the current quorum token and * the lastCommitTime on this message agree with the quorum token and * lastCommitTime in the root block from the last "prepare" message. - * - * @param joinedServiceIds - * The services joined with the met quorum, in their join order. - * @param nonJoinedPipelineServiceIds - * The non-joined services in the write pipeline (in any order). - * @param token - * The quorum token used in the prepare message. - * @param commitTime - * The commit time that assigned to the new commit point. */ - void commit2Phase( - final UUID[] joinedServiceIds, // - final Set<UUID> nonJoinedPipelineServiceIds, long token, - long commitTime) throws IOException, InterruptedException; + void commit2Phase(CommitRequest req) throws IOException, InterruptedException; /** * Used by the leader to send a message to each service joined with the Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommitImpl.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommitImpl.java 2013-05-15 11:55:06 UTC (rev 7126) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommitImpl.java 2013-05-15 14:32:20 UTC (rev 7127) @@ -25,11 +25,10 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.LinkedList; import java.util.List; -import java.util.Set; import java.util.UUID; +import java.util.concurrent.CancellationException; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; @@ -139,30 +138,23 @@ * from the prepare message. This metadata is used to decide how the service * will handle the prepare, commit, and abort messages. */ - public int prepare2Phase(// - final UUID[] joinedServiceIds, // - final Set<UUID> nonJoinedPipelineServiceIds,// - final IRootBlockView rootBlock,// - final long timeout, final TimeUnit unit// - ) - throws InterruptedException, TimeoutException, IOException { + public PrepareResponse prepare2Phase(final PrepareRequest req) + throws InterruptedException, IOException { - if (rootBlock == null) - throw new IllegalArgumentException(); + if (log.isInfoEnabled()) + log.info("req=" + req); - if (unit == null) - throw new IllegalArgumentException(); + final IRootBlockView rootBlock = req.getRootBlock(); + + final UUID[] joinedServiceIds = req.getJoinedServiceIds(); - final boolean isRootBlock0 = rootBlock.isRootBlock0(); - - if (log.isInfoEnabled()) - log.info("isRootBlock0=" + isRootBlock0 + ", rootBlock=" - + rootBlock + ", #joined=" + joinedServiceIds.length - + ", #nonJoined=" + nonJoinedPipelineServiceIds.size() - + ", joinedServices=" + Arrays.toString(joinedServiceIds) - + ", nonJoined=" + nonJoinedPipelineServiceIds - + ", timeout=" + timeout + ", unit=" + unit); - +// final Set<UUID> nonJoinedPipelineServiceIds = req +// .getNonJoinedPipelineServiceIds(); + + final long timeout = req.getTimeout(); + + final TimeUnit unit = req.getUnit(); + /* * The token of the quorum for which the leader issued this prepare * message. @@ -187,12 +179,12 @@ // #of remote followers (joined services, excluding the leader). final int nfollowers = (joinedServiceIds.length - 1); - // #of non-joined services in the pipeline. - final int nNonJoinedPipelineServices = nonJoinedPipelineServiceIds - .size(); +// // #of non-joined services in the pipeline. +// final int nNonJoinedPipelineServices = nonJoinedPipelineServiceIds +// .size(); // #of remote services (followers plus others in the pipeline). - final int remoteServiceCount = nfollowers + nNonJoinedPipelineServices; + final int remoteServiceCount = nfollowers;// + nNonJoinedPipelineServices; // Random access list of futures. final ArrayList<Future<Boolean>> remoteFutures = new ArrayList<Future<Boolean>>( @@ -238,30 +230,30 @@ } - // Next, message the pipeline services NOT met with the quorum. - { +// // Next, message the pipeline services NOT met with the quorum. +// { +// +// // message for non-joined services. +// final IHA2PhasePrepareMessage msg = new HA2PhasePrepareMessage( +// false/* isJoinedService */, rootBlock, timeout, unit); +// +// for (UUID serviceId : nonJoinedPipelineServiceIds) { +// +// /* +// * Runnable which will execute this message on the +// * remote service. +// */ +// final Future<Boolean> rf = getService(serviceId) +// .prepare2Phase(msg); +// +// // add to list of futures we will check. +// remoteFutures.set(i, rf); +// +// i++; +// +// } +// } - // message for non-joined services. - final IHA2PhasePrepareMessage msg = new HA2PhasePrepareMessage( - false/* isJoinedService */, rootBlock, timeout, unit); - - for (UUID serviceId : nonJoinedPipelineServiceIds) { - - /* - * Runnable which will execute this message on the - * remote service. - */ - final Future<Boolean> rf = getService(serviceId) - .prepare2Phase(msg); - - // add to list of futures we will check. - remoteFutures.set(i, rf); - - i++; - - } - } - /* * Finally, run the operation on the leader using local method * call (non-RMI) in the caller's thread to avoid deadlock. @@ -292,6 +284,7 @@ */ int nyes = 0; assert remoteFutures.size() == remoteServiceCount + 1; + final boolean[] votes = new boolean[remoteServiceCount + 1]; for (int i = 0; i <= remoteServiceCount; i++) { final Future<Boolean> rf = remoteFutures.get(i); if (rf == null) @@ -301,6 +294,7 @@ remaining = nanos - (begin - System.nanoTime()); final boolean vote = rf .get(remaining, TimeUnit.NANOSECONDS); + votes[i] = vote; if (i < joinedServiceIds.length) { // Only the leader and the followers get a vote. nyes += vote ? 1 : 0; @@ -311,21 +305,27 @@ } } done = true; + } catch (CancellationException ex) { + // This Future was cancelled. + log.error(ex, ex); + done = true; // CancellationException indicates isDone(). + } catch (TimeoutException ex) { + // Timeout on this Future. + log.error(ex, ex); + done = false; } catch (ExecutionException ex) { /* - * TODO prepare2Phase() is throwing exceptions if - * preconditions are violated. Unless if is a joined - * service, it probably should just vote "no" instead. We do - * not need to log @ ERROR when a precondition for a - * non-joined service has been violated. + * Note: prepare2Phase() is throwing exceptions if + * preconditions are violated. These thrown exceptions are + * interpreted as a "NO" vote. */ log.error(ex, ex); - done = true; // Note: ExecutionException indicates isDone(). + done = true; // ExecutionException indicates isDone(). } catch (RuntimeException ex) { /* - * Note: ClientFuture.get() can throw a RuntimeException - * if there is a problem with the RMI call. In this case - * we do not know whether the Future is done. + * Note: ClientFuture.get() can throw a RuntimeException if + * there is a problem with the RMI call. In this case we do + * not know whether the Future is done. */ log.error(ex, ex); } finally { @@ -340,15 +340,26 @@ } } + // The quorum replication factor. final int k = getQuorum().replicationFactor(); - if (!getQuorum().isQuorum(nyes)) { + /* + * Note: The leader MUST vote YES in order for the commit to + * continue. In addition, we need a majority of the joined services. + * In practice, for an HA3 configuration, this means that up to one + * follower could fail and the commit would still go through. + * However, if the leader fails then the commit will fail as well. + */ + final boolean willCommit = votes[0] && getQuorum().isQuorum(nyes); - log.error("prepare rejected: nyes=" + nyes + " out of " + k); + if (!willCommit) { + log.error("prepare rejected: leader=" + votes[0] + ", nyes=" + + nyes + " out of " + k); + } - return nyes; + return new PrepareResponse(k, nyes, willCommit, votes); } finally { /* @@ -370,24 +381,45 @@ } - public void commit2Phase(final UUID[] joinedServiceIds, // - final Set<UUID> nonJoinedPipelineServiceIds,// - final long token, final long commitTime) throws IOException, + public void commit2Phase(final CommitRequest req) throws IOException, InterruptedException { if (log.isInfoEnabled()) - log.info("token=" + token + ", commitTime=" + commitTime - + ", #joined=" + joinedServiceIds.length + ", #nonJoined=" - + nonJoinedPipelineServiceIds.size() + ", joinedServices=" - + Arrays.toString(joinedServiceIds) + ", nonJoined=" - + nonJoinedPipelineServiceIds); + log.info("req=" + req); /* * To minimize latency, we first submit the futures for the other * services and then do f.run() on the leader. This will allow the other * services to commit concurrently with the leader's IO. + * + * Note: Only services that voted "YES" will get a commit2Phase message. + * + * Note: Do NOT message the services that voted NO. [At one point the + * code was modified to message each joined and non-joined service. That + * change was introduced to support services that join during the + * 2-phase commit. However, we have since resolved the service join by + * relying on the service blocking the pipeline writes in + * handleReplicatedWrite(). Since we can reliably know that there will + * not be a concurrent commit, we can atomically join an existing quorum + * and we do not need to make the 2-phase commit protocol visible to the + * non-joined services. Thus we do not need to push the 2-phase commit + * protocol to a service that is not joined with the met quorum at the + * atomic decision point concerning such things in commitNow().] */ + + final PrepareRequest preq = req.getPrepareRequest(); + final UUID[] joinedServiceIds = preq.getJoinedServiceIds(); + +// final Set<UUID> nonJoinedPipelineServiceIds = preq +// .getNonJoinedPipelineServiceIds(); + + final long token = preq.getRootBlock().getQuorumToken(); + + final long commitTime = preq.getRootBlock().getLastCommitTime(); + + final PrepareResponse presp = req.getPrepareResponse(); + member.assertLeader(token); final List<Future<Void>> remoteFutures = new LinkedList<Future<Void>>(); @@ -401,6 +433,13 @@ final UUID serviceId = joinedServiceIds[i]; + if (!presp.getVote(i)) { + + // Skip services that did not vote YES in PREPARE. + continue; + + } + /* * Runnable which will execute this message on the remote * service. @@ -413,27 +452,27 @@ } - if (!nonJoinedPipelineServiceIds.isEmpty()) { +// if (!nonJoinedPipelineServiceIds.isEmpty()) { +// +// final IHA2PhaseCommitMessage msgNonJoinedService = new HA2PhaseCommitMessage( +// false/* isJoinedService */, commitTime); +// +// for (UUID serviceId : nonJoinedPipelineServiceIds) { +// +// /* +// * Runnable which will execute this message on the remote +// * service. +// */ +// final Future<Void> rf = getService(serviceId).commit2Phase( +// msgNonJoinedService); +// +// // add to list of futures we will check. +// remoteFutures.add(rf); +// +// } +// +// } - final IHA2PhaseCommitMessage msgNonJoinedService = new HA2PhaseCommitMessage( - false/* isJoinedService */, commitTime); - - for (UUID serviceId : nonJoinedPipelineServiceIds) { - - /* - * Runnable which will execute this message on the remote - * service. - */ - final Future<Void> rf = getService(serviceId).commit2Phase( - msgNonJoinedService); - - // add to list of futures we will check. - remoteFutures.add(rf); - - } - - } - { /* * Run the operation on the leader using local method call in @@ -455,11 +494,18 @@ for (Future<Void> rf : remoteFutures) { boolean done = false; try { - rf.get(); + rf.get(); // TODO Timeout to await followers in commit2Phase(). done = true; - } catch (InterruptedException ex) { +// } catch (TimeoutException ex) { +// // Timeout on this Future. +// log.error(ex, ex); +// causes.add(ex); +// done = false; + } catch (CancellationException ex) { + // Future was cancelled. log.error(ex, ex); causes.add(ex); + done = true; // Future is done since cancelled. } catch (ExecutionException ex) { log.error(ex, ex); causes.add(ex); @@ -486,9 +532,6 @@ /* * If there were any errors, then throw an exception listing them. - * - * FIXME But only throw the exception if the errors were for a joined - * service. Otherwise just log. */ if (!causes.isEmpty()) { // Cancel remote futures. @@ -518,6 +561,11 @@ } + /** + * FIXME Only issue abort to services that voted YES in prepare? [We have + * that information in commitNow(), but we do not have the atomic set of + * joined services in AbstractJournal.abort())]. + */ public void abort2Phase(final long token) throws IOException, InterruptedException { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumPipelineImpl.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumPipelineImpl.java 2013-05-15 11:55:06 UTC (rev 7126) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumPipelineImpl.java 2013-05-15 14:32:20 UTC (rev 7127) @@ -1,1442 +1,1557 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -package com.bigdata.ha; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.net.InetSocketAddress; -import java.nio.ByteBuffer; -import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.concurrent.CancellationException; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; -import java.util.concurrent.RunnableFuture; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicReference; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; - -import org.apache.log4j.Logger; - -import com.bigdata.ha.msg.HAWriteMessageBase; -import com.bigdata.ha.msg.IHALogRequest; -import com.bigdata.ha.msg.IHAMessage; -import com.bigdata.ha.msg.IHASyncRequest; -import com.bigdata.ha.msg.IHAWriteMessage; -import com.bigdata.ha.pipeline.HAReceiveService; -import com.bigdata.ha.pipeline.HAReceiveService.IHAReceiveCallback; -import com.bigdata.ha.pipeline.HASendService; -import com.bigdata.io.DirectBufferPool; -import com.bigdata.io.IBufferAccess; -import com.bigdata.io.writecache.WriteCache; -import com.bigdata.quorum.Quorum; -import com.bigdata.quorum.QuorumException; -import com.bigdata.quorum.QuorumMember; -import com.bigdata.quorum.QuorumStateChangeListener; -import com.bigdata.quorum.QuorumStateChangeListenerBase; -import com.bigdata.util.InnerCause; - -/** - * {@link QuorumPipeline} implementation. - * <p> - * The {@link QuorumMember} must pass along the "pipeline" messages, including: - * <ul> - * <li>{@link QuorumMember#pipelineAdd()}</li> - * <li>{@link QuorumMember#pipelineRemove()}</li> - * <li>{@link QuorumMember#pipelineChange(UUID, UUID)}</li> - * </ul> - * When a quorum is met, the <i>leader</i> is always first in the write pipeline - * since it is the node which receives writes from clients. When a service joins - * the write pipeline, it always does so at the end of the chain. Services may - * enter the write pipeline before joining a quorum in order to synchronize with - * the quorum. If a service in the middle of the chain leaves the pipeline, then - * the upstream node will reconfigure and retransmit the current cache block to - * its new downstream node. This prevent nodes which are "bouncing" during - * synchronization from causing write sets to be discarded. However, if the - * leader leaves the write pipeline, then the quorum is broken and the write set - * will be discarded. - * <p> - * Since the write pipeline is used to synchronize services trying to join the - * quorum as well as the replicate writes for services joined with the quorum, - * {@link HAReceiveService} may be live for a met quorum even though the - * {@link QuorumMember} on whose behalf this class is acting is not joined with - * the met quorum. - * - * <h3>Pipeline maintenance</h3> - * - * There are three broad categories which have to be handled: (1) leader leaves; - * (2) pipeline leader election; and (3) follower leaves. A leader leave causes - * the quorum to break, which will cause service leaves and pipeline leaves for - * all joined services. However, services must add themselves to the pipeline - * before they join the quorum and the pipeline will be reorganized if necessary - * when the quorum leader is elected. This will result in a - * {@link #pipelineElectedLeader()} event. A follower leave only causes the - * follower to leave the pipeline and results in a - * {@link #pipelineChange(UUID, UUID)} event. - * <p> - * There are two cases for a follower leave: (A) when the follower did not did - * not have a downstream node; and (B) when there is downstream node. For (B), - * the upstream node from the left follower should reconfigure for the new - * downstream node and retransmit the current cache block and the event should - * be otherwise unnoticed. - * <p> - * Handling a follower join requires us to synchronize the follower first which - * requires some more infrastructure and should be done as part of the HA - * synchronization test suite. - * <p> - * What follows is an example of how events will arrive for a quorum of three - * services: A, B, and C. - * - * <pre> - * A.getActor().pipelineAdd() => A.pipelineAdd() - * B.getActor().pipelineAdd() => B.pipelineAdd(); A.pipelineChange(null,B); - * C.getActor().pipelineAdd() => C.pipelineAdd(); B.pipelineChange(null,C); - * </pre> - * - * At this point the pipeline order is <code>[A,B,C]</code>. Notice that the - * {@link HASendService} for A is not established until the - * <code>A.pipelineChange(null,B)</code> sets B as the new downstream service - * for A. Likewise, B will not relay to C until it handles the - * <code>B.pipelineChange(null,C)</code> event. - * - * <p> - * - * Given the pipeline order <code>[A,B,C]</code>, if B were to leave, then the - * events would be: - * - * <pre> - * B.getActor().pipelineRemove() => B.pipelineRemove(); A.pipelineChange(B,C); - * </pre> - * - * and when this class handles the <code>A.pipelineChange(B,C)</code> event, it - * must update the {@link HAReceiveService} such that it now relays data to C. - * - * <p> - * - * On the other hand, given the pipeline order <code>[A,B,C]</code>, if C were - * to leave the events would be: - * - * <pre> - * C.getActor().pipelineRemove() => C.pipelineRemove(); B.pipelineChange(C,null); - * </pre> - * - * and when this class handles the <code>B.pipelineChange(C,null)</code> event, - * it must update the C's {@link HAReceiveService} such that it continues to - * receive data, but no longer relays data to a downstream service. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * @param <S> - */ -abstract public class QuorumPipelineImpl<S extends HAPipelineGlue> extends - QuorumStateChangeListenerBase implements QuorumPipeline<S>, - QuorumStateChangeListener { - - static private transient final Logger log = Logger - .getLogger(QuorumPipelineImpl.class); - - /** - * The timeouts for a sleep before the next retry. These timeouts are - * designed to allow some asynchronous processes to reconnect the - * {@link HASendService} and the {@link HAReceiveService}s in write pipeline - * such that a retransmit can succeed after a service has left the pipeline. - * Depending on the nature of the error (i.e., a transient network problem - * versus a pipeline reorganization), this can involve a number of zookeeper - * events. Hence the sleep latency is backed off through this array of - * values. - * - * TODO We do not want to induce too much latency here. It would be nice if - * we automatically retried after each relevant quorum event that might cure - * the problem as well as after a timeout. This would require a Condition - * that we await with a timeout and signaling the Condition if there are any - * relevant events (probably once we handle them locally). - */ - static protected final int RETRY_SLEEP[] = new int[] { 100, 200, 200, 500, 500, 1000 }; - - /** - * The {@link QuorumMember}. - */ - protected final QuorumMember<S> member; - - /** - * The service {@link UUID} for the {@link QuorumMember}. - */ - protected final UUID serviceId; - - /** - * Lock managing the various mutable aspects of the pipeline state. - */ - private final ReentrantLock lock = new ReentrantLock(); - - /** send service (iff this is the leader). */ - private HASendService sendService; - - /** - * The receive service (iff this is a follower in a met quorum). - */ - private HAReceiveService<HAMessageWrapper> receiveService; - - /** - * The buffer used to relay the data. This is only allocated for a - * follower. - */ - private IBufferAccess receiveBuffer; - - /** - * Cached metadata about the downstream service. - */ - private final AtomicReference<PipelineState<S>> pipelineStateRef = new AtomicReference<PipelineState<S>>(); - - public QuorumPipelineImpl(final QuorumMember<S> member) { - - if (member == null) - throw new IllegalArgumentException(); - - this.member = member; - - this.serviceId = member.getServiceId(); - - } - - /** - * Extended to invoke {@link #tearDown()} in order to guarantee the eventual - * release of the {@link #receiveBuffer} and the shutdown of the - * {@link #sendService} or {@link #receiveService}. - */ - @Override - protected void finalize() throws Throwable { - - tearDown(); - - super.finalize(); - - } - - /** - * Return the index at which the given serviceId appears in the array of - * serviceIds. - * - * @param serviceId - * The {@link UUID} of some quorum member. - * @param a - * An array of service {@link UUID}s. - * - * @return The index of the service in the array -or- <code>-1</code> if the - * service does not appear in the array. - */ - private int getIndex(final UUID serviceId, final UUID[] a) { - - if (serviceId == null) - throw new IllegalArgumentException(); - - for (int i = 0; i < a.length; i++) { - - if (serviceId.equals(a[i])) { - - return i; - - } - } - - return -1; - - } - - /** - * Return the NIO buffer used to receive payloads written on the HA write - * pipeline. - * - * @return The buffer -or- <code>null</code> if the pipeline has been torn - * down or if this is the leader. - */ - private ByteBuffer getReceiveBuffer() { - - if (!lock.isHeldByCurrentThread()) { - - // The caller MUST be holding the lock. - throw new IllegalMonitorStateException(); - - } - - // trinary pattern is safe while thread has lock. - return receiveBuffer == null ? null : receiveBuffer.buffer(); - - } - - /** - * Return the {@link HAReceiveService} used to receive payloads written on - * the HA write pipeline. - * - * @return The buffer -or- <code>null</code> if the pipeline has been torn - * down or if this is the leader. - */ - private HAReceiveService<HAMessageWrapper> getHAReceiveService() { - - if (!lock.isHeldByCurrentThread()) { - - // The caller MUST be holding the lock. - throw new IllegalMonitorStateException(); - - } - - return receiveService; - - } - - /** - * Return the {@link HASendService} used to write payloads on the HA write - * pipeline. - * - * @return The {@link HASendService} -or- <code>null</code> if the pipeline - * has been torn down. - */ - private HASendService getHASendService() { - - if (!lock.isHeldByCurrentThread()) { - - // The caller MUST be holding the lock. - throw new IllegalMonitorStateException(); - - } - - return sendService; - - } - - /* - * QuorumStateChangeListener - */ - -// /** -// * Extended to setup this service as a leader ({@link #setUpLeader()}), -// * or a relay ({@link #setUpReceiveAndRelay()}. -// */ -// @Override -// public void quorumMeet(final long token, final UUID leaderId) { -// super.quorumMeet(token, leaderId); -// lock.lock(); -// try { -// this.token = token; -// if(leaderId.equals(serviceId)) { -// setUpLeader(); -// } else if(member.isPipelineMember()) { -// setUpReceiveAndRelay(); -// } -// } finally { -// lock.unlock(); -// } -// } - -// @Override -// public void quorumBreak() { -// super.quorumBreak(); -// lock.lock(); -// try { -// tearDown(); -// } finally { -// lock.unlock(); -// } -// } - - /** - * Sets up the {@link HASendService} or the {@link HAReceiveService} as - * appropriate depending on whether or not this service is the first in the - * pipeline order. - */ - public void pipelineAdd() { - if (log.isInfoEnabled()) - log.info(""); - super.pipelineAdd(); - lock.lock(); - try { - // The current pipeline order. - final UUID[] pipelineOrder = member.getQuorum().getPipeline(); - // The index of this service in the pipeline order. - final int index = getIndex(serviceId, pipelineOrder); - if (index == 0) { - setUpSendService(); - } else - if (index > 0) { - setUpReceiveService(); - } - } finally { - lock.unlock(); - } - } - - public void pipelineElectedLeader() { - if (log.isInfoEnabled()) - log.info(""); - super.pipelineElectedLeader(); - lock.lock(); - try { - tearDown(); - setUpSendService(); - } finally { - lock.unlock(); - } - } - - /** - * Tears down the {@link HASendService} or {@link HAReceiveService} - * associated with this service. - */ - @Override - public void pipelineRemove() { - if (log.isInfoEnabled()) - log.info(""); - super.pipelineRemove(); - lock.lock(); - try { - tearDown(); - } finally { - lock.unlock(); - } - } - - /** - * Changes the target of the {@link HASendService} for the leader (or the - * {@link HAReceiveService} for a follower) to send (or relay) write cache - * blocks to the specified service. - */ - public void pipelineChange(final UUID oldDownStreamId, - final UUID newDownStreamId) { - super.pipelineChange(oldDownStreamId, newDownStreamId); - lock.lock(); - try { - // The address of the next service in the pipeline. - final InetSocketAddress addrNext = newDownStreamId == null ? null - : getAddrNext(newDownStreamId); - if (log.isInfoEnabled()) - log.info("oldDownStreamId=" + oldDownStreamId - + ",newDownStreamId=" + newDownStreamId + ", addrNext=" - + addrNext + ", sendService=" + sendService - + ", receiveService=" + receiveService); - if (sendService != null) { - /* - * Terminate the existing connection (we were the first service - * in the pipeline). - */ - sendService.terminate(); - if (addrNext != null) { - if (log.isDebugEnabled()) - log.debug("sendService.start(): addrNext=" + addrNext); - sendService.start(addrNext); - } - } else if (receiveService != null) { - /* - * Reconfigure the receive service to change how it is relaying - * (we were relaying, so the receiveService was running but not - * the sendService). - */ - if (log.isDebugEnabled()) - log.debug("receiveService.changeDownStream(): addrNext=" - + addrNext); - receiveService.changeDownStream(addrNext); - } - // populate and/or clear the cache. - cachePipelineState(newDownStreamId); - if (log.isDebugEnabled()) - log.debug("pipelineChange - done."); - } finally { - lock.unlock(); - } - } - - @Override - public void pipelineUpstreamChange() { - super.pipelineUpstreamChange(); - lock.lock(); - try { - if (receiveService != null) { - /* - * Make sure that the receiveService closes out its client - * connection with the old upstream service. - */ - if (log.isInfoEnabled()) - log.info("receiveService=" + receiveService); - receiveService.changeUpStream(); - } - } finally { - lock.unlock(); - } - } - - /** - * Request the {@link InetSocketAddress} of the write pipeline for a service - * (RMI). - * - * @param downStreamId - * The service. - * - * @return It's {@link InetSocketAddress} - */ - private InetSocketAddress getAddrNext(final UUID downStreamId) { - - if (downStreamId == null) - return null; - - final S service = member.getService(downStreamId); - - try { - - final InetSocketAddress addrNext = service.getWritePipelineAddr(); - - return addrNext; - - } catch (IOException e) { - - throw new RuntimeException(e); - - } - - } - - /** - * Tear down any state associated with the {@link QuorumPipelineImpl}. This - * implementation tears down the send/receive service and releases the - * receive buffer. - */ - private void tearDown() { - if (log.isInfoEnabled()) - log.info(""); - lock.lock(); - try { - /* - * Leader tear down. - */ - { - if (sendService != null) { - sendService.terminate(); - sendService = null; - } - } - /* - * Follower tear down. - */ - { - if (receiveService != null) { - receiveService.terminate(); - try { - receiveService.awaitShutdown(); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } finally { - receiveService = null; - } - } - if (receiveBuffer != null) { - try { - /* - * Release the buffer back to the pool. - */ - receiveBuffer.release(); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } finally { - receiveBuffer = null; - } - } - } - // clear cache. - pipelineStateRef.set(null); - } finally { - lock.unlock(); - } - } - - /** - * Populate or clear the {@link #pipelineState} cache. - * <p> - * Note: The only times we need to populate the {@link #pipelineState} are - * in response to a {@link #pipelineChange(UUID, UUID)} event or in response - * to message a {@link #pipelineElectedLeader()} event. - * - * @param downStreamId - * The downstream service {@link UUID}. - */ - private void cachePipelineState(final UUID downStreamId) { - - if (downStreamId == null) { - - pipelineStateRef.set(null); - - return; - - } - - final S nextService = member.getService(downStreamId); - - final PipelineState<S> pipelineState = new PipelineState<S>(); - - try { - - pipelineState.addr = nextService.getWritePipelineAddr(); - - } catch (IOException e) { - - throw new RuntimeException(e); - - } - - pipelineState.service = nextService; - - this.pipelineStateRef.set(pipelineState); - - } - - /** - * Setup the send service. - */ - private void setUpSendService() { - if (log.isInfoEnabled()) - log.info(""); - lock.lock(); - try { - // Allocate the send service. - sendService = new HASendService(); - /* - * The service downstream from this service. - * - * Note: The downstream service in the pipeline is not available - * when the first service adds itself to the pipeline. In those - * cases the pipelineChange() event is used to update the - * HASendService to send to the downstream service. - * - * Note: When we handle a pipelineLeaderElected() message the - * downstream service MAY already be available, which is why we - * handle downstreamId != null conditionally. - */ - final UUID downstreamId = member.getDownstreamServiceId(); - if (downstreamId != null) { - // The address of the next service in the pipeline. - final InetSocketAddress addrNext = member.getService( - downstreamId).getWritePipelineAddr(); - // Start the send service. - sendService.start(addrNext); - } - // populate and/or clear the cache. - cachePipelineState(downstreamId); - } catch (Throwable t) { - try { - tearDown(); - } catch (Throwable t2) { - log.error(t2, t2); - } - throw new RuntimeException(t); - } finally { - lock.unlock(); - } - } - - /** - * Glue class wraps the {@link IHAWriteMessage} and the - * {@link IHALogRequest} message and exposes the requires {@link IHAMessage} - * interface to the {@link HAReceiveService}. This class is never persisted. - * It just let's us handshake with the {@link HAReceiveService} and get back - * out the original {@link IHAWriteMessage} as well as the optional - * {@link IHALogRequest} message. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ - private static class HAMessageWrapper extends HAWriteMessageBase { - - private static final long serialVersionUID = 1L; - - final IHASyncRequest req; - final IHAWriteMessage msg; - - public HAMessageWrapper(final IHASyncRequest req, - final IHAWriteMessage msg) { - - // Use size and checksum from real IHAWriteMessage. - super(msg.getSize(),msg.getChk()); - - this.req = req; // MAY be null; - this.msg = msg; - - } - - } - - /** - * Setup the service to receive pipeline writes and to relay them (if there - * is a downstream service). - */ - private void setUpReceiveService() { - lock.lock(); - try { - // The downstream service UUID. - final UUID downstreamId = member.getDownstreamServiceId(); - // Acquire buffer from the pool to receive data. - try { - receiveBuffer = DirectBufferPool.INSTANCE.acquire(); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - // The address of this service. - final InetSocketAddress addrSelf = member.getService() - .getWritePipelineAddr(); - // Address of the downstream service (if any). - final InetSocketAddress addrNext = downstreamId == null ? null - : member.getService(downstreamId).getWritePipelineAddr(); - // Setup the receive service. - receiveService = new HAReceiveService<HAMessageWrapper>(addrSelf, - addrNext, new IHAReceiveCallback<HAMessageWrapper>() { - public void callback(final HAMessageWrapper msg, - final ByteBuffer data) throws Exception { - // delegate handling of write cache blocks. - handleReplicatedWrite(msg.req, msg.msg, data); - } - }); - // Start the receive service - will not return until service is - // running - receiveService.start(); - } catch (Throwable t) { - /* - * Always tear down if there was a setup problem to avoid leaking - * threads or a native ByteBuffer. - */ - try { - tearDown(); - } catch (Throwable t2) { - log.error(t2, t2); - } finally { - log.error(t, t); - } - throw new RuntimeException(t); - } finally { - lock.unlock(); - } - } - - /* - * This is the leader, so send() the buffer. - */ - @Override - public Future<Void> replicate(final IHASyncRequest req, - final IHAWriteMessage msg, final ByteBuffer b) throws IOException { - - final RunnableFuture<Void> ft; - - lock.lock(); - try { - - ft = new FutureTask<Void>(new RobustReplicateTask(req, msg, b)); - - } finally { - - lock.unlock(); - - } - - // Submit Future for execution (outside of the lock). - member.getExecutor().execute(ft); - - // Return Future. Caller must wait on the Future. - return ft; - - } - - /** - * Task robustly replicates an {@link IHAWriteMessage} and the associated - * payload. - */ - private class RobustReplicateTask implements Callable<Void> { - - /** - * An historical message is indicated when the {@link IHASyncRequest} is - * non-<code>null</code>. - */ - private final IHASyncRequest req; - - /** - * The {@link IHAWriteMessage}. - */ - private final IHAWriteMessage msg; - - /** - * The associated payload. - */ - private final ByteBuffer b; - - /** - * The token for the leader. The service that initiates the replication - * of a message MUST be the leader for this token. - * <p> - * The token is either taken from the {@link IHAWriteMessage} (if this - * is a live write) or from the current {@link Quorum#token()}. - * <p> - * Either way, we verify that this service is (and remains) the leader - * for that token throughout the {@link Robus... [truncated message content] |
From: <tho...@us...> - 2013-05-15 20:39:39
|
Revision: 7130 http://bigdata.svn.sourceforge.net/bigdata/?rev=7130&view=rev Author: thompsonbry Date: 2013-05-15 20:39:30 +0000 (Wed, 15 May 2013) Log Message: ----------- I performed a code review searching for all instances in the project of {{{ System.nanoTime() }}} Problems were identified in - QuorumCommitImpl.prepare2Phase(); - ServicesManagerStartupTask - JiniCoreServicesConfiguration - JiniServiceConfiguration - ZookeeperServerConfiguration. - ProcessHelper.exitValue() - WORMStrategy.writeOnChannel() (this is only ever invoked with Long.MAX_VALUE, but the code was still wrong). The RWStore version of writeOnChannel() is actually ignoring the timeout (I've added a TODO, but per above the timeout is Long.MAX_VALUE). - AsynchronousOverflowTask.runTasksInSingleThread() - JiniFederation.awaitJiniRegistrars() - Haltable - Latch.await() - ZLockImpl.lock() - ZLockImpl.awaitZLockNanos() - ZookeeperAccessor.awaitZookeeperConnected() - AbstractTransactionService.findUnusedTimestamp() I've added unit tests for Latch and Haltable that verify the bug and the fix. The errors in ZLockImpl could explain some odd behaviors in CI for the JiniFederation test suite. @see https://sourceforge.net/apps/trac/bigdata/ticket/676 (Bad patterns for timeout computations) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java branches/READ_CACHE/bigdata/src/java/com/bigdata/resources/AsynchronousOverflowTask.java branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractTransactionService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java branches/READ_CACHE/bigdata/src/java/com/bigdata/util/concurrent/Latch.java branches/READ_CACHE/bigdata/src/test/com/bigdata/util/concurrent/TestAll.java branches/READ_CACHE/bigdata/src/test/com/bigdata/util/concurrent/TestHaltable.java branches/READ_CACHE/bigdata/src/test/com/bigdata/util/concurrent/TestLatch.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/ServicesManagerStartupTask.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/config/JiniCoreServicesConfiguration.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/config/JiniServiceConfiguration.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/config/ZookeeperServerConfiguration.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/process/ProcessHelper.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/service/jini/JiniFederation.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/zookeeper/ZLockImpl.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/zookeeper/ZooKeeperAccessor.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java Added Paths: ----------- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestCyclicBarrier.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -1737,6 +1737,8 @@ /** * Called by WriteCacheService to process a direct write for large * blocks and also to flush data from dirty caches. + * + * TODO The [nanos] parameter is ignored. */ protected boolean writeOnChannel(final ByteBuffer data, final long firstOffsetIgnored, Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -1066,7 +1066,7 @@ try { - remaining -= (System.nanoTime() - begin); + remaining = nanos - (System.nanoTime() - begin); final int dpos = data.position(); final int nbytes = data.remaining(); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/resources/AsynchronousOverflowTask.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/resources/AsynchronousOverflowTask.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/resources/AsynchronousOverflowTask.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -3145,14 +3145,16 @@ final long begin = System.nanoTime(); // remaining nanoseconds in which to execute overflow tasks. - long nanos = TimeUnit.MILLISECONDS + final long nanos = TimeUnit.MILLISECONDS .toNanos(resourceManager.overflowTimeout); + long remaining = nanos; + final Iterator<AbstractTask> titr = tasks.iterator(); int ndone = 0; - while (titr.hasNext() && nanos > 0) { + while (titr.hasNext() && remaining > 0) { final boolean shouldOverflow = resourceManager .isOverflowEnabled() @@ -3182,10 +3184,10 @@ final Future<? extends Object> f = resourceManager .getConcurrencyManager().submit(task); - getFutureForTask(f, task, nanos, TimeUnit.NANOSECONDS); + getFutureForTask(f, task, remaining, TimeUnit.NANOSECONDS); - nanos -= (System.nanoTime() - begin); - + remaining = nanos - (System.nanoTime() - begin); + ndone++; } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractTransactionService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractTransactionService.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractTransactionService.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -1861,11 +1861,11 @@ final long nextCommitTime, final long timeout, final TimeUnit unit) throws InterruptedException, TimeoutException { - long nanos = unit.toNanos(timeout); - final long begin = System.nanoTime(); + final long nanos = unit.toNanos(timeout); + long remaining = nanos; - while (nanos >= 0) { + while (remaining >= 0) { for (long t = commitTime; t < nextCommitTime; t++) { @@ -1908,15 +1908,15 @@ * Note: throws InterruptedException */ - nanos -= (System.nanoTime() - begin); + remaining = nanos - (System.nanoTime() - begin); - if (!txDeactivate.await(nanos, TimeUnit.NANOSECONDS)) { + if (!txDeactivate.await(remaining, TimeUnit.NANOSECONDS)) { throw new TimeoutException(); } - nanos -= (System.nanoTime() - begin); + remaining = nanos - (System.nanoTime() - begin); } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -258,14 +258,15 @@ final public V get(final long timeout, final TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { final long begin = System.nanoTime(); - long nanos = unit.toNanos(timeout); - if (lock.tryLock(nanos, TimeUnit.NANOSECONDS)) { + final long nanos = unit.toNanos(timeout); + long remaining = nanos; + if (lock.tryLock(remaining, TimeUnit.NANOSECONDS)) { try { // subtract out the elapsed time - nanos -= (System.nanoTime() - begin); + remaining = nanos - (System.nanoTime() - begin); while (!halt) { - if (nanos > 0) - nanos = halted.awaitNanos(nanos); + if (remaining > 0) + remaining = halted.awaitNanos(remaining); else throw new TimeoutException(); } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/util/concurrent/Latch.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/util/concurrent/Latch.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/util/concurrent/Latch.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -339,47 +339,29 @@ public boolean await(final long timeout, final TimeUnit unit) throws InterruptedException { - long nanos = unit.toNanos(timeout); final long start = System.nanoTime(); + final long nanos = unit.toNanos(timeout); + long remaining = nanos; - if (lock.tryLock(nanos, TimeUnit.NANOSECONDS)) { + if (lock.tryLock(remaining, TimeUnit.NANOSECONDS)) { try { // subtract out the lock waiting time. - nanos -= (System.nanoTime() - start); + remaining = nanos - (System.nanoTime() - start); long c; while ((c = counter.get()) != 0) { if (c < 0) throw new IllegalStateException(toString()); - if (nanos > 0) - nanos = cond.awaitNanos(nanos); + if (remaining > 0) + remaining = cond.awaitNanos(remaining); else return false; } return true; -// if (counter.get() == 0) { -// -// if (log.isInfoEnabled()) -// log.info("Done waiting (true)."); -// -// // don't wait. -// return true; -// -// } -// -// if (cond.await(nanos, TimeUnit.NANOSECONDS)) { -// -// if (log.isInfoEnabled()) -// log.info("Done waiting (true)"); -// -// return true; -// -// } - } finally { lock.unlock(); @@ -387,9 +369,6 @@ } } -// if (log.isInfoEnabled()) -// log.info("Timeout (false) : counter" + counter); - // Timeout. return false; Modified: branches/READ_CACHE/bigdata/src/test/com/bigdata/util/concurrent/TestAll.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/util/concurrent/TestAll.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/util/concurrent/TestAll.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -27,10 +27,6 @@ package com.bigdata.util.concurrent; - -import com.bigdata.jsr166.LinkedBlockingDequeTest; -import com.bigdata.jsr166.LinkedBlockingQueueTest; - import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; Modified: branches/READ_CACHE/bigdata/src/test/com/bigdata/util/concurrent/TestHaltable.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/util/concurrent/TestHaltable.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/util/concurrent/TestHaltable.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -29,6 +29,8 @@ import java.util.concurrent.CancellationException; import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import junit.framework.TestCase2; @@ -120,4 +122,45 @@ } -} + public void test_get_timeout() throws InterruptedException, ExecutionException { + + final Haltable<Long> f = new Haltable<Long>(); + + assertFalse(f.isDone()); + assertFalse(f.isCancelled()); + assertNull(f.getCause()); + + final Long result = Long.valueOf(12); + + { + final long timeout = TimeUnit.SECONDS.toNanos(1L); + final long begin = System.nanoTime(); + try { + f.get(timeout, TimeUnit.NANOSECONDS); + fail("Expecting: " + TimeoutException.class); + } catch (TimeoutException e) { + // ignore + } + final long elapsed = System.nanoTime() - begin; + if (elapsed < timeout || (elapsed > (2 * timeout))) { + fail("elapsed=" + elapsed + ", timeout=" + timeout); + } + } + + // set the result. + f.halt(result); + + assertTrue(result == f.get()); + + assertTrue(f.isDone()); + assertFalse(f.isCancelled()); + assertNull(f.getCause()); + assertNull(f.getAsThrownCause()); + + assertFalse(f.cancel(true/*mayInterruptIfRunning*/)); + + assertTrue(result == f.get()); + + } + +} \ No newline at end of file Modified: branches/READ_CACHE/bigdata/src/test/com/bigdata/util/concurrent/TestLatch.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/util/concurrent/TestLatch.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/util/concurrent/TestLatch.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -34,7 +34,6 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; import junit.framework.TestCase2; @@ -111,8 +110,8 @@ latch.inc(); - if(!latch.await(100, TimeUnit.MILLISECONDS)) - throw new TimeoutException(); + if (!latch.await(100, TimeUnit.MILLISECONDS)) + fail("Expecting latch to decrement to zero."); return null; @@ -132,7 +131,8 @@ latch.dec(); - future.get(); + // Verify normal return. + assertNull(future.get()); } finally { @@ -213,5 +213,48 @@ assertEquals(0, latch.addAndGet(-1)); } + + /** + * Test of {@link Latch#await(long, TimeUnit)}. + * @throws InterruptedException + */ + public void test5() throws InterruptedException { + + final Latch latch = new Latch(); + + assertEquals(latch.get(), 0); + + assertEquals(latch.inc(), 1); + + assertEquals(latch.get(), 1); + + { + final long timeout = TimeUnit.SECONDS.toNanos(1L); + final long begin = System.nanoTime(); + // await latch to decrement to zero. + assertFalse(latch.await(timeout, TimeUnit.NANOSECONDS)); + final long elapsed = System.nanoTime() - begin; + if (elapsed < timeout || (elapsed > (2 * timeout))) { + fail("elapsed=" + elapsed + ", timeout=" + timeout); + } + } + + assertEquals(latch.get(), 1); + + assertEquals(latch.dec(), 0); + + assertTrue(latch.await(1, TimeUnit.SECONDS)); + + try { + latch.dec(); + fail("Expecting: " + IllegalStateException.class); + } catch (IllegalStateException ex) { + if (log.isInfoEnabled()) + log.info("Ignoring expected error: " + ex); + } + + assertEquals(latch.get(), 0); + + } } Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/ServicesManagerStartupTask.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/ServicesManagerStartupTask.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/ServicesManagerStartupTask.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -169,10 +169,11 @@ final long begin = System.nanoTime(); - long nanos = selfConfig.zookeeperDiscoveryTimeoutNanos; + final long nanos = selfConfig.zookeeperDiscoveryTimeoutNanos; + long remaining = nanos; // await zookeeper connection. - if (!fed.getZookeeperAccessor().awaitZookeeperConnected(nanos, + if (!fed.getZookeeperAccessor().awaitZookeeperConnected(remaining, TimeUnit.NANOSECONDS)) { throw new Exception( @@ -180,7 +181,7 @@ } - nanos -= (System.nanoTime() - begin); + remaining = nanos - (System.nanoTime() - begin); /* * @todo Should have its own timeout value (using zk's). Or just get @@ -188,7 +189,7 @@ * discovered and you can kill it if there is a problem. */ // await jini registrar(s) - if (!fed.awaitJiniRegistrars(nanos, TimeUnit.NANOSECONDS)) { + if (!fed.awaitJiniRegistrars(remaining, TimeUnit.NANOSECONDS)) { throw new Exception( "No jini registrars: startup sequence aborted."); Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/config/JiniCoreServicesConfiguration.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/config/JiniCoreServicesConfiguration.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/config/JiniCoreServicesConfiguration.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -49,7 +49,6 @@ import com.bigdata.jini.start.process.JiniCoreServicesProcessHelper; import com.bigdata.service.jini.JiniClientConfig; import com.bigdata.service.jini.util.LookupStarter; -import com.bigdata.service.jini.util.LookupStarter; import com.sun.jini.start.NonActivatableServiceDescriptor; import com.sun.jini.start.ServiceStarter; @@ -308,7 +307,7 @@ final long begin = System.nanoTime(); - long nanos = unit.toNanos(timeout); + final long nanos = unit.toNanos(timeout); /* * The #of registrars that we can locate on this host within a @@ -332,7 +331,7 @@ final LookupLocator locators[] = lst.toArray(new LookupLocator[0]); // adjust for elapsed time. - nanos -= (System.nanoTime() - begin); + final long remaining = nanos - (System.nanoTime() - begin); /* * Look for at least one registrar on the local host using the @@ -341,13 +340,10 @@ final ServiceRegistrar[] registrars = getServiceRegistrars(1/* maxCount */, clientConfig.groups, /* clientConfig. */locators, - nanos, TimeUnit.NANOSECONDS); + remaining, TimeUnit.NANOSECONDS); // elapsed time (ns). final long elapsed = (System.nanoTime() - begin); - - // adjust for elapsed time. - nanos -= elapsed; if (log.isInfoEnabled()) log @@ -389,10 +385,6 @@ long timeout, final TimeUnit unit) throws InterruptedException, IOException { - final long begin = System.nanoTime(); - - timeout = unit.toNanos(timeout); - final Object signal = new Object(); final LookupDiscoveryManager discovery = new LookupDiscoveryManager(groups, @@ -428,20 +420,29 @@ try { - long elapsed; - // demand some results. ServiceRegistrar[] registrars = new ServiceRegistrar[0]; - while ((timeout -= (elapsed = (System.nanoTime() - begin))) > 0 - && registrars.length < maxCount) { + final long begin = System.nanoTime(); + final long nanos = unit.toNanos(timeout); + + long remaining = nanos; + + while (remaining > 0 && registrars.length < maxCount) { + + remaining = nanos - (System.nanoTime() - begin); + synchronized (signal) { try { - signal.wait(TimeUnit.NANOSECONDS.toMillis(timeout)); + + signal.wait(TimeUnit.NANOSECONDS.toMillis(remaining)); + } catch(InterruptedException ex) { + // fall through + } if(log.isDebugEnabled()) @@ -453,9 +454,14 @@ } - if (log.isInfoEnabled()) + if (log.isInfoEnabled()) { + + final long elapsed = System.nanoTime() - begin; + log.info("Found " + registrars.length + " registrars in " + TimeUnit.NANOSECONDS.toMillis(elapsed) + "ms."); + + } return registrars; Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/config/JiniServiceConfiguration.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/config/JiniServiceConfiguration.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/config/JiniServiceConfiguration.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -763,21 +763,23 @@ final long begin = System.nanoTime(); - long nanos = unit.toNanos(timeout); + final long nanos = unit.toNanos(timeout); + + long remaining = nanos; // wait for the service to be discovered final ServiceItem serviceItem = awaitServiceDiscoveryOrDeath( - processHelper, nanos, TimeUnit.NANOSECONDS); + processHelper, remaining, TimeUnit.NANOSECONDS); // proxy will be used for destroy(). processHelper.setServiceItem(serviceItem); // subtract out the time we already waited. - nanos -= (System.nanoTime() - begin); + remaining = nanos - (System.nanoTime() - begin); // wait for the ephemeral znode for the service to be created awaitZNodeCreatedOrDeath(serviceItem, processHelper, - nanos, TimeUnit.NANOSECONDS); + remaining, TimeUnit.NANOSECONDS); } Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/config/ZookeeperServerConfiguration.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/config/ZookeeperServerConfiguration.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/config/ZookeeperServerConfiguration.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -697,7 +697,7 @@ final long begin = System.nanoTime(); - long nanos = unit.toNanos(timeout); + final long nanos = unit.toNanos(timeout); try { @@ -721,9 +721,6 @@ } - // adjust for time remaining. - nanos = (System.nanoTime() - begin); - /* * Verify that an instance is up and running by connecting to the * client port on the local host. @@ -740,12 +737,11 @@ ZooHelper.ruok(thisInetAddr, clientPort); - // adjust for time remaining. - nanos = (System.nanoTime() - begin); + final long elapsed = (System.nanoTime() - begin); log.warn("Started zookeeper: elapsed=" - + TimeUnit.NANOSECONDS.toMillis(nanos)); - + + TimeUnit.NANOSECONDS.toMillis(elapsed)); + } /** Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/process/ProcessHelper.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/process/ProcessHelper.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/jini/start/process/ProcessHelper.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -165,12 +165,12 @@ * if the caller's thread was interrupted awaiting the exit * value. */ - public int exitValue(long timeout, final TimeUnit unit) + public int exitValue(final long timeout, final TimeUnit unit) throws TimeoutException, InterruptedException { final long begin = System.nanoTime(); - timeout = unit.toNanos(timeout); + final long nanos = unit.toNanos(timeout); lock.lock(); @@ -192,14 +192,12 @@ } - final long elapsed = System.nanoTime() - begin; - - timeout -= elapsed; - - if (timeout <= 0) + final long remaining = nanos - (System.nanoTime() - begin); + + if (remaining <= 0) throw new TimeoutException(); - dead.await(timeout, TimeUnit.NANOSECONDS); + dead.await(remaining, TimeUnit.NANOSECONDS); } Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/service/jini/JiniFederation.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/service/jini/JiniFederation.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/service/jini/JiniFederation.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -468,19 +468,20 @@ final long begin = System.nanoTime(); // nanoseconds remaining. - long nanos = unit.toNanos(timeout); + final long nanos = unit.toNanos(timeout); + long remaining = nanos; ServiceRegistrar[] registrars = null; while (((registrars = lookupDiscoveryManager.getRegistrars()).length == 0) - && (nanos -= (System.nanoTime() - begin)) > 0) { + && ((remaining = nanos - (System.nanoTime() - begin)) > 0)) { discoveryEventLock.lockInterruptibly(); try { // await another discovery event, but not more than the time // remaining. - discoveryEvent.awaitNanos(nanos); + discoveryEvent.awaitNanos(remaining); } finally { @@ -488,6 +489,8 @@ } + remaining = nanos - (System.nanoTime() - begin); + } if (registrars.length == 0) { Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/zookeeper/ZLockImpl.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/zookeeper/ZLockImpl.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/zookeeper/ZLockImpl.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -567,20 +567,21 @@ * {@link ZooKeeper} client, but it can be handled at the * application layer in a number of ways. */ - protected boolean awaitZLockNanos(long nanos) + protected boolean awaitZLockNanos(final long nanos) throws InterruptedException, SessionExpiredException { final long begin = System.nanoTime(); + long remaining = nanos; lock.lockInterruptibly(); try { - while ((nanos -= (System.nanoTime() - begin)) > 0 + while ((remaining = nanos - (System.nanoTime() - begin)) > 0 && !zlockGranted && !cancelled) { if (log.isDebugEnabled()) log.debug("remaining=" - + TimeUnit.NANOSECONDS.toMillis(nanos) + "ms"); + + TimeUnit.NANOSECONDS.toMillis(remaining) + "ms"); try { @@ -634,8 +635,10 @@ * is granted to the caller. */ - zlock.awaitNanos((nanos -= (System.nanoTime() - begin))); + remaining = nanos - (System.nanoTime() - begin); + zlock.awaitNanos(remaining); + } // while if (cancelled) { @@ -645,10 +648,10 @@ } if (log.isDebugEnabled()) - log.debug("nanos remaining=" + nanos); + log.debug("nanos remaining=" + remaining); // lock granted iff nanos remaining is GT zero. - return nanos > 0; + return remaining > 0; } finally { @@ -965,7 +968,8 @@ final long begin = System.nanoTime(); - long nanos = unit.toNanos(timeout); + final long nanos = unit.toNanos(timeout); + long remaining = nanos; /* * Ensure that the lock node exists. @@ -996,14 +1000,14 @@ this.watcher = new ZLockWatcher(zchild); - nanos -= (System.nanoTime() - begin); + remaining = nanos - (System.nanoTime() - begin); try { /* Note: The state reported here is incomplete since [priorZChild] is not set until we test things in awaitZLockNanos(). */ - if(log.isInfoEnabled()) - log.info("Will await zlock: "+this); + if (log.isInfoEnabled()) + log.info("Will await zlock: " + this); - if(!watcher.awaitZLockNanos(nanos)) { + if(!watcher.awaitZLockNanos(remaining)) { // timeout (lock not granted). throw new TimeoutException(); Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/zookeeper/ZooKeeperAccessor.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/zookeeper/ZooKeeperAccessor.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/zookeeper/ZooKeeperAccessor.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -446,11 +446,12 @@ final long begin = System.nanoTime(); // nanoseconds remaining. - long nanos = unit.toNanos(timeout); + final long nanos = unit.toNanos(timeout); + long remaining = nanos; ZooKeeper.States state = null; - while ((nanos -= (System.nanoTime() - begin)) > 0) { + while ((remaining = nanos - (System.nanoTime() - begin)) > 0) { switch (state = getZookeeper().getState()) { @@ -469,7 +470,8 @@ // wait a bit, but not more than the time remaining. lock.lockInterruptibly(); try { - event.awaitNanos(nanos); + remaining = nanos - (System.nanoTime() - begin); + event.awaitNanos(remaining); } finally { lock.unlock(); } Modified: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java 2013-05-15 18:35:21 UTC (rev 7129) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -31,9 +31,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.PrintWriter; -import java.io.RandomAccessFile; import java.io.StringWriter; -import java.nio.channels.FileLock; import java.rmi.Remote; import java.util.Arrays; import java.util.LinkedList; @@ -1731,9 +1729,9 @@ final long timeout, final TimeUnit unit) throws Exception, TimeoutException, InterruptedException { - final long begin = System.nanoTime(); +// final long begin = System.nanoTime(); - long nanos = unit.toNanos(timeout); + final long nanos = unit.toNanos(timeout); // wait for the service to be discovered serviceItem = awaitServiceDiscoveryOrDeath(processHelper, @@ -1742,9 +1740,9 @@ // // proxy will be used for destroy(). // processHelper.setServiceItem(serviceItem); - // subtract out the time we already waited. - nanos -= (System.nanoTime() - begin); - +// // subtract out the time we already waited. +// final long remaining = nanos - (System.nanoTime() - begin); +// // // TODO (restore) wait for the ephemeral znode for the service to be created // awaitZNodeCreatedOrDeath(serviceItem, processHelper, nanos, // TimeUnit.NANOSECONDS); Added: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestCyclicBarrier.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestCyclicBarrier.java (rev 0) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestCyclicBarrier.java 2013-05-15 20:39:30 UTC (rev 7130) @@ -0,0 +1,54 @@ +package com.bigdata.journal.jini.ha; + +import java.util.concurrent.BrokenBarrierException; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import junit.framework.TestCase; + +/** + * This test demonstrates that {@link CyclicBarrier} does not adhere to its + * documentation for {@link CyclicBarrier#await(long, TimeUnit)}. This means + * that we can not use this variant in the release time consensus protocol since + * we must also watch for service leaves, etc. + * <p> + * Note: We can still use the {@link CyclicBarrier#await()} as long as we + * <em>interrupt</em> one of the threads that is blocked in + * {@link CyclicBarrier#await()}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class TestCyclicBarrier extends TestCase { + + public TestCyclicBarrier() { + + } + + public TestCyclicBarrier(String name) { + super(name); + } + + public void test_cyclicBarrier_awaitTimeout() throws InterruptedException, + BrokenBarrierException, TimeoutException { + + final CyclicBarrier b = new CyclicBarrier(2); + + assertFalse(b.isBroken()); + + try { + + b.await(1000, TimeUnit.MILLISECONDS); + + fail("Barrier should not be broken"); + + } catch (TimeoutException ex) { + + // The barrier should not be broken. + assertFalse("barrier broke with timeout.", b.isBroken()); + + } + + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-05-16 14:27:07
|
Revision: 7133 http://bigdata.svn.sourceforge.net/bigdata/?rev=7133&view=rev Author: thompsonbry Date: 2013-05-16 14:26:59 +0000 (Thu, 16 May 2013) Log Message: ----------- Added the ability to demand a local service REBUILD (disaster recover). This is available on HAGlue#rebuildFromLeader(). There is also an (unpublished, unsupported) ability to do this using /status?rebuild. @see https://sourceforge.net/apps/trac/bigdata/ticket/530 (Journal HA) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java Added Paths: ----------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HARemoteRebuildRequest.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHARemoteRebuildRequest.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java 2013-05-16 12:35:45 UTC (rev 7132) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java 2013-05-16 14:26:59 UTC (rev 7133) @@ -37,6 +37,7 @@ import com.bigdata.ha.msg.IHAGlobalWriteLockRequest; import com.bigdata.ha.msg.IHALogDigestRequest; import com.bigdata.ha.msg.IHALogDigestResponse; +import com.bigdata.ha.msg.IHARemoteRebuildRequest; import com.bigdata.ha.msg.IHARootBlockRequest; import com.bigdata.ha.msg.IHARootBlockResponse; import com.bigdata.ha.msg.IHASnapshotDigestRequest; @@ -45,6 +46,7 @@ import com.bigdata.ha.msg.IHASnapshotResponse; import com.bigdata.journal.AbstractJournal; import com.bigdata.journal.Journal; +import com.bigdata.journal.jini.ha.HAJournalServer; import com.bigdata.quorum.AsynchronousQuorumCloseException; import com.bigdata.quorum.QuorumException; import com.bigdata.service.IService; @@ -232,4 +234,31 @@ Future<IHASnapshotResponse> takeSnapshot(IHASnapshotRequest req) throws IOException; + /** + * Disaster recovery (REBUILD) of the local database instance from the + * leader of a met quorum. + * + * There are several preconditions: + * <ul> + * + * <li>The quorum must be met and there must be an + * {@link HAStatusEnum#Ready} leader.</li> + * + * <li>This service must be {@link HAStatusEnum#NotReady}.</li> + * + * <li>This service MUST NOT be at the same commit point as the leader (if + * it is, then the service could meet in a data race with the met quorum and + * we do not permit RESTORE if the service is joined with the met quorum).</li> + * + * <li>The {@link HAJournalServer} must not be running a RESTORE (we don't + * want it to accidentally interrupt a RESTORE that is in progress).</li> + * + * </ul> + * + * @return The (asynchronous) {@link Future} of the REBUILD operation -or- + * <code>null</code> if any of the pre-conditions were violated. + */ + Future<Void> rebuildFromLeader(IHARemoteRebuildRequest req) + throws IOException; + } Added: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HARemoteRebuildRequest.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HARemoteRebuildRequest.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HARemoteRebuildRequest.java 2013-05-16 14:26:59 UTC (rev 7133) @@ -0,0 +1,39 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.ha.msg; + +import java.io.Serializable; + +public class HARemoteRebuildRequest implements IHARemoteRebuildRequest, + Serializable { + + private static final long serialVersionUID = 1L; + + /** + */ + public HARemoteRebuildRequest() { + + } + +} Added: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHARemoteRebuildRequest.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHARemoteRebuildRequest.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHARemoteRebuildRequest.java 2013-05-16 14:26:59 UTC (rev 7133) @@ -0,0 +1,40 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.ha.msg; + +import com.bigdata.ha.HAGlue; + +/** + * Message requesting disaster recovery of the service from the quorum + * leader. This message is sent to the service that must be recovered. + * The service will then message the leader to send its backing state. + * + * @see HAGlue#rebuildFromLeader() + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public interface IHARemoteRebuildRequest extends IHAMessage { + + +} Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-16 12:35:45 UTC (rev 7132) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-16 14:26:59 UTC (rev 7133) @@ -126,6 +126,7 @@ import com.bigdata.ha.msg.IHAReadRequest; import com.bigdata.ha.msg.IHAReadResponse; import com.bigdata.ha.msg.IHARebuildRequest; +import com.bigdata.ha.msg.IHARemoteRebuildRequest; import com.bigdata.ha.msg.IHARootBlockRequest; import com.bigdata.ha.msg.IHARootBlockResponse; import com.bigdata.ha.msg.IHASendStoreResponse; @@ -5477,7 +5478,7 @@ * * @see HAGlue#getHAStatus() */ - final protected HAStatusEnum getHAStatus() { + final public HAStatusEnum getHAStatus() { if (quorum == null) { @@ -5936,6 +5937,14 @@ } + @Override + public Future<Void> rebuildFromLeader(IHARemoteRebuildRequest req) + throws IOException { + + throw new UnsupportedOperationException(); + + } + /** * Return a proxy object for a {@link Future} suitable for use in an RMI * environment (the default implementation returns its argument). Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-16 12:35:45 UTC (rev 7132) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-16 14:26:59 UTC (rev 7133) @@ -68,6 +68,7 @@ import com.bigdata.ha.msg.HADigestResponse; import com.bigdata.ha.msg.HALogDigestResponse; import com.bigdata.ha.msg.HALogRootBlocksResponse; +import com.bigdata.ha.msg.HARemoteRebuildRequest; import com.bigdata.ha.msg.HASendStoreResponse; import com.bigdata.ha.msg.HASnapshotDigestResponse; import com.bigdata.ha.msg.IHADigestRequest; @@ -79,6 +80,7 @@ import com.bigdata.ha.msg.IHALogRootBlocksRequest; import com.bigdata.ha.msg.IHALogRootBlocksResponse; import com.bigdata.ha.msg.IHARebuildRequest; +import com.bigdata.ha.msg.IHARemoteRebuildRequest; import com.bigdata.ha.msg.IHASendStoreResponse; import com.bigdata.ha.msg.IHASnapshotDigestRequest; import com.bigdata.ha.msg.IHASnapshotDigestResponse; @@ -1472,6 +1474,59 @@ } + @Override + public Future<Void> rebuildFromLeader(final IHARemoteRebuildRequest req) + throws IOException { + + final HAQuorumService<HAGlue, HAJournal> localService = getQuorumService(); + + final RunStateEnum innerRunState = (localService == null ? null + : localService.getRunStateEnum()); + + if(innerRunState == null) + return null; + + switch (innerRunState) { + case Error: + case SeekConsensus: + case Operator: { + + if (localService == null) + return null; + + final Future<Void> f = localService + .rebuildFromLeader(new HARemoteRebuildRequest()); + + if (f == null) + return null; + + haLog.warn("Started REBUILD"); + + return getProxy(f, true/* async */); + + } + case Rebuild: + // Already running rebuild. + return null; + case Restore: + // Running restore. Can not do rebuild. + return null; + case Resync: + // Running resync. Can not do rebuild. + return null; + case RunMet: + // RunMet. Can not do rebuild. + return null; + case Shutdown: + // Shutting down. Can not do rebuild. + return null; + default: + // Unknown run state. + throw new AssertionError("innerRunState=" + innerRunState); + } + + } + /** * Note: The invocation layer factory is reused for each exported proxy (but * the exporter itself is paired 1:1 with the exported proxy). Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-16 12:35:45 UTC (rev 7132) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-16 14:26:59 UTC (rev 7133) @@ -60,6 +60,7 @@ import com.bigdata.concurrent.FutureTaskMon; import com.bigdata.ha.HAGlue; import com.bigdata.ha.HAPipelineGlue; +import com.bigdata.ha.HAStatusEnum; import com.bigdata.ha.QuorumService; import com.bigdata.ha.QuorumServiceBase; import com.bigdata.ha.halog.HALogWriter; @@ -73,6 +74,7 @@ import com.bigdata.ha.msg.IHALogRequest; import com.bigdata.ha.msg.IHALogRootBlocksResponse; import com.bigdata.ha.msg.IHARebuildRequest; +import com.bigdata.ha.msg.IHARemoteRebuildRequest; import com.bigdata.ha.msg.IHASendStoreResponse; import com.bigdata.ha.msg.IHASnapshotResponse; import com.bigdata.ha.msg.IHASyncRequest; @@ -1149,12 +1151,68 @@ } /** + * Enter RESTORE. + * + * @return + * @throws IOException + * + * @see HAGlue#rebuildFromLeader(IHARemoteRebuildRequest) + */ + public Future<Void> rebuildFromLeader(final IHARemoteRebuildRequest req) + throws IOException { + + final Quorum<HAGlue, QuorumService<HAGlue>> quorum = getQuorum(); + + final QuorumService<HAGlue> localService = quorum.getClient(); + + if (localService == null) + return null; + + final long token = quorum.token(); + + if (journal.getHAStatus() != HAStatusEnum.NotReady) + return null; + + final UUID leaderId = quorum.getLeaderId(); + + if (leaderId == null) + return null; + + final HAGlue leader = localService.getService(leaderId); + + if (leader.getHAStatus() != HAStatusEnum.Leader) { + + return null; + + } + + final IRootBlockView leaderRB = leader.getRootBlock( + new HARootBlockRequest(null/* storeUUID */)).getRootBlock(); + + final IRootBlockView localRB = journal.getRootBlockView(); + + if (leaderRB.getCommitCounter() == localRB.getCommitCounter()) { + + // At the same commit point. + return null; + + } + + // Re-verify. + if (journal.getHAStatus() != HAStatusEnum.NotReady) + return null; + + return enterRunState(new RebuildTask(token)); + + } + + /** * Change the run state. * * @param runStateTask * The task for the new run state. */ - private void enterRunState(final RunStateCallable<Void> runStateTask) { + private Future<Void> enterRunState(final RunStateCallable<Void> runStateTask) { if (runStateTask == null) throw new IllegalArgumentException(); @@ -1181,6 +1239,8 @@ // haLog.info("Entering runState=" // + runStateTask.getClass().getSimpleName()); + return ft; + } finally { if (oldFuture != null) { Modified: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java 2013-05-16 12:35:45 UTC (rev 7132) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java 2013-05-16 14:26:59 UTC (rev 7133) @@ -70,6 +70,7 @@ import com.bigdata.ha.msg.IHAReadRequest; import com.bigdata.ha.msg.IHAReadResponse; import com.bigdata.ha.msg.IHARebuildRequest; +import com.bigdata.ha.msg.IHARemoteRebuildRequest; import com.bigdata.ha.msg.IHARootBlockRequest; import com.bigdata.ha.msg.IHARootBlockResponse; import com.bigdata.ha.msg.IHASendStoreResponse; @@ -673,6 +674,16 @@ } @Override + public Future<Void> rebuildFromLeader(IHARemoteRebuildRequest req) throws IOException { + + checkMethod("restoreFromLeader", + new Class[] { IHARemoteRebuildRequest.class }); + + return super.rebuildFromLeader(req); + + } + + @Override public Future<Void> globalWriteLock(IHAGlobalWriteLockRequest req) throws IOException, TimeoutException, InterruptedException { Modified: branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java =================================================================== --- branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java 2013-05-16 12:35:45 UTC (rev 7132) +++ branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java 2013-05-16 14:26:59 UTC (rev 7133) @@ -43,7 +43,9 @@ import com.bigdata.ha.HAStatusEnum; import com.bigdata.ha.QuorumService; import com.bigdata.ha.halog.IHALogReader; +import com.bigdata.ha.msg.HARemoteRebuildRequest; import com.bigdata.ha.msg.HASnapshotRequest; +import com.bigdata.ha.msg.IHARemoteRebuildRequest; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.IRootBlockView; import com.bigdata.journal.RootBlockView; @@ -70,6 +72,17 @@ */ public class HAStatusServletUtil { + /** + * Disaster recover of this service from the leader (REBUILD). + * + * @see HAGlue#rebuildFromLeader(IHARemoteRebuildRequest) + * + * TODO Move this declaration to {@link StatusServlet} once we are done + * reconciling between the 1.2.x maintenance branch and the READ_CACHE + * branch. + */ + static final String REBUILD = "rebuild"; + final private IIndexManager indexManager; public HAStatusServletUtil(final IIndexManager indexManager) { @@ -117,6 +130,7 @@ final int njoined = quorum.getJoined().length; + // Note: This is the *local* HAGlueService. final QuorumService<HAGlue> quorumService = quorum.getClient(); final boolean digests = req.getParameter(StatusServlet.DIGESTS) != null; @@ -421,7 +435,7 @@ } /* - * If requested, conditional start a snapshot. + * If requested, conditionally start a snapshot. */ { final String val = req.getParameter(StatusServlet.SNAPSHOT); @@ -446,12 +460,12 @@ // ignore. } - ((HAJournal) journal).getSnapshotManager().takeSnapshot( + journal.getSnapshotManager().takeSnapshot( new HASnapshotRequest(percentLogSize)); } - } + } /* * Report if a snapshot is currently running. @@ -468,6 +482,36 @@ } + /** + * If requested, conditionally REBUILD the service from the leader + * (disaster recover). + * + * FIXME This should only be triggered by a POST (it is modestly safe + * since a REBUILD can not be triggered if the service is joined, at the + * same commit point as the leader, or already running, but it is not so + * safe that you should be able to use a GET to demand a REBUILD). + */ + { + + final String val = req.getParameter(HAStatusServletUtil.REBUILD); + + if (val != null) { + + // Local HAGlue interface for this service (not proxy). + final HAGlue haGlue = quorumService.getService(); + + // Request RESTORE. + if (haGlue.rebuildFromLeader(new HARemoteRebuildRequest()) != null) { + + current.node("h2", + "Running Disaster Recovery for this service (REBUILD)."); + + } + + } + + } + /* * Display the NSS port, host, and leader/follower/not-joined * status for each service in the quorum. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-05-16 20:25:08
|
Revision: 7139 http://bigdata.svn.sourceforge.net/bigdata/?rev=7139&view=rev Author: thompsonbry Date: 2013-05-16 20:24:56 +0000 (Thu, 16 May 2013) Log Message: ----------- Merging changes from the development branch into the HA branch (READ_CACHE). @r7138. See [1] for details on the previous merge. {{{ merge https://bigdata.svn.sourceforge.net/svnroot/bigdata/branches/BIGDATA_RELEASE_1_2_0 /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE --- Merging r7077 through r7138 into /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/pom.xml U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/build.properties U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/search/FullTextIndex.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/bop/solutions/JVMDistinctBindingSetsOp.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/bop/ModifiableBOpBase.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/bop/join/IDistinctFilter.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HTreeHashIndexOp.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMHashJoinUtility.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMDistinctFilter.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HashIndexOp.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/bop/join/SolutionSetHashJoinOp.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HTreeHashJoinUtility.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HashJoinAnnotations.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMHashIndexOp.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMHashIndex.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMSolutionSetHashJoinOp.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/journal/Name2Addr.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/journal/Tx.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractTask.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/.classpath A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/TestTicket669.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithSids.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithoutSids.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/TestTicket647.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithQuads.java --- Merging r7077 through r7138 into /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/DeleteServlet.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/InsertServlet.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/UpdateServlet.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/BigdataRDFServlet.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/client/RemoteRepository.java --- Merging r7077 through r7138 into /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/remote/BigdataSailRemoteRepositoryConnection.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/lib/nxparser-1.2.3.jar D /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/lib/nxparser-1.2.2.jar U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/optimizers/TestASTSubGroupJoinVarOptimizer.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/TestNegation.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/BasicIntQueryHint.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/AbstractQueryHint.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/BasicDoubleQueryHint.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/BasicBooleanQueryHint.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/AbstractStringQueryHint.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/CutoffLimitHint.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/BasicLongQueryHint.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/BasicStringQueryHint.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/GraphPatternGroup.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/SliceServiceFactory.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AbstractServiceFactory.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/SampleServiceFactory.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/FunctionNode.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/AbstractJoinGroupOptimizer.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTRangeOptimizer.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTSparql11SubqueryOptimizer.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/DefaultOptimizerList.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTComplexOptionalOptimizer.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTSubGroupJoinVarOptimizer.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTCardinalityOptimizer.java A /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTFlattenJoinGroupsOptimizer.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/GroupNodeBase.java U /Users/bryan/Documents/workspace/BIGDATA_READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StatementPatternNode.java Merge complete. ===== File Statistics: ===== Deleted: 1 Added: 16 Updated: 46 }}} No conflicts. [1] https://sourceforge.net/apps/trac/bigdata/ticket/530#comment:111 Revision Links: -------------- http://bigdata.svn.sourceforge.net/bigdata/?rev=7138&view=rev http://bigdata.svn.sourceforge.net/bigdata/?rev=7077&view=rev http://bigdata.svn.sourceforge.net/bigdata/?rev=7138&view=rev http://bigdata.svn.sourceforge.net/bigdata/?rev=7077&view=rev http://bigdata.svn.sourceforge.net/bigdata/?rev=7138&view=rev http://bigdata.svn.sourceforge.net/bigdata/?rev=7077&view=rev http://bigdata.svn.sourceforge.net/bigdata/?rev=7138&view=rev Modified Paths: -------------- branches/READ_CACHE/.classpath branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/ModifiableBOpBase.java branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HTreeHashIndexOp.java branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HTreeHashJoinUtility.java branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HashIndexOp.java branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HashJoinAnnotations.java branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMHashIndexOp.java branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMHashJoinUtility.java branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMSolutionSetHashJoinOp.java branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/SolutionSetHashJoinOp.java branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/solutions/JVMDistinctBindingSetsOp.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractTask.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Name2Addr.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Tx.java branches/READ_CACHE/bigdata/src/java/com/bigdata/search/FullTextIndex.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/FunctionNode.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/GraphPatternGroup.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/GroupNodeBase.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StatementPatternNode.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/AbstractQueryHint.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/CutoffLimitHint.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTCardinalityOptimizer.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTComplexOptionalOptimizer.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTRangeOptimizer.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTSparql11SubqueryOptimizer.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTSubGroupJoinVarOptimizer.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/AbstractJoinGroupOptimizer.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/DefaultOptimizerList.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/TestNegation.java branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/optimizers/TestASTSubGroupJoinVarOptimizer.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/remote/BigdataSailRemoteRepositoryConnection.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/BigdataRDFServlet.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/DeleteServlet.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/InsertServlet.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/UpdateServlet.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/client/RemoteRepository.java branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithQuads.java branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithSids.java branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithoutSids.java branches/READ_CACHE/build.properties branches/READ_CACHE/pom.xml Added Paths: ----------- branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/IDistinctFilter.java branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMDistinctFilter.java branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMHashIndex.java branches/READ_CACHE/bigdata-rdf/lib/nxparser-1.2.3.jar branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AbstractServiceFactory.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/SampleServiceFactory.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/SliceServiceFactory.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/AbstractStringQueryHint.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/BasicBooleanQueryHint.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/BasicDoubleQueryHint.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/BasicIntQueryHint.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/BasicLongQueryHint.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/BasicStringQueryHint.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTFlattenJoinGroupsOptimizer.java branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/TestTicket647.java branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/TestTicket669.java Removed Paths: ------------- branches/READ_CACHE/bigdata-rdf/lib/nxparser-1.2.2.jar Property Changed: ---------------- branches/READ_CACHE/ branches/READ_CACHE/bigdata/lib/jetty/ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/aggregate/ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/joinGraph/ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/util/ branches/READ_CACHE/bigdata/src/java/com/bigdata/htree/raba/ branches/READ_CACHE/bigdata/src/java/com/bigdata/jsr166/ branches/READ_CACHE/bigdata/src/test/com/bigdata/bop/joinGraph/ branches/READ_CACHE/bigdata/src/test/com/bigdata/bop/util/ branches/READ_CACHE/bigdata/src/test/com/bigdata/jsr166/ branches/READ_CACHE/bigdata/src/test/com/bigdata/util/httpd/ branches/READ_CACHE/bigdata-compatibility/ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/attr/ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/disco/ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/util/config/ branches/READ_CACHE/bigdata-perf/ branches/READ_CACHE/bigdata-perf/btc/ branches/READ_CACHE/bigdata-perf/btc/src/resources/ branches/READ_CACHE/bigdata-perf/lubm/ branches/READ_CACHE/bigdata-perf/uniprot/ branches/READ_CACHE/bigdata-perf/uniprot/src/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/changesets/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/error/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/internal/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/relation/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/util/ branches/READ_CACHE/bigdata-rdf/src/samples/ branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/ branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/internal/ branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/relation/ branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/ branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/changesets/ branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/ branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/bench/ branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/ branches/READ_CACHE/dsi-utils/ branches/READ_CACHE/dsi-utils/LEGAL/ branches/READ_CACHE/dsi-utils/lib/ branches/READ_CACHE/dsi-utils/src/ branches/READ_CACHE/dsi-utils/src/java/ branches/READ_CACHE/dsi-utils/src/java/it/ branches/READ_CACHE/dsi-utils/src/java/it/unimi/ branches/READ_CACHE/dsi-utils/src/test/ branches/READ_CACHE/dsi-utils/src/test/it/unimi/ branches/READ_CACHE/dsi-utils/src/test/it/unimi/dsi/ branches/READ_CACHE/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/ branches/READ_CACHE/lgpl-utils/src/test/it/unimi/dsi/fastutil/bytes/custom/ branches/READ_CACHE/osgi/ branches/READ_CACHE/src/resources/bin/config/ Property changes on: branches/READ_CACHE ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE:6769-6785 /branches/BIGDATA_RELEASE_1_2_0:6766-7077 /branches/BTREE_BUFFER_BRANCH:2004-2045 /branches/DEV_BRANCH_27_OCT_2009:2270-2546,2548-2782 /branches/INT64_BRANCH:4486-4522 /branches/JOURNAL_HA_BRANCH:2596-4066 /branches/LARGE_LITERALS_REFACTOR:4175-4387 /branches/LEXICON_REFACTOR_BRANCH:2633-3304 /branches/QUADS_QUERY_BRANCH:4525-4531,4550-4584,4586-4609,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH:4814-4836 /branches/bugfix-btm:2594-3237 /branches/dev-btm:2574-2730 /branches/fko:3150-3194 /trunk:3392-3437,3656-4061 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE:6769-6785 /branches/BIGDATA_RELEASE_1_2_0:6766-7138 /branches/BTREE_BUFFER_BRANCH:2004-2045 /branches/DEV_BRANCH_27_OCT_2009:2270-2546,2548-2782 /branches/INT64_BRANCH:4486-4522 /branches/JOURNAL_HA_BRANCH:2596-4066 /branches/LARGE_LITERALS_REFACTOR:4175-4387 /branches/LEXICON_REFACTOR_BRANCH:2633-3304 /branches/QUADS_QUERY_BRANCH:4525-4531,4550-4584,4586-4609,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH:4814-4836 /branches/bugfix-btm:2594-3237 /branches/dev-btm:2574-2730 /branches/fko:3150-3194 /trunk:3392-3437,3656-4061 Modified: branches/READ_CACHE/.classpath =================================================================== --- branches/READ_CACHE/.classpath 2013-05-16 20:16:19 UTC (rev 7138) +++ branches/READ_CACHE/.classpath 2013-05-16 20:24:56 UTC (rev 7139) @@ -32,7 +32,6 @@ <classpathentry exported="true" kind="lib" path="bigdata/lib/dsi-utils-1.0.6-020610.jar"/> <classpathentry exported="true" kind="lib" path="bigdata/lib/lgpl-utils-1.0.6-020610.jar"/> <classpathentry exported="true" kind="lib" path="bigdata-jini/lib/apache/zookeeper-3.3.3.jar"/> - <classpathentry exported="true" kind="lib" path="bigdata-rdf/lib/nxparser-1.2.2.jar"/> <classpathentry exported="true" kind="lib" path="bigdata/lib/jetty/jetty-continuation-7.2.2.v20101205.jar"/> <classpathentry exported="true" kind="lib" path="bigdata/lib/jetty/jetty-http-7.2.2.v20101205.jar"/> <classpathentry exported="true" kind="lib" path="bigdata/lib/jetty/jetty-io-7.2.2.v20101205.jar"/> @@ -84,5 +83,6 @@ <classpathentry kind="lib" path="bigdata-rdf/lib/sesame-rio-testsuite-2.6.10.jar"/> <classpathentry kind="lib" path="bigdata-sails/lib/sesame-sparql-testsuite-2.6.10.jar"/> <classpathentry kind="lib" path="bigdata-sails/lib/sesame-store-testsuite-2.6.10.jar"/> + <classpathentry kind="lib" path="bigdata-rdf/lib/nxparser-1.2.3.jar"/> <classpathentry kind="output" path="bin"/> </classpath> Property changes on: branches/READ_CACHE/bigdata/lib/jetty ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/lib/jetty:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/lib/jetty:6766-7077 /branches/INT64_BRANCH/bigdata/lib/jetty:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/lib/jetty:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/lib/jetty:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/lib/jetty:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/lib/jetty:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/lib/jetty:6766-7138 /branches/INT64_BRANCH/bigdata/lib/jetty:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/lib/jetty:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/lib/jetty:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/lib/jetty:4814-4836 Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/ModifiableBOpBase.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/ModifiableBOpBase.java 2013-05-16 20:16:19 UTC (rev 7138) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/ModifiableBOpBase.java 2013-05-16 20:24:56 UTC (rev 7139) @@ -164,6 +164,16 @@ return args.get(index); } + + /** + * Return the index of the bop in the args. Returns -1 if bop is not + * present in the args. + */ + public int indexOf(final BOp bop) { + + return args.indexOf(bop); + + } /** * Invoked automatically any time a mutation operation occurs. The default @@ -254,7 +264,7 @@ * Add a new argument (core mutation method) at the specified index. * * @param index - * The index of the child expression to be replaced. + * The index at which the child expression is to be inserted. * @param newArg * The argument. * Property changes on: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/aggregate ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/aggregate:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/aggregate:6766-7077 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/aggregate:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/aggregate:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/aggregate:6766-7138 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/aggregate:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4814-4836 Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HTreeHashIndexOp.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HTreeHashIndexOp.java 2013-05-16 20:16:19 UTC (rev 7138) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HTreeHashIndexOp.java 2013-05-16 20:24:56 UTC (rev 7139) @@ -53,7 +53,7 @@ HTreeHashJoinAnnotations { } - + /** * Deep copy constructor. */ Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HTreeHashJoinUtility.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HTreeHashJoinUtility.java 2013-05-16 20:16:19 UTC (rev 7138) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HTreeHashJoinUtility.java 2013-05-16 20:24:56 UTC (rev 7139) @@ -40,6 +40,7 @@ import com.bigdata.bop.BOpContext; import com.bigdata.bop.Constant; import com.bigdata.bop.HTreeAnnotations; +import com.bigdata.bop.HashMapAnnotations; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IConstraint; @@ -202,6 +203,14 @@ private final AtomicBoolean open = new AtomicBoolean(true); /** + * The operator whose annotations are used to initialize this object. + * <p> + * Note: This was added to support the DISTINCT FILTER in + * {@link #outputSolutions(IBuffer)}. + */ + private final PipelineOp op; + + /** * This basically controls the vectoring of the hash join. * * TODO parameter from operator annotations. Note that 10k tends to put too @@ -259,6 +268,16 @@ private final IVariable<?>[] selectVars; /** + * The variables to be projected into a join group. When non- + * <code>null</code> variables that are NOT in this array are NOT flowed + * into the join group. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/668" > + * JoinGroup optimizations </a> + */ + private final IVariable<?>[] projectedInVars; + + /** * The join constraints (optional). */ private final IConstraint[] constraints; @@ -344,6 +363,8 @@ if (askVar != null) sb.append(",askVar=" + askVar); sb.append(",joinVars=" + Arrays.toString(joinVars)); + if (projectedInVars != null) + sb.append(",projectedInVars=" + Arrays.toString(projectedInVars)); if (selectVars != null) sb.append(",selectVars=" + Arrays.toString(selectVars)); if (constraints != null) @@ -497,7 +518,7 @@ if(joinType == null) throw new IllegalArgumentException(); -// this.op = op; + this.op = op; this.joinType = joinType; this.optional = joinType == JoinTypeEnum.Optional; this.filter = joinType == JoinTypeEnum.Filter; @@ -516,6 +537,12 @@ .getProperty(JoinAnnotations.SELECT); /* + * The variables that are projected IN to the join group. + */ + this.projectedInVars = (IVariable<?>[]) op + .getProperty(HashJoinAnnotations.PROJECT_IN_VARS); + + /* * This wraps an efficient raw store interface around a child memory * manager created from the IMemoryManager which will back the named * solution set. @@ -1524,82 +1551,179 @@ } + /** + * DISTINCT solutions filter for + * {@link HTreeHashJoinUtility#outputSolutions(IBuffer)} + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/668" > + * JoinGroup optimizations </a> + */ + private class HTreeDistinctFilter implements IDistinctFilter { + + /** + * The variables used to impose a distinct constraint. + */ + private final IVariable<?>[] vars; + + private final HTreeHashJoinUtility state; + + public HTreeDistinctFilter(final IVariable<?>[] vars, final PipelineOp op) { + + this.vars = vars; + + this.state = new HTreeHashJoinUtility( + ((MemStore) store).getMemoryManager(), op, + JoinTypeEnum.Filter); + + } + + @Override + public IVariable<?>[] getProjectedVars() { + + return vars; + + } + + @Override + public IBindingSet accept(final IBindingSet bset) { + // FIXME Auto-generated method stub + throw new UnsupportedOperationException(); + } + + @Override + public long filterSolutions(ICloseableIterator<IBindingSet[]> itr, + BOpStats stats, IBuffer<IBindingSet> sink) { + // FIXME Auto-generated method stub + throw new UnsupportedOperationException(); + } + + @Override + public void release() { + + state.release(); + + } + + } + @Override public void outputSolutions(final IBuffer<IBindingSet> out) { try { -// if (false) { -// -// /* -// * Striterator pattern. -// */ -// -// final ICloseableIterator<IBindingSet> itr = indexScan(); -// -// try { -// -// while(itr.hasNext()) { -// -// IBindingSet bset = itr.next(); -// -// if (selectVars != null) { -// -// // Drop variables which are not projected. -// bset = bset.copy(selectVars); -// -// } -// out.add(bset); -// -// } -// -// } finally { -// -// itr.close(); -// -// } -// -// -// } else { + /* + * FIXME Set this to enable "DISTINCT" on the solutions flowing into the + * join group. + * + * Note: This should be set by the HashIndexOp (or passed in through the + * interface). + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/668" > + * JoinGroup optimizations </a> + */ + final boolean distinct = false; + + /* + * FIXME Replace with an HTreeDistinctFilter and integrate to NOT + * flow duplicate solutions into the sub-group. The HTree + * filterSolutions() method needs to be vectored to be efficient. + * Therefore, this outputSolutions() method needs to be rewritten to + * be vectored as well. It is efficient in reading the solutions + * from the HTree, and the solutions are in the "natural" order of + * the HTree for the join vars. This order SHOULD be pretty + * efficient for the DISTINCT solutions set as well, but note that + * joinVars:=projectedInVars. To maximize the corrleation, both the + * joinVars[] and the projectedInVars[] should be sorted so the + * variables in the solutions will be correllated and any variables + * that are NOT in the projectedInVars should appear towards the end + * of the joinVars where they will cause the least perturbation in + * this scan + filter. + */ + final IDistinctFilter distinctFilter; + + if (distinct && projectedInVars != null && projectedInVars.length > 0) { /* - * Simple iterator pattern. + * Note: We are single threaded here so we can use a lower + * concurrencyLevel value. + * + * Note: If necessary, this could be replaced with JVMHashIndex so + * we get the #of occurrences of each distinct combination of + * bindings that is projected into the sub-group/-query. */ + final int concurrencyLevel = 1;//ConcurrentHashMapAnnotations.DEFAULT_CONCURRENCY_LEVEL; + + distinctFilter = new JVMDistinctFilter(projectedInVars, // + op.getProperty(HashMapAnnotations.INITIAL_CAPACITY, + HashMapAnnotations.DEFAULT_INITIAL_CAPACITY),// + op.getProperty(HashMapAnnotations.LOAD_FACTOR, + HashMapAnnotations.DEFAULT_LOAD_FACTOR),// + concurrencyLevel + ); - final HTree rightSolutions = getRightSolutions(); + } else { + + distinctFilter = null; + + } + + final HTree rightSolutions = getRightSolutions(); - if (log.isInfoEnabled()) { - log.info("rightSolutions: #nnodes=" - + rightSolutions.getNodeCount() + ",#leaves=" - + rightSolutions.getLeafCount() + ",#entries=" - + rightSolutions.getEntryCount()); - } + if (log.isInfoEnabled()) { + log.info("rightSolutions: #nnodes=" + + rightSolutions.getNodeCount() + ",#leaves=" + + rightSolutions.getLeafCount() + ",#entries=" + + rightSolutions.getEntryCount()); + } - // source. - final ITupleIterator<?> solutionsIterator = rightSolutions - .rangeIterator(); + // source. + final ITupleIterator<?> solutionsIterator = rightSolutions + .rangeIterator(); - while (solutionsIterator.hasNext()) { + while (solutionsIterator.hasNext()) { - final ITuple<?> t = solutionsIterator.next(); + final ITuple<?> t = solutionsIterator.next(); - IBindingSet bset = decodeSolution(t); + IBindingSet bset = decodeSolution(t); - if (selectVars != null) { + if (distinctFilter != null) { - // Drop variables which are not projected. - bset = bset.copy(selectVars); + /* + * Note: The DISTINCT filter is based on the variables + * that are projected INTO the child join group. + * However, those are NOT always the same as the + * variables that are projected OUT of the child join + * group, so we need to + */ + if ((bset = distinctFilter.accept(bset)) == null) { + + // Drop duplicate solutions. + continue; + } - encoder.resolveCachedValues(bset); + } else if (selectVars != null) { - out.add(bset); + /* + * FIXME We should be using projectedInVars here since + * outputSolutions() is used to stream solutions into + * the child join group (at least for some kinds of + * joins, but there might be exceptions for joining with + * a named solution set). + */ + // Drop variables which are not projected. + bset = bset.copy(selectVars); + } -// } + encoder.resolveCachedValues(bset); + out.add(bset); + + } + } catch (Throwable t) { throw launderThrowable(t); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HashIndexOp.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HashIndexOp.java 2013-05-16 20:16:19 UTC (rev 7138) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HashIndexOp.java 2013-05-16 20:24:56 UTC (rev 7139) @@ -114,8 +114,7 @@ * @param args * @param annotations */ - public HashIndexOp(final BOp[] args, - final Map<String, Object> annotations) { + public HashIndexOp(final BOp[] args, final Map<String, Object> annotations) { super(args, annotations); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HashJoinAnnotations.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HashJoinAnnotations.java 2013-05-16 20:16:19 UTC (rev 7138) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/HashJoinAnnotations.java 2013-05-16 20:24:56 UTC (rev 7139) @@ -84,4 +84,13 @@ */ long DEFAULT_NO_JOIN_VARS_LIMIT = Long.MAX_VALUE; + + /** + * The {@link IVariable[]} specifying what variables need to flow into + * the right operator of the hash join (i.e. what visible variables inside + * the right operator have appeared previously in the query and may be + * bound). + */ + String PROJECT_IN_VARS = HashJoinAnnotations.class.getName() + ".projectInVars"; + } Copied: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/IDistinctFilter.java (from rev 7138, branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/join/IDistinctFilter.java) =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/IDistinctFilter.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/IDistinctFilter.java 2013-05-16 20:24:56 UTC (rev 7139) @@ -0,0 +1,81 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Apr 29, 2013 + */ +package com.bigdata.bop.join; + +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.relation.accesspath.IBuffer; +import com.bigdata.striterator.ICloseableIterator; + +/** + * A "DISTINCT" filter for {@link IBindingSet}s. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public interface IDistinctFilter { + + /** + * The variables that are being projected out of the DISTINCT filter. The + * solutions will be DISTINCT on this combination of variables. Bindings on + * other variables will be dropped. + */ + IVariable<?>[] getProjectedVars(); + + /** + * If the bindings are distinct for the configured variables then return a + * new {@link IBindingSet} consisting of only the selected variables. + * + * @param bset + * The binding set to be filtered. + * + * @return A new {@link IBindingSet} containing only the distinct as bound + * values -or- <code>null</code> if the binding set duplicates a + * solution which was already accepted. + */ + IBindingSet accept(final IBindingSet bset); + + /** + * Vectored DISTINCT. + * + * @param itr + * The source solutions. + * @param stats + * Statistics object to be updated. + * @param sink + * The sink onto which the DISTINCT solutions will be written. + * @return The #of DISTINCT solutions. + */ + long filterSolutions(final ICloseableIterator<IBindingSet[]> itr, + final BOpStats stats, final IBuffer<IBindingSet> sink); + + /** + * Discard the map backing this filter. + */ + void release(); + +} \ No newline at end of file Copied: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMDistinctFilter.java (from rev 7138, branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/join/JVMDistinctFilter.java) =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMDistinctFilter.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMDistinctFilter.java 2013-05-16 20:24:56 UTC (rev 7139) @@ -0,0 +1,269 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Apr 26, 2013 + */ +package com.bigdata.bop.join; + +import java.util.Arrays; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.bindingSet.ListBindingSet; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.relation.accesspath.IBuffer; +import com.bigdata.striterator.ICloseableIterator; + +/** + * Utility class for imposing a DISTINCT filter on {@link IBindingSet}. This + * class is thread-safe. It is based on a {@link ConcurrentHashMap}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class JVMDistinctFilter implements IDistinctFilter { + + private static final Logger log = Logger.getLogger(JVMDistinctFilter.class); + + /** + * Wrapper used for the as bound solutions in the {@link ConcurrentHashMap}. + */ + private static class Solution { + + private final int hash; + + private final IConstant<?>[] vals; + + public Solution(final IConstant<?>[] vals) { + this.vals = vals; + this.hash = java.util.Arrays.hashCode(vals); + } + + public int hashCode() { + return hash; + } + + public boolean equals(final Object o) { + if (this == o) + return true; + if (!(o instanceof Solution)) { + return false; + } + final Solution t = (Solution) o; + if (vals.length != t.vals.length) + return false; + for (int i = 0; i < vals.length; i++) { + // @todo verify that this allows for nulls with a unit test. + if (vals[i] == t.vals[i]) + continue; + if (vals[i] == null) + return false; + if (!vals[i].equals(t.vals[i])) + return false; + } + return true; + } + } + + /** + * The variables used to impose a distinct constraint. + */ + private final IVariable<?>[] vars; + + /** + * A concurrent map whose keys are the bindings on the specified variables + * (the keys and the values are the same since the map implementation does + * not allow <code>null</code> values). + * <p> + * Note: The map is shared state and can not be discarded or cleared until + * the last invocation!!! + */ + private final ConcurrentHashMap<Solution, Solution> map; + + /** + * + * @param vars + * The set of variables on which the DISTINCT filter will be + * imposed. Only these variables will be present in the + * "accepted" solutions. Any variable bindings not specified in + * this array will be dropped. + * @param initialCapacity + * @param loadFactor + * @param concurrencyLevel + */ + public JVMDistinctFilter(final IVariable<?>[] vars, + final int initialCapacity, final float loadFactor, + final int concurrencyLevel) { + + if (vars == null) + throw new IllegalArgumentException(); + + if (vars.length == 0) + throw new IllegalArgumentException(); + + this.vars = vars; + + this.map = new ConcurrentHashMap<Solution, Solution>(initialCapacity, + loadFactor, concurrencyLevel); + + } + + /* (non-Javadoc) + * @see com.bigdata.bop.join.IDistinctFilter#clear() + */ + @Override + public void release() { + + map.clear(); + + } + + @Override + public IVariable<?>[] getProjectedVars() { + + return vars; + + } + + /** + * If the bindings are distinct for the configured variables then return + * those bindings. + * + * @param bset + * The binding set to be filtered. + * + * @return The distinct as bound values -or- <code>null</code> if the + * binding set duplicates a solution which was already accepted. + */ + private IConstant<?>[] _accept(final IBindingSet bset) { + + final IConstant<?>[] r = new IConstant<?>[vars.length]; + + for (int i = 0; i < vars.length; i++) { + + /* + * Note: This allows null's. + * + * @todo write a unit test when some variables are not bound. + */ + r[i] = bset.get(vars[i]); + + } + + final Solution s = new Solution(r); + + if (log.isTraceEnabled()) + log.trace("considering: " + Arrays.toString(r)); + + final boolean distinct = map.putIfAbsent(s, s) == null; + + if (distinct && log.isDebugEnabled()) + log.debug("accepted: " + Arrays.toString(r)); + + return distinct ? r : null; + + } + + /* (non-Javadoc) + * @see com.bigdata.bop.join.IDistinctFilter#accept(com.bigdata.bop.IBindingSet) + */ + @Override + public IBindingSet accept(final IBindingSet bset) { + + final IConstant<?>[] vals = _accept(bset); + + if (vals == null) { + + /* + * This is a duplicate solution. + */ + + return null; + + } + + /* + * This is a distinct solution. Copy only the variables used to select + * distinct solutions into a new binding set and add that to the set of + * [accepted] binding sets which will be emitted by this operator. + */ + + final ListBindingSet tmp = new ListBindingSet(); + + for (int i = 0; i < vars.length; i++) { + + if (vals[i] != null) + tmp.set(vars[i], vals[i]); + + } + + return tmp; + + } + + @Override + public long filterSolutions(final ICloseableIterator<IBindingSet[]> itr, + final BOpStats stats, final IBuffer<IBindingSet> sink) { + + long n = 0L; + + while (itr.hasNext()) { + + final IBindingSet[] a = itr.next(); + + stats.chunksIn.increment(); + stats.unitsIn.add(a.length); + + for (IBindingSet bset : a) { + + /* + * Test to see if this solution is distinct from those already + * seen. + */ + if ((bset = accept(bset)) == null) { + + // Drop duplicate solution. + continue; + } + + /* + * This is a distinct solution. + */ + + sink.add(bset); + + n++; + + } + + } // next chunk. + + return n; + + } + +} Copied: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMHashIndex.java (from rev 7138, branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/join/JVMHashIndex.java) =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMHashIndex.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/join/JVMHashIndex.java 2013-05-16 20:24:56 UTC (rev 7139) @@ -0,0 +1,694 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Apr 29, 2013 + */ +package com.bigdata.bop.join; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.solutions.JVMDistinctBindingSetsOp; +import com.bigdata.counters.CAT; + +/** + * A hash index for {@link IBindingSet}s that supports duplicate solutions and + * hit counts. The hit counts are used to detect {@link IBindingSet}s that do + * not join for OPTIONAL, MINUS, and related kinds of "negation" joins. + * <p> + * Note: The {@link JVMDistinctBindingSetsOp} does not use this class right now + * because it enjoys better concurrency than the {@link JVMHashIndex}. Also see + * {@link JVMDistinctFilter}, which is the backing implementation for the + * {@link JVMDistinctBindingSetsOp}. + * + * @see JVMDistinctFilter + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class JVMHashIndex { + + private static final Logger log = Logger.getLogger(JVMHashIndex.class); + + /** + * Note: If joinVars is an empty array, then the solutions will all hash to + * ONE (1). + */ + private static final int ONE = 1; + +// /** +// * Return the hash code which will be used as the key given the ordered +// * as-bound values for the join variables. +// * +// * @param joinVars +// * The join variables. +// * @param bset +// * The bindings whose as-bound hash code for the join variables +// * will be computed. +// * @param ignoreUnboundVariables +// * If a variable without a binding should be silently ignored. +// * +// * @return The hash code. +// * +// * @throws JoinVariableNotBoundException +// * if there is no binding for a join variable. +// */ +// private static int hashCode(final IVariable<?>[] joinVars, +// final IBindingSet bset, final boolean ignoreUnboundVariables) +// throws JoinVariableNotBoundException { +// +// int h = ONE; +// +// for (IVariable<?> v : joinVars) { +// +// final IConstant<?> c = bset.get(v); +// +// if (c == null) { +// +// if (ignoreUnboundVariables) +// continue; +// +// // Reject any solution which does not have a binding for a join +// // variable. +// +// throw new JoinVariableNotBoundException(v.getName()); +// +// } +// +// h = 31 * h + c.hashCode(); +// +// } +// +// if (log.isTraceEnabled()) +// log.trace("hashCode=" + h + ", joinVars=" +// + Arrays.toString(joinVars) + " : " + bset); +// +// return h; +// +// } + + /** + * Return an array of constants corresponding to the as-bound values of the + * join variables for the given solution. + * + * @param bset + * The solution. + * + * @return The as-bound values for the {@link #keyVars} for that solution + * -or- <code>null</code> if one or more join variables is not bound + * by the solution and {@link #indexSolutionsHavingUnboundJoinVars} + * is <code>false</code>. + * + * @see #keyVars + * @see #indexSolutionsHavingUnboundJoinVars + */ + private Key makeKey(//final IVariable<?>[] keyVars, + final IBindingSet bset +// final boolean indexSolutionsHavingUnboundJoinVars + ) { + + final IConstant<?>[] vals = new IConstant<?>[keyVars.length]; + + for (int i = 0; i < keyVars.length; i++) { + + final IVariable<?> v = keyVars[i]; + + vals[i] = bset.get(v); + + } + + int h = ONE; + + for (IVariable<?> v : keyVars) { + + final IConstant<?> c = bset.get(v); + + if (c == null) { + + if (!indexSolutionsHavingUnboundJoinVars) { + + /* + * Drop solution having an unbound join variable. + */ + + if (log.isDebugEnabled()) + log.debug("Join variable is not bound: var=" + v + + ", solution=" + bset); + + return null; + + } + + continue; + + } + + h = 31 * h + c.hashCode(); + + } + + if (log.isTraceEnabled()) + log.trace("hashCode=" + h + ", joinVars=" + + Arrays.toString(keyVars) + " : " + bset); + + return new Key(h, vals); + + } + + /** + * Wrapper for the keys in the hash table. This is necessary for the hash + * table to compare the keys as equal and also provides efficiencies in the + * hash code and equals() methods. + */ + public static class Key { + + private final int hash; + + private final IConstant<?>[] vals; + + private Key(final int hashCode, final IConstant<?>[] vals) { + this.vals = vals; + this.hash = hashCode; + } + + public int hashCode() { + return hash; + } + + public boolean equals(final Object o) { + if (this == o) + return true; + if (!(o instanceof Key)) { + return false; + } + final Key t = (Key) o; + if (vals.length != t.vals.length) + return false; + for (int i = 0; i < vals.length; i++) { + if (vals[i] == t.vals[i]) + continue; + if (vals[i] == null) + return false; + if (!vals[i].equals(t.vals[i])) + return false; + } + return true; + } + } + + /** + * An solution and a hit counter as stored in the {@link JVMHashIndex}. + */ + public static class SolutionHit { + + /** + * The input solution. + */ + final public IBindingSet solution; + + /** + * The #of hits on that solution. This may be used to detect solutions + * that did not join. E.g., by scanning and reporting out all solutions + * where {@link #nhits} is ZERO (0L). + */ + public final CAT nhits = new CAT(); + + private SolutionHit(final IBindingSet solution) { + + if (solution == null) + throw new IllegalArgumentException(); + + this.solution = solution; + + } + + public String toString() { + + return getClass().getName() + "{nhits=" + nhits + ",solution=" + + solution + "}"; + + } + + } // class SolutionHit + + /** + * A group of solutions having the same as-bound values for the join vars. + * Each solution is paired with a hit counter so we can support OPTIONAL + * semantics for the join. + */ + public static class Bucket implements Iterable<SolutionHit>, + Comparable<Bucket> { + + /** The hash code for this collision bucket. */ + private final int hashCode; + + /** + * A set of solutions (and their hit counters) which have the same + * as-bound values for the join variables. + */ + private final List<SolutionHit> solutions = new LinkedList<SolutionHit>(); + + public String toString() { + return super.toString() + + // + "{hashCode=" + hashCode + ",#solutions=" + solutions.size() + + "}"; + } + + public Bucket(final int hashCode, final IBindingSet solution) { + + this.hashCode = hashCode; + + add(solution); + + } + + public void add(final IBindingSet solution) { + + if (solution == null) + throw new IllegalArgumentException(); + + solutions.add(new SolutionHit(solution)); + + } + + /** + * Add the solution to the bucket iff the solutions is not already + * present in the bucket. + * <p> + * Note: There is already a hash index in place on the join variables + * when we are doing a DISTINCT filter. Further, only the "join" + * variables are "selected" and participate in a DISTINCT filter. + * Therefore, if we have a hash collision such that two solutions would + * be directed into the same {@link Bucket} then we can not improve + * matters but must simply scan the solutions in the bucket to decide + * whether the new solution duplicates a solution which is already + * present. + * + * @param solution + * The solution. + * + * @return <code>true</code> iff the bucket was modified by this + * operation. + */ + public boolean addDistinct(final IBindingSet solution) { + + if (solutions.isEmpty()) { + + // First solution. + solutions.add(new SolutionHit(solution)); + + return true; + + } + + final Iterator<SolutionHit> itr = solutions.iterator(); + + while (itr.hasNext()) { + + ... [truncated message content] |
From: <tho...@us...> - 2013-05-17 18:06:55
|
Revision: 7142 http://bigdata.svn.sourceforge.net/bigdata/?rev=7142&view=rev Author: thompsonbry Date: 2013-05-17 18:05:59 +0000 (Fri, 17 May 2013) Log Message: ----------- This commit addresses several different issues. Added doRejectedCommit() into BasicHA and override in HAJournal. This provides the necessary hook to force the HAJournalServer into an Error state if the service fails a commit (specifically, if it votes NO in prepare2Phase). Significant cleanup around the access methods to the CommitRecordIndex in AbstractJournal. In particular, AbstractJournal.getCommitRecordIndex() was being used in several unsafe patterns (esp., getRootBlocks(startTime), getRootBlock(startTime)). In addition, DumpJournal was using this method can now be run with concurrent operations against the journal, making it unsafe. The only publically exposed method now returns a read-only committed view of the CommitRecordIndex. StoreManager was modified to support the new version of getCommitRecordIndex() that accepts a readOnly flag. {{{ CommitRecordIndex getCommitRecordIndex(long addr, boolean readOnly) }}} JournalTransactionService.findNextCommitTime() was modified to use AbstractJournal.getCommitRecordStrictlyGreaterThan(). That method provides appropriate synchronization for the CommitRecordIndex. Modified commit2Phase() to NOT purge the HALogs unless prepare2Phase voted YES for all services. This addresses a problem where one service could vote NO but the logs would be purged before the service left the quorum. The logs are now correctly retained if a service votes NO for prepare. The didAllServicesCommit flag was added to the IHA2PhaseCommitMessage. This was done in response to problems identified in TestHA3JournalServerOverrides. HAJournalServer.ErrorTask() was modified to force a service leave. This was done in response to problems identified in TestHA3JournalServerOverrides. QuorumCommitImpl.prepare2Phase() now interprets an error attempting to send the PREPARE message to a server as a "NO" vote (as opposed to only errors in processing the PREPARE message on the remote server). New tests were added to TestHAJournalServerOverride that examine failure modes where (a) a service votes "NO" for PREPARE; and (b) a service throws an exception out of prepare2Phase(). ---- I have observed occasional failures in the following stress test. See [1]. This looks like a failure to resolve the view of the database through the DefaultResourceLocator, thus passing a [null] view into BigdataSailConnection.attach(). It fails every so many runs through that stress test. I have some question about whether this a new failure or if it has been failing periodically. {{{ TestConcurrentCreateKB.test_concurrentKBCreateAndDiscoveryStressTestOnDistinctJournals() }}} The change in this commit to the DefaultResourceLocator is unlikely to be responsible since only read-only tx code paths were effected, but the test failure goes through a read-committed request: {{{ conn = getQueryConnection(indexManager, namespace, ITx.READ_COMMITTED); }}} The failure trace is: {{{ java.util.concurrent.ExecutionException: java.lang.RuntimeException: java.lang.RuntimeException: java.lang.IllegalArgumentException at java.util.concurrent.FutureTask$Sync.innerGet(FutureTask.java:252) at java.util.concurrent.FutureTask.get(FutureTask.java:111) at com.bigdata.rdf.sail.TestConcurrentKBCreate.doConcurrentCreateAndDiscoveryTest(TestConcurrentKBCreate.java:312) at com.bigdata.rdf.sail.TestConcurrentKBCreate.test_concurrentKBCreateAndDiscovery(TestConcurrentKBCreate.java:172) at com.bigdata.rdf.sail.TestConcurrentKBCreate.test_concurrentKBCreateAndDiscoveryStressTestOnDistinctJournals(TestConcurrentKBCreate.java:227) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at junit.framework.TestCase.runTest(TestCase.java:154) at junit.framework.TestCase.runBare(TestCase.java:127) at junit.framework.TestResult$1.protect(TestResult.java:106) at junit.framework.TestResult.runProtected(TestResult.java:124) at junit.framework.TestResult.run(TestResult.java:109) at junit.framework.TestCase.run(TestCase.java:118) at org.eclipse.jdt.internal.junit.runner.junit3.JUnit3TestReference.run(JUnit3TestReference.java:130) at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:467) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:683) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:390) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:197) Caused by: java.lang.RuntimeException: java.lang.RuntimeException: java.lang.IllegalArgumentException at com.bigdata.rdf.sail.TestConcurrentKBCreate$DiscoveryTask.call(TestConcurrentKBCreate.java:373) at com.bigdata.rdf.sail.TestConcurrentKBCreate$DiscoveryTask.call(TestConcurrentKBCreate.java:1) at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:334) at java.util.concurrent.FutureTask.run(FutureTask.java:166) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603) at java.lang.Thread.run(Thread.java:722) Caused by: java.lang.RuntimeException: java.lang.IllegalArgumentException at com.bigdata.rdf.sail.BigdataSail$BigdataSailReadOnlyConnection.newTx(BigdataSail.java:4119) at com.bigdata.rdf.sail.BigdataSail$BigdataSailReadOnlyConnection.<init>(BigdataSail.java:3986) at com.bigdata.rdf.sail.BigdataSail._getReadOnlyConnection(BigdataSail.java:1424) at com.bigdata.rdf.sail.BigdataSail.getReadOnlyConnection(BigdataSail.java:1399) at com.bigdata.rdf.sail.BigdataSailRepository.getReadOnlyConnection(BigdataSailRepository.java:82) at com.bigdata.rdf.sail.TestConcurrentKBCreate.getQueryConnection(TestConcurrentKBCreate.java:434) at com.bigdata.rdf.sail.TestConcurrentKBCreate.access$0(TestConcurrentKBCreate.java:405) at com.bigdata.rdf.sail.TestConcurrentKBCreate$DiscoveryTask.call(TestConcurrentKBCreate.java:358) ... 6 more Caused by: java.lang.IllegalArgumentException at com.bigdata.rdf.sail.BigdataSail$BigdataSailConnection.attach(BigdataSail.java:1896) at com.bigdata.rdf.sail.BigdataSail$BigdataSailReadOnlyConnection.newTx(BigdataSail.java:4109) ... 13 more }}} [1] https://sourceforge.net/apps/trac/bigdata/ticket/617 (Concurrent KB create fails with "No axioms defined?) The TestRWJournal, journal.TestAll, TestBigdataSailWithQuads, and HA test suites are all 100% green. I did observe one zombie for the HA test suite. However, this was likely related to a previous run in which a bug (never committed) had caused all of that test suite to fail. See https://sourceforge.net/apps/trac/bigdata/ticket/530 (Journal HA) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommitImpl.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HA2PhaseCommitMessage.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitMessage.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractTask.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/DumpJournal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IAtomicStore.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/JournalDelegate.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/JournalTransactionService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/resources/StoreManager.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommitImpl.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommitImpl.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumCommitImpl.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -31,6 +31,7 @@ import java.util.concurrent.CancellationException; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; +import java.util.concurrent.FutureTask; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -219,9 +220,30 @@ /* * Runnable which will execute this message on the * remote service. + * + * FIXME Because async futures cause DGC native thread + * leaks this is no longer running the prepare + * asynchronously on the followers. Change the code + * here, and in commit2Phase and abort2Phase to use + * multiple threads to run the tasks on the followers. */ - final Future<Boolean> rf = getService(serviceId) - .prepare2Phase(msgForJoinedService); + + final HACommitGlue service = getService(serviceId); + + Future<Boolean> rf = null; + try { + // RMI. + rf = service.prepare2Phase(msgForJoinedService); + } catch (final Throwable t) { + // If anything goes wrong, wrap up exception as Future. + final FutureTask<Boolean> ft = new FutureTask<Boolean>(new Runnable() { + public void run() { + throw new RuntimeException(t); + } + }, Boolean.FALSE); + rf = ft; + ft.run(); // evaluate future. + } // add to list of futures we will check. remoteFutures.set(i, rf); @@ -317,7 +339,9 @@ /* * Note: prepare2Phase() is throwing exceptions if * preconditions are violated. These thrown exceptions are - * interpreted as a "NO" vote. + * interpreted as a "NO" vote. An exception can also appear + * here if there is an RMI failure or even a failure on this + * service when attempting to perform the RMI. */ log.error(ex, ex); done = true; // ExecutionException indicates isDone(). @@ -419,6 +443,11 @@ final long commitTime = preq.getRootBlock().getLastCommitTime(); final PrepareResponse presp = req.getPrepareResponse(); + + // true iff we have a full complement of services that vote YES for this + // commit. + final boolean didAllServicesPrepare = presp.getYesCount() == presp + .replicationFactor(); member.assertLeader(token); @@ -427,7 +456,7 @@ try { final IHA2PhaseCommitMessage msgJoinedService = new HA2PhaseCommitMessage( - true/* isJoinedService */, commitTime); + true/* isJoinedService */, commitTime, didAllServicesPrepare); for (int i = 1; i < joinedServiceIds.length; i++) { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HA2PhaseCommitMessage.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HA2PhaseCommitMessage.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HA2PhaseCommitMessage.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -32,12 +32,14 @@ private final boolean isJoinedService; private final long commitTime; + private final boolean didAllServicesPrepare; public HA2PhaseCommitMessage(final boolean isJoinedService, - final long commitTime) { + final long commitTime, final boolean didAllServicesPrepare) { this.isJoinedService = isJoinedService; this.commitTime = commitTime; + this.didAllServicesPrepare = didAllServicesPrepare; } @@ -51,4 +53,18 @@ return commitTime; } + @Override + public boolean didAllServicesPrepare() { + return didAllServicesPrepare; + } + + @Override + public String toString() { + + return super.toString() + "{isJoinedService=" + isJoinedService + + ",commitTime=" + commitTime + ",didAllServicesPrepare=" + + didAllServicesPrepare + "}"; + + } + } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitMessage.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitMessage.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitMessage.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -54,4 +54,11 @@ */ long getCommitTime(); + /** + * Return <code>true</code> iff all services voted "YES" for PREPARE. When + * <code>false</code>, not all services will participate in this commit (but + * the commit will still be performed). + */ + boolean didAllServicesPrepare(); + } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -81,7 +81,6 @@ import com.bigdata.btree.ITuple; import com.bigdata.btree.ITupleIterator; import com.bigdata.btree.IndexMetadata; -import com.bigdata.btree.ReadOnlyIndex; import com.bigdata.btree.keys.ICUVersionRecord; import com.bigdata.btree.view.FusedView; import com.bigdata.cache.ConcurrentWeakValueCache; @@ -395,26 +394,34 @@ */ private final ReentrantReadWriteLock _fieldReadWriteLock = new ReentrantReadWriteLock(false/* fair */); - /** - * Used to cache the most recent {@link ICommitRecord} -- discarded on - * {@link #abort()}; set by {@link #commitNow(long)}. - */ + /** + * Used to cache the most recent {@link ICommitRecord} -- discarded on + * {@link #abort()}; set by {@link #commitNow(long)}. + * <p> + * Note: This is set in the constructor and modified by {@link #_abort()} + * but (once set by the constructor) it is never <code>null</code> until the + * store is closed. + */ private volatile ICommitRecord _commitRecord; - /** - * BTree mapping commit timestamps to the address of the corresponding - * {@link ICommitRecord}. The keys are timestamps (long integers). The - * values are the address of the {@link ICommitRecord} with that commit - * timestamp. - * <p> - * Note: The {@link CommitRecordIndex} object is NOT systematically - * protected by <code>synchronized</code> within this class. Therefore it is - * NOT safe for use by outside classes and CAN NOT be made safe simply by - * synchronizing their access on the {@link CommitRecordIndex} object - * itself. This is mainly for historical reasons and it may be possible to - * systematically protect access to this index within a synchronized block - * and then expose it to other classes. - */ + /** + * BTree mapping commit timestamps to the address of the corresponding + * {@link ICommitRecord}. The keys are timestamps (long integers). The + * values are the address of the {@link ICommitRecord} with that commit + * timestamp. + * <p> + * Note: The {@link CommitRecordIndex} object is NOT systematically + * protected by <code>synchronized</code> within this class. Therefore it is + * NOT safe for use by outside classes and CAN NOT be made safe simply by + * synchronizing their access on the {@link CommitRecordIndex} object + * itself. This is mainly for historical reasons and it may be possible to + * systematically protect access to this index within a synchronized block + * and then expose it to other classes. + * <p> + * Note: This is set in the constructor and modified by {@link #_abort()} + * but (once set by the constructor) it is never <code>null</code> until the + * store is closed. + */ private volatile CommitRecordIndex _commitRecordIndex; /** @@ -688,74 +695,92 @@ } - /** - * Return the root block view associated with the commitRecord for the - * provided commit time. This requires accessing the next commit record - * since the previous root block is stored with each record. - * - * @param commitTime - * A commit time. - * - * @return The root block view -or- <code>null</code> if there is no commit - * record for that commitTime. - * - */ - public IRootBlockView getRootBlock(final long commitTime) { +// /** +// * Return the root block view associated with the commitRecord for the +// * provided commit time. This requires accessing the next commit record +// * since the previous root block is stored with each record. +// * +// * @param commitTime +// * A commit time. +// * +// * @return The root block view -or- <code>null</code> if there is no commit +// * record for that commitTime. +// */ +// @Deprecated // This method is unused and lacks a unit test. +// IRootBlockView getRootBlock(final long commitTime) { +// +// /* +// * Note: getCommitRecordStrictlyGreaterThan() uses appropriate +// * synchronization for the CommitRecordIndex. +// */ +// final ICommitRecord commitRecord = getCommitRecordStrictlyGreaterThan(commitTime); +// +// if (commitRecord == null) { +// +// return null; +// +// } +// +// final long rootBlockAddr = commitRecord.getRootAddr(PREV_ROOTBLOCK); +// +// if (rootBlockAddr == 0) { +// +// return null; +// +// } else { +// +// final ByteBuffer bb = read(rootBlockAddr); +// +// return new RootBlockView(true /* rb0 - WTH */, bb, checker); +// +// } +// +// } +// +// /** +// * +// * @param startTime from which to begin iteration +// * +// * @return an iterator over the committed root blocks +// */ +// @Deprecated +// /* +// * This is UNUSED AND NOT SAFE (used only in test suite by +// * StressTestConcurrentTx, which I have commented out) and not safe (because +// * it lacks the necessary locks to access the CommitRecordIndex). The code +// * is also wrong since it visits GT the commitTime when it should visit GTE +// * the commitTime. +// */ +// Iterator<IRootBlockView> getRootBlocks(final long startTime) { +// return new Iterator<IRootBlockView>() { +// ICommitRecord commitRecord = getCommitRecordIndex().findNext(startTime); +// +// public boolean hasNext() { +// return commitRecord != null; +// } +// +// public IRootBlockView next() { +// final long rootBlockAddr = commitRecord.getRootAddr(PREV_ROOTBLOCK); +// +// commitRecord = getCommitRecordIndex().findNext(commitRecord.getTimestamp()); +// +// if (rootBlockAddr == 0) { +// return null; +// } else { +// ByteBuffer bb = read(rootBlockAddr); +// +// return new RootBlockView(true /* rb0 - WTH */, bb, checker); +// } +// } +// +// public void remove() { +// throw new UnsupportedOperationException(); +// } +// +// }; +// } - final ICommitRecord commitRecord = getCommitRecordIndex().findNext(commitTime); - - if (commitRecord == null) { - return null; - } - - final long rootBlockAddr = commitRecord.getRootAddr(PREV_ROOTBLOCK); - - if (rootBlockAddr == 0) { - return null; - } else { - ByteBuffer bb = read(rootBlockAddr); - - return new RootBlockView(true /* rb0 - WTH */, bb, checker); - } - - } - /** - * - * @param startTime from which to begin iteration - * - * @return an iterator over the committed root blocks - */ - public Iterator<IRootBlockView> getRootBlocks(final long startTime) { - return new Iterator<IRootBlockView>() { - ICommitRecord commitRecord = getCommitRecordIndex().findNext(startTime); - - public boolean hasNext() { - return commitRecord != null; - } - - public IRootBlockView next() { - final long rootBlockAddr = commitRecord.getRootAddr(PREV_ROOTBLOCK); - - commitRecord = getCommitRecordIndex().findNext(commitRecord.getTimestamp()); - - if (rootBlockAddr == 0) { - return null; - } else { - ByteBuffer bb = read(rootBlockAddr); - - return new RootBlockView(true /* rb0 - WTH */, bb, checker); - } - } - - public void remove() { - throw new UnsupportedOperationException(); - } - - }; - } - - /** * True iff the journal was opened in a read-only mode. */ private final boolean readOnly; @@ -2924,7 +2949,25 @@ // The services joined with the met quorum, in their join order. joinedServiceIds = quorum.getJoined(); + + // The UUID for this service. + final UUID serviceId = quorum.getClient().getServiceId(); + if (joinedServiceIds.length == 0 + || !joinedServiceIds[0].equals(serviceId)) { + + /* + * Sanity check. Verify that the first service in the join + * order is *this* service. This is a precondition for the + * service to be the leader. + */ + + throw new RuntimeException("Not leader: serviceId=" + + serviceId + ", joinedServiceIds=" + + Arrays.toString(joinedServiceIds)); + + } + // The services in the write pipeline (in any order). nonJoinedPipelineServiceIds = new LinkedHashSet<UUID>( Arrays.asList(getQuorum().getPipeline())); @@ -2935,6 +2978,7 @@ nonJoinedPipelineServiceIds.remove(joinedServiceId); } + try { /** * CRITICAL SECTION. We need obtain a distributed consensus @@ -3924,7 +3968,7 @@ * * @return The read-only view of the {@link CommitRecordIndex}. */ - public IIndex getReadOnlyCommitRecordIndex() { + public CommitRecordIndex getReadOnlyCommitRecordIndex() { final ReadLock lock = _fieldReadWriteLock.readLock(); @@ -3933,12 +3977,13 @@ try { assertOpen(); - - final CommitRecordIndex commitRecordIndex = getCommitRecordIndex(_rootBlock - .getCommitRecordIndexAddr()); - return new ReadOnlyIndex(commitRecordIndex); + final CommitRecordIndex commitRecordIndex = getCommitRecordIndex( + _rootBlock.getCommitRecordIndexAddr(), true/* readOnly */); +// return new ReadOnlyIndex(commitRecordIndex); + return commitRecordIndex; + } finally { lock.unlock(); @@ -3946,48 +3991,40 @@ } } - - /** - * Return the current state of the index that resolves timestamps to - * {@link ICommitRecord}s. - * <p> - * Note: The returned object is NOT safe for concurrent operations and is - * NOT systematically protected by the use of synchronized blocks within - * this class. - * - * @return The {@link CommitRecordIndex}. - * - * @todo If you need access to this object in an outside class consider - * using {@link #getRootBlockView()}, - * {@link IRootBlockView#getCommitRecordIndexAddr()}, and - * {@link #getCommitRecord(long)} to obtain a distinct instance - * suitable for read-only access. - */ - protected CommitRecordIndex getCommitRecordIndex() { - final ReadLock lock = _fieldReadWriteLock.readLock(); +// /** +// * I have removed this method since the returned {@link CommitRecordIndex} +// * was being used without appropriate synchronization. There is a +// * {@link #getReadOnlyCommitRecordIndex()} which may be used in place of +// * this method. +// */ +// protected CommitRecordIndex getCommitRecordIndex() { +// +// final ReadLock lock = _fieldReadWriteLock.readLock(); +// +// lock.lock(); +// +// try { +// +// assertOpen(); +// +// final long commitRecordIndexAddr = _rootBlock.getCommitRecordIndexAddr(); +// +// final CommitRecordIndex commitRecordIndex = getCommitRecordIndex(addr); +// +// if (commitRecordIndex == null) +// throw new AssertionError(); +// +// return commitRecordIndex; +// +// } finally { +// +// lock.unlock(); +// +// } +// +// } - lock.lock(); - - try { - - assertOpen(); - - final CommitRecordIndex commitRecordIndex = _commitRecordIndex; - - if (commitRecordIndex == null) - throw new AssertionError(); - - return commitRecordIndex; - - } finally { - - lock.unlock(); - - } - - } - /** * Read and return the {@link CommitRecordIndex} from the current root * block. @@ -4007,7 +4044,8 @@ if (log.isDebugEnabled()) log.debug("Loading from addr=" + addr); - return getCommitRecordIndex(addr); + // Load the live index from the disk. + return getCommitRecordIndex(addr, false/* readOnly */); } catch (RuntimeException ex) { @@ -4023,30 +4061,31 @@ } /** - * Create or re-load the index that resolves timestamps to - * {@link ICommitRecord}s. - * <p> - * Note: The returned object is NOT cached. When addr is non-{@link #NULL}, - * each invocation will return a distinct {@link CommitRecordIndex} object. - * This behavior is partly for historical reasons but it does serve to - * protect the live {@link CommitRecordIndex} from outside access. This is - * important since access to the live {@link CommitRecordIndex} is NOT - * systematically protected by <code>synchronized</code> within this class. + * Create or load and return the index that resolves timestamps to + * {@link ICommitRecord}s. This method is capable of returning either the + * live {@link CommitRecordIndex} or a read-only view of any committed + * version of that index. * + * <strong>CAUTION: DO NOT EXPOSE THE LIVE COMMIT RECORD INDEX OUTSIDE OF + * THIS CLASS. IT IS NOT POSSIBLE TO HAVE CORRECT SYNCHRONIZATION ON THAT + * INDEX IN ANOTHER CLASS.</code> + * * @param addr * The root address of the index -or- 0L if the index has not - * been created yet. + * been created yet. When addr is non-{@link #NULL}, each + * invocation will return a distinct {@link CommitRecordIndex} + * object. * + * @param readOnly + * When <code>false</code> the returned is NOT cached. + * * @return The {@link CommitRecordIndex} for that address or a new index if - * 0L was specified as the address. + * <code>0L</code> was specified as the address. * * @see #_commitRecordIndex - * - * TODO We could modify getCommitRecordIndex() to accept - * readOnly:boolean and let it load/create a read-only view rather than - * wrapping it with a ReadOnlyIndex. */ - protected CommitRecordIndex getCommitRecordIndex(final long addr) { + protected CommitRecordIndex getCommitRecordIndex(final long addr, + final boolean readOnly) { if (log.isInfoEnabled()) log.info("addr=" + toString(addr)); @@ -4067,7 +4106,7 @@ * Note: if the journal is not the quorum leader then it is * effectively read-only. */ - if (isReadOnly()) { + if (isReadOnly() || readOnly) { ndx = CommitRecordIndex.createTransient(); @@ -4079,12 +4118,26 @@ } else { - /* - * Reload the mutable btree from its root address. - */ + if (readOnly) { - ndx = (CommitRecordIndex) BTree.load(this, addr, false/* readOnly */); + /* + * Read only view of the most CommitRecordIndex having + * that checkpointAddr. + */ + + ndx = (CommitRecordIndex) getIndexWithCheckpointAddr(addr); + + } else { + + /* + * Reload the mutable btree from its root address. + * + * Note: For this code path we DO NOT cache the index view. + */ + ndx = (CommitRecordIndex) BTree.load(this, addr, false/* readOnly */); + + } } assert ndx != null; @@ -6118,6 +6171,8 @@ // Vote NO. vote.set(false); + doRejectedCommit(); + return vote.get(); } @@ -6125,6 +6180,16 @@ } // class VoteNoTask /** + * Method must be extended by subclass to coordinate the rejected + * commit. + */ + protected void doRejectedCommit() { + + doLocalAbort(); + + } + + /** * Task prepares for a 2-phase commit (syncs to the disk) and votes YES * iff if is able to prepare successfully. * @@ -6156,6 +6221,8 @@ */ public Boolean call() throws Exception { + try { + final IRootBlockView rootBlock = prepareMessage.getRootBlock(); if (haLog.isInfoEnabled()) @@ -6253,7 +6320,17 @@ vote.set(true); return vote.get(); + + } finally { + if(!vote.get()) { + + doRejectedCommit(); + + } + + } + } } @@ -6515,14 +6592,27 @@ + rootBlock); } - /* - * The HA log files are purged on each node any time the - * quorum is fully met and goes through a commit point. - * Leaving only the current open log file. - */ + if (commitMessage.didAllServicesPrepare()) { - localService.purgeHALogs(rootBlock.getQuorumToken()); + /* + * The HALog files are conditionally purged (depending + * on the IRestorePolicy) on each node any time the + * quorum is fully met and goes through a commit point. + * The current HALog always remains open. + * + * Note: This decision needs to be made in awareness of + * whether all services voted to PREPARE. Otherwise we + * can hit a problem where some service did not vote to + * prepare, but the other services did, and we wind up + * purging the HALogs even though one of the services + * did not go through the commit2Phase(). This issue is + * fixed by the didAllServicesPrepare() flag. + */ + localService.purgeHALogs(rootBlock.getQuorumToken()); + + } + } catch (Throwable t) { try { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractTask.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractTask.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractTask.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -2746,14 +2746,14 @@ return delegate.toString(addr); } - public IRootBlockView getRootBlock(final long commitTime) { - return delegate.getRootBlock(commitTime); - } +// public IRootBlockView getRootBlock(final long commitTime) { +// return delegate.getRootBlock(commitTime); +// } +// +// public Iterator<IRootBlockView> getRootBlocks(final long startTime) { +// return delegate.getRootBlocks(startTime); +// } - public Iterator<IRootBlockView> getRootBlocks(final long startTime) { - return delegate.getRootBlocks(startTime); - } - /* * IAllocationContext * @@ -3257,14 +3257,14 @@ return delegate.toString(addr); } - public IRootBlockView getRootBlock(long commitTime) { - return delegate.getRootBlock(commitTime); - } +// public IRootBlockView getRootBlock(long commitTime) { +// return delegate.getRootBlock(commitTime); +// } +// +// public Iterator<IRootBlockView> getRootBlocks(long startTime) { +// return delegate.getRootBlocks(startTime); +// } - public Iterator<IRootBlockView> getRootBlocks(long startTime) { - return delegate.getRootBlocks(startTime); - } - public ScheduledFuture<?> addScheduledTask(Runnable task, long initialDelay, long delay, TimeUnit unit) { return delegate.addScheduledTask(task, initialDelay, delay, unit); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/DumpJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/DumpJournal.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/DumpJournal.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -526,10 +526,14 @@ } } - - final CommitRecordIndex commitRecordIndex = journal - .getCommitRecordIndex(); + /* + * Note: A read-only view is used since the Journal could be exposed to + * concurrent operations through the NSS. + */ + final CommitRecordIndex commitRecordIndex = journal + .getReadOnlyCommitRecordIndex(); + out.println("There are " + commitRecordIndex.getEntryCount() + " commit points."); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IAtomicStore.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IAtomicStore.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IAtomicStore.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -27,8 +27,6 @@ package com.bigdata.journal; -import java.util.Iterator; - import com.bigdata.rawstore.IRawStore; /** @@ -122,24 +120,31 @@ */ public ICommitRecord getCommitRecord(long timestamp); - /** - * Return the root block view associated with the commitRecord for the - * provided commit time. This requires accessing the next commit record - * since it is the previous root block that is referenced from each record. - * - * @param commitTime - * A commit time. - * - * @return The root block view -or- <code>null</code> if there is no commit - * record for that commitTime. - */ - public IRootBlockView getRootBlock(final long commitTime); - - /** - * - * @param startTime from which to begin iteration - * - * @return an iterator over the committed root blocks - */ - public Iterator<IRootBlockView> getRootBlocks(final long startTime); + /* + * These methods have been removed from the public interface. They were only + * used by the test suite. Further, there were problems with the + * implementations. + */ + +// /** +// * Return the root block view associated with the commitRecord for the +// * provided commit time. This requires accessing the next commit record +// * since it is the previous root block that is referenced from each record. +// * +// * @param commitTime +// * A commit time. +// * +// * @return The root block view -or- <code>null</code> if there is no commit +// * record for that commitTime. +// */ +// public IRootBlockView getRootBlock(final long commitTime); +// +// /** +// * +// * @param startTime from which to begin iteration +// * +// * @return an iterator over the committed root blocks +// */ +// public Iterator<IRootBlockView> getRootBlocks(final long startTime); + } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -520,6 +520,7 @@ final long nanos = timeoutNanos; long remaining = nanos; + // Verify that this service is still the leader. getQuorum().assertLeader(token); // /* @@ -2320,7 +2321,7 @@ // final Tx tx = (Tx) (isReadWriteTx ? getConcurrencyManager() // .getTransactionManager().getTx(timestamp) : null); - final Tx tx = (Tx) getConcurrencyManager().getTransactionManager() + final Tx tx = (Tx) /*getConcurrencyManager().*/getTransactionManager() .getTx(timestamp); if (isReadWriteTx) { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/JournalDelegate.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/JournalDelegate.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/JournalDelegate.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -40,7 +40,6 @@ import com.bigdata.btree.IndexMetadata; import com.bigdata.counters.CounterSet; import com.bigdata.mdi.IResourceMetadata; -import com.bigdata.rawstore.IAllocationContext; import com.bigdata.rawstore.IPSOutputStream; import com.bigdata.relation.locator.IResourceLocator; import com.bigdata.sparse.SparseRowStore; @@ -86,17 +85,17 @@ return delegate.getRootAddr(index); } - public IRootBlockView getRootBlock(long commitTime) { - return delegate.getRootBlock(commitTime); - } - public IRootBlockView getRootBlockView() { return delegate.getRootBlockView(); } - public Iterator<IRootBlockView> getRootBlocks(long startTime) { - return delegate.getRootBlocks(startTime); - } +// public IRootBlockView getRootBlock(long commitTime) { +// return delegate.getRootBlock(commitTime); +// } +// +// public Iterator<IRootBlockView> getRootBlocks(long startTime) { +// return delegate.getRootBlocks(startTime); +// } public void setCommitter(int index, ICommitter committer) { delegate.setCommitter(index, committer); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/JournalTransactionService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/JournalTransactionService.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/JournalTransactionService.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -127,10 +127,19 @@ } protected long findNextCommitTime(final long commitTime) { - - final ICommitRecord commitRecord = journal.getCommitRecordIndex() - .findNext(commitTime); - + + /* + * Note: The following code did not obtain the appropriate lock to + * access the CommitRecordIndex. It was replaced by the + * getCommitRecordStrictlyGreaterThan() call, which does take the + * necessary lock and does the same thing. + */ +// final ICommitRecord commitRecord = journal.getCommitRecordIndex() +// .findNext(commitTime); + + final ICommitRecord commitRecord = journal + .getCommitRecordStrictlyGreaterThan(commitTime); + if(commitRecord == null) { return -1L; Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/resources/StoreManager.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/resources/StoreManager.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/resources/StoreManager.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -2556,10 +2556,11 @@ * for the live journal. */ @Override - public CommitRecordIndex getCommitRecordIndex(final long addr) { + public CommitRecordIndex getCommitRecordIndex(final long addr, + final boolean readOnly) { + + return super.getCommitRecordIndex(addr, readOnly); - return super.getCommitRecordIndex(addr); - } @Override @@ -4438,7 +4439,7 @@ */ final CommitRecordIndex commitRecordIndex = journal .getCommitRecordIndex(journal.getRootBlockView() - .getCommitRecordIndexAddr()); + .getCommitRecordIndexAddr(),true/*readOnly*/); /* * A per-journal hash set of the [checkpointAddr] for the Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -447,7 +447,7 @@ super.setQuorumToken(newValue); - if (newValue == Quorum.NO_QUORUM) { + if (getHAReady() == Quorum.NO_QUORUM) { /* * If there is a running snapshot, then cancel it since the quorum @@ -1500,25 +1500,22 @@ if (f == null) return null; - haLog.warn("Started REBUILD"); + haLog.warn("Started REBUILD: runState=" + innerRunState); return getProxy(f, true/* async */); } case Rebuild: // Already running rebuild. - return null; case Restore: // Running restore. Can not do rebuild. - return null; case Resync: // Running resync. Can not do rebuild. - return null; case RunMet: // RunMet. Can not do rebuild. - return null; case Shutdown: // Shutting down. Can not do rebuild. + haLog.warn("Can not REBUILD: runState=" + innerRunState); return null; default: // Unknown run state. @@ -1731,6 +1728,21 @@ } /** + * {@inheritDoc} + * <p> + * Extended to kick the {@link HAJournalServer} into an error state. It + * will recover from that error state by re-entering seek consensus. + */ + @Override + protected void doRejectedCommit() { + + super.doRejectedCommit(); + + getQuorumService().enterErrorState(); + + } + + /** * Return this quorum member, appropriately cast. * * @return The quorum member -or- <code>null</code> if the quorum is not Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -1579,6 +1579,33 @@ } finally { logLock.unlock(); } + + // Force a service leave. + getQuorum().getActor().serviceLeave(); + +// /* +// * Set token. Journal will notice that it is no longer +// * "HA Ready" +// * +// * Note: AbstractJournal.setQuorumToken() will detect case where +// * it transitions from a met quorum through a service leave and +// * will clear its haReady token and update its haStatus field +// * appropriately. +// * +// * FIXME There may be a data race here. The quorum.token() might +// * be be cleared by the time we call +// * setQuorumToken(quorum.token()) so we may have to explicitly +// * "clear" the journal token by passing in NO_QUORUM. +// */ +// journal.setQuorumToken(Quorum.NO_QUORUM); +// +// try { +// journal.getHALogNexus().disableHALog(); +// } catch (IOException e) { +// haLog.error(e, e); +// } + + // Seek consensus. enterRunState(new SeekConsensusTask()); return null; Modified: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java 2013-05-17 17:40:53 UTC (rev 7141) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java 2013-05-17 18:05:59 UTC (rev 7142) @@ -161,18 +161,17 @@ * {@link HACommitGlue#prepare2Phase(IHA2PhasePrepareMessage)} to throw an * exeption. A simple transaction is performed. We verify that the * transaction completes successfully, that the quorum token is unchanged, - * and that [A,C] both participated in the commit. + * and that [A,C] both participated in the commit. We also verify that B is + * moved to the end of the pipeline (by doing a serviceLeave and then + * re-entering the pipeline) and that it resyncs with the met quorum and + * finally re-joins with the met quorum. The quorum should not break across + * this test. * - * TODO Spot check the {@link HAStatusEnum} on all services after the B - * fails the prepare2Phase request. + * FIXME Variant where the GATHER failed. * - * FIXME Extend test and its variant to verify that B RESYNCs and joins the - * met quourum. (B should do a serviceLeave() then SeekConsensus, RESYNC, - * and serviceJoin()). - * - * FIXME Variant where B votes "NO" rather than throwing an exception. + * FIXME Variant where the commit2Phase fails. */ - public void testStartABC_prepare2Phase_B_throws_exception() + public void testStartABC_prepare2Phase_B_votes_NO() throws Exception { // Enforce the join order. @@ -187,12 +186,73 @@ // Setup B to vote "NO" on the next PREPARE request. ((HAGlueTest) startup.serverB).voteNo(); -// // Setup B to fail the next PREPARE request. -// ((HAGlueTest) startup.serverB) -// .failNext("prepare2Phase", -// new Class[] { IHA2PhasePrepareMessage.class }, -// 0/* nwait */, 1/* nfail */); + // Simple transaction. + simpleTransaction(); + + // Verify quorum is unchanged. + assertEquals(token, quorum.token()); + + // Should be two commit points on {A,C]. + awaitCommitCounter(2L, startup.serverA, startup.serverC); + + /* + * B should go into an ERROR state and then into SeekConsensus and from + * there to RESYNC and finally back to RunMet. We can not reliably + * observe the intervening states. So what we really need to do is watch + * for B to move to the end of the pipeline and catch up to the same + * commit point. + */ + /* + * The pipeline should be reordered. B will do a service leave, then + * enter seek consensus, and then re-enter the pipeline. + */ + awaitPipeline(new HAGlue[] { startup.serverA, startup.serverC, + startup.serverB }); + + /* + * There should be two commit points on {A,C,B} (note that this assert + * does not pay attention to the pipeline order). + */ + awaitCommitCounter(2L, startup.serverA, startup.serverC, + startup.serverB); + + // B should be a follower again. + awaitHAStatus(startup.serverB, HAStatusEnum.Follower); + + // quorum token is unchanged. + assertEquals(token, quorum.token()); + + } + + /** + * Three services are started in [A,B,C] order. B is setup for + * {@link HACommitGlue#prepare2Phase(IHA2PhasePrepareMessage)} to vote "NO". + * A simple transaction is performed. We verify that the transaction + * completes successfully, that the quorum token is unchanged, and that + * [A,C] both participated in the commit. We also verify that B is moved to + * the end of the pipeline (by doing a serviceLeave and then re-entering the + * pipeline) and that it resyncs with the met quorum and finally re-joins + * with the met quorum. The quorum should not break across this test. + */ + public void testStartABC_prepare2Phase_B_throws_exception() + throws Exception { + + // Enforce the join order. + final ABC startup = new ABC(true /*sequential*/); + + final long token = awaitFullyMetQuorum(); + + // Should be one commit point. + awaitCommitCounter(1L, startup.serverA, startup.serverB, + startup.serverC); + + // Setup B to fail the next PREPARE request. + ((HAGlueTest) startup.serverB) + .failNext("prepare2Phase", + new Class[] { IHA2PhasePrepareMessage.class }, + 0/* nwait */, 1/* nfail */); + // Simple transaction. simpleTransaction(); @@ -202,6 +262,63 @@ // Should be two commit points on {A,C]. awaitCommitCounter(2L, startup.serverA, startup.serverC); + /* + * FIXME Unlike the test above, if there is a problem making the RMI + * call, then B will not go through its doRejectedCommit() handler and + * will not enter the ERROR state directly. We need to have B notice + * that it is no longer at the same commit point, e.g., by observing a + * LIVE write cache message with an unexpected value for the + * commitCounter (especially, GT its current expected value). That is + * the indication that B needs to enter an error state. Until then it + * does not know that there was an attempt to PREPARE since it did not + * get the prepare2Phase() message. + * + * - Modify HAJournalServer to enter the error state if we observe a + * live write cache block for a commitCounter != the expected + * commitCounter. + * + * - Modify commit2Phase() to accept the #of services that are + * participating in the commit. If it is not a full quorum, then we can + * not purge the HA logs in commit2Phase() regardless of what the quorum + * state looks like. + * + * - Modify this test to do another transaction. B can not notice the + * problem until there is another write cache flushed through the + * pipeline. + * + * - Modify this test to await B to move to the end of the pipeline, + * resync, and rejoin. + */ + + // Should be two commit points on {A,C}. + awaitCommitCounter(2L, startup.serverA, startup.serverC); + + // Should be ONE commit points on {B}. + awaitCommitCounter(1L, startup.serverB); + + // A commit is necessary for B to notice that it did not prepare. + simpleTransaction(); + + /* + * The pipeline should be reordered. B will do a service leave, then + * enter seek consensus, and then re-enter the pipeline. + */ + awaitPipeline(new HAGlue[] { startup.serverA, startup.serverC, + startup.serverB }); + + /* + * There should be three commit points on {A,C,B} (note that this assert + * does not pay attention to the pipeline order). + */ + awaitCommitCounter(3L, startup.serverA, startup.serverC, + startup.serverB); + + // B should be a follower again. + awaitHAStatus(startup.serverB, HAStatusEnum.Follower); + + // quorum token is unchanged. + assertEquals(token, quorum.token()); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-05-18 11:52:27
|
Revision: 7144 http://bigdata.svn.sourceforge.net/bigdata/?rev=7144&view=rev Author: thompsonbry Date: 2013-05-18 11:52:12 +0000 (Sat, 18 May 2013) Log Message: ----------- Merging from dev branch to the READ_CACHE branch. @see https://sourceforge.net/apps/trac/bigdata/ticket/530 (Journal HA) Modified Paths: -------------- branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java Added Paths: ----------- branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ValuesServiceFactory.java Property Changed: ---------------- branches/READ_CACHE/ branches/READ_CACHE/bigdata/lib/jetty/ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/aggregate/ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/joinGraph/ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/util/ branches/READ_CACHE/bigdata/src/java/com/bigdata/htree/raba/ branches/READ_CACHE/bigdata/src/java/com/bigdata/jsr166/ branches/READ_CACHE/bigdata/src/test/com/bigdata/bop/joinGraph/ branches/READ_CACHE/bigdata/src/test/com/bigdata/bop/util/ branches/READ_CACHE/bigdata/src/test/com/bigdata/jsr166/ branches/READ_CACHE/bigdata/src/test/com/bigdata/util/httpd/ branches/READ_CACHE/bigdata-compatibility/ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/attr/ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/disco/ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/util/config/ branches/READ_CACHE/bigdata-perf/ branches/READ_CACHE/bigdata-perf/btc/ branches/READ_CACHE/bigdata-perf/btc/src/resources/ branches/READ_CACHE/bigdata-perf/lubm/ branches/READ_CACHE/bigdata-perf/uniprot/ branches/READ_CACHE/bigdata-perf/uniprot/src/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/changesets/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/error/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/internal/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/relation/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/util/ branches/READ_CACHE/bigdata-rdf/src/samples/ branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/ branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/internal/ branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/relation/ branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/ branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/changesets/ branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/ branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/bench/ branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/ branches/READ_CACHE/dsi-utils/ branches/READ_CACHE/dsi-utils/LEGAL/ branches/READ_CACHE/dsi-utils/lib/ branches/READ_CACHE/dsi-utils/src/ branches/READ_CACHE/dsi-utils/src/java/ branches/READ_CACHE/dsi-utils/src/java/it/ branches/READ_CACHE/dsi-utils/src/java/it/unimi/ branches/READ_CACHE/dsi-utils/src/test/ branches/READ_CACHE/dsi-utils/src/test/it/unimi/ branches/READ_CACHE/dsi-utils/src/test/it/unimi/dsi/ branches/READ_CACHE/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/ branches/READ_CACHE/lgpl-utils/src/test/it/unimi/dsi/fastutil/bytes/custom/ branches/READ_CACHE/osgi/ branches/READ_CACHE/src/resources/bin/config/ Property changes on: branches/READ_CACHE ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE:6769-6785 /branches/BIGDATA_RELEASE_1_2_0:6766-7138 /branches/BTREE_BUFFER_BRANCH:2004-2045 /branches/DEV_BRANCH_27_OCT_2009:2270-2546,2548-2782 /branches/INT64_BRANCH:4486-4522 /branches/JOURNAL_HA_BRANCH:2596-4066 /branches/LARGE_LITERALS_REFACTOR:4175-4387 /branches/LEXICON_REFACTOR_BRANCH:2633-3304 /branches/QUADS_QUERY_BRANCH:4525-4531,4550-4584,4586-4609,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH:4814-4836 /branches/bugfix-btm:2594-3237 /branches/dev-btm:2574-2730 /branches/fko:3150-3194 /trunk:3392-3437,3656-4061 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE:6769-6785 /branches/BIGDATA_RELEASE_1_2_0:6766-7143 /branches/BTREE_BUFFER_BRANCH:2004-2045 /branches/DEV_BRANCH_27_OCT_2009:2270-2546,2548-2782 /branches/INT64_BRANCH:4486-4522 /branches/JOURNAL_HA_BRANCH:2596-4066 /branches/LARGE_LITERALS_REFACTOR:4175-4387 /branches/LEXICON_REFACTOR_BRANCH:2633-3304 /branches/QUADS_QUERY_BRANCH:4525-4531,4550-4584,4586-4609,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH:4814-4836 /branches/bugfix-btm:2594-3237 /branches/dev-btm:2574-2730 /branches/fko:3150-3194 /trunk:3392-3437,3656-4061 Property changes on: branches/READ_CACHE/bigdata/lib/jetty ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/lib/jetty:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/lib/jetty:6766-7138 /branches/INT64_BRANCH/bigdata/lib/jetty:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/lib/jetty:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/lib/jetty:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/lib/jetty:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/lib/jetty:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/lib/jetty:6766-7143 /branches/INT64_BRANCH/bigdata/lib/jetty:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/lib/jetty:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/lib/jetty:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/lib/jetty:4814-4836 Property changes on: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/aggregate ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/aggregate:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/aggregate:6766-7138 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/aggregate:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/aggregate:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/aggregate:6766-7143 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/aggregate:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4814-4836 Property changes on: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/joinGraph ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/joinGraph:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/joinGraph:6766-7138 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/joinGraph:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/joinGraph:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/joinGraph:6766-7143 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/joinGraph:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph:4814-4836 Property changes on: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/util ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/util:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/util:6766-7138 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/util:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/util:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/util:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/util:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/util:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/util:6766-7143 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/util:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/util:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/util:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/util:4814-4836 Property changes on: branches/READ_CACHE/bigdata/src/java/com/bigdata/htree/raba ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/htree/raba:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/htree/raba:6766-7138 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/htree/raba:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/raba:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/htree/raba:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/htree/raba:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/htree/raba:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/htree/raba:6766-7143 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/htree/raba:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/raba:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/htree/raba:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/htree/raba:4814-4836 Property changes on: branches/READ_CACHE/bigdata/src/java/com/bigdata/jsr166 ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/jsr166:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/jsr166:6766-7138 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/jsr166:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/jsr166:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/jsr166:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/jsr166:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/jsr166:6766-7143 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/jsr166:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/jsr166:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/jsr166:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/jsr166:4814-4836 Property changes on: branches/READ_CACHE/bigdata/src/test/com/bigdata/bop/joinGraph ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/test/com/bigdata/bop/joinGraph:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/bop/joinGraph:6766-7138 /branches/INT64_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/test/com/bigdata/bop/joinGraph:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/test/com/bigdata/bop/joinGraph:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/bop/joinGraph:6766-7143 /branches/INT64_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/test/com/bigdata/bop/joinGraph:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph:4814-4836 Property changes on: branches/READ_CACHE/bigdata/src/test/com/bigdata/bop/util ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/test/com/bigdata/bop/util:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/bop/util:6766-7138 /branches/INT64_BRANCH/bigdata/src/test/com/bigdata/bop/util:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/test/com/bigdata/bop/util:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/test/com/bigdata/bop/util:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/test/com/bigdata/bop/util:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/bop/util:6766-7143 /branches/INT64_BRANCH/bigdata/src/test/com/bigdata/bop/util:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/test/com/bigdata/bop/util:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/test/com/bigdata/bop/util:4814-4836 Property changes on: branches/READ_CACHE/bigdata/src/test/com/bigdata/jsr166 ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/test/com/bigdata/jsr166:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/jsr166:6766-7138 /branches/INT64_BRANCH/bigdata/src/test/com/bigdata/jsr166:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/jsr166:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/test/com/bigdata/jsr166:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/test/com/bigdata/jsr166:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/test/com/bigdata/jsr166:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/jsr166:6766-7143 /branches/INT64_BRANCH/bigdata/src/test/com/bigdata/jsr166:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/jsr166:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/test/com/bigdata/jsr166:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/test/com/bigdata/jsr166:4814-4836 Property changes on: branches/READ_CACHE/bigdata/src/test/com/bigdata/util/httpd ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/test/com/bigdata/util/httpd:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/util/httpd:6766-7138 /branches/INT64_BRANCH/bigdata/src/test/com/bigdata/util/httpd:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/httpd:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/test/com/bigdata/util/httpd:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/test/com/bigdata/util/httpd:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/test/com/bigdata/util/httpd:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/util/httpd:6766-7143 /branches/INT64_BRANCH/bigdata/src/test/com/bigdata/util/httpd:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/httpd:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/test/com/bigdata/util/httpd:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/test/com/bigdata/util/httpd:4814-4836 Property changes on: branches/READ_CACHE/bigdata-compatibility ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-compatibility:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-compatibility:6766-7138 /branches/INT64_BRANCH/bigdata-compatibility:4486-4522 /branches/LARGE_LITERALS_REFACTOR/bigdata-compatibility:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-compatibility:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-compatibility:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-compatibility:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-compatibility:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-compatibility:6766-7143 /branches/INT64_BRANCH/bigdata-compatibility:4486-4522 /branches/LARGE_LITERALS_REFACTOR/bigdata-compatibility:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-compatibility:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-compatibility:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-compatibility:4814-4836 Property changes on: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/attr ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-jini/src/java/com/bigdata/attr:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-jini/src/java/com/bigdata/attr:6766-7138 /branches/BTREE_BUFFER_BRANCH/bigdata-jini/src/java/com/bigdata/attr:2004-2045 /branches/DEV_BRANCH_27_OCT_2009/bigdata-jini/src/java/com/bigdata/attr:2270-2546,2548-2782 /branches/INT64_BRANCH/bigdata-jini/src/java/com/bigdata/attr:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-jini/src/java/com/bigdata/attr:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-jini/src/java/com/bigdata/attr:4175-4387 /branches/LEXICON_REFACTOR_BRANCH/bigdata-jini/src/java/com/bigdata/attr:2633-3304 /branches/QUADS_QUERY_BRANCH/bigdata-jini/src/java/com/bigdata/attr:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-jini/src/java/com/bigdata/attr:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-jini/src/java/com/bigdata/attr:4814-4836 /branches/bugfix-btm/bigdata-jini/src/java/com/bigdata/attr:2594-3237 /branches/dev-btm/bigdata-jini/src/java/com/bigdata/attr:2574-2730 /branches/fko/bigdata-jini/src/java/com/bigdata/attr:3150-3194 /trunk/bigdata-jini/src/java/com/bigdata/attr:2981-3282,3368-3437,3656-4061 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-jini/src/java/com/bigdata/attr:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-jini/src/java/com/bigdata/attr:6766-7143 /branches/BTREE_BUFFER_BRANCH/bigdata-jini/src/java/com/bigdata/attr:2004-2045 /branches/DEV_BRANCH_27_OCT_2009/bigdata-jini/src/java/com/bigdata/attr:2270-2546,2548-2782 /branches/INT64_BRANCH/bigdata-jini/src/java/com/bigdata/attr:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-jini/src/java/com/bigdata/attr:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-jini/src/java/com/bigdata/attr:4175-4387 /branches/LEXICON_REFACTOR_BRANCH/bigdata-jini/src/java/com/bigdata/attr:2633-3304 /branches/QUADS_QUERY_BRANCH/bigdata-jini/src/java/com/bigdata/attr:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-jini/src/java/com/bigdata/attr:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-jini/src/java/com/bigdata/attr:4814-4836 /branches/bugfix-btm/bigdata-jini/src/java/com/bigdata/attr:2594-3237 /branches/dev-btm/bigdata-jini/src/java/com/bigdata/attr:2574-2730 /branches/fko/bigdata-jini/src/java/com/bigdata/attr:3150-3194 /trunk/bigdata-jini/src/java/com/bigdata/attr:2981-3282,3368-3437,3656-4061 Property changes on: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/disco ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-jini/src/java/com/bigdata/disco:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-jini/src/java/com/bigdata/disco:6766-7138 /branches/BTREE_BUFFER_BRANCH/bigdata-jini/src/java/com/bigdata/disco:2004-2045 /branches/DEV_BRANCH_27_OCT_2009/bigdata-jini/src/java/com/bigdata/disco:2270-2546,2548-2782 /branches/INT64_BRANCH/bigdata-jini/src/java/com/bigdata/disco:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-jini/src/java/com/bigdata/disco:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-jini/src/java/com/bigdata/disco:4175-4387 /branches/LEXICON_REFACTOR_BRANCH/bigdata-jini/src/java/com/bigdata/disco:2633-3304 /branches/QUADS_QUERY_BRANCH/bigdata-jini/src/java/com/bigdata/disco:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-jini/src/java/com/bigdata/disco:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-jini/src/java/com/bigdata/disco:4814-4836 /branches/bugfix-btm/bigdata-jini/src/java/com/bigdata/disco:2594-3237 /branches/dev-btm/bigdata-jini/src/java/com/bigdata/disco:2574-2730 /branches/fko/bigdata-jini/src/java/com/bigdata/disco:3150-3194 /trunk/bigdata-jini/src/java/com/bigdata/disco:2981-3282,3368-3437,3656-4061 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-jini/src/java/com/bigdata/disco:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-jini/src/java/com/bigdata/disco:6766-7143 /branches/BTREE_BUFFER_BRANCH/bigdata-jini/src/java/com/bigdata/disco:2004-2045 /branches/DEV_BRANCH_27_OCT_2009/bigdata-jini/src/java/com/bigdata/disco:2270-2546,2548-2782 /branches/INT64_BRANCH/bigdata-jini/src/java/com/bigdata/disco:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-jini/src/java/com/bigdata/disco:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-jini/src/java/com/bigdata/disco:4175-4387 /branches/LEXICON_REFACTOR_BRANCH/bigdata-jini/src/java/com/bigdata/disco:2633-3304 /branches/QUADS_QUERY_BRANCH/bigdata-jini/src/java/com/bigdata/disco:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-jini/src/java/com/bigdata/disco:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-jini/src/java/com/bigdata/disco:4814-4836 /branches/bugfix-btm/bigdata-jini/src/java/com/bigdata/disco:2594-3237 /branches/dev-btm/bigdata-jini/src/java/com/bigdata/disco:2574-2730 /branches/fko/bigdata-jini/src/java/com/bigdata/disco:3150-3194 /trunk/bigdata-jini/src/java/com/bigdata/disco:2981-3282,3368-3437,3656-4061 Property changes on: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/util/config ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-jini/src/java/com/bigdata/util/config:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-jini/src/java/com/bigdata/util/config:6766-7138 /branches/BTREE_BUFFER_BRANCH/bigdata-jini/src/java/com/bigdata/util/config:2004-2045 /branches/DEV_BRANCH_27_OCT_2009/bigdata-jini/src/java/com/bigdata/util/config:2270-2546,2548-2782 /branches/INT64_BRANCH/bigdata-jini/src/java/com/bigdata/util/config:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-jini/src/java/com/bigdata/util/config:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-jini/src/java/com/bigdata/util/config:4175-4387 /branches/LEXICON_REFACTOR_BRANCH/bigdata-jini/src/java/com/bigdata/util/config:2633-3304 /branches/QUADS_QUERY_BRANCH/bigdata-jini/src/java/com/bigdata/util/config:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-jini/src/java/com/bigdata/util/config:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-jini/src/java/com/bigdata/util/config:4814-4836 /branches/bugfix-btm/bigdata-jini/src/java/com/bigdata/util/config:2594-3237 /branches/dev-btm/bigdata-jini/src/java/com/bigdata/util/config:2574-2730 /branches/fko/bigdata-jini/src/java/com/bigdata/util/config:3150-3194 /trunk/bigdata-jini/src/java/com/bigdata/util/config:2981-3282,3368-3437,3656-4061 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-jini/src/java/com/bigdata/util/config:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-jini/src/java/com/bigdata/util/config:6766-7143 /branches/BTREE_BUFFER_BRANCH/bigdata-jini/src/java/com/bigdata/util/config:2004-2045 /branches/DEV_BRANCH_27_OCT_2009/bigdata-jini/src/java/com/bigdata/util/config:2270-2546,2548-2782 /branches/INT64_BRANCH/bigdata-jini/src/java/com/bigdata/util/config:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-jini/src/java/com/bigdata/util/config:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-jini/src/java/com/bigdata/util/config:4175-4387 /branches/LEXICON_REFACTOR_BRANCH/bigdata-jini/src/java/com/bigdata/util/config:2633-3304 /branches/QUADS_QUERY_BRANCH/bigdata-jini/src/java/com/bigdata/util/config:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-jini/src/java/com/bigdata/util/config:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-jini/src/java/com/bigdata/util/config:4814-4836 /branches/bugfix-btm/bigdata-jini/src/java/com/bigdata/util/config:2594-3237 /branches/dev-btm/bigdata-jini/src/java/com/bigdata/util/config:2574-2730 /branches/fko/bigdata-jini/src/java/com/bigdata/util/config:3150-3194 /trunk/bigdata-jini/src/java/com/bigdata/util/config:2981-3282,3368-3437,3656-4061 Property changes on: branches/READ_CACHE/bigdata-perf ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-perf:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-perf:6766-7138 /branches/BTREE_BUFFER_BRANCH/bigdata-perf:2004-2045 /branches/DEV_BRANCH_27_OCT_2009/bigdata-perf:2270-2546,2548-2782 /branches/INT64_BRANCH/bigdata-perf:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-perf:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-perf:4175-4387 /branches/LEXICON_REFACTOR_BRANCH/bigdata-perf:2633-3304 /branches/QUADS_QUERY_BRANCH/bigdata-perf:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-perf:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-perf:4814-4836 /branches/bugfix-btm/bigdata-perf:2594-3237 /branches/dev-btm/bigdata-perf:2574-2730 /branches/fko/bigdata-perf:3150-3194 /trunk/bigdata-perf:2981-3043,3368-3437,3656-4061 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-perf:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-perf:6766-7143 /branches/BTREE_BUFFER_BRANCH/bigdata-perf:2004-2045 /branches/DEV_BRANCH_27_OCT_2009/bigdata-perf:2270-2546,2548-2782 /branches/INT64_BRANCH/bigdata-perf:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-perf:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-perf:4175-4387 /branches/LEXICON_REFACTOR_BRANCH/bigdata-perf:2633-3304 /branches/QUADS_QUERY_BRANCH/bigdata-perf:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-perf:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-perf:4814-4836 /branches/bugfix-btm/bigdata-perf:2594-3237 /branches/dev-btm/bigdata-perf:2574-2730 /branches/fko/bigdata-perf:3150-3194 /trunk/bigdata-perf:2981-3043,3368-3437,3656-4061 Property changes on: branches/READ_CACHE/bigdata-perf/btc ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-perf/btc:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/btc:6766-7138 /branches/INT64_BRANCH/bigdata-perf/btc:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-perf/btc:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-perf/btc:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-perf/btc:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-perf/btc:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-perf/btc:4814-4836 /trunk/bigdata-perf/btc:2981-3043,3368-3437,3656-4061 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-perf/btc:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/btc:6766-7143 /branches/INT64_BRANCH/bigdata-perf/btc:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-perf/btc:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-perf/btc:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-perf/btc:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-perf/btc:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-perf/btc:4814-4836 /trunk/bigdata-perf/btc:2981-3043,3368-3437,3656-4061 Property changes on: branches/READ_CACHE/bigdata-perf/btc/src/resources ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-perf/btc/src/resources:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/btc/src/resources:6766-7138 /branches/INT64_BRANCH/bigdata-perf/btc/src/resources:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-perf/btc/src/resources:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-perf/btc/src/resources:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-perf/btc/src/resources:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-perf/btc/src/resources:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-perf/btc/src/resources:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-perf/btc/src/resources:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/btc/src/resources:6766-7143 /branches/INT64_BRANCH/bigdata-perf/btc/src/resources:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-perf/btc/src/resources:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-perf/btc/src/resources:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-perf/btc/src/resources:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-perf/btc/src/resources:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-perf/btc/src/resources:4814-4836 Property changes on: branches/READ_CACHE/bigdata-perf/lubm ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-perf/lubm:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/lubm:6766-7138 /branches/INT64_BRANCH/bigdata-perf/lubm:4486-4522 /branches/LARGE_LITERALS_REFACTOR/bigdata-perf/lubm:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-perf/lubm:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-perf/lubm:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-perf/lubm:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/lubm:6766-7143 /branches/INT64_BRANCH/bigdata-perf/lubm:4486-4522 /branches/LARGE_LITERALS_REFACTOR/bigdata-perf/lubm:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-perf/lubm:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-perf/lubm:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-perf/lubm:4814-4836 Property changes on: branches/READ_CACHE/bigdata-perf/uniprot ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-perf/uniprot:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/uniprot:6766-7138 /branches/INT64_BRANCH/bigdata-perf/uniprot:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-perf/uniprot:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-perf/uniprot:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-perf/uniprot:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-perf/uniprot:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-perf/uniprot:4814-4836 /trunk/bigdata-perf/uniprot:2981-3043,3368-3437,3656-4061 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-perf/uniprot:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/uniprot:6766-7143 /branches/INT64_BRANCH/bigdata-perf/uniprot:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-perf/uniprot:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-perf/uniprot:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-perf/uniprot:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-perf/uniprot:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-perf/uniprot:4814-4836 /trunk/bigdata-perf/uniprot:2981-3043,3368-3437,3656-4061 Property changes on: branches/READ_CACHE/bigdata-perf/uniprot/src ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-perf/uniprot/src:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/uniprot/src:6766-7138 /branches/INT64_BRANCH/bigdata-perf/uniprot/src:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-perf/uniprot/src:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-perf/uniprot/src:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-perf/uniprot/src:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-perf/uniprot/src:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-perf/uniprot/src:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-perf/uniprot/src:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/uniprot/src:6766-7143 /branches/INT64_BRANCH/bigdata-perf/uniprot/src:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-perf/uniprot/src:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-perf/uniprot/src:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-perf/uniprot/src:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-perf/uniprot/src:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-perf/uniprot/src:4814-4836 Property changes on: branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate:6766-7138 /branches/INT64_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate:6766-7143 /branches/INT64_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate:4814-4836 Property changes on: branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/changesets ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-rdf/src/java/com/bigdata/rdf/changesets:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/changesets:6766-7138 /branches/INT64_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets:4486-4522 /branches/LARGE_LITERALS_REFACTOR/bigdata-rdf/src/java/com/bigdata/rdf/changesets:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-rdf/src/java/com/bigdata/rdf/changesets:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-rdf/src/java/com/bigdata/rdf/changesets:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/changesets:6766-7143 /branches/INT64_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets:4486-4522 /branches/LARGE_LITERALS_REFACTOR/bigdata-rdf/src/java/com/bigdata/rdf/changesets:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-rdf/src/java/com/bigdata/rdf/changesets:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets:4814-4836 Property changes on: branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/error ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-rdf/src/java/com/bigdata/rdf/error:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/error:6766-7138 /branches/INT64_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/error:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/error:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-rdf/src/java/com/bigdata/rdf/error:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/error:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-rdf/src/java/com/bigdata/rdf/error:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/error:6766-7143 /branches/INT64_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/error:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/error:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-rdf/src/java/com/bigdata/rdf/error:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/error:4814-4836 Property changes on: branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/internal ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-rdf/src/java/com/bigdata/rdf/internal:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/internal:6766-7138 /branches/INT64_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal:4486-4522 /branches/LARGE_LITERALS_REFACTOR/bigdata-rdf/src/java/com/bigdata/rdf/internal:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal:4525-4531,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796 /branches/RWSTORE_1_1_0_DEBUG/bigdata-rdf/src/java/com/bigdata/rdf/internal:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-rdf/src/java/com/bigdata/rdf/internal:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/internal:6766-7143 /branches/INT64_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal:4486-4522 /branches/LARGE_LITERALS_REFACTOR/bigdata-rdf/src/java/com/bigdata/rdf/internal:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal:4525-4531,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796 /branches/RWSTORE_1_1_0_DEBUG/bigdata-rdf/src/java/com/bigdata/rdf/internal:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal:4814-4836 Property changes on: branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/relation ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-rdf/src/java/com/bigdata/rdf/relation:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/relation:6766-7138 /branches/INT64_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/relation:4486-4522 /branches/LARGE_LITERALS_REFACTOR/bigdata-rdf/src/java/com/bigdata/rdf/relation:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/relation:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-rdf/src/java/com/bigdata/rdf/relation:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/relation:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-rdf/src/java/com/bigdata/rdf/relation:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/relation:6766-7143 /branches/INT64_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/relation:4486-4522 /branches/LARGE_LITERALS_REFACTOR/bigdata-rdf/src/java/com/bigdata/rdf/relation:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/relation:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-rdf/src/java/com/bigdata/rdf/relation:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/relation:4814-4836 Copied: branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ValuesServiceFactory.java (from rev 7143, branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ValuesServiceFactory.java) =================================================================== --- branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ValuesServiceFactory.java (rev 0) +++ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ValuesServiceFactory.java 2013-05-18 11:52:12 UTC (rev 7144) @@ -0,0 +1,288 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 9, 2011 + */ + +package com.bigdata.rdf.sparql.ast.eval; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.log4j.Logger; +import org.openrdf.model.Resource; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.impl.URIImpl; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContextBase; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.bop.ap.SampleIndex; +import com.bigdata.bop.bindingSet.EmptyBindingSet; +import com.bigdata.bop.bindingSet.ListBindingSet; +import com.bigdata.bop.fed.QueryEngineFactory; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.sparql.ast.GroupNodeBase; +import com.bigdata.rdf.sparql.ast.IGroupMemberNode; +import com.bigdata.rdf.sparql.ast.TermNode; +import com.bigdata.rdf.sparql.ast.eval.SampleServiceFactory.SampleParams; +import com.bigdata.rdf.sparql.ast.service.BigdataNativeServiceOptions; +import com.bigdata.rdf.sparql.ast.service.BigdataServiceCall; +import com.bigdata.rdf.sparql.ast.service.IServiceOptions; +import com.bigdata.rdf.sparql.ast.service.ServiceCallCreateParams; +import com.bigdata.rdf.sparql.ast.service.ServiceFactory; +import com.bigdata.rdf.sparql.ast.service.ServiceNode; +import com.bigdata.rdf.spo.ISPO; +import com.bigdata.rdf.store.AbstractTripleStore; +import com.bigdata.rdf.store.BD; +import com.bigdata.relation.accesspath.EmptyCloseableIterator; +import com.bigdata.relation.accesspath.ThickCloseableIterator; +import com.bigdata.striterator.ICloseableIterator; + +/** + * A factory for a service that simulates the VALUES syntax in SPARQL: + * + * service bd:values { + * # service params + * bd:serviceParam bd:values.var ?var . + * bd:serviceParam bd:values.val "val1" . + * bd:serviceParam bd:values.val "val2" . + * ... + * } + */ +public class ValuesServiceFactory extends AbstractServiceFactory + implements ServiceFactory { + + private static final Logger log = Logger + .getLogger(ValuesServiceFactory.class); + + /** + * The URI service key. + */ + public static final URI SERVICE_KEY = new URIImpl(BD.NAMESPACE+"values"); + + /** + * The service params for this service. + */ + public static interface ValuesParams { + + /** + * The limit on the sample. + */ + URI VAR = new URIImpl(SERVICE_KEY.stringValue() + ".var"); + + /** + * The seed on the sample. + */ + URI VAL = new URIImpl(SERVICE_KEY.stringValue() + ".val"); + + } + + + /* + * Note: This could extend the base class to allow for search service + * configuration options. + */ + private final BigdataNativeServiceOptions serviceOptions; + + public ValuesServiceFactory() { + + serviceOptions = new BigdataNativeServiceOptions(); + serviceOptions.setRunFirst(true); + + } + + @Override + public BigdataNativeServiceOptions getServiceOptions() { + + return serviceOptions; + + } + + public BigdataServiceCall create( + final ServiceCallCreateParams params, + final ServiceParams serviceParams) { + + final AbstractTripleStore store = params.getTripleStore(); + + final ServiceNode serviceNode = params.getServiceNode(); + + /* + * Validate the predicates for a given service call. + */ + verifyGraphPattern(store, serviceNode.getGraphPattern(), serviceParams); + + /* + * Create and return the ServiceCall object which will execute this + * query. + */ + return new ValuesCall(store, getServiceOptions(), serviceParams); + + } + + /** + * Verify that there is only a single statement pattern node and that the + * service parameters are valid. + */ + private void verifyGraphPattern( + final AbstractTripleStore database, + final GroupNodeBase<IGroupMemberNode> group, + final ServiceParams serviceParams) { + + final Iterator<Map.Entry<URI, List<TermNode>>> it = serviceParams.iterator(); + + if (!serviceParams.contains(ValuesParams.VAR)) { + + throw new RuntimeException("must provide a variable for: " + ValuesParams.VAR); + + } + + if (!serviceParams.contains(ValuesParams.VAL)) { + + throw new RuntimeException("must provide at least one value for: " + ValuesParams.VAL); + + } + + while (it.hasNext()) { + + final URI param = it.next().getKey(); + + if (ValuesParams.VAR.equals(param)) { + + final List<TermNode> vars = serviceParams.get(param); + + if (vars == null || vars.size() != 1 || vars.get(0).isConstant()) { + throw new RuntimeException("must provide exactly one variable for: " + param); + } + + } else if (ValuesParams.VAL.equals(param)) { + + final List<TermNode> vals = serviceParams.get(param); + + if (vals == null || vals.size() == 0) { + throw new RuntimeException("must provide at least one value for: " + param); + } + + for (TermNode val : vals) { + if (val.isVariable()) { + throw new RuntimeException("must provide constant values for: " + param); + } + } + + } else { + + throw new RuntimeException("unrecognized param: " + param); + + } + + } + + } + + /** + * + * Note: This has the {@link AbstractTripleStore} reference attached. This + * is not a {@link Serializable} object. It MUST run on the query + * controller. + */ + private static class ValuesCall implements BigdataServiceCall { + + private final AbstractTripleStore db; + private final IServiceOptions serviceOptions; + private final ServiceParams serviceParams; + + public ValuesCall( + final AbstractTripleStore db, + final IServiceOptions serviceOptions, + final ServiceParams serviceParams) { + + if(db == null) + throw new IllegalArgumentException(); + + if(serviceOptions == null) + throw new IllegalArgumentException(); + + if(serviceParams == null) + throw new IllegalArgumentException(); + + this.db = db; + this.serviceOptions = serviceOptions; + this.serviceParams = serviceParams; + + } + + /** + * Run a sample index op over the access path. + */ + @Override + public ICloseableIterator<IBindingSet> call( + final IBindingSet[] bc) { + + if (log.isInfoEnabled()) { + log.info(bc.length); + log.info(Arrays.toString(bc)); + log.info(serviceParams); + } + + if (bc != null && bc.length > 0 && !bc[0].equals(EmptyBindingSet.INSTANCE)) { + throw new RuntimeException("cannot run with incoming bindings"); + } + + final IVariable<IV> var = serviceParams.getAsVar(ValuesParams.VAR); + + final List<TermNode> vals = serviceParams.get(ValuesParams.VAL); + + final IBindingSet[] bSets = new IBindingSet[vals.size()]; + + for (int i = 0; i < bSets.length; i++) { + + bSets[i] = new ListBindingSet(); + + bSets[i].set(var, (IConstant<IV>) vals.get(i).getValueExpression()); + + } + + return new ThickCloseableIterator<IBindingSet>(bSets, bSets.length); + + } + + @Override + public IServiceOptions getServiceOptions() { + + return serviceOptions; + + } + + } + +} Modified: branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java =================================================================== --- branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java 2013-05-17 21:10:52 UTC (rev 7143) +++ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java 2013-05-18 11:52:12 UTC (rev 7144) @@ -10,13 +10,13 @@ import org.openrdf.model.URI; import org.openrdf.model.impl.URIImpl; -import com.bigdata.bop.ap.SampleIndex; import com.bigdata.rdf.sparql.ast.QueryHints; import com.bigdata.rdf.sparql.ast.cache.DescribeServiceFactory; import com.bigdata.rdf.sparql.ast.eval.SampleServiceFactory; import com.bigdata.rdf.sparql.ast.eval.SearchInSearchServiceFactory; import com.bigdata.rdf.sparql.ast.eval.SearchServiceFactory; import com.bigdata.rdf.sparql.ast.eval.SliceServiceFactory; +import com.bigdata.rdf.sparql.ast.eval.ValuesServiceFactory; import com.bigdata.rdf.sparql.ast.service.history.HistoryServiceFactory; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.rdf.store.BD; @@ -89,6 +89,9 @@ // Add the slice index service. add(SliceServiceFactory.SERVICE_KEY, new SliceServiceFactory()); + // Add the values service. + add(ValuesServiceFactory.SERVICE_KEY, new ValuesServiceFactory()); + if (QueryHints.DEFAULT_DESCRIBE_CACHE) { add(new URIImpl(BD.NAMESPACE + "describe"), Property changes on: branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/util ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-rdf/src/java/com/bigdata/rdf/util:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/util:6766-7138 /branches/BTREE_BUFFER_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/util:2004-2045 /branches/DEV_BRANCH_27_OCT_2009/bigdata-rdf/src/java/com/bigdata/rdf/util:2270-2546,2548-2782 /branches/INT64_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/util:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/util:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-rdf/src/java/com/bigdata/rdf/util:4175-4387 /branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/util:2633-3304 /branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/util:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-rdf/src/java/com/bigdata/rdf/util:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/util:4814-4836 /branches/bugfix-btm/bigdata-rdf/src/java/com/bigdata/rdf/util:2594-3237 /branches/dev-btm/bigdata-rdf/src/java/com/bigdata/rdf/util:2574-2730 /branches/fko/bigdata-rdf/src/java/com/bigdata/rdf/util:3150-3194 /trunk/bigdata-rdf/src/java/com/bigdata/rdf/util:2981-3110,3368-3437,3656-4061 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-rdf/src/java/com/bigdata/rdf/util:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/util:6766-7143 /branches/BTREE_BUFFER_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/util:2004-2045 /branches/DEV_BRANCH_27_OCT_2009/bigdata-rdf/src/java/com/bigdata/rdf/util:2270-2546,2548-2782 /branches/INT64_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/util:4486-4522 /branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/util:2596-4066 /branches/LARGE_LITERALS_REFACTOR/bigdata-rdf/src/java/com/bigdata/rdf/util:4175-4387 /branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/util:2633-3304 /branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/util:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata-rdf/src/java/com/bigdata/rdf/util:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/util:4814-4836 /branches/bugfix-btm/bigdata-rdf/src/java/com/bigdata/rdf/util:2594-3237 /branches/dev-btm/bigdata-rdf/src/java/com/bigdata/rdf/util:2574-2730 /branches/fko/bigdata-rdf/src/java/com/bigdata/rdf/util:3150-3194 /trunk/bigdata-rdf/src/java/com/bigdata/rdf/util:2981-3110,3368-3437,3656-4061 Property changes on: branches/READ_CACHE/bigdata-rdf/src/samples ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata-rdf/src/samples:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/samples:6766-7138 /branches/INT64_BRANCH/bigdata-rdf/src/samples:4486-4522 /branches/LARGE_LITERALS_REFACTOR/bigdata-rdf/src/samples:4175-4387 /branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/samples:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/... [truncated message content] |
From: <tho...@us...> - 2013-05-18 13:07:13
|
Revision: 7145 http://bigdata.svn.sourceforge.net/bigdata/?rev=7145&view=rev Author: thompsonbry Date: 2013-05-18 13:07:04 +0000 (Sat, 18 May 2013) Log Message: ----------- Bug fix for the consensus releaseTime protocol. It was hitting a clock skew error and then failing to message back to the leader with an appropriate response such that the barrier would break and the commit could either proceed or be rejected. @see https://sourceforge.net/apps/trac/bigdata/ticket/677 (HA deadlock under UPDATE + QUERY) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-18 11:52:12 UTC (rev 7144) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-18 13:07:04 UTC (rev 7145) @@ -7010,12 +7010,15 @@ public void gatherMinimumVisibleCommitTime( final IHAGatherReleaseTimeRequest req) throws IOException { - if (haLog.isInfoEnabled()) haLog.info("req=" + req); + if (haLog.isInfoEnabled()) + haLog.info("req=" + req); // Clear the old outcome. Reference SHOULD be null. Ensure not running. final Future<Void> oldFuture = gatherFuture.getAndSet(null); - if(oldFuture!=null&&!oldFuture.isDone()) oldFuture.cancel(true/*mayInterruptIfRunning*/); + if (oldFuture != null && !oldFuture.isDone()) + oldFuture.cancel(true/* mayInterruptIfRunning */); + final Callable<Void> task = ((AbstractHATransactionService) AbstractJournal.this .getLocalTransactionManager() .getTransactionService()) @@ -7038,12 +7041,48 @@ final IHANotifyReleaseTimeRequest req) throws IOException, InterruptedException, BrokenBarrierException { + /* + * Note: Pass through [req] without checks. We need to get this + * message to the CyclicBarrier regardless of whether it is + * well-formed or valid. + */ + return ((HATXSGlue) AbstractJournal.this .getLocalTransactionManager().getTransactionService()) .notifyEarliestCommitTime(req); } + + /** + * This exposes the clock used to assign transaction identifiers and + * commit times. It is being exposed to support certain kinds of + * overrides for unit tests. + * <p> + * Note: This method is NOT exposed to RMI. However, it can still be + * overridden by the unit tests. + * + * @return The next timestamp from that clock. + */ + public long nextTimestamp() { + try { + + return AbstractJournal.this.getLocalTransactionManager() + .getTransactionService().nextTimestamp(); + + } catch (IOException ex) { + + /* + * Note: This is a local method call. IOException will not be + * thrown. + */ + + throw new RuntimeException(ex); + + } + + } + /* * IService */ Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java 2013-05-18 11:52:12 UTC (rev 7144) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java 2013-05-18 13:07:04 UTC (rev 7145) @@ -362,7 +362,17 @@ /** * The message from each of those followers providing their local - * earliest visible commit point. + * earliest visible commit point. + * <p> + * Note: The {@link ConcurrentHashMap} does NOT allow <code>null</code> + * values. Further the {@link IHANotifyReleaseTimeRequest} specifies the + * serviceId of the follower. Therefore, a follower whose + * {@link GatherTask} fails MUST provide a "mock" + * {@link IHANotifyReleaseTimeRequest} that it will use to wait at the + * {@link CyclicBarrier}. + * + * @see InnerJournalTransactionService#notifyEarliestCommitTime(IHANotifyReleaseTimeRequest) + * @see GatherTask */ final private Map<UUID, IHANotifyReleaseTimeRequest> responses = new ConcurrentHashMap<UUID, IHANotifyReleaseTimeRequest>(); @@ -1347,12 +1357,26 @@ this.req = req; } - + + /** + * Note: This needs to be robust to most kinds of errors. However, + * if the quorum breaks (leader leaves) of if a follower leaves that + * was joined with the met quorum as of the atomic decision point in + * commitNow(), then that change will be detected by the leader and + * it will break the {@link CyclicBarrier}. + */ public Void call() throws Exception { if (log.isInfoEnabled()) log.info("Running gather on follower"); + /* + * These variables are set in the try {} below. If we can + * discover the leader, then we will eventually respond either + * in the try{} or in the finally{}. + */ + long now = 0L; + UUID serviceId = null; HAGlue leader = null; boolean didNotifyLeader = false; @@ -1361,27 +1385,59 @@ try { - // This timestamp is used to help detect clock skew. - final long now = nextTimestamp(); - - // Verify event on leader occurs before event on follower. - assertBefore(req.getTimestampOnLeader(), now); - final long token = req.token(); + /* + * we do not need to handle the case where the token is + * invalid. The leader will reset() the CylicBarrier for + * this case. + */ getQuorum().assertQuorum(token); + /* + * If the quorumService is null because this service is + * shutting down then the leader will notice the + * serviceLeave() and reset() the CyclicBarrier. + */ final QuorumService<HAGlue> quorumService = getQuorum() .getClient(); + // The serviceId for this service. + serviceId = quorumService.getServiceId(); + + /* + * This timestamp is used to help detect clock skew. + * + * Note: This deliberately uses the (non-remote) + * nextTimestamp() method on BasicHA. This is being done so + * we can write a unit test of the GatherTask that imposes + * clock skew by overridding the next value to be returned + * by that method. + */ + now = ((BasicHA)quorumService.getService()).nextTimestamp(); + + /* + * If the token is invalid, making it impossible for us to + * discover and message the leader, then then leader will + * reset() the CyclicBarrier. + */ + leader = quorumService.getLeader(token); + + /* + * Note: At this point we have everything we need to form up + * our response. If we hit an assertion, we will still + * respond in the finally {} block below. + */ + + /* Verify event on leader occurs before event on follower. + */ + assertBefore(req.getTimestampOnLeader(), now); + if (!quorumService.isFollower(token)) throw new QuorumException(); - leader = quorumService.getLeader(token); + final IHANotifyReleaseTimeRequest req2 = newHANotifyReleaseTimeRequest(serviceId); - final IHANotifyReleaseTimeRequest req2 = newHANotifyReleaseTimeRequest(quorumService - .getServiceId()); - /* * RMI to leader. * @@ -1464,12 +1520,16 @@ if (!didNotifyLeader && leader != null) { /* - * Send a [null] to the leader so it does not block - * forever waiting for our response. + * Send mock response to the leader so it does not block + * forever waiting for our response. The mock response MUST + * include our correct serviceId. */ try { - leader.notifyEarliestCommitTime(null/* resp */); + final IHANotifyReleaseTimeRequest resp = new HANotifyReleaseTimeRequest( + serviceId, 0L/* pinnedCommitTime */, + 1L/* pinnedCommitCounter */, now/* timestamp */); + leader.notifyEarliestCommitTime(resp); } catch (Throwable t2) { log.error(t2, t2); } @@ -1503,18 +1563,33 @@ * request as this barrier instance. That will let us detect a service * that responds late (after a transient disconnect) when the leader has * moved on to another commit. See BarrierState#token for more on this. - * [Note that [req] can be [null if the follower was unable to produce a - * valid response.] + * [Note that [req] can not safely be [null] since the follower must + * self-report its serviceId.] */ @Override public IHANotifyReleaseTimeResponse notifyEarliestCommitTime( final IHANotifyReleaseTimeRequest req) throws IOException, InterruptedException, BrokenBarrierException { + /* + * Note: Do NOT error check [req] until we are in the try{} / + * finally {} below that will do the CyclicBarrier.await(). + */ + final BarrierState barrierState = barrierRef.get(); - if (barrierState == null) + if (barrierState == null) { + + /* + * If the BarrierState reference has been cleared then it is not + * possible for us to count down at the barrier for this message + * (since the CyclicBarrier is gone). Otherwise, we will await() + * at the CyclicBarrier regardless of the message. + */ + throw new IllegalStateException(); + + } try { @@ -1534,6 +1609,9 @@ * Note: We want to await() on the barrier even if there is an * error in the try{} block. This is necessary to decrement the * barrier count down to zero. + * + * TODO If there is an error, we could reset() the barrier + * instead. */ // follower blocks on Thread on the leader here. @@ -1545,9 +1623,10 @@ final IHANotifyReleaseTimeResponse resp = barrierState.consensus; if (resp == null) { - - throw new RuntimeException("No consensus"); - + /* + * Log error, but return anyway. + */ + haLog.error("No consensus"); } return resp; Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-18 11:52:12 UTC (rev 7144) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-18 13:07:04 UTC (rev 7145) @@ -1795,6 +1795,16 @@ + journal.getRootBlockView().getCommitCounter()); innerRunStateStr.append(", haReady=" + getHAReady()); innerRunStateStr.append(", haStatus=" + getHAStatus()); + innerRunStateStr + .append(", serviceId=" + quorumService == null ? "N/A" + : quorumService.getServiceId()); + /* + * TODO This is not a TXS timestamp. That would be more useful but I + * want to avoid taking the TXS lock. [It looks like the TXS does + * not need that synchronized keyword on nextTimestamp(). Try + * removing it and then using it here.] + */ + innerRunStateStr.append(", now=" + System.currentTimeMillis()); final String msg = server.getOperatorAlert(); if (msg != null) innerRunStateStr.append(", msg=[" + msg + "]"); Modified: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java 2013-05-18 11:52:12 UTC (rev 7144) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java 2013-05-18 13:07:04 UTC (rev 7145) @@ -42,6 +42,7 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import net.jini.config.Configuration; import net.jini.config.ConfigurationException; @@ -185,6 +186,22 @@ */ public void voteNo() throws IOException; + /** + * Set the next value to be reported by {@link BasicHA#nextTimestamp()}. + * <p> + * Note: Only a few specific methods call against + * {@link BasicHA#nextTimestamp()}. They do so precisely because they + * were written to allow us to override the clock in the test suite + * using this method. + * + * @param nextTimestamp + * when <code>-1L</code> the behavior will revert to the + * default. Any other value will be the next value reported + * by {@link BasicHA#nextTimestamp()}, after which the + * behavior will revert to the default. + */ + public void setNextTimestamp(long nextTimestamp) throws IOException; + } /** @@ -314,6 +331,8 @@ */ private final AtomicBoolean voteNo = new AtomicBoolean(false); + private final AtomicLong nextTimestamp = new AtomicLong(-1L); + private HAGlueTestImpl(final UUID serviceId) { super(serviceId); @@ -374,7 +393,14 @@ public void voteNo() throws IOException { voteNo.set(true); } + + @Override + public void setNextTimestamp(long nextTimestamp) throws IOException { + this.nextTimestamp.set(nextTimestamp); + + } + /** * Conditionally fail the method if (a) it is registered in the * {@link #failSet} and (b) it is due to fail on this invocation. @@ -720,7 +746,28 @@ return super.notifyEarliestCommitTime(req); } + + /** + * {@inheritDoc} + * <p> + * Note: This is NOT an RMI method, but we want to be able to override + * it anyway to test the releaseTime consensus protocol. + */ + @Override + public long nextTimestamp() { + final long t = nextTimestamp.getAndSet(-1L); + + if (t == -1L) { + + return super.nextTimestamp(); + + } + + return t; + + } + /* * HACommitGlue */ Modified: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java 2013-05-18 11:52:12 UTC (rev 7144) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java 2013-05-18 13:07:04 UTC (rev 7145) @@ -550,6 +550,24 @@ } /** + * This stress test was written after seeing rare failures in + * testStartAB_C_MultiTransactionResync_5tx_then_200ms_delay. + * + * Currently it offers a reliable failure. + * + * @throws Exception + */ + public void _testStressStartAB_C_MultiTransactionResync() + throws Exception { + + for (int i = 0; i < 50; i++) { + doStartAB_C_MultiTransactionResync(200, 5); + destroyAll(); + } + + } + + /** * Test where C starts after <i>initialTransactions</i> on A+B. A series of * transactions are issued with the specified delay. * Modified: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java 2013-05-18 11:52:12 UTC (rev 7144) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java 2013-05-18 13:07:04 UTC (rev 7145) @@ -27,6 +27,7 @@ package com.bigdata.journal.jini.ha; import java.util.UUID; +import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -36,6 +37,7 @@ import com.bigdata.ha.HAGlue; import com.bigdata.ha.HAStatusEnum; import com.bigdata.ha.msg.IHA2PhasePrepareMessage; +import com.bigdata.ha.msg.IHANotifyReleaseTimeRequest; import com.bigdata.journal.jini.ha.HAJournalTest.HAGlueTest; import com.bigdata.journal.jini.ha.HAJournalTest.SpuriousTestException; import com.bigdata.util.InnerCause; @@ -263,7 +265,7 @@ awaitCommitCounter(2L, startup.serverA, startup.serverC); /* - * FIXME Unlike the test above, if there is a problem making the RMI + * Note: Unlike the test above, if there is a problem making the RMI * call, then B will not go through its doRejectedCommit() handler and * will not enter the ERROR state directly. We need to have B notice * that it is no longer at the same commit point, e.g., by observing a @@ -273,20 +275,20 @@ * does not know that there was an attempt to PREPARE since it did not * get the prepare2Phase() message. * - * - Modify HAJournalServer to enter the error state if we observe a + * - Modified HAJournalServer to enter the error state if we observe a * live write cache block for a commitCounter != the expected * commitCounter. * - * - Modify commit2Phase() to accept the #of services that are + * - Modified commit2Phase() to accept the #of services that are * participating in the commit. If it is not a full quorum, then we can * not purge the HA logs in commit2Phase() regardless of what the quorum * state looks like. * - * - Modify this test to do another transaction. B can not notice the + * - Modified this test to do another transaction. B can not notice the * problem until there is another write cache flushed through the * pipeline. * - * - Modify this test to await B to move to the end of the pipeline, + * - Modified this test to await B to move to the end of the pipeline, * resync, and rejoin. */ @@ -320,5 +322,122 @@ assertEquals(token, quorum.token()); } - + + /** + * This test forces clock skew on one of the followers causing it to + * encounter an error in its GatherTask. This models the problem that was + * causing a deadlock in an HA3 cluster with BSBM UPDATE running on the + * leader (EXPLORE was running on the follower, but analysis of the root + * cause shows that this was not required to trip the deadlock). The + * deadlock was caused by clock skew resulting in an exception and either + * {@link IHANotifyReleaseTimeRequest} message that was <code>null</code> + * and thus could not be processed or a failure to send that message back to + * the leader. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/677" > HA + * deadlock under UPDATE + QUERY </a> + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/673" > DGC + * in release time consensus protocol causes native thread leak in + * HAJournalServer at each commit </a> + */ + public void testStartABC_releaseTimeConsensusProtocol_clockSkew() + throws Exception { + + // Enforce the join order. + final ABC startup = new ABC(true /*sequential*/); + + final long token = awaitFullyMetQuorum(); + + // Should be one commit point. + awaitCommitCounter(1L, startup.serverA, startup.serverB, + startup.serverC); + + /* + * Setup B with a significant clock skew to force an error during the + * GatherTask. + */ + ((HAGlueTest) startup.serverB).setNextTimestamp(10L); + + try { + + // Simple transaction. + simpleTransaction(); + + } catch (Throwable t) { + /* + * TODO This test is currently failing because the consensus + * releaseTime protocol will fail if one of the joined services + * reports an error. The protocol should be robust to an error and + * move forward if a consensus can be formed. If a consensus can not + * be formed (due to some curable error), then any queries running + * on that service should break (force a service leave). Else, if + * the remaining services were to advance the release time since + * otherwise the service could not get through another releaseTime + * consensus protocol exchange successfully if it is reading on a + * commit point that has been released by the other services. + */ + if (!InnerCause.isInnerCause(t, BrokenBarrierException.class)) { + /* + * Wrong inner cause. + */ + fail("Expecting " + BrokenBarrierException.class, t); + } + + } + + // Should be one commit point. + awaitCommitCounter(1L, startup.serverA, startup.serverB, + startup.serverC); + + final long token1 = awaitFullyMetQuorum(); + + /* + * Should have formed a new quorum (each service should have done a + * rejected commit, forced a service leave, and then cured that error + * through seek consensus). + */ + assertEquals(token + 1, token1); + +// // Verify quorum is unchanged. +// assertEquals(token, quorum.token()); +// +// // Should be two commit points on {A,C]. +// awaitCommitCounter(2L, startup.serverA, startup.serverC); +// +// // Should be ONE commit points on {B}. +// awaitCommitCounter(1L, startup.serverB); +// +// /* +// * We use a simple transaction to force B to notice that it missed a +// * commit. B will notice that it did not join in the 2-phase commit when +// * the next live write cache block flows through the pipeline and it is +// * associated with a commitCounter that is GT the commitCounter which B +// * is expecting. That will force B into an Error state. From the Error +// * state, it will then resync and re-join the met quourm. +// */ +// simpleTransaction(); +// +// /* +// * The pipeline should be reordered. B will do a service leave, then +// * enter seek consensus, and then re-enter the pipeline. +// */ +// awaitPipeline(new HAGlue[] { startup.serverA, startup.serverC, +// startup.serverB }); +// +// /* +// * There should be three commit points on {A,C,B} (note that this assert +// * does not pay attention to the pipeline order). +// */ +// awaitCommitCounter(3L, startup.serverA, startup.serverC, +// startup.serverB); +// +// // B should be a follower again. +// awaitHAStatus(startup.serverB, HAStatusEnum.Follower); +// +// // quorum token is unchanged. +// assertEquals(token, quorum.token()); + + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-05-20 17:13:30
|
Revision: 7150 http://bigdata.svn.sourceforge.net/bigdata/?rev=7150&view=rev Author: thompsonbry Date: 2013-05-20 17:13:24 +0000 (Mon, 20 May 2013) Log Message: ----------- Modified QuorumMember to expose an ExecutorService rather than an Executor so we can use invokeAll(). Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumServiceBase.java branches/READ_CACHE/bigdata/src/java/com/bigdata/quorum/QuorumMember.java branches/READ_CACHE/bigdata/src/test/com/bigdata/io/writecache/TestWORMWriteCacheService.java branches/READ_CACHE/bigdata/src/test/com/bigdata/quorum/MockQuorumFixture.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/quorum/zk/MockQuorumMember.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumServiceBase.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumServiceBase.java 2013-05-20 17:11:14 UTC (rev 7149) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumServiceBase.java 2013-05-20 17:13:24 UTC (rev 7150) @@ -30,7 +30,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.UUID; -import java.util.concurrent.Executor; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.TimeoutException; @@ -184,7 +184,7 @@ } @Override - public Executor getExecutor() { + public ExecutorService getExecutor() { return getLocalService().getExecutorService(); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/quorum/QuorumMember.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/quorum/QuorumMember.java 2013-05-20 17:11:14 UTC (rev 7149) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/quorum/QuorumMember.java 2013-05-20 17:13:24 UTC (rev 7150) @@ -29,7 +29,7 @@ import java.rmi.Remote; import java.util.UUID; -import java.util.concurrent.Executor; +import java.util.concurrent.ExecutorService; import com.bigdata.ha.HACommitGlue; import com.bigdata.ha.QuorumCommit; @@ -80,10 +80,10 @@ S getService(); /** - * An {@link Executor} which may be used by the {@link QuorumMember} to run - * various asynchronous tasks. + * An {@link ExecutorService} which may be used by the {@link QuorumMember} + * to run various asynchronous tasks. */ - Executor getExecutor(); + ExecutorService getExecutor(); /** * Return the actor for this {@link QuorumMember}. Modified: branches/READ_CACHE/bigdata/src/test/com/bigdata/io/writecache/TestWORMWriteCacheService.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/io/writecache/TestWORMWriteCacheService.java 2013-05-20 17:11:14 UTC (rev 7149) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/io/writecache/TestWORMWriteCacheService.java 2013-05-20 17:13:24 UTC (rev 7150) @@ -36,7 +36,7 @@ import java.text.NumberFormat; import java.util.Random; import java.util.UUID; -import java.util.concurrent.Executor; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; import java.util.concurrent.RunnableFuture; @@ -393,7 +393,7 @@ } @Override - public Executor getExecutor() { + public ExecutorService getExecutor() { return fixture.getExecutor(); Modified: branches/READ_CACHE/bigdata/src/test/com/bigdata/quorum/MockQuorumFixture.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/quorum/MockQuorumFixture.java 2013-05-20 17:11:14 UTC (rev 7149) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/quorum/MockQuorumFixture.java 2013-05-20 17:13:24 UTC (rev 7150) @@ -60,7 +60,6 @@ import com.bigdata.ha.msg.IHALogRootBlocksRequest; import com.bigdata.ha.msg.IHALogRootBlocksResponse; import com.bigdata.ha.msg.IHARebuildRequest; -import com.bigdata.ha.msg.IHARootBlockResponse; import com.bigdata.ha.msg.IHASendStoreResponse; import com.bigdata.ha.msg.IHASyncRequest; import com.bigdata.ha.msg.IHAWriteMessage; @@ -195,11 +194,11 @@ private final ConcurrentHashMap<UUID, QuorumMember<?>> known = new ConcurrentHashMap<UUID, QuorumMember<?>>(); /** - * An {@link Executor} which can be used by the unit tests. + * An {@link ExecutorService} which can be used by the unit tests. * * @see QuorumMember#getExecutor() */ - public Executor getExecutor() { + public ExecutorService getExecutor() { return executorService; } @@ -1208,10 +1207,12 @@ } } - public Executor getExecutor() { + @Override + public ExecutorService getExecutor() { return executorService; } + @Override public S getService() { return service; } Modified: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/quorum/zk/MockQuorumMember.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/quorum/zk/MockQuorumMember.java 2013-05-20 17:11:14 UTC (rev 7149) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/quorum/zk/MockQuorumMember.java 2013-05-20 17:13:24 UTC (rev 7150) @@ -95,7 +95,7 @@ } } - public Executor getExecutor() { + public ExecutorService getExecutor() { return executorService; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-05-20 17:32:30
|
Revision: 7151 http://bigdata.svn.sourceforge.net/bigdata/?rev=7151&view=rev Author: thompsonbry Date: 2013-05-20 17:32:22 +0000 (Mon, 20 May 2013) Log Message: ----------- AbstractJournal: Added logic to make AbstractJournal.commit() spin for up to 5000ms if required to obtain a commitTime that moves forward with respect to the then current lastCommitTime. This addresses a potential source of problems with clock skew in HA and could permit us to allow more skew to be present among the nodes of an HA cluster (though I have not yet relaxed that constraint in Journal.assertBefore()). AbstractJournal: added getRootBlockWithLock() variant. HAJournalServer.awaitJournalToken(): modified to use getRootBlockWithLock() to ensure atomicity of the state changes that awaitJournalToken() is monitoring. Journal: More cleanup on the consensus release time protocol. Changes improve reporting of the root cause of an exception in the release time consensus protocol and shift reporting of clock synchronization problems entirely to the leader (based on the timestamps obtain on the leader and the followers). @see https://sourceforge.net/apps/trac/bigdata/ticket/530 (Journal HA) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-20 17:13:24 UTC (rev 7150) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-20 17:32:22 UTC (rev 7151) @@ -168,6 +168,7 @@ import com.bigdata.service.IBigdataFederation; import com.bigdata.stream.Stream; import com.bigdata.util.ChecksumUtility; +import com.bigdata.util.ClocksNotSynchronizedException; import com.bigdata.util.NT; /** @@ -2452,7 +2453,44 @@ // } } + + /** + * Variant of {@link #getRootBlockView()} that takes the internal lock in + * order to provide an appropriate synchronization barrier when installing + * new root blocks onto an empty journal in HA. + * + * @see #installRootBlocks(IRootBlockView, IRootBlockView) + */ + final public IRootBlockView getRootBlockViewWithLock() { + final ReadLock lock = _fieldReadWriteLock.readLock(); + + lock.lock(); + + try { + + if (_rootBlock == null) { + + /* + * This can happen before the journal file has been created. + * Once it has been created the root block will always be + * non-null when viewed while holding the lock. + */ + + throw new IllegalStateException(); + + } + + return _rootBlock; + + } finally { + + lock.unlock(); + + } + + } + final public long getLastCommitTime() { // final ReadLock lock = _fieldReadWriteLock.readLock(); @@ -2805,17 +2843,75 @@ } + /** + * Get timestamp that will be assigned to this commit point. + * <P> + * Note: This will spin until commit time advances over + * <code>lastCommitTime</code>, but not for more than N milliseconds. This + * will allow us to ensure that time moves forward when the leader fails + * over to another node with modest clock skew. If there is a large clock + * skew, the operator intervention will be required. + * <p> + * Note: This also makes sense for a non-HA deployment since we still want + * time to move forward at each commit point. + * + * TODO This also makes sense when the Journal is opened since we often + * issue queries against historical commit points on the journal based on + * the clock. [Unit test for this in standalone and HA modes?] + */ + private long nextCommitTimestamp() { + final IRootBlockView rootBlock = _rootBlock; + final long lastCommitTime = rootBlock.getLastCommitTime(); + if (lastCommitTime < 0) + throw new RuntimeException( + "Last commit time is invalid in rootBlock: " + rootBlock); + final long commitTime; + { + final ILocalTransactionManager transactionManager = getLocalTransactionManager(); + + boolean warned = false; + while (true) { + final long t = transactionManager.nextTimestamp(); + if (t > lastCommitTime) { + /* + * We have a distinct timestamp. Time is moving forward. + */ + commitTime = t; + break; + } + /* + * Time is going backwards. Figure out by how much. + * + * Note: delta is in ms. + */ + final long delta = Math.abs(t - lastCommitTime); + if (delta > 5000/* ms */) + throw new ClocksNotSynchronizedException("Clocks off by " + + delta + " ms: lastCommitTime=" + lastCommitTime + + ", but localTimestamp=" + t); + if (!warned) { + log.warn("Clocks off by " + delta + " ms: lastCommitTime=" + + lastCommitTime + ", but localTimestamp=" + t); + warned = true; + } + try { + // Wait for the delta to expire. + Thread.sleep(delta/* ms */); + } catch (InterruptedException ex) { + // Propagate interrupt. + Thread.currentThread().interrupt(); + } + } + } + return commitTime; + } + @Override public long commit() { - final ILocalTransactionManager transactionManager = getLocalTransactionManager(); + // The timestamp to be assigned to this commit point. + final long commitTime = nextCommitTimestamp(); - /* - * Get timestamp that will be assigned to this commit (RMI if the - * journal is part of a distributed federation). - */ - final long commitTime = transactionManager.nextTimestamp(); - // do the commit. final IRootBlockView lastRootBlock = _rootBlock; final long commitTime2; @@ -2845,7 +2941,7 @@ * e.g., due to a service outage. */ - transactionManager.notifyCommit(commitTime); + getLocalTransactionManager().notifyCommit(commitTime); return commitTime; Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java 2013-05-20 17:13:24 UTC (rev 7150) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java 2013-05-20 17:32:22 UTC (rev 7151) @@ -27,6 +27,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.Collection; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Properties; @@ -359,6 +360,13 @@ * This is the earliest visible commit point on the leader. */ final private IHANotifyReleaseTimeRequest leadersValue; + + /** + * Exception is set by {@link #run()} if there is a problem when the + * barrier breaks. The exception is then thrown out to the thread on + * the leader that is running commitNow(), forcing the commit to fail. + */ + volatile Throwable cause = null; /** * The message from each of those followers providing their local @@ -462,12 +470,20 @@ @Override public void run() { + try { + if (log.isInfoEnabled()) log.info("leader: " + leadersValue); // This is the timestamp from the BarrierState ctor. final long timeLeader = leadersValue.getTimestamp(); + // This is the timestamp for right now. + final long timeNow = newConsensusProtocolTimestamp(); + + // The local clock must be moving forward. + assertBefore(timeLeader, timeNow); + // Start with the leader's value (from ctor). minimumResponse = leadersValue; @@ -485,10 +501,17 @@ /* * Verify that the timestamp from the ctor is BEFORE the - * timestamp assigned by the follower for its response. + * timestamp assigned by the follower in the GatherTask. */ assertBefore(timeLeader, response.getTimestamp()); + /* + * Verify that the timestamp from the GatherTask on the follower + * is before the timestamp obtained at the top of this run() + * method. + */ + assertBefore(response.getTimestamp(), timeNow); + } // Restate the consensus as an appropriate message object. @@ -499,6 +522,13 @@ if (log.isInfoEnabled()) log.info("consensus: " + consensus); + } catch(Throwable t) { + + // Set the cause. + cause = t; + + } + } /** @@ -553,26 +583,46 @@ // final Future<Void>[] remoteFutures = new Future[joinedServiceIds.length]; // final boolean[] remoteDone = new boolean[joinedServiceIds.length]; + final List<Future<Void>> futures = new LinkedList<Future<Void>>(); + try { final IHAGatherReleaseTimeRequest msg = new HAGatherReleaseTimeRequest( token, timestampOnLeader); - + // Do not send message to self (leader is at index 0). for (int i = 1; i < joinedServiceIds.length; i++) { final UUID serviceId = joinedServiceIds[i]; /* - * Runnable which will execute this message on the remote - * service. + * Message each follower. + * + * Note: The invoked RMI method submits the GatherTask that + * executes on the follower and returns. It does not block + * waiting for the outcome of the task on the follower. + * Instead, we wait until the barrier breaks. A thread will + * monitor the quorum state and break the barrier if the + * quorum breaks or if a joined service leaves during the + * consensus protocol. + * + * Note: This uses multiple threads to issue the requests in + * parallel against the followers in order to minimize the + * latency of the protocol. */ - - // Resolve joined service. - final HATXSGlue service = getService(serviceId); - - // Message joined service (can throw NPE if service is gone). - service.gatherMinimumVisibleCommitTime(msg); + // Note: throws RejectedExecutionException if shutdown. + futures.add(getExecutorService().submit( + new Callable<Void>() { + public Void call() throws Exception { + // Resolve joined service. + final HATXSGlue service = getService(serviceId); + // Message remote service. + // Note: NPE if [service] is gone. + service.gatherMinimumVisibleCommitTime(msg); + // Done. + return null; + } + })); // // add to list of futures we will check. // remoteFutures[i] = rf; @@ -741,21 +791,17 @@ // } } finally { -// /* -// * Regardless of outcome or errors above, ensure that all remote -// * futures are cancelled. -// */ -// for (int i = 0; i < remoteFutures.length; i++) { -// final Future<Void> rf = remoteFutures[i]; -// if (!remoteDone[i]) { -// // Cancel the request on the remote service (RMI). -// try { -// rf.cancel(true/* mayInterruptIfRunning */); -// } catch (Throwable t) { -// // ignored. -// } -// } -// } + /* + * Cancel local futures for RMI messages to followers. + * + * Note: Regardless of outcome or errors above, ensure that the + * futures used to initiate the GatherTask on the followers are + * cancelled. These are local Futures that do RMIs. The RMIs + * should not block when the execute on the follower. + */ + for (Future<Void> f : futures) { + f.cancel(true/* mayInterruptIfRunning */); + } if (!barrier.isBroken()) { /* * If there were any followers that did not message the @@ -773,10 +819,8 @@ * "vote yes". Thus, a single node failure during the * release time consensus protocol will cause the commit to * fail. [Actually, we could use getNumberWaiting(). If it - * is a bare majority, then we could force the barrier to - * meet break (either with reset or with running an await() - * in other threads) and take the barrier break action - * ourselves. E.g., in the thread that calls + * is a bare majority, then we could take the barrier break + * action ourselves. E.g., in the thread that calls * barrier.reset()]. */ barrier.reset(); @@ -933,11 +977,14 @@ * Assert that t1 LT t2. * * @param t1 + * A timestamp from one service. * @param t2 + * A timestamp from another service. * * @throws ClocksNotSynchronizedException */ - private void assertBefore(final long t1, final long t2) { + private void assertBefore(final long t1, final long t2) + throws ClocksNotSynchronizedException { if (t1 < t2) return; @@ -945,8 +992,20 @@ throw new ClocksNotSynchronizedException(); } - + /** + * Note: This deliberately uses the (non-remote) method + * {@link BasicHA#nextTimestamp()}. This is done so we can write a unit test + * of the {@link GatherTask} that imposes clock skew by overridding the next + * value to be returned by that method. + */ + private long newConsensusProtocolTimestamp() { + + return ((BasicHA) getQuorum().getClient().getService()).nextTimestamp(); + + } + + /** * {@inheritDoc} * <p> * Extends the {@link JournalTransactionService} to provide protection for @@ -1112,6 +1171,17 @@ } + if (barrierState.cause != null) { + + /* + * If an exception was recorded, re-throw it in the thread + * that invoked commitNow(). + */ + + throw new RuntimeException(barrierState.cause); + + } + /** * Update the release time on the leader. * @@ -1286,7 +1356,8 @@ final long commitTime = commitRecord == null ? 0 : commitRecord.getTimestamp(); - final long now = getLocalTransactionManager().nextTimestamp(); +// final long now = getLocalTransactionManager().nextTimestamp(); + final long now = newConsensusProtocolTimestamp(); final IHANotifyReleaseTimeRequest req = new HANotifyReleaseTimeRequest( serviceId, commitTime, commitCounter, now); @@ -1407,14 +1478,8 @@ /* * This timestamp is used to help detect clock skew. - * - * Note: This deliberately uses the (non-remote) - * nextTimestamp() method on BasicHA. This is being done so - * we can write a unit test of the GatherTask that imposes - * clock skew by overridding the next value to be returned - * by that method. */ - now = ((BasicHA)quorumService.getService()).nextTimestamp(); + now = newConsensusProtocolTimestamp(); /* * If the token is invalid, making it impossible for us to @@ -1428,10 +1493,16 @@ * our response. If we hit an assertion, we will still * respond in the finally {} block below. */ - - /* Verify event on leader occurs before event on follower. + + /* + * Note: This assert has been moved to the leader when it + * analyzes the messages from the followers. This allows us + * to report out the nature of the exception on the leader + * and thence back to the client. */ - assertBefore(req.getTimestampOnLeader(), now); +// /* Verify event on leader occurs before event on follower. +// */ +// assertBefore(req.getTimestampOnLeader(), now); if (!quorumService.isFollower(token)) throw new QuorumException(); @@ -1601,17 +1672,29 @@ // Make a note of the message from this follower. barrierState.responses.put(followerId, req); + } catch(RuntimeException e) { + + /* + * Note: The try {} block can throw RuntimeException but not + * Exception. If anything is thrown, then reset the barrier and + * rethrow the exception. + */ + + haLog.error(e, e); + + // Reset the barrier (barrier will break). + barrierState.barrier.reset(); + + // Rethrow the exception. + throw new RuntimeException(e); + } finally { /* * Block until barrier breaks. * - * Note: We want to await() on the barrier even if there is an - * error in the try{} block. This is necessary to decrement the - * barrier count down to zero. - * - * TODO If there is an error, we could reset() the barrier - * instead. + * Note: The barrier will break immediately if it was reset in + * the catch{} block above. */ // follower blocks on Thread on the leader here. @@ -1619,6 +1702,23 @@ } + /* + * Check for an error in the consensus protocol. + */ + final Throwable t = barrierState.cause; + + if (t != null) { + + /* + * Log error. + */ + haLog.error(t, t); + + // rethrow cause. + throw new RuntimeException(t); + + } + // Return the consensus. final IHANotifyReleaseTimeResponse resp = barrierState.consensus; @@ -1630,7 +1730,7 @@ } return resp; - + } /** Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-20 17:13:24 UTC (rev 7150) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-20 17:32:22 UTC (rev 7151) @@ -1566,6 +1566,10 @@ * yet. */ // server.haGlueService.bounceZookeeperConnection(); + /* + * Note: Try moving to doRejectedCommit() so this will be + * synchronous. + */ logLock.lock(); try { if (journal.getHALogNexus().isHALogOpen()) { @@ -3525,7 +3529,8 @@ continue; } if (isFollower(token)) {// if (awaitRootBlocks) { - final IRootBlockView rbSelf = journal.getRootBlockView(); + // Check root block, using lock for synchronization barrier. + final IRootBlockView rbSelf = journal.getRootBlockViewWithLock(); if (rbSelf.getCommitCounter() == 0L) { /* * Only wait if this is an empty Journal. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-05-21 20:00:53
|
Revision: 7154 http://bigdata.svn.sourceforge.net/bigdata/?rev=7154&view=rev Author: thompsonbry Date: 2013-05-21 20:00:46 +0000 (Tue, 21 May 2013) Log Message: ----------- Added isLive() to the IHALog interface. Modified HAJournal.sendHALogForWriteSet() to use an asyncnonous future only when the live log is being sent along the pipeline. This will minimize DGC thread leaks per [1]. [1] https://sourceforge.net/apps/trac/bigdata/ticket/678 (DGC Thread Leak: sendHALogForWriteSet()) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java 2013-05-21 16:08:18 UTC (rev 7153) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java 2013-05-21 20:00:46 UTC (rev 7154) @@ -173,6 +173,7 @@ } }; + @Override public void close() { if (m_channel.isOpen()) { @@ -189,6 +190,14 @@ } + @Override + public boolean isLive() { + + return false; + + } + + @Override public boolean isEmpty() { return m_openRootBlock.getCommitCounter() == m_closeRootBlock @@ -219,6 +228,7 @@ } + @Override public boolean hasMoreBuffers() throws IOException { assertOpen(); @@ -260,6 +270,7 @@ } + @Override public IHAWriteMessage processNextBuffer(final ByteBuffer clientBuffer) throws IOException { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java 2013-05-21 16:08:18 UTC (rev 7153) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java 2013-05-21 20:00:46 UTC (rev 7154) @@ -857,6 +857,13 @@ m_state.m_accessors++; } + @Override + public boolean isLive() { + + return true; + + } + @Override public IRootBlockView getOpeningRootBlock() throws IOException { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java 2013-05-21 16:08:18 UTC (rev 7153) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java 2013-05-21 20:00:46 UTC (rev 7154) @@ -126,4 +126,9 @@ void computeDigest(MessageDigest digest) throws DigestException, IOException; + /** + * Return <code>true</code> iff this is was the live HALog at the moment + * when it was opened. + */ + boolean isLive(); } Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-21 16:08:18 UTC (rev 7153) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-21 20:00:46 UTC (rev 7154) @@ -807,6 +807,8 @@ */ final IHALogReader r = getHALogNexus().getReader(commitCounter); + final boolean isLive = r.isLive(); + // Task sends an HALog file along the pipeline. final FutureTask<Void> ft = new FutureTaskMon<Void>( new SendHALogTask(req, r)); @@ -814,8 +816,36 @@ // Run task. getExecutorService().submit(ft); - // Return *ASYNCHRONOUS* proxy (interruptable). - return getProxy(ft, true/* asynch */); + /** + * Return Future. + * + * FIXME DGC: This leaks a thread every time we return an + * asynchronous proxy, but we need the ability to interrupt the + * transfer of an HALog file. + * + * Look at HAJournalServer and how it manages the transition to a + * joined service in RESYNC and identify a different mechanism for + * interrupting the transfer of the HALog. + * + * Consider using a well known exception thrown back long the write + * pipeline to indicate that a receiver is done recieving data for + * some HALog (or backing store) or sending a message which + * explicitly cancels a transfer using an identifier for that + * transfer. If this is done synchronously while in + * handleReplicatedWrite then we will get the same decidability as + * using an asyncrhonous future, but without the thread leak + * problem. + * + * This issue is most pressing for sendHALogForWriteSet() since we + * can synchronous many 1000s of HALog files when resynchronizing a + * service. However, the same DGC thread leak exists for several + * other methods as specified on the ticket below. + * + * @see <a + * href="https://sourceforge.net/apps/trac/bigdata/ticket/678" + * > DGC Thread Leak: sendHALogForWriteSet() </a> + */ + return getProxy(ft, isLive/* asynch */); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-05-22 19:26:54
|
Revision: 7155 http://bigdata.svn.sourceforge.net/bigdata/?rev=7155&view=rev Author: thompsonbry Date: 2013-05-22 19:26:41 +0000 (Wed, 22 May 2013) Log Message: ----------- HAJournal: sendHALogForWriteSet() was not closing the HALog. This lead to a "too many open files" exception when trying to resync a follower for a delta involving a lot of commit points. HAGlue: removed globalWriteLock() method. It is difficult to reconcile this with the concurrent unisolated writers task that we are taking up next and the global write lock is no longer required for backups. The test suite for this method was also removed. HALogNexus: sort the files in a directory before loading them into the index (files are not lexically sorted by default on some OS platforms and sorting facilitates index writes and makes the restart process more intelligible since we scan the files in commit order). HALogFile (althalog package). removed unused method. HALogReader, HALogWriter, IHALogReader, and test suite for same: found and cured several synchronization errors, cleaned up the test cases, and expanded test coverage. There is one test that is not yet written and hence fails with a "write me" message. See https://sourceforge.net/apps/trac/bigdata/ticket/678 (DGC Thread Leak: sendHALogForWriteSet()) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogFile.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HAGlobalWriteLockRequest.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHAGlobalWriteLockRequest.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/test/com/bigdata/ha/halog/TestHALogWriter.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HALogNexus.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestAll.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java Removed Paths: ------------- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerGlobalWriteLock.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java 2013-05-21 20:00:46 UTC (rev 7154) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java 2013-05-22 19:26:41 UTC (rev 7155) @@ -34,7 +34,6 @@ import com.bigdata.ha.msg.IHADigestRequest; import com.bigdata.ha.msg.IHADigestResponse; -import com.bigdata.ha.msg.IHAGlobalWriteLockRequest; import com.bigdata.ha.msg.IHALogDigestRequest; import com.bigdata.ha.msg.IHALogDigestResponse; import com.bigdata.ha.msg.IHARemoteRebuildRequest; @@ -45,7 +44,6 @@ import com.bigdata.ha.msg.IHASnapshotRequest; import com.bigdata.ha.msg.IHASnapshotResponse; import com.bigdata.journal.AbstractJournal; -import com.bigdata.journal.Journal; import com.bigdata.journal.jini.ha.HAJournalServer; import com.bigdata.quorum.AsynchronousQuorumCloseException; import com.bigdata.quorum.QuorumException; @@ -184,39 +182,42 @@ IHASnapshotDigestResponse computeHASnapshotDigest(IHASnapshotDigestRequest req) throws IOException, NoSuchAlgorithmException, DigestException; - /** - * Obtain a global write lock on the leader. The lock only blocks writers. - * Readers may continue to execute without delay. - * <p> - * You can not obtain a coherent backup of the {@link Journal} while there - * are concurrent write operations. This method may be used to coordinate - * full backups of the {@link Journal} by suspending low level writes on the - * backing file. - * <p> - * This method will block until the lock is held, the lock request is - * interrupted, or the lock request timeout expires. - * - * @param req - * The request. - * - * @return A {@link Future} for the lock. The lock may be released by - * canceling the {@link Future}. The lock is acquired before this - * method returns and is held while the {@link Future} is running. - * If the {@link Future#isDone()} then the lock is no longer held. - * - * @throws IOException - * if there is an RMI problem. - * @throws TimeoutException - * if a timeout expires while awaiting the global lock. - * @throws InterruptedException - * if interrupted while awaiting the lock. - * - * @deprecated This is no longer necessary to support backups since we can - * now take snapshots without suspending writers. - */ - @Deprecated - Future<Void> globalWriteLock(IHAGlobalWriteLockRequest req) - throws IOException, TimeoutException, InterruptedException; +// /** +// * Obtain a global write lock on the leader. The lock only blocks writers. +// * Readers may continue to execute without delay. +// * <p> +// * You can not obtain a coherent backup of the {@link Journal} while there +// * are concurrent write operations. This method may be used to coordinate +// * full backups of the {@link Journal} by suspending low level writes on the +// * backing file. +// * <p> +// * This method will block until the lock is held, the lock request is +// * interrupted, or the lock request timeout expires. +// * +// * @param req +// * The request. +// * +// * @return A {@link Future} for the lock. The lock may be released by +// * canceling the {@link Future}. The lock is acquired before this +// * method returns and is held while the {@link Future} is running. +// * If the {@link Future#isDone()} then the lock is no longer held. +// * +// * @throws IOException +// * if there is an RMI problem. +// * @throws TimeoutException +// * if a timeout expires while awaiting the global lock. +// * @throws InterruptedException +// * if interrupted while awaiting the lock. +// * +// * @deprecated This is no longer necessary to support backups since we can +// * now take snapshots without suspending writers. +// * @see https://sourceforge.net/apps/trac/bigdata/ticket/566 ( +// * Concurrent unisolated operations against multiple KBs on the +// * same Journal) +// */ +// @Deprecated +// Future<Void> globalWriteLock(IHAGlobalWriteLockRequest req) +// throws IOException, TimeoutException, InterruptedException; /** * Request that the service take a snapshot. If there is already a snapshot Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogFile.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogFile.java 2013-05-21 20:00:46 UTC (rev 7154) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/althalog/HALogFile.java 2013-05-22 19:26:41 UTC (rev 7155) @@ -745,16 +745,16 @@ } - /** - * The {@link IRootBlockView} for the committed state BEFORE the write - * set contained in the HA log file. - */ - public HALogFile getHALogFile() { +// /** +// * The {@link IRootBlockView} for the committed state BEFORE the write +// * set contained in the HA log file. +// */ +// public HALogFile getHALogFile() { +// +// return HALogFile.this; +// +// } - return HALogFile.this; - - } - public boolean hasMoreBuffers() throws IOException { assertOpen(); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java 2013-05-21 20:00:46 UTC (rev 7154) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java 2013-05-22 19:26:41 UTC (rev 7155) @@ -33,6 +33,7 @@ import java.nio.channels.FileChannel; import java.security.DigestException; import java.security.MessageDigest; +import java.util.Arrays; import org.apache.log4j.Logger; @@ -176,20 +177,29 @@ @Override public void close() { - if (m_channel.isOpen()) { + if (isOpen()) { try { - m_raf.close(); + + m_raf.close(); + } catch (IOException e) { - log - .error("Problem closing file: file=" + m_file + " : " - + e, e); - } + + log.error("Problem closing file: file=" + m_file + " : " + e, e); + + } } } + @Override + public boolean isOpen() { + + return m_channel.isOpen(); + + } + @Override public boolean isLive() { @@ -438,6 +448,17 @@ } }); + /* + * Sort into lexical order to force visitation in lexical order. + * + * Note: This should work under any OS. Files will be either directory + * names (3 digits) or filenames (21 digits plus the file extension). + * Thus the comparison centers numerically on the digits that encode + * either part of a commit counter (subdirectory) or an entire commit + * counter (HALog file). + */ + Arrays.sort(files); + for (File file : files) { if (file.isDirectory()) { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java 2013-05-21 20:00:46 UTC (rev 7154) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java 2013-05-22 19:26:41 UTC (rev 7155) @@ -31,6 +31,7 @@ import java.nio.channels.FileChannel; import java.security.DigestException; import java.security.MessageDigest; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -126,9 +127,6 @@ /** current write point on the channel. */ private long m_position = headerSize0; - /** number of open readers **/ - private int m_readers = 0; - /** * Return the commit counter that is expected for the writes that will be * logged (the same commit counter that is on the opening root block). @@ -349,15 +347,26 @@ @Override public FileChannel reopenChannel() throws IOException { - final Lock lock = m_stateLock.readLock(); - lock.lock(); - try { - if (m_state == null || m_state.m_channel == null) - throw new IOException("Closed"); + final Lock lock = m_stateLock.readLock(); + + lock.lock(); + + try { + + if (m_state == null || m_state.m_channel == null + || !m_state.m_channel.isOpen()) { + + throw new IOException("Closed"); + + } + return m_state.m_channel; + } finally { - lock.unlock(); + + lock.unlock(); + } } @@ -666,29 +675,6 @@ } /** - * FIXME This method is only used by the unit tests. They need to modified - * to use {@link #getReader(long)} instead. - * - * @deprecated Use {@link #getReader(long)}. That code can make an atomic - * decision about whether the current HALog is being request or - * a historical HALog. It is not possible for the caller to make - * this decision from the outside. - */ - public IHALogReader getReader() { - - final Lock lock = m_stateLock.readLock(); - lock.lock(); - try { - if (m_state == null) - return null; - - return new OpenHALogReader(m_state); - } finally { - lock.unlock(); - } - } - - /** * Return the {@link IHALogReader} for the specified commit counter. If the * request identifies the HALog that is currently being written, then an * {@link IHALogReader} will be returned that will "see" newly written @@ -756,15 +742,28 @@ * The FileState class encapsulates the file objects shared by the Writer * and Readers. */ - static class FileState { - final StoreTypeEnum m_storeType; - final File m_haLogFile; - final FileChannel m_channel; - final RandomAccessFile m_raf; - int m_records = 0; - boolean m_committed = false; + private static class FileState { + private final StoreTypeEnum m_storeType; + private final File m_haLogFile; + private final FileChannel m_channel; + private final RandomAccessFile m_raf; + /* + * Note: Mutable fields are guarded by synchronized(this) for the + * FileState object. + */ + /** + * The #of messages written onto the live HALog file. + */ + private long m_records = 0; + /** + * <code>false</code> until the live HALog file has been committed (by + * writing the closing root block). + */ + private boolean m_committed = false; + /** number of open writers (at most one) plus readers **/ + private int m_accessors; - final IReopenChannel<FileChannel> reopener = new IReopenChannel<FileChannel>() { + private final IReopenChannel<FileChannel> reopener = new IReopenChannel<FileChannel>() { @Override public FileChannel reopenChannel() throws IOException { @@ -777,20 +776,24 @@ } }; - int m_accessors = 0; - - FileState(final File file, StoreTypeEnum storeType) - throws FileNotFoundException { + private FileState(final File file, final StoreTypeEnum storeType) + throws FileNotFoundException { + m_haLogFile = file; m_storeType = storeType; m_raf = new RandomAccessFile(m_haLogFile, "rw"); m_channel = m_raf.getChannel(); m_accessors = 1; // the writer is a reader also + } public void close() throws IOException { - if (--m_accessors == 0) - m_channel.close(); + synchronized (this) { + if (--m_accessors == 0) + m_channel.close(); + // wake up anyone waiting. + this.notifyAll(); + } } public void addRecord() { @@ -800,7 +803,7 @@ } } - public int recordCount() { + public long recordCount() { synchronized (this) { return m_records; } @@ -825,36 +828,69 @@ } } - /** - * - * @param record - * - the next sequence required - */ + /** + * + * @param record + * - the next sequence required + */ + /* + * TODO We should support wait up to a timeout here to make the API more + * pleasant. + */ public void waitOnStateChange(final int record) { - synchronized (this) { - if (m_records >= record) { - return; + + synchronized (this) { + + if (m_records >= record) { + + return; + } try { - wait(); + + wait(); + } catch (InterruptedException e) { - // okay; + + // Propagate the interrupt. + Thread.currentThread().interrupt(); + + return; + } + } } - } + } // class FileState static class OpenHALogReader implements IHALogReader { + private final FileState m_state; + private int m_record = 0; + private long m_position = headerSize0; // initial position + + /** <code>true</code> iff this reader is open. */ + private final AtomicBoolean open = new AtomicBoolean(true); - OpenHALogReader(final FileState state) { - m_state = state; - m_state.m_accessors++; + OpenHALogReader(final FileState state) { + + if (state == null) + throw new IllegalArgumentException(); + + m_state = state; + + // Note: Must be synchronized for visibility and atomicity! + synchronized (m_state) { + + m_state.m_accessors++; + + } + } @Override @@ -892,7 +928,11 @@ @Override public boolean hasMoreBuffers() throws IOException { - if (m_state.isCommitted() && m_state.recordCount() <= m_record) + + if (!isOpen()) + return false; + + if (m_state.isCommitted() && m_state.recordCount() <= m_record) return false; if (m_state.recordCount() > m_record) @@ -900,12 +940,22 @@ m_state.waitOnStateChange(m_record + 1); - return hasMoreBuffers(); + return hasMoreBuffers(); // tail recursion. + } @Override + public boolean isOpen() { + + return open.get(); + + } + + @Override public boolean isEmpty() { - return m_state.isEmpty(); + + return m_state.isEmpty(); + } @Override @@ -931,20 +981,28 @@ } @Override - public void close() throws IOException { - if (m_state != null) { - m_state.close(); - } + public void close() throws IOException { + + // Note: this pattern prevents a double-close of a reader. + if (open.compareAndSet(true/* expected */, false/* newValue */)) { + + /* + * Close an open reader. + */ + m_state.close(); + + } + } @Override - public void computeDigest(MessageDigest digest) throws DigestException, - IOException { + public void computeDigest(final MessageDigest digest) + throws DigestException, IOException { HALogReader.computeDigest(m_state.reopener, digest); - + } - } + } // class OpenHAReader } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java 2013-05-21 20:00:46 UTC (rev 7154) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java 2013-05-22 19:26:41 UTC (rev 7155) @@ -70,20 +70,32 @@ }; /** - * Closes the Reader. - * - * @throws IOException - */ + * Closes the reader iff it is open. + * <p> + * Note: Closing a reader does not have a side-effect on any open reader or + * writer for the same file. Specifically, if two readers are open for the + * same file and one is closed, then other will remain open. Likewise, if a + * reader is open for the live HALog file, closing the writer will not close + * the reader and closing the reader will not close the writer. + */ void close() throws IOException; /** * Return <code>true</code> if the root blocks in the log file have the same * commit counter. Such log files are logically empty regardless of their * length. + * + * FIXME The code disagress and will report [false] if the live log has the + * same root blocks but has not yet been closed. */ boolean isEmpty(); /** + * Return <code>true</code> iff the reader is open. + */ + boolean isOpen(); + + /** * The {@link IRootBlockView} for the committed state BEFORE the write set * contained in the HA log file. */ @@ -95,9 +107,18 @@ */ IRootBlockView getClosingRootBlock() throws IOException; - /** - * Checks whether we have reached the end of the file. - */ + /** + * Checks whether we have reached the end of the file (blocking). + * <p> + * Note: This method will block if this is the live HALog. This allows a + * process to block until the next message is made available on the live + * HALog by the writer. + * <p> + * Note: This method is non-blocking if this is not the live HALog since the + * decision can be made deterministically by inspecting the #of messages + * available (in the closing root block) and the #of messages consumed by + * the reader. + */ boolean hasMoreBuffers() throws IOException; /** @@ -128,7 +149,8 @@ /** * Return <code>true</code> iff this is was the live HALog at the moment - * when it was opened. + * when it was opened (if true, then this flag will remain <code>true</code> + * even if the writer moves onto another HALog file). */ boolean isLive(); } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HAGlobalWriteLockRequest.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HAGlobalWriteLockRequest.java 2013-05-21 20:00:46 UTC (rev 7154) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HAGlobalWriteLockRequest.java 2013-05-22 19:26:41 UTC (rev 7155) @@ -26,6 +26,13 @@ import java.io.Serializable; import java.util.concurrent.TimeUnit; +/** + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @see https://sourceforge.net/apps/trac/bigdata/ticket/566 ( Concurrent + * unisolated operations against multiple KBs on the same Journal) + */ +@Deprecated public class HAGlobalWriteLockRequest implements IHAGlobalWriteLockRequest, Serializable { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHAGlobalWriteLockRequest.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHAGlobalWriteLockRequest.java 2013-05-21 20:00:46 UTC (rev 7154) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHAGlobalWriteLockRequest.java 2013-05-22 19:26:41 UTC (rev 7155) @@ -29,7 +29,10 @@ * Message requesting a global write lock. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @see https://sourceforge.net/apps/trac/bigdata/ticket/566 ( Concurrent + * unisolated operations against multiple KBs on the same Journal) */ +@Deprecated public interface IHAGlobalWriteLockRequest extends IHAMessage { /** Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-21 20:00:46 UTC (rev 7154) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-22 19:26:41 UTC (rev 7155) @@ -6142,14 +6142,14 @@ } - @Override - public Future<Void> globalWriteLock(final IHAGlobalWriteLockRequest req) - throws IOException, TimeoutException, InterruptedException { +// @Override +// public Future<Void> globalWriteLock(final IHAGlobalWriteLockRequest req) +// throws IOException, TimeoutException, InterruptedException { +// +// throw new UnsupportedOperationException(); +// +// } - throw new UnsupportedOperationException(); - - } - @Override public Future<IHASnapshotResponse> takeSnapshot( final IHASnapshotRequest req) throws IOException { Modified: branches/READ_CACHE/bigdata/src/test/com/bigdata/ha/halog/TestHALogWriter.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/ha/halog/TestHALogWriter.java 2013-05-21 20:00:46 UTC (rev 7154) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/ha/halog/TestHALogWriter.java 2013-05-22 19:26:41 UTC (rev 7155) @@ -29,18 +29,116 @@ import java.nio.ByteBuffer; import java.util.Random; import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import junit.framework.TestCase2; + import com.bigdata.ha.msg.HAWriteMessage; +import com.bigdata.ha.msg.IHAMessage; import com.bigdata.ha.msg.IHAWriteMessage; +import com.bigdata.io.DirectBufferPool; +import com.bigdata.journal.CommitCounterUtility; import com.bigdata.journal.IRootBlockView; import com.bigdata.journal.RootBlockView; import com.bigdata.journal.StoreTypeEnum; +import com.bigdata.rawstore.Bytes; import com.bigdata.util.ChecksumUtility; +import com.bigdata.util.concurrent.DaemonThreadFactory; -import junit.framework.TestCase; +/** + * Test suite for {@link HALogWriter} and {@link HALogReader}. + * + * @author <a href="mailto:mar...@us...">Martyn Cutcher</a> + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class TestHALogWriter extends TestCase2 { -public class TestHALogWriter extends TestCase { + private Random r; + private File logdir; + private ExecutorService executorService; + @Override + protected void setUp() throws Exception { + + super.setUp(); + + // create temporary file for the test. + logdir = File.createTempFile(getClass().getSimpleName(), "halogdir"); + + // delete temp file. will recreate as a directory. + if (!logdir.delete()) + throw new IOException("Could not remove: file=" + logdir); + + // re-create as a directory. + if (!logdir.mkdirs()) + throw new IOException("Could not create: dir=" + logdir); + + r = new Random(); + + executorService = Executors.newCachedThreadPool(DaemonThreadFactory + .defaultThreadFactory()); + + } + + @Override + protected void tearDown() throws Exception { + + super.tearDown(); + + r = null; + + if (logdir != null && logdir.exists()) { + + recursiveDelete(logdir); + + } + + if (executorService != null) { + + executorService.shutdownNow(); + + executorService = null; + + } + + } + + /** + * Recursively removes any files and subdirectories and then removes the + * file (or directory) itself. + * + * @param f + * A file or directory. + */ + private void recursiveDelete(final File f) { + + if (f.isDirectory()) { + + final File[] children = f.listFiles(); + + for (int i = 0; i < children.length; i++) { + + recursiveDelete(children[i]); + + } + + } + + if (log.isInfoEnabled()) + log.info("Removing: " + f); + + if (f.exists() && !f.delete()) { + + log.warn("Could not remove: " + f); + + } + + } + /* * Need to mock up some valid rootblocks * @@ -53,7 +151,7 @@ * storeTypeEnum, // VERSION1 final long createTime, final long closeTime, * final int version, final ChecksumUtility checker) */ - private IRootBlockView openRBV(final StoreTypeEnum st) { + private static IRootBlockView openRBV(final StoreTypeEnum st) { return new RootBlockView( // true /* rb0 */, 0, 0, 0 /* commitTime */, 0, @@ -84,235 +182,508 @@ RootBlockView.currentVersion, ChecksumUtility.getCHK()); } - final static Random r = new Random(); + private ByteBuffer randomData(final int sze) { - static ByteBuffer randomData(final int sze) { - byte[] buf = new byte[sze]; - r.nextBytes(buf); + final byte[] buf = new byte[sze]; + + r.nextBytes(buf); return ByteBuffer.wrap(buf, 0, sze); + } - /** - * Simple writelog test, open file, write data and commit. - */ - public void testSimpleRWWriter() throws FileNotFoundException, IOException { - // establish halogdir - File logdir = new File("/tmp/halogdir"); - logdir.mkdirs(); + /** + * Simple writelog test, open file, write data and commit. + */ + public void testSimpleRWWriter() throws FileNotFoundException, IOException, + InterruptedException { - final ChecksumUtility checker = ChecksumUtility.getCHK(); + final HALogWriter writer = new HALogWriter(logdir); - final HALogWriter writer = new HALogWriter(logdir); - final IRootBlockView rbv = openRBV(StoreTypeEnum.RW); + try { - assertTrue(rbv.getStoreType() == StoreTypeEnum.RW); + final IRootBlockView rbv = openRBV(StoreTypeEnum.RW); - writer.createLog(rbv); + assertEquals(StoreTypeEnum.RW, rbv.getStoreType()); - int sequence = 0; + writer.createLog(rbv); - final ByteBuffer data = randomData(2000); + int sequence = 0; - final UUID storeUUID = UUID.randomUUID(); - - IHAWriteMessage msg = new HAWriteMessage(storeUUID, rbv.getCommitCounter(), rbv - .getFirstCommitTime(), sequence, data.limit(), checker - .checksum(data), rbv.getStoreType(), rbv.getQuorumToken(), - 1000, 0); + final ByteBuffer data = randomData(2000); - writer.writeOnHALog(msg, data); + final UUID storeUUID = UUID.randomUUID(); - writer.closeHALog(closeRBV(rbv)); + final IHAWriteMessage msg = new HAWriteMessage(storeUUID, + rbv.getCommitCounter(), rbv.getFirstCommitTime(), sequence, + data.limit()/* size */, ChecksumUtility.getCHK().checksum( + data), rbv.getStoreType(), rbv.getQuorumToken(), + 1000/* fileExtent */, 0/* firstOffset */); - // for sanity, let's run through the standard reader - try { - HALogReader.main(new String[] { "/tmp/halogdir" }); - } catch (InterruptedException e) { - // NOP - } + writer.writeOnHALog(msg, data); + + writer.closeHALog(closeRBV(rbv)); + + } finally { + + writer.disableHALog(); + + } + + // Read all files in the test directory. + HALogReader.main(new String[] { logdir.toString() }); + } /** * Simple WriteReader, no concurrency, confirms non-delayed responses. */ public void testSimpleRWWriterReader() throws FileNotFoundException, - IOException { - // establish halogdir - File logdir = new File("/tmp/halogdir"); - logdir.mkdirs(); + IOException, InterruptedException { - final ChecksumUtility checker = ChecksumUtility.getCHK(); - final HALogWriter writer = new HALogWriter(logdir); - final IRootBlockView rbv = openRBV(StoreTypeEnum.RW); + + try { - assertTrue(rbv.getStoreType() == StoreTypeEnum.RW); + // The opening root block. + final IRootBlockView openRB = openRBV(StoreTypeEnum.RW); - writer.createLog(rbv); + assertEquals(StoreTypeEnum.RW, openRB.getStoreType()); - int sequence = 0; + { + // should not be able to open the reader yet. + try { + writer.getReader(openRB.getCommitCounter() + 1); + } catch (FileNotFoundException ex) { + // Ignore expected exception. + if (log.isInfoEnabled()) + log.info("Ignoring expected exception: " + ex); + } - final ByteBuffer data = randomData(2000); + } - final UUID storeUUID = UUID.randomUUID(); + // writer is not open. + assertFalse(writer.isHALogOpen()); - final IHAWriteMessage msg = new HAWriteMessage(storeUUID, rbv.getCommitCounter(), rbv - .getFirstCommitTime(), sequence, data.limit(), checker - .checksum(data), rbv.getStoreType(), rbv.getQuorumToken(), - 1000, 0); + // create HALog file. + writer.createLog(openRB); - writer.writeOnHALog(msg, data); + { - final IHALogReader reader = writer.getReader(); + // writer is open. + assertTrue(writer.isHALogOpen()); + + // should be able to open the reader for that log now. + final IHALogReader reader = writer.getReader(openRB + .getCommitCounter() + 1); - assertTrue(reader.hasMoreBuffers()); + // This is the "live" HALog. + assertTrue(reader.isLive()); - ByteBuffer rbuf = ByteBuffer.allocate(1 * 1024 * 1024); // 1 mb - IHAWriteMessage rmsg = reader.processNextBuffer(rbuf); + // The reader is open. + assertTrue(reader.isOpen()); + + // The HALog is logically empty. +// assertTrue(reader.isEmpty()); + + /* + * Note: Don't do this here. The method will block for the live + * HALog until the file is closed (sealed with the closing root + * block) or destroyed. + */ +// assertTrue(reader.hasMoreBuffers()); - assertTrue(rmsg.getSize() == msg.getSize()); + // close the reader. should not close the writer. + reader.close(); - // commit the log file - writer.closeHALog(closeRBV(rbv)); + // the reader is closed. + assertFalse(reader.isOpen()); + + // once closed, this method should return immediately. + assertFalse(reader.hasMoreBuffers()); + + // the writer is still open. + assertTrue(writer.isHALogOpen()); - // the writer should have closed the file, so the reader should return - // immediately to report no more buffers - assertFalse(reader.hasMoreBuffers()); + // double-close the reader. should be ignored. + reader.close(); + + // the writer should *still* be open. + assertTrue(writer.isHALogOpen()); + + } - // for sanity, let's run through the standard reader - try { - HALogReader.main(new String[] { "/tmp/halogdir" }); - } catch (InterruptedException e) { - // NOP + /* + * Verify that we can open two distinct readers on the same live + * HALog and that closing one does not close the other and does not + * close the writer. + */ + { + + final IHALogReader r1 = writer.getReader(openRB.getCommitCounter() + 1); + final IHALogReader r2 = writer.getReader(openRB.getCommitCounter() + 1); + + assertTrue(r1.isOpen()); + assertTrue(r2.isOpen()); + + // close one reader. + r1.close(); + + // one reader is closed, the other is open. + assertFalse(r1.isOpen()); + assertTrue(r2.isOpen()); + + // the writer should *still* be open. + assertTrue(writer.isHALogOpen()); + + // close the other reader. + r2.close(); + + // Verify both are now closed. + assertFalse(r2.isOpen()); + assertFalse(r2.isOpen()); + + // the writer should *still* be open. + assertTrue(writer.isHALogOpen()); + + } + + int sequence = 0; + + final ByteBuffer data = randomData(2000); + + final UUID storeUUID = UUID.randomUUID(); + + final IHAWriteMessage msg = new HAWriteMessage(storeUUID, + openRB.getCommitCounter(), openRB.getFirstCommitTime(), sequence, + data.limit()/* size */, ChecksumUtility.getCHK().checksum( + data), openRB.getStoreType(), openRB.getQuorumToken(), + 1000/* fileExtent */, 0/* firstOffset */); + + // write a message on the HALog. + writer.writeOnHALog(msg, data); + + // should be able to open the reader for that log now. + final IHALogReader reader = writer + .getReader(openRB.getCommitCounter() + 1); + + assertTrue(reader.hasMoreBuffers()); + + { + + // Allocate heap byte buffer for the reader. + final ByteBuffer rbuf = ByteBuffer + .allocate(DirectBufferPool.INSTANCE.getBufferCapacity()); + + final IHAWriteMessage rmsg = reader.processNextBuffer(rbuf); + + assertEquals(rmsg.getSize(), msg.getSize()); + + } + + // commit the log file (write the closing root block). + writer.closeHALog(closeRBV(openRB)); + + /* + * The writer should have closed the file, so the reader should + * return immediately to report no more buffers. + */ + assertFalse(reader.hasMoreBuffers()); + + } finally { + + writer.disableHALog(); + } + + // Read all HALog files in the test directory. + HALogReader.main(new String[] { logdir.toString() }); + } /** * SimpleWriter writes a number of log files with a set of messages in each */ - static class SimpleWriter implements Runnable { - final ByteBuffer data = randomData(2000); + private class SimpleWriter implements Callable<Void> { - int sequence = 0; + private IRootBlockView openRB; + private final HALogWriter writer; + private final int count; - private IRootBlockView rbv; - private HALogWriter writer; - private ChecksumUtility checker; - private int count; + /** + * + * @param openRB + * The opening root block. + * @param writer + * The {@link HALogWriter}. + * @param count + * The HALog files to write. Each will have a random #of + * records. + */ + SimpleWriter(final IRootBlockView openRB, final HALogWriter writer, + final int count) { - SimpleWriter(IRootBlockView rbv, HALogWriter writer, ChecksumUtility checker, int count) { - this.rbv = rbv; + this.openRB = openRB; this.writer = writer; - this.checker = checker; this.count = count; + } @Override - public void run() { - final UUID storeUUID = UUID.randomUUID(); - try { - for (int i = 0; i < count; i++) { - // add delay to write thread to test read thread waiting for data - Thread.sleep(10); - final IHAWriteMessage msg = new HAWriteMessage(storeUUID, rbv - .getCommitCounter(), rbv.getLastCommitTime(), - sequence++, data.limit(), checker - .checksum(data), rbv.getStoreType(), - rbv.getQuorumToken(), 1000, 0); + public Void call() throws Exception { - writer.writeOnHALog(msg, data); - if (((i+1) % (1 + r.nextInt(count/3))) == 0) { - System.out.println("Cycling HALog after " + sequence + " records"); - rbv = closeRBV(rbv); - writer.closeHALog(rbv); - sequence = 0; - writer.createLog(rbv); - } - } - rbv = closeRBV(rbv); - writer.closeHALog(rbv); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - + final UUID storeUUID = UUID.randomUUID(); + final long fileExtent = 1000; // NB: ignored for test. + final long firstOffset = 0; // NB: ignored for test. + + // Note: Thread Local! Can not be passed in by the caller. + final ChecksumUtility checker = ChecksumUtility.getCHK(); + + for (int i = 0; i < count; i++) { + + // Min of 1 message. Max of r.nextInt(). + final long nmessages = r.nextInt(100) + 1; + + for (long sequence = 0; sequence < nmessages; sequence++) { + + // add delay to write thread to test reader waiting + Thread.sleep(10); + + // Use random data of random length. + final int size = r.nextInt(4 * Bytes.kilobyte32) + 1; + + final ByteBuffer data = randomData(size); + + final IHAWriteMessage msg = new HAWriteMessage(storeUUID, + openRB.getCommitCounter(), + openRB.getLastCommitTime(), sequence, size, + checker.checksum(data), openRB.getStoreType(), + openRB.getQuorumToken(), fileExtent, firstOffset); + + writer.writeOnHALog(msg, data); + + } + + if (log.isInfoEnabled()) + log.info("Cycling HALog after " + nmessages + " records"); + + // close log. + writer.closeHALog(openRB = closeRBV(openRB)); + + // open new log. + writer.createLog(openRB); + + } // next HALog file. + + // Close the last HALog. + writer.closeHALog(openRB = closeRBV(openRB)); + + // Done. + return null; + + } + + } // class SimpleWriter. + + /** + * Reader consumes an HALog file. The file must exist before you start + * running the {@link ReaderTask}. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + private static class ReaderTask implements Callable<Long> { + + private final long commitCounter; + private final HALogWriter writer; + private final Future<Void> wf; + + /** + * + * @param commitCounter + * The commit counter that identifies the closing commit + * point for the HALog file to be read. + * @param writer + * The {@link HALogWriter}. + * @param wf + * The {@link Future} for the {@link HALogWriter}. This is + * used to monitor for an error in the writer so the reader + * does not block the test from completing (or failing). + */ + public ReaderTask(final long commitCounter, final HALogWriter writer, + final Future<Void> wf) { + + this.commitCounter = commitCounter; + + this.writer = writer; + + this.wf = wf; + + } + + /** Test future if done. Throws exception if writer fails. */ + private void checkWriterFuture() throws InterruptedException, + ExecutionException { + + if (wf.isDone()) { + + wf.get(); + + } + + } + + /** + * + * @return The #of {@link IHAMessage}s read from the file. + */ + @Override + public Long call() throws Exception { + + // Allocate a heap ByteBuffer + final ByteBuffer rbuf = ByteBuffer + .allocate(DirectBufferPool.INSTANCE.getBufferCapacity()); + + // Note: Throws FileNotFoundException if does not exist. + final IHALogReader reader = writer.getReader(commitCounter); + + assertNotNull(reader); + + long nread = 0L; + + try { + + while (reader.hasMoreBuffers()) { + + checkWriterFuture(); + + final IHAWriteMessage rmsg = reader.processNextBuffer(rbuf); + + if (log.isDebugEnabled()) + log.debug("Read message: " + rmsg.getSequence() + + ", size: " + rmsg.getSize()); + + assertEquals(nread, rmsg.getSequence()); + + nread++; + + checkWriterFuture(); + + } + + return nread; + + } finally { + + /* + * Note: This should not throw an IOException. + * + * Note: It it does throw an IOException, then it can also be + * masking an error in the try{} above. Diagnose both if you get + * anything thrown out of here. + */ + + reader.close(); + + } + + } + } + /** * While a writer thread writes a number of HALogs, readers are opened * to process them. + * + * @throws Exception */ - public void testConcurrentRWWriterReader() throws FileNotFoundException, - IOException { - // establish halogdir - File logdir = new File("/tmp/halogdir"); - logdir.mkdirs(); + public void testConcurrentRWWriterReader() throws Exception { - final ChecksumUtility checker = ChecksumUtility.getCHK(); - final HALogWriter writer = new HALogWriter(logdir); - final IRootBlockView rbv = openRBV(StoreTypeEnum.RW); + + final IRootBlockView rbv = openRBV(StoreTypeEnum.RW); - assertTrue(rbv.getStoreType() == StoreTypeEnum.RW); + assertEquals(StoreTypeEnum.RW, rbv.getStoreType()); writer.createLog(rbv); -// final ByteBuffer data = randomData(2000); + // The #of HALog files to write. If GT 1000, then more than one + // subdirectory worth of files will be written. + final int nfiles = 100 + r.nextInt(1000); - Thread wthread = new Thread(new SimpleWriter(rbv, writer, checker, 500)); + // Start the writer. + final Future<Void> wf = executorService.submit(new SimpleWriter(rbv, + writer, nfiles)); - Runnable rreader = new Runnable() { + try { - ByteBuffer rbuf = ByteBuffer.allocate(1 * 1024 * 1024); // 1 mb + /* + * Now keep on opening readers for "current file" while writer + * continues. + * + * Note: The writer will write multiple files. For each file that it + * writes, we run the reader until it is done, then we open a new + * reader on the next HALog file. + */ + for (long commitCounter = 1L; commitCounter <= nfiles; commitCounter++) { - @Override - public void run() { - final IHALogReader reader = writer.getReader(); - if (reader == null) { - return; - } - - try { - while (reader.hasMoreBuffers()) { - final IHAWriteMessage rmsg = reader - .processNextBuffer(rbuf); + /* + * Note: We need to spin here in case the reader tries to open + * the HALog for reading before the writer has created the HALog + * for that commit point. This can be done by monitoring the + * writer or the file system. + */ + final File file = CommitCounterUtility.getCommitCounterFile( + logdir, commitCounter, IHALogReader.HA_LOG_EXT); -// System.out.println("Read message: " + rmsg.getSequence() -// + ", size: " + rmsg.getSize()); - } - } catch (IOException e) { - e.printStackTrace(); - } - } + while (!file.exists()) { - }; - - // start the writer first - wthread.start(); - - // now keep on opening readers for "current file" while writer continues - while (wthread.isAlive()) { - Thread rthread = new Thread(rreader); - rthread.start(); - while (rthread.isAlive()) { - try { - Thread.sleep(10); - } catch (InterruptedException e) { - break; - } - } - } + if (wf.isDone()) { + // Check writer for errors. + wf.get(); + } - // for sanity, let's run through the standard reader - try { - HALogReader.main(new String[] { "/tmp/halogdir" }); - } catch (InterruptedException e) { - // NOP - } + if (log.isInfoEnabled()) + log.info("Blocked waiting on writer: commitCounter=" + + commitCounter + ", file=" + file); + + // Wait for the file. + Thread.sleep(100/* ms */); + + } + + /* + * Open and read the next HALog file, blocking until all data + * has been read from that file. + */ + new ReaderTask(commitCounter, writer, wf).call(); + + } + + // Check writer for errors. There should not be any. + wf.get(); + + } finally { + + wf.cancel(true/* mayInterruptIfRunning */); + + } + + // for sanity, let's run through the standard reader + HALogReader.main(new String[] { logdir.toString() }); + } + /** + * Unit test verifies that each open of an {@link IHALogReader} is distinct + * and the an {@link IHALogReader#close()} will not close the backing + * channel for a different reader instance that is reading from the same + * HALog file. This version of the test is for a historical (non-live) HALog + * file. The case for the live HALog file is tested by + * {@link #testSimpleRWWriterReader()}. + */ + public void test_doubleOpen_close_historicalHALog() { + fail("write test"); + } + } Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-21 20:00:46 UTC (rev 7154) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-22 19:26:41 UTC (rev 7155) @@ -39,13 +39,9 @@ import java.util.Properties; import java.util.UUID; import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Executor; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; import java.util.concurrent.RejectedExecutionException; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.Lock; import net.jini.config.Configuration; @@ -73,7 +69,6 @@ import com.bigdata.ha.msg.HASnapshotDigestResponse; import com.bigdata.ha.msg.IHADigestRequest; import com.bigdata.ha.msg.IHADigestResponse; -import com.bigdata.ha.msg.IHAGlobalWriteLockRequest; import com.bigdata.ha.msg.IHALogDigestRequest; import com.bigdata.ha.msg.IHALogDigestResponse; import com.bigdata.ha.msg.IHALogRequest; @@ -97,11 +92,9 @@ import com.bigdata.journal.IRootBlockView; import com.bigdata.journal.ITx; import com.bigdata.journal.Journal; -import com.bigdata.journal.WriteExecutorService; import com.bigdata.journal.jini.ha.HAJournalServer.HAQuorumService; import com.bigdata.journal.jini.ha.HAJournalServer.NSSConfigurationOptions; import com.bigdata.journal.jini.ha.HAJournalServer.RunStateEnum; -import com.bigdata.quorum.AsynchronousQuorumCloseException; import com.bigdata.quorum.Quorum; import com.bigdata.service.AbstractTransactionService; import com.bigdata.service.jini.JiniClient; @@ -801,20 +794,62 @@ final long commitCounter = req.getCommitCounter(); /* + * Open the HALog file. If it exists, then we will run a task to + * send it along the pipeline. + * * Note: The choice of the "live" versus a historical "closed" log * file needs to be an atomic decision and thus MUST be made by the * HALogManager. + * + * Note: Once opened, the HALog file must be closed. Once we submit + * the task for execution, the SendHALogTask() is responsible for + * closing the HALog file. If we do not get that far, then the file + * is closed by this code block. + * + * Note: This can leak an open file handle in the case where the + * ExecutorService is shutdown before the task runs, but that only + * occurs on Journal shutdown. */ - final IHALogReader r = getHALogNexus().getReader(commitCounter); + final boolean isLive; + final FutureTask<Void> ft; + { - final boolean isLive = r.isLive(); - - // Task sends an HALog file along the pipeline. - final FutureTask<Void> ft = new FutureTaskMon<Void>( - new SendHALogTask(req, r)); + IHALogReader r = null; + + try { - // Run task. - getExecutorService().submit(ft); + // Note: open file handle - must be closed eventually. + r = getHALogNexus().getReader(commitCounter); + + isLive = r.isLive(); + + // Task sends an HALog file along the pipeline. + ft = new FutureTaskMon<Void>(new SendHALogTask(req, r)); + + // Run task. + getExecutorService().submit(ft); + + // Clear reference. File handle will be closed by task. + r = null; + + } finally { + + if (r != null) { + + try { + + r.close(); + + } catch (Throwable t) { + + log.error(t, t); + } + + } + + } + + } /** * Return Future. @@ -868,6 +903,7 @@ public Void call() throws Exception { + try { final IBufferAccess buf = DirectBufferPool.INSTANCE.acquire(); long nsent = 0; @@ -922,7 +958,8 @@ // ft.cancel(true/* mayInterruptIfRunning */); } - } + } // while(hasMoreBuffers()) + success = true; return null; @@ -931,15 +968,24 @@ buf.release(); + r.close(); + if (haLog.isDebugEnabled()) haLog.debug("req=" + req + ", nsent=" + nsent + ", success=" + success); } + } finally { + + // Close the open log file. + r.close(); + } - } + } // call() + + } // class SendHALogTask /* * REBUILD: Take a read lock and send everything from the backing file, @@ -1191,308 +1237,308 @@ } - /** - * {@inheritDoc} - * - * TODO This method relies on the unisolated semaphore. That provides a - * sufficient guarantee for updates that original through the NSS since - * all such updates will eventually require the unisolated connection to - * execute. However, if we support multiple concurrent unisolated - * connections distinct KBs per the ticket below, then we will need to - * have a different global write lock - perhaps via the - * {@link WriteExecutorService}. - * - * @deprecated This method is no longer necessary to support backups - * since we can now take snapshots without suspending - * writers. - * - * @see https://sourceforge.net/apps/trac/bigdata/ticket/566 ( - * Concurrent unisolated operations against multiple KBs on the - * same Journal) - */ - @Override - public Future<Void> globalWriteLock(final IHAGlobalWriteLockRequest req) - throws IOException, InterruptedException, TimeoutException { - - if (req == null) - throw new IllegalArgumentException(); - - /* - * This token will be -1L if there is no met quorum. This method may - * only execute while there is a met quorum and this service is the - * leader for that met quorum. - * - * Note: This method must stop waiting for the global lock if this - * service is no longer the leader (quorum break). - * - * Note: This method must stop holding the global lock if this - * service is no longer the leader (quorum break). - */ - final long token = getQuorum().token(); - - // Verify that the quorum is met and that this is the leader. - getQuorum().assertLeader(token); - - // Set true IFF we acquire the global write lock. - final AtomicBoolean didAcquire = new AtomicBoolean(false); - - // Task to acquire the lock - final FutureTask<Void> acquireLockTaskFuture = new FutureTask<Void>( - new AcquireGlobalLockTask(didAcquire)); - - // Task used to interrupt task acquiring the lock if quorum breaks. - final FutureTask<Void> interruptLockTaskFuture = new FutureTask<Void>( - new InterruptAcquireLockTask(token, acquireLockTaskFuture, - req)); - - // Task to release the lock. - final FutureTask<Void> releaseLockTaskFuture = new FutureTask<Void>( - new ReleaseGlobalLockTask(token, req)); - - // Service to run those tasks. - final Executor executor = getExecutorService(); - - // Set true iff we will run with the global lock. - boolean willRunWithLock = false; - try { - - /* - * Submit task to interrupt the task that is attempting to - * acquire the lock if the quorum breaks. This prevents us - * waiting for the global long beyond a quorum break. - */ - executor.execute(interruptLockTaskFuture); - - /* - * Submit task to acquire the lock. - */ - executor.execute(acquireLockTaskFuture); - - /* - * Wait for the global lock (blocks up to the timeout). - */ - acquireLockTaskFuture.get(req.getLockWaitTimeout(), - req.getLockWaitUnits()); - - // We will run with the global lock. - willRunWithLock = true; - - } catch (RejectedExecutionException ex) { - - /* - * Note: This will throw a RejectedExecutionException if the - * executor has been shutdown. That unchecked exception will be - * thrown back to the client. Since the lock has not been - * acquired if that exception is thrown, we do not need to do - * anything else here. - */ - - haLog.warn(ex); - - throw ex; - - } catch (ExecutionException e) { - - haLog.error(e, e); - - throw new RuntimeException(e); - - } finally { - - /* - * Make sure these tasks are cancelled. - */ - - interruptLockTaskFuture.cancel(true/* mayInterruptIfRunning */); - - acquireLockTaskFuture.cancel(true/* mayInterruptIfRunning */); - - /* - * Release the global lock if we acquired it but will not run - * with that lock held (e.g., due to some error). - */ - - if (!willRunWithLock && didAcquire.get()) { - - HAJournal.this.releaseUnisolatedConnection(); - - log.wa... [truncated message content] |
From: <tho...@us...> - 2013-05-24 17:23:30
|
Revision: 7161 http://bigdata.svn.sourceforge.net/bigdata/?rev=7161&view=rev Author: thompsonbry Date: 2013-05-24 17:23:19 +0000 (Fri, 24 May 2013) Log Message: ----------- Write cache payload compression prior to replication and compacted HALog files. Changes in process to also provide payload storage for the WORM in the HALog files. Added a CompressorRegistry for configurable block compression schemes. WCS compaction still observed to fail for testStartAB_C_LiveResync and is disabled in the WCS constructor. Compression, WriteCache, WORM, RWJournal, HA, and SPARQL test suites are green locally. See https://sourceforge.net/apps/trac/bigdata/ticket/652 (Compress write cache blocks for replication and in HALogs) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HAWriteMessage.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHAWriteMessage.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/pipeline/HAReceiveService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/IRecordCompressor.java branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/NOPRecordCompressor.java branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/RecordCompressor.java branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractBufferStrategy.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/DiskBackedBufferStrategy.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IBufferStrategy.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IHABufferStrategy.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Options.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/RWStrategy.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWStore.java branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWWriteCacheService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/sector/MemStrategy.java branches/READ_CACHE/bigdata/src/test/com/bigdata/io/compression/TestAll.java branches/READ_CACHE/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/AbstractServer.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHAJournalServerTestCase.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java Added Paths: ----------- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/CompressorRegistry.java branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/GZipCompressor.java branches/READ_CACHE/bigdata/src/test/com/bigdata/io/compression/TestCompressorRegistry.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3WORMJournalServer.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogWriter.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -347,26 +347,19 @@ @Override public FileChannel reopenChannel() throws IOException { - - final Lock lock = m_stateLock.readLock(); - - lock.lock(); - - try { - + final Lock lock = m_stateLock.readLock(); + lock.lock(); + try { if (m_state == null || m_state.m_channel == null || !m_state.m_channel.isOpen()) { throw new IOException("Closed"); - + } - + return m_state.m_channel; - } finally { - - lock.unlock(); - + lock.unlock(); } } @@ -527,6 +520,7 @@ } switch (m_rootBlock.getStoreType()) { + case WORM: case RW: { /* * Write the WriteCache block on the channel. @@ -538,12 +532,7 @@ FileChannelUtility.writeAll(reopener, data.duplicate(), m_position); m_position += nbytes; - } - case WORM: { - /* - * We will use the HA failover read API to recover the block - * from a node in the quorum when we need to replay the HA log. - */ + break; } default: @@ -776,14 +765,12 @@ }; private FileState(final File file, final StoreTypeEnum storeType) - throws FileNotFoundException { - + throws FileNotFoundException { m_haLogFile = file; m_storeType = storeType; m_raf = new RandomAccessFile(m_haLogFile, "rw"); m_channel = m_raf.getChannel(); m_accessors = 1; // the writer is a reader also - } /** @@ -828,11 +815,11 @@ m_raf.close(); } } finally { - // wake up anyone waiting. - this.notifyAll(); - } - } + // wake up anyone waiting. + this.notifyAll(); + } } + } public void addRecord() { synchronized (this) { @@ -879,72 +866,66 @@ } } - /** - * - * @param record - * - the next sequence required - */ + /** + * + * @param record + * - the next sequence required + */ /* * TODO We should support wait up to a timeout here to make the API more * pleasant. */ public void waitOnStateChange(final long record) { - synchronized (this) { - + synchronized (this) { // Condition variable. while (m_records < record && !m_committed) { if (!isOpen()) { - // Provable nothing left to read. - return; + return; + } - } + try { + wait(); + } catch (InterruptedException e) { + + // Propagate the interrupt. + Thread.currentThread().interrupt(); + + return; + + } - try { + } - wait(); + } - } catch (InterruptedException e) { - - // Propagate the interrupt. - Thread.currentThread().interrupt(); - - return; - - } - - } - - } - } } // class FileState static class OpenHALogReader implements IHALogReader { - private final FileState m_state; private long m_record = 0L; private long m_position = headerSize0; // initial position - + /** <code>true</code> iff this reader is open. */ private final AtomicBoolean open = new AtomicBoolean(true); - OpenHALogReader(final FileState state) { + OpenHALogReader(final FileState state) { if (state == null) throw new IllegalArgumentException(); - m_state = state; + m_state = state; // Note: Must be synchronized for visibility and atomicity! synchronized (m_state) { - m_state.m_accessors++; + m_state.m_accessors++; } @@ -988,7 +969,7 @@ if (!isOpen()) return false; - + synchronized (m_state) { /* @@ -998,13 +979,13 @@ if (!m_state.isOpen()) return false; - if (m_state.isCommitted() && m_state.recordCount() <= m_record) - return false; + if (m_state.isCommitted() && m_state.recordCount() <= m_record) + return false; - if (m_state.recordCount() > m_record) - return true; + if (m_state.recordCount() > m_record) + return true; - m_state.waitOnStateChange(m_record + 1); + m_state.waitOnStateChange(m_record + 1); } @@ -1018,33 +999,27 @@ return open.get(); } - + @Override public boolean isEmpty() { - - return m_state.isEmpty(); - + return m_state.isEmpty(); } - @Override + @Override public IHAWriteMessage processNextBuffer(final ByteBuffer clientBuffer) - throws IOException { + throws IOException { final IHAWriteMessage msg; synchronized (m_state) { + final long savePosition = m_state.m_channel.position(); + m_state.m_channel.position(m_position); - final long savePosition = m_state.m_channel.position(); - - m_state.m_channel.position(m_position); - msg = HALogReader.processNextBuffer(m_state.m_raf, m_state.reopener, m_state.m_storeType, clientBuffer); m_position = m_state.m_channel.position(); - m_state.m_channel.position(savePosition); - } m_record++; @@ -1053,7 +1028,7 @@ } @Override - public void close() throws IOException { + public void close() throws IOException { // Note: this pattern prevents a double-close of a reader. if (open.compareAndSet(true/* expected */, false/* newValue */)) { @@ -1082,20 +1057,18 @@ } - m_state.close(); - - } - - } - + m_state.close(); + } } + } + @Override public void computeDigest(final MessageDigest digest) throws DigestException, IOException { HALogReader.computeDigest(m_state.reopener, digest); - + } } // class OpenHAReader Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogReader.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -77,7 +77,7 @@ * same file and one is closed, then other will remain open. Likewise, if a * reader is open for the live HALog file, closing the writer will not close * the reader and closing the reader will not close the writer. - */ + */ void close() throws IOException; /** @@ -107,7 +107,7 @@ */ IRootBlockView getClosingRootBlock() throws IOException; - /** + /** * Checks whether we have reached the end of the file (blocking). * <p> * Note: This method will block if this is the live HALog. This allows a @@ -127,7 +127,7 @@ * * @throws IOException * if there is an error reading from the backing file. - */ + */ boolean hasMoreBuffers() throws IOException; /** Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HAWriteMessage.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HAWriteMessage.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HAWriteMessage.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -30,8 +30,13 @@ import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; +import java.nio.ByteBuffer; import java.util.UUID; +import org.apache.log4j.Logger; + +import com.bigdata.io.compression.CompressorRegistry; +import com.bigdata.io.compression.IRecordCompressor; import com.bigdata.journal.StoreTypeEnum; /** @@ -44,6 +49,8 @@ public class HAWriteMessage extends HAWriteMessageBase implements IHAWriteMessage { + protected static final Logger log = Logger.getLogger(HAWriteMessage.class); + /** * */ @@ -61,8 +68,11 @@ /** The write sequence since last commit beginning at zero */ private long sequence; - /** The type of backing store (RW or WORM). */ + /** The type of backing store (RW or WORM). */ private StoreTypeEnum storeType; + + /** Indicates if data is compressed (if included in file). */ + private String compressorKey; /** The quorum token for which this message is valid. */ private long quorumToken; @@ -102,6 +112,8 @@ return sequence; } + + /* (non-Javadoc) * @see com.bigdata.journal.ha.IHAWriteMessage#getStoreType() */ @@ -134,8 +146,13 @@ return firstOffset; } - public String toString() { + @Override + public String getCompressorKey() { + return compressorKey; + } + public String toString() { + return getClass().getName() // + "{size=" + getSize() // + ",chksum=" + getChk() // @@ -144,6 +161,7 @@ + ",commitTime=" + lastCommitTime // + ",sequence=" + sequence // + ",storeType=" + getStoreType() // + + ",compressorKey=" + getCompressorKey() // + ",quorumToken=" + getQuorumToken()// + ",fileExtent=" + getFileExtent() // + ",firstOffset=" + getFirstOffset() // @@ -186,6 +204,8 @@ * The length of the backing file on the disk. * @param firstOffset * The file offset at which the data will be written (WORM only). + * + * @deprecated by the version that accepts the compressor key. */ public HAWriteMessage(final UUID uuid, final long commitCounter, final long commitTime, final long sequence, final int sze, @@ -193,6 +213,52 @@ final long quorumToken, final long fileExtent, final long firstOffset) { + this(uuid, commitCounter, commitTime, sequence, sze, chk, storeType, + quorumToken, fileExtent, firstOffset, null/* compressorKey */); + + } + + /** + * @param uuid + * The {@link UUID} associated with the backing store on the + * leader. This can be used to decide whether the message is for + * a given store, or (conversly) whether the receiver has already + * setup its root blocks based on the leader (and hence has the + * correct {@link UUID} for its local store). + * @param commitCounter + * The commit counter for the current root block for the write + * set which is being replicated by this message. + * @param commitTime + * The commit time for the current root block for the write set + * which is being replicated by this message. + * @param sequence + * The write cache block sequence number. This is reset to ZERO + * (0) for the first replicated write cache block in each write + * set. + * @param sze + * The #of bytes in the payload. + * @param chk + * The checksum of the payload. + * @param storeType + * The type of backing store (RW or WORM). + * @param quorumToken + * The quorum token for which this message is valid. + * @param fileExtent + * The length of the backing file on the disk. + * @param firstOffset + * The file offset at which the data will be written (WORM only). + * @param compressorKey + * The key under which an {@link IRecordCompressor} has been + * registered against the {@link CompressorRegistry} -or- + * <code>null</code> for no compression. + */ + public HAWriteMessage(final UUID uuid, final long commitCounter, + final long commitTime, final long sequence, final int sze, + final int chk, final StoreTypeEnum storeType, + final long quorumToken, final long fileExtent, + final long firstOffset, + final String compressorKey) { + super(sze, chk); if (uuid == null) @@ -217,6 +283,8 @@ this.firstOffset = firstOffset; + this.compressorKey = compressorKey; + } /** @@ -235,10 +303,31 @@ private static final byte VERSION1 = 0x1; /** + * Supports optional data compression for the payload (backwards compatible + * default for {@link #VERSION1} is no compression). + */ + private static final byte VERSION2 = 0x2; + + /** * The current version. */ - private static final byte currentVersion = VERSION1; + private static final byte currentVersion = VERSION2; // VERSION2; + /** + * Determine whether message data is compressed + */ + private static boolean compressData = true; // default + + /** + * Static method to indicate whether the message will reference + * compressed data. + * + * @return + */ + public static boolean isDataCompressed() { + return compressData; + } + @Override public boolean equals(final Object obj) { @@ -268,11 +357,17 @@ ClassNotFoundException { super.readExternal(in); - final byte version = in.readByte(); + + final byte version = in.readByte(); switch (version) { case VERSION0: uuid = null; // Note: not available. break; + case VERSION2: { + final boolean isNull = in.readBoolean(); + compressorKey = isNull ? null : in.readUTF(); + // fall through. + } case VERSION1: uuid = new UUID(// in.readLong(), // MSB @@ -295,6 +390,11 @@ super.writeExternal(out); if (currentVersion >= VERSION1 && uuid != null) { out.write(currentVersion); + if (currentVersion >= VERSION2) { + out.writeBoolean(compressorKey == null); + if (compressorKey != null) + out.writeUTF(compressorKey); + } out.writeLong(uuid.getMostSignificantBits()); out.writeLong(uuid.getLeastSignificantBits()); } else { @@ -310,4 +410,61 @@ out.writeLong(firstOffset); } +// // Versions of compress/expand with Deflator using RecordCompressor +// static IRecordCompressor compressor = CompressorRegistry.fetch(CompressorRegistry.DEFLATE_BEST_SPEED); +// static String compressorKey = CompressorRegistry.DEFLATE_BEST_SPEED; + +// /** +// * This configuration method has a dual role since if the Deflater is configured +// * with NO_COMPRESSION, the message indicates directly that the buffer is not compressed +// * avoiding the double buffering of the Deflater class. +// * +// * Note that the strategy is only applicable for the compression, the expansion is +// * determined by the source data. +// */ +// public static void setCompression(final String strategy) { +// compressorKey = strategy; +// compressor = CompressorRegistry.fetch(strategy); +// } +// +// public ByteBuffer compress(final ByteBuffer buffer) { +// +// final IRecordCompressor compressor = CompressorRegistry.getInstance() +// .get(compressionMethod); +// +// if (compressor == null) +// throw new UnsupportedOperationException("Unknown compressor: " +// + compressionMethod); +// +// return compressor.compress(buffer); +// } + + public ByteBuffer expand(final ByteBuffer buffer) { + + final String compressorKey = getCompressorKey(); + + if (compressorKey == null) { + + /* + * No compression. + */ + + return buffer; + + } + + final IRecordCompressor compressor = CompressorRegistry.getInstance() + .get(compressorKey); + + if (compressor == null) + throw new UnsupportedOperationException("Unknown compressor: " + + compressorKey); + + return compressor.decompress(buffer); + + } + + // public static IRecordCompressor getCompressor() { + // return compressor; + // } } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHAWriteMessage.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHAWriteMessage.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHAWriteMessage.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -23,8 +23,10 @@ */ package com.bigdata.ha.msg; +import java.nio.ByteBuffer; import java.util.UUID; +import com.bigdata.io.compression.IRecordCompressor; import com.bigdata.journal.StoreTypeEnum; /** @@ -51,6 +53,17 @@ */ long getSequence(); + /** + * Applies associated {@link IRecordCompressor} (if any) to decompress the + * data + */ + ByteBuffer expand(ByteBuffer bin); + + /** + * Return the associated {@link IRecordCompressor} key (if any). + */ + String getCompressorKey(); + /** The type of backing store (RW or WORM). */ StoreTypeEnum getStoreType(); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/pipeline/HAReceiveService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/pipeline/HAReceiveService.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/pipeline/HAReceiveService.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -1076,7 +1076,7 @@ + ", number of reads: " + reads + ", buffer: " + localBuffer); - if (message.getChk() != (int) chk.getValue()) { + if (message.getChk() != (int) chk.getValue()) { throw new ChecksumError("msg=" + message.toString() + ", actual=" + chk.getValue()); } Added: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/CompressorRegistry.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/CompressorRegistry.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/CompressorRegistry.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -0,0 +1,120 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2013. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +package com.bigdata.io.compression; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.zip.Deflater; + +/** + * Registration pattern for {@link IRecordCompressor} implementations. + * + * @author Martyn Cutcher + */ +public class CompressorRegistry { + + /** + * Key for {@link Deflater} compression with BEST SPEED. + * + * @see RecordCompressor + */ + final public static String DEFLATE_BEST_SPEED = "DBS"; + + /** + * Key for {@link Deflater} compression with BEST COMPRESSION. + * + * @see RecordCompressor + */ + final public static String DEFLATE_BEST_COMPRESSION = "DBC"; + + /** + * Key for GZIP compression. + * + * @see GZipCompressor + */ + final public static String GZIP = "GZIP"; + + /** + * Key for no compression. + * <p> + * Note: <code>null</code> is more efficient than the + * {@link NOPRecordCompressor} since it avoids all copy for all + * {@link IRecordCompressor} methods. + * + * @see NOPRecordCompressor + */ + final public static String NOP = "NOP"; + + private static CompressorRegistry DEFAULT = new CompressorRegistry(); + + static public CompressorRegistry getInstance() { + + return DEFAULT; + + } + + final private ConcurrentHashMap<String, IRecordCompressor> compressors = new ConcurrentHashMap<String, IRecordCompressor>(); + + private CompressorRegistry() { + add(DEFLATE_BEST_SPEED, new RecordCompressor(Deflater.BEST_SPEED)); + add(DEFLATE_BEST_COMPRESSION, new RecordCompressor(Deflater.BEST_COMPRESSION)); + add(GZIP, new GZipCompressor()); + add(NOP, new NOPRecordCompressor()); + } + + /** + * Global hook to allow customized compression strategies + * + * @param key + * @param compressor + */ + public void add(final String key, final IRecordCompressor compressor) { + + if (compressors.putIfAbsent(key, compressor) != null) { + + throw new UnsupportedOperationException("Already declared: " + key); + + } + + } + + /** + * Return the {@link IRecordCompressor} registered under that key (if any). + * + * @param key + * The key (optional - may be <code>null</code>). + * @return The {@link IRecordCompressor} -or- <code>null</code> if the key + * is <code>null</code> or if there is nothing registered under that + * key. + */ + public IRecordCompressor get(final String key) { + + if (key == null) + return null; + + return compressors.get(key); + + } + +} Added: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/GZipCompressor.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/GZipCompressor.java (rev 0) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/GZipCompressor.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -0,0 +1,148 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2013. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +package com.bigdata.io.compression; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +import com.bigdata.io.ByteBufferInputStream; +import com.bigdata.io.ByteBufferOutputStream; + +public class GZipCompressor implements IRecordCompressor { + + @Override + public void compress(final ByteBuffer bin, final ByteBuffer out) { + + compress(bin, new ByteBufferOutputStream(out)); + + } + + @Override + public ByteBuffer compress(final ByteBuffer bin) { + + final ByteArrayOutputStream os = new ByteArrayOutputStream(); + + compress(bin, os); + + return ByteBuffer.wrap(os.toByteArray()); + } + + @Override + public void compress(final ByteBuffer bin, final OutputStream os) { + try { + final GZIPOutputStream gzout = new GZIPOutputStream(os); + final DataOutputStream dout = new DataOutputStream(gzout); + + // First write the length of the expanded data + dout.writeInt(bin.limit()); + if (bin.hasArray()) { + dout.write(bin.array()); + } else { + final byte[] tbuf = new byte[bin.limit()]; + bin.get(tbuf); + dout.write(tbuf); + } + dout.flush(); + gzout.flush(); + + dout.close(); + gzout.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void compress(final byte[] bytes, final OutputStream os) { + compress(bytes, 0, bytes.length, os); + } + + @Override + public void compress(final byte[] bytes, final int off, final int len, + final OutputStream os) { + try { + final GZIPOutputStream gzout = new GZIPOutputStream(os); + final DataOutputStream dout = new DataOutputStream(gzout); + + // First write the length of the expanded data + dout.writeInt(len); + dout.write(bytes, off, len); + + dout.flush(); + gzout.flush(); + + dout.close(); + gzout.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public ByteBuffer decompress(final ByteBuffer bin) { + final InputStream instr; + if (bin.hasArray()) { + instr = new ByteArrayInputStream(bin.array()); + } else { + instr = new ByteBufferInputStream(bin); + } + + return decompress(instr); + } + + @Override + public ByteBuffer decompress(final byte[] bin) { + return decompress(new ByteArrayInputStream(bin)); + } + + public ByteBuffer decompress(final InputStream instr) { + try { + final GZIPInputStream gzin = new GZIPInputStream(instr); + final DataInputStream din = new DataInputStream(gzin); + + final int length = din.readInt(); + final byte[] xbuf = new byte[length]; + for (int cursor = 0; cursor < length;) { + final int rdlen = din.read(xbuf, cursor, (length - cursor)); + + cursor += rdlen; + + } + + return ByteBuffer.wrap(xbuf); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + +} Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/IRecordCompressor.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/IRecordCompressor.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/IRecordCompressor.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -40,6 +40,32 @@ public interface IRecordCompressor { /** + * Compresses data onto the provided ByteBuffer. + * + * @param bin + * The data. The data from the position to the limit will be + * compressed. The position will be advanced to the limit as a + * side effect. + * + * @param out + * The ByteBuffer into which the compressed data is written + */ + void compress(final ByteBuffer bin, final ByteBuffer out); + + /** + * Compresses data onto the provided ByteBuffer. + * + * @param bin + * The data. The data from the position to the limit will be + * compressed. The position will be advanced to the limit as a + * side effect. + * + * @return + * The ByteBuffer into which the compressed data is written + */ + ByteBuffer compress(final ByteBuffer bin); + + /** * Compresses data onto the output stream. * * @param bin Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/NOPRecordCompressor.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/NOPRecordCompressor.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/NOPRecordCompressor.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -57,6 +57,14 @@ } + public void compress(ByteBuffer bin, ByteBuffer out) { + out.put(bin); + } + + public ByteBuffer compress(ByteBuffer bin) { + return bin; + } + /** * Writes the buffer on the output stream. */ Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/RecordCompressor.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/RecordCompressor.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/RecordCompressor.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -28,6 +28,7 @@ package com.bigdata.io.compression; import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.Externalizable; import java.io.IOException; import java.io.ObjectInput; @@ -41,6 +42,7 @@ import com.bigdata.btree.IndexSegment; import com.bigdata.io.ByteBufferInputStream; +import com.bigdata.io.ByteBufferOutputStream; /** * Bulk data (de-)compressor used for leaves in {@link IndexSegment}s. The @@ -114,8 +116,21 @@ } - public void compress(final ByteBuffer bin, final OutputStream os) { + public void compress(ByteBuffer bin, ByteBuffer out) { + compress(bin, new ByteBufferOutputStream(out)); + } + + public ByteBuffer compress(ByteBuffer bin) { + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + + compress(bin, out); + + return ByteBuffer.wrap(out.toByteArray()); + } + + public void compress(final ByteBuffer bin, final OutputStream os) { + if (bin.hasArray() && bin.position() == 0 && bin.limit() == bin.capacity()) { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -61,6 +61,8 @@ import com.bigdata.io.FileChannelUtility; import com.bigdata.io.IBufferAccess; import com.bigdata.io.IReopenChannel; +import com.bigdata.io.compression.CompressorRegistry; +import com.bigdata.io.compression.IRecordCompressor; import com.bigdata.journal.AbstractBufferStrategy; import com.bigdata.journal.StoreTypeEnum; import com.bigdata.journal.WORMStrategy; @@ -772,6 +774,16 @@ // package private : exposed to WriteTask.call(). int getWholeBufferChecksum(final ByteBuffer checksumBuffer) { + final ByteBuffer src = peek().duplicate(); + // flip(limit=pos;pos=0) + src.flip(); + + return getWholeBufferChecksum(checksumBuffer, src); + + } + + int getWholeBufferChecksum(final ByteBuffer checksumBuffer, final ByteBuffer src) { + if (checker == null) throw new UnsupportedOperationException(); @@ -785,17 +797,13 @@ * checksum. */ - final ByteBuffer b = peek().duplicate(); - // flip(limit=pos;pos=0) - b.flip(); - - assert checksumBuffer.capacity() == b.capacity() : "b.capacity=" - + b.capacity() + ", checksumBuffer.capacity=" + assert checksumBuffer.capacity() == src.capacity() : "b.capacity=" + + src.capacity() + ", checksumBuffer.capacity=" + checksumBuffer.capacity(); checksumBuffer.limit(checksumBuffer.capacity()); checksumBuffer.position(0); - checksumBuffer.put(b); + checksumBuffer.put(src); checksumBuffer.flip(); checker.reset(); @@ -1526,30 +1534,132 @@ } +// /** +// * Return the RMI message object that will accompany the payload from the +// * {@link WriteCache} when it is replicated along the write pipeline. +// * +// * @return cache A {@link WriteCache} to be replicated. +// */ +// final IHAWriteMessage newHAWriteMessage(// +// final UUID storeUUID, +// final long quorumToken, +// final long lastCommitCounter,// +// final long lastCommitTime,// +// final long sequence, +// final ByteBuffer tmp +// ) { +// +// return new HAWriteMessage(// +// storeUUID,// +// lastCommitCounter,// +// lastCommitTime,// +// sequence, // +// bytesWritten(), getWholeBufferChecksum(tmp), +// prefixWrites ? StoreTypeEnum.RW : StoreTypeEnum.WORM, +// quorumToken, fileExtent.get(), firstOffset.get()); +// +// } + /** - * Return the RMI message object that will accompany the payload from the - * {@link WriteCache} when it is replicated along the write pipeline. - * - * @return cache A {@link WriteCache} to be replicated. + * Used to retrieve the {@link HAWriteMessage} AND the associated + * {@link ByteBuffer}. + * <p> + * This allows the {@link WriteCache} to compress the data and create the + * correct {@link HAWriteMessage}. */ - final IHAWriteMessage newHAWriteMessage(// - final UUID storeUUID, - final long quorumToken, + static public class HAPackage { + + /** + * The message as it will be sent. + */ + private final IHAWriteMessage m_msg; + /** + * The data as it will be sent, with compression already applied if + * compression will be used. + */ + private final ByteBuffer m_data; + + /** + * + * @param msg + * The message as it will be sent. + * @param data + * The data as it will be sent, with compression already + * applied if compression will be used. + */ + HAPackage(final IHAWriteMessage msg, final ByteBuffer data) { + m_msg = msg; + m_data = data; + } + + public IHAWriteMessage getMessage() { + return m_msg; + } + + public ByteBuffer getData() { + return m_data; + } + } + + /** + * Return the optional key for the {@link CompressorRegistry} which + * identifies the {@link IRecordCompressor} to be applied. + */ + protected String getCompressorKey() { + + // Default is NO compression. + return null; + + } + + /** + * Return the RMI message object plus the payload (the payload has been + * optionally compressed, depending on the configuration). + */ + final HAPackage newHAPackage(// + final UUID storeUUID,// + final long quorumToken,// final long lastCommitCounter,// final long lastCommitTime,// - final long sequence, - final ByteBuffer tmp + final long sequence,// + final ByteBuffer checksumBuffer ) { + + final ByteBuffer b = peek().duplicate(); + b.flip(); - return new HAWriteMessage(// + final ByteBuffer send; + + final String compressorKey = getCompressorKey(); + + final IRecordCompressor compressor = CompressorRegistry.getInstance() + .get(compressorKey); + + if (compressor != null) { + + // Compress current buffer + send = compressor.compress(b); + + } else { + + send = b; + + } + + // log.warn("Message, position: " + send.position() + ", limit: " + send.limit()); + + final HAWriteMessage msg = new HAWriteMessage(// storeUUID,// lastCommitCounter,// lastCommitTime,// sequence, // - bytesWritten(), getWholeBufferChecksum(tmp), + send.limit(), getWholeBufferChecksum(checksumBuffer, send.duplicate()), prefixWrites ? StoreTypeEnum.RW : StoreTypeEnum.WORM, - quorumToken, fileExtent.get(), firstOffset.get()); - + quorumToken, fileExtent.get(), firstOffset.get(), + compressorKey); + + return new HAPackage(msg, send); + } /** @@ -1829,6 +1939,8 @@ recordMap.clear(); final int limit = buf.limit(); // end position. int pos = buf.position(); // start position + + // log.trace("position: " + pos + ", limit: " + limit); while (pos < limit) { buf.position(pos); // 8 bytes (negative iff record is deleted) @@ -1839,7 +1951,10 @@ assert recordLength != 0; // 4 bytes final int latchedAddr = buf.getInt(); -// if (sze == 0 /* old style deleted */) { + + // log.trace("Record fileOffset: " + fileOffset + ", length: " + recordLength + ", latchedAddr: " + latchedAddr); + + // if (sze == 0 /* old style deleted */) { // /* // * Should only happen if a previous write was already made // * to the buffer but the allocation has since been freed. Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -62,7 +62,6 @@ import com.bigdata.counters.CounterSet; import com.bigdata.ha.HAPipelineGlue; import com.bigdata.ha.QuorumPipeline; -import com.bigdata.ha.msg.IHAWriteMessage; import com.bigdata.io.DirectBufferPool; import com.bigdata.io.IBufferAccess; import com.bigdata.io.IReopenChannel; @@ -544,7 +543,18 @@ this.useChecksum = useChecksum; - this.compactionEnabled = false;//canCompact() && compactionThreshold < 100; + /** + * FIXME WCS compaction fails! + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/674" > + * WCS write cache compaction causes errors in RWS postHACommit() + * </a> + */ + this.compactionEnabled = false; //canCompact() && compactionThreshold < 100; + + if (log.isInfoEnabled()) + log.info("Compaction Enabled: " + compactionEnabled + + " @ threshold=" + compactionThreshold); // this.opener = opener; @@ -1376,11 +1386,6 @@ * non-final follower will receiveAndReplicate the write cache * buffer. The last follower will receive the buffer. */ - // duplicate the write cache's buffer. - final ByteBuffer b = cache.peek().duplicate(); - // flip(limit=pos;pos=0) - b.flip(); - assert b.remaining() > 0 : "Empty cache: " + cache; // send to 1st follower. @SuppressWarnings("unchecked") @@ -1389,7 +1394,7 @@ assert quorumMember != null : "Not quorum member?"; - final IHAWriteMessage msg = cache.newHAWriteMessage(// + final WriteCache.HAPackage pkg = cache.newHAPackage(// quorumMember.getStoreUUID(),// quorumToken,// quorumMember.getLastCommitCounter(),// @@ -1398,6 +1403,8 @@ checksumBuffer ); + assert pkg.getData().remaining() > 0 : "Empty cache: " + cache; + /* * Start the remote asynchronous IO before the local synchronous * IO. @@ -1413,11 +1420,11 @@ * then clean up the documentation here (see the commented * out version of this line below). */ - quorumMember.logWriteCacheBlock(msg, b.duplicate()); + quorumMember.logWriteCacheBlock(pkg.getMessage(), pkg.getData().duplicate()); // ASYNC MSG RMI + NIO XFER. - remoteWriteFuture = quorumMember.replicate(null/* req */, msg, - b.duplicate()); + remoteWriteFuture = quorumMember.replicate(null/* req */, pkg.getMessage(), + pkg.getData().duplicate()); counters.get().nsend++; @@ -1468,7 +1475,7 @@ } } // writeCacheBlock() - + } // class WriteTask /** @@ -3835,6 +3842,23 @@ } /** + * Debug method to verify that the {@link WriteCacheService} has flushed all + * {@link WriteCache} buffers. + * + * @return whether there are no outstanding writes buffered + */ + public boolean isFlushed() { + + final boolean clear = + dirtyList.size() == 0 + && compactingCacheRef.get() == null + && (current.get() == null || current.get().isEmpty()); + + return clear; + + } + + /** * An array of writeCache actions is maintained that can be used * to provide a breadcrumb of how that address has been written, saved, * freed or removed. Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractBufferStrategy.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractBufferStrategy.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractBufferStrategy.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -676,4 +676,15 @@ return false; } +// +// /** +// * {@inheritDoc} +// * <p> +// * Note: By default there is no WriteCache to buffer any writes +// * +// * @return <code>true</code> unless overridden. +// */ +// public boolean isFlushed() { +// return true; +// } } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -3274,12 +3274,31 @@ final long nextOffset = _bufferStrategy.getNextOffset(); final long blockSequence; + if (_bufferStrategy instanceof IHABufferStrategy) { + // always available for HA. blockSequence = ((IHABufferStrategy) _bufferStrategy) .getBlockSequence(); + + if (!((IHABufferStrategy) _bufferStrategy) + .getWriteCacheService().isFlushed()) { + + /** + * @see <a + * href="https://sourceforge.net/apps/trac/bigdata/ticket/674" + * > WCS write cache compaction causes errors in RWS + * postHACommit() </a> + */ + + throw new AssertionError(); + + } + } else { + blockSequence = old.getBlockSequence(); + } /* Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/DiskBackedBufferStrategy.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/DiskBackedBufferStrategy.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/DiskBackedBufferStrategy.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -433,5 +433,6 @@ public void setNextOffset(long lastOffset) { // void for default DiskBackedBufferStrategy } - + + } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IBufferStrategy.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IBufferStrategy.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IBufferStrategy.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -276,4 +276,9 @@ */ public boolean useChecksums(); +// /** +// * Determines whether there are outstanding writes to the underlying store +// */ +// public boolean isFlushed(); + } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IHABufferStrategy.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IHABufferStrategy.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IHABufferStrategy.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -275,4 +275,9 @@ Quorum<HAGlue, QuorumService<HAGlue>> quorum, long token) throws IOException, QuorumException; + /** + * Return the {@link WriteCacheService} (mainly for debugging). + */ + WriteCacheService getWriteCacheService(); + } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Options.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Options.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Options.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -36,6 +36,8 @@ import com.bigdata.cache.HardReferenceQueue; import com.bigdata.io.DirectBufferPool; import com.bigdata.io.FileLockUtility; +import com.bigdata.io.compression.CompressorRegistry; +import com.bigdata.io.compression.IRecordCompressor; import com.bigdata.io.writecache.WriteCache; import com.bigdata.io.writecache.WriteCache.ReadCache; import com.bigdata.io.writecache.WriteCacheService; @@ -332,6 +334,21 @@ // String WRITE_CACHE_CAPACITY = AbstractJournal.class.getName()+".writeCacheCapacity"; /** + * Optional {@link IRecordCompressor} strategy for the + * {@link WriteCacheService} in support of compressed payloads for + * replicated messages and compressed HALogs (default + * {@value #DEFAULT_HALOG_COMPRESSOR}). The value is a <code>key</code> + * declared to the {@link CompressorRegistry}. + * + * @see CompressorRegistry + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/652" > + * Compress write cache blocks for replication and in HALogs </a> + */ + String HALOG_COMPRESSOR = "HALogCompressor"; + String DEFAULT_HALOG_COMPRESSOR = null;//FIXME Change default: CompressorRegistry.DEFLATE_BEST_SPEED; + + /** * The initial extent of the journal (bytes). When the journal is backed by * a file, this is the initial length of that file. The initial user extent * is typically slightly smaller as the head of the file contains some Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/RWStrategy.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/RWStrategy.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/RWStrategy.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -47,6 +47,7 @@ import com.bigdata.ha.msg.IHARebuildRequest; import com.bigdata.ha.msg.IHAWriteMessage; import com.bigdata.io.IBufferAccess; +import com.bigdata.io.writecache.WriteCacheService; import com.bigdata.mdi.IResourceMetadata; import com.bigdata.quorum.Quorum; import com.bigdata.quorum.QuorumException; @@ -894,6 +895,15 @@ m_store.postHACommit(rootBlock); } + @Override + public WriteCacheService getWriteCacheService() { + return m_store.getWriteCacheService(); + } + +// @Override +// public boolean isFlushed() { +// return m_store.isFlushed(); +// } // private int m_rebuildSequence = -1; // // @Override Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -63,6 +63,8 @@ import com.bigdata.io.FileChannelUtility; import com.bigdata.io.IBufferAccess; import com.bigdata.io.IReopenChannel; +import com.bigdata.io.compression.CompressorRegistry; +import com.bigdata.io.compression.IRecordCompressor; import com.bigdata.io.writecache.IBackingReader; import com.bigdata.io.writecache.WriteCache; import com.bigdata.io.writecache.WriteCacheCounters; @@ -217,6 +219,11 @@ */ private volatile WORMWriteCacheService writeCacheService; + @Override + public WORMWriteCacheService getWriteCacheService() { + return writeCacheService; + } + /** * <code>true</code> iff the backing store has record level checksums. */ @@ -252,6 +259,14 @@ private final int hotCacheSize; /** + * The key for the {@link CompressorRegistry} which identifies the + * {@link IRecordCompressor} to be applied (optional). + * + * @see com.bigdata.journal.Options#HALOG_COMPRESSOR + */ + private final String compressorKey; + + /** * <code>true</code> if the backing store will be used in an HA * {@link Quorum} (this is passed through to the {@link WriteCache} objects * which use this flag to conditionally track the checksum of the entire @@ -951,6 +966,10 @@ com.bigdata.journal.Options.HOT_CACHE_SIZE, com.bigdata.journal.Options.DEFAULT_HOT_CACHE_SIZE)); + this.compressorKey = fileMetadata.getProperty( + com.bigdata.journal.Options.HALOG_COMPRESSOR, + com.bigdata.journal.Options.DEFAULT_HALOG_COMPRESSOR); + isHighlyAvailable = quorum != null && quorum.isHighlyAvailable(); final boolean useWriteCacheService = fileMetadata.writeCacheEnabled @@ -1002,7 +1021,8 @@ public WriteCacheImpl newWriteCache(final IBufferAccess buf, final boolean useChecksum, final boolean bufferHasData, final IReopenChannel<? extends Channel> opener, - final long fileExtent) throws InterruptedException { + final long fileExtent) + throws InterruptedException { return new WriteCacheImpl(0/* baseOffset */, buf, useChecksum, bufferHasData, (IReopenChannel<FileChannel>) opener, @@ -1034,6 +1054,13 @@ } + @Override + protected String getCompressorKey() { + + return compressorKey; + + } + /** * {@inheritDoc} * <p> @@ -2480,12 +2507,18 @@ public void writeRawBuffer(final IHAWriteMessage msg, final IBufferAccess b) throws IOException, InterruptedException { + // FIXME Must EXPAND() iff message is compressed. + /* * Wrap up the data from the message as a WriteCache object. This will * build up a RecordMap containing the allocations to be made, and * including a ZERO (0) data length if any offset winds up being deleted * (released). - */ + * + * Note: We do not need to pass in the compressorKey here. It is ignored + * by WriteCache.flush(). We have expanded the payload above. Now we are + * just flushing the write cache onto the disk. + */ final WriteCacheImpl writeCache = writeCacheService.newWriteCache(b, useChecksums, true/* bufferHasData */, opener, msg.getFileExtent()); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -345,7 +345,7 @@ // assert block.releaseSession(m_store.m_writeCache) == 0; // clear out writes - FIXME is releaseSession okay - block.releaseCommitWrites(m_store.m_writeCacheService); + block.releaseCommitWrites(m_store.getWriteCacheService()); // Moved to postCommit() // block.m_transients = block.m_live.clone(); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2013-05-23 19:25:57 UTC (rev 7160) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2013-05-24 17:23:19 UTC (rev 7161) @@ -50,6 +50,7 @@ import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.Lock; @@ -81,6 +82,8 @@ import com.bigdata.io.FileChannelUtility; import com.bigdata.io.IBufferAccess; import com.bigdata.io.IReopenChannel; +import com.bigdata.io.compression.CompressorRegistry; +import com.bigdata.io.compression.IRecordCompressor; import com.bigdata.io.writecache.BufferedWrite; import com.bigdata.io.writecache.IBackingReader; import com.bigdata.io.writecache.IBufferedWriter; @@ -493,13 +496,36 @@ private final int m_hotCacheSize; /** + * The key for the {@link CompressorRegistry} which identifies the + * {@link IRecordCompressor} to be applied (optional). + * + * @see com.bigdata.journal.Options#HALOG_COMPRESSOR + */ + private final String m_compressorKey; + + /** * Note: This is not final because we replace the {@link WriteCacheService} * during {@link #reset(long)} in order to propagate the then current quorum * token to the {@link WriteCacheService}. */ - RWWriteCacheService m_writeCacheService; + private RWWriteCacheService m_writeCacheService; /** + * Return the then current {@link WriteCacheService} object. + * + * @see IHABufferStrategy#getWriteCacheService() + */ + public RWWriteCacheService getWriteCacheService() { + m_allocationReadLock.lock(); + try { + return m_writeCacheService; + } finally { + m_allocationReadLock.unlock(); + } + + } + + /** * The actual allocation sizes as read from the store. * * @see #DEFAULT_ALLOCATION_SIZES @@ -650,11 +676,14 @@ private ConcurrentHashMap<Integer, Long> m_lockAddresses = null; class WriteCacheImpl extends WriteCache.FileChannelScatteredWriteCache { + + final private String compressorKey; + public WriteCacheImpl(final IBufferAcces... [truncated message content] |
From: <tho...@us...> - 2013-05-28 12:16:11
|
Revision: 7162 http://bigdata.svn.sourceforge.net/bigdata/?rev=7162&view=rev Author: thompsonbry Date: 2013-05-28 12:15:58 +0000 (Tue, 28 May 2013) Log Message: ----------- I tracked this down to the getHALogRootBlocksForWriteSet() method rather than the sendHALogForWriteSet() method. I have reviewed all code paths that open HALog files. I have commented out the finalize() method on HALogReader since it should not be required. Some cleanup on the HALog test suite with respect to the guaranteed close() of HALog files. Added public method on HALogNexus to open an HALog file using a File which makes an atomic decision regarding whether or not this is the live HALog file. Identified and closed a open file descriptor leak in the HAJournalServer RESTORE doRun() method. @see https://sourceforge.net/apps/trac/bigdata/ticket/678 (DGC Thread and Open File Leaks: sendHALogForWriteSet()) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogWriter.java branches/READ_CACHE/bigdata/src/test/com/bigdata/ha/halog/TestHALogWriter.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HALogNexus.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java 2013-05-24 17:23:19 UTC (rev 7161) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java 2013-05-28 12:15:58 UTC (rev 7162) @@ -76,6 +76,18 @@ private final int magic; private final int version; + /** + * <strong>CAUTION: This constructor should not be used in circumstances in + * which the {@link HALogWriter} is active since this constructor can not + * differentiate atomically between the live HALog and a historical HALog + * and will always provide a read-only view, even if the live HALog file is + * opened.</strong> + * + * @param file + * The HALog file. + * + * @throws IOException + */ public HALogReader(final File file) throws IOException { m_file = file; @@ -157,25 +169,25 @@ } - /** - * {@inheritDoc} - * - * TODO This was added to address a file handle leak. However, I am quite - * dubious that this will fix the problem. While GC may be necessary to - * finalize {@link HALogReader} instances during a RESYNC, we have already - * invoked {@link #close()} on those instances in the SendHALogTask(). It - * may be better to remove this since finalize() methods add overhead to - * GC. - * - * @see <a - * href="https://sourceforge.net/apps/trac/bigdata/ticket/678#comment:4" - * > DGC Thread Leak: sendHALogForWriteSet() </a> - */ - @Override - protected void finalize() throws Throwable { - close(); - super.finalize(); - } +// /** +// * {@inheritDoc} +// * +// * TODO This was added to address a file handle leak. However, I am quite +// * dubious that this will fix the problem. While GC may be necessary to +// * finalize {@link HALogReader} instances during a RESYNC, we have already +// * invoked {@link #close()} on those instances in the SendHALogTask(). It +// * may be better to remove this since finalize() methods add overhead to +// * GC. +// * +// * @see <a +// * href="https://sourceforge.net/apps/trac/bigdata/ticket/678#comment:4" +// * > DGC Thread Leak: sendHALogForWriteSet() </a> +// */ +// @Override +// protected void finalize() throws Throwable { +// close(); +// super.finalize(); +// } /** * Hook for Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogWriter.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogWriter.java 2013-05-24 17:23:19 UTC (rev 7161) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/IHALogWriter.java 2013-05-28 12:15:58 UTC (rev 7162) @@ -47,6 +47,13 @@ /** * Return the commit counter that is expected for the writes that will be * logged (the same commit counter that is on the opening root block). + * <p> + * Note: Once the HALog is sealed, the closing root block will have a + * commitCounter that is equal to <code>getCommitCounter() + 1</code>. + * <p> + * Note: The HALog filename contains the closing commit counter - that is, + * the HALog file is named for the commit counter associated with the + * closing root block for a given write set. */ public long getCommitCounter(); Modified: branches/READ_CACHE/bigdata/src/test/com/bigdata/ha/halog/TestHALogWriter.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/ha/halog/TestHALogWriter.java 2013-05-24 17:23:19 UTC (rev 7161) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/ha/halog/TestHALogWriter.java 2013-05-28 12:15:58 UTC (rev 7162) @@ -279,40 +279,55 @@ final IHALogReader reader = writer.getReader(openRB .getCommitCounter() + 1); - // This is the "live" HALog. - assertTrue(reader.isLive()); + try { - // The reader is open. - assertTrue(reader.isOpen()); - - // The HALog is logically empty. -// assertTrue(reader.isEmpty()); - - /* - * Note: Don't do this here. The method will block for the live - * HALog until the file is closed (sealed with the closing root - * block) or destroyed. - */ -// assertTrue(reader.hasMoreBuffers()); + // This is the "live" HALog. + assertTrue(reader.isLive()); - // close the reader. should not close the writer. - reader.close(); + // The reader is open. + assertTrue(reader.isOpen()); - // the reader is closed. - assertFalse(reader.isOpen()); - - // once closed, this method should return immediately. - assertFalse(reader.hasMoreBuffers()); - - // the writer is still open. - assertTrue(writer.isHALogOpen()); + // The HALog is logically empty. + // assertTrue(reader.isEmpty()); - // double-close the reader. should be ignored. - reader.close(); + /* + * Note: Don't do this here. The method will block for the + * live HALog until the file is closed (sealed with the + * closing root block) or destroyed. + */ + // assertTrue(reader.hasMoreBuffers()); + + // close the reader. should not close the writer. + reader.close(); + + // the reader is closed. + assertFalse(reader.isOpen()); + + // once closed, this method should return immediately. + assertFalse(reader.hasMoreBuffers()); + + // the writer is still open. + assertTrue(writer.isHALogOpen()); + + // double-close the reader. should be ignored. + reader.close(); + + // the reader is closed. + assertFalse(reader.isOpen()); + + // the writer should *still* be open. + assertTrue(writer.isHALogOpen()); + + } finally { + + if(reader.isOpen()) { + + reader.close(); + + } + + } - // the writer should *still* be open. - assertTrue(writer.isHALogOpen()); - } /* @@ -549,12 +564,12 @@ // Note: Throws FileNotFoundException if does not exist. final IHALogReader reader = writer.getReader(commitCounter); - assertNotNull(reader); - - long nread = 0L; - try { + assertNotNull(reader); + + long nread = 0L; + while (reader.hasMoreBuffers()) { checkWriterFuture(); @@ -860,6 +875,8 @@ */ public void test_doubleOpen_close_historicalHALog() throws Exception { + IHALogReader r1 = null, r2 = null; + final HALogWriter writer = new HALogWriter(logdir); try { @@ -901,16 +918,14 @@ * Setup two readers on that HALog file. */ - final IHALogReader r1 = writer.getReader(openRB - .getCommitCounter() + 1); + r1 = writer.getReader(openRB.getCommitCounter() + 1); assertFalse(r1.isLive()); assertTrue(r1.isOpen()); assertFalse(r1.isEmpty()); assertTrue(r1.hasMoreBuffers()); - final IHALogReader r2 = writer.getReader(openRB - .getCommitCounter() + 1); + r2 = writer.getReader(openRB.getCommitCounter() + 1); assertFalse(r2.isLive()); assertTrue(r2.isOpen()); @@ -965,6 +980,16 @@ writer.disableHALog(); + if (r1 != null && r1.isOpen()) { + r1.close(); + r1 = null; + } + + if (r2 != null && r2.isOpen()) { + r2.close(); + r2 = null; + } + } // Read all files in the test directory. @@ -1038,59 +1063,68 @@ final IHALogReader r1 = writer .getReader(openRB.getCommitCounter() + 1); - assertFalse(r1.isLive()); - assertTrue(r1.isOpen()); - assertFalse(r1.isEmpty()); - assertTrue(r1.hasMoreBuffers()); + try { - for (int i = 0; i < MAX_OPEN_FILE_HANDLES; i++) { + assertFalse(r1.isLive()); + assertTrue(r1.isOpen()); + assertFalse(r1.isEmpty()); + assertTrue(r1.hasMoreBuffers()); - final IHALogReader r2 = writer.getReader(openRB - .getCommitCounter() + 1); + for (int i = 0; i < MAX_OPEN_FILE_HANDLES; i++) { - assertFalse(r2.isLive()); - assertTrue(r2.isOpen()); - assertFalse(r2.isEmpty()); - assertTrue(r2.hasMoreBuffers()); + final IHALogReader r2 = writer.getReader(openRB + .getCommitCounter() + 1); - /* - * Now use the 2nd reader to read the data to make sure that the - * IHALogReader is really open and functional. - */ - try { + assertFalse(r2.isLive()); + assertTrue(r2.isOpen()); + assertFalse(r2.isEmpty()); + assertTrue(r2.hasMoreBuffers()); - // Allocate a heap ByteBuffer - final ByteBuffer rbuf = ByteBuffer - .allocate(DirectBufferPool.INSTANCE - .getBufferCapacity()); + /* + * Now use the 2nd reader to read the data to make sure that + * the IHALogReader is really open and functional. + */ + try { - while (r2.hasMoreBuffers()) { + // Allocate a heap ByteBuffer + final ByteBuffer rbuf = ByteBuffer + .allocate(DirectBufferPool.INSTANCE + .getBufferCapacity()); - // read data into reader's buffer. - r2.processNextBuffer(rbuf); + while (r2.hasMoreBuffers()) { + // read data into reader's buffer. + r2.processNextBuffer(rbuf); + + } + + } finally { + + r2.close(); + } - } finally { + assertFalse(r2.isLive()); + assertFalse(r2.isOpen()); + assertFalse(r2.isEmpty()); + assertFalse(r2.hasMoreBuffers()); - r2.close(); - } - assertFalse(r2.isLive()); - assertFalse(r2.isOpen()); - assertFalse(r2.isEmpty()); - assertFalse(r2.hasMoreBuffers()); + // close [r1]. + r1.close(); + assertFalse(r1.isLive()); + assertFalse(r1.isOpen()); + assertFalse(r1.isEmpty()); + assertFalse(r1.hasMoreBuffers()); + } finally { + + if (r1.isOpen()) + r1.close(); + } - - // close [r1]. - r1.close(); - assertFalse(r1.isLive()); - assertFalse(r1.isOpen()); - assertFalse(r1.isEmpty()); - assertFalse(r1.hasMoreBuffers()); - + } finally { writer.disableHALog(); Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-24 17:23:19 UTC (rev 7161) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-05-28 12:15:58 UTC (rev 7162) @@ -737,7 +737,13 @@ } } - + + /** + * {@inheritDoc} + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/678" > + * DGC Thread Leak: sendHALogForWriteSet() </a> + */ @Override public IHALogRootBlocksResponse getHALogRootBlocksForWriteSet( final IHALogRootBlocksRequest msg) throws IOException { @@ -764,16 +770,24 @@ } - final HALogReader r = new HALogReader(logFile); + final IHALogReader r = getHALogNexus().getReader(logFile); - final HALogRootBlocksResponse resp = new HALogRootBlocksResponse( - r.getOpeningRootBlock(), r.getClosingRootBlock()); + try { - if (haLog.isDebugEnabled()) - haLog.debug("msg=" + msg + ", resp=" + resp); + final HALogRootBlocksResponse resp = new HALogRootBlocksResponse( + r.getOpeningRootBlock(), r.getClosingRootBlock()); - return resp; + if (haLog.isDebugEnabled()) + haLog.debug("msg=" + msg + ", resp=" + resp); + return resp; + + } finally { + + r.close(); + + } + } finally { logLock.unlock(); @@ -820,14 +834,15 @@ // Note: open file handle - must be closed eventually. r = getHALogNexus().getReader(commitCounter); + // true iff is live log at moment reader was opened. isLive = r.isLive(); - - // Task sends an HALog file along the pipeline. + + // Task sends an HALog file along the pipeline. ft = new FutureTaskMon<Void>(new SendHALogTask(req, r)); - // Run task. - getExecutorService().submit(ft); - + // Run task. + getExecutorService().submit(ft); + // Clear reference. File handle will be closed by task. r = null; Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-24 17:23:19 UTC (rev 7161) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-28 12:15:58 UTC (rev 7162) @@ -1909,11 +1909,11 @@ final long commitCounter = journal.getRootBlockView() .getCommitCounter(); + final IHALogReader r = journal.getHALogNexus().getReader( + commitCounter + 1); + try { - final IHALogReader r = journal.getHALogNexus() - .getReader(commitCounter + 1); - if (r.isEmpty()) { /* @@ -1954,6 +1954,10 @@ break; + } finally { + + r.close(); + } } Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HALogNexus.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HALogNexus.java 2013-05-24 17:23:19 UTC (rev 7161) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HALogNexus.java 2013-05-28 12:15:58 UTC (rev 7162) @@ -43,6 +43,7 @@ import com.bigdata.btree.ITuple; import com.bigdata.btree.ITupleIterator; import com.bigdata.ha.QuorumServiceBase; +import com.bigdata.ha.halog.HALogReader; import com.bigdata.ha.halog.HALogWriter; import com.bigdata.ha.halog.IHALogReader; import com.bigdata.ha.halog.IHALogWriter; @@ -831,6 +832,60 @@ } /** + * Return the {@link IHALogReader} for the specified HALog file. If the + * request identifies the HALog that is currently being written, then an + * {@link IHALogReader} will be returned that will "see" newly written + * entries on the HALog. If the request identifies a historical HALog that + * has been closed and which exists, then a reader will be returned for that + * HALog file. Otherwise, an exception is thrown. + * + * @param logFile + * The HALog file. + * + * @return The {@link IHALogReader}. + * + * @throws IllegalArgumentException + * if the argument is <code>null</code>. + * @throws IOException + * if the HALog file does not exist or can not be read. + */ + public IHALogReader getReader(final File logFile) throws IOException { + + if (logFile == null) + throw new IllegalArgumentException(); + + logLock.lock(); + + try { + + if (haLogWriter.getFile().equals(logFile)) { + + /* + * This is the live HALog file. + */ + + // The closing commit counter. + final long cc = haLogWriter.getCommitCounter() + 1; + + return haLogWriter.getReader(cc); + + } + + /* + * This is an historical HALog file. + */ + + return new HALogReader(logFile); + + } finally { + + logLock.unlock(); + + } + + } + + /** * Open an HALog file for the write set starting with the given root block. * * @param rootBlock Modified: branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java =================================================================== --- branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java 2013-05-24 17:23:19 UTC (rev 7161) +++ branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java 2013-05-28 12:15:58 UTC (rev 7162) @@ -315,11 +315,11 @@ final long nbytes = rec.sizeOnDisk(); final long closingCommitCounter = rec.getRootBlock() .getCommitCounter(); + String digestStr = null; + final File file = nexus + .getHALogFile(closingCommitCounter); final IHALogReader r = nexus.getHALogWriter() .getReader(closingCommitCounter); - final File file = nexus - .getHALogFile(closingCommitCounter); - String digestStr = null; try { if (digests && !r.isEmpty()) { try { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-05-29 14:47:20
|
Revision: 7170 http://bigdata.svn.sourceforge.net/bigdata/?rev=7170&view=rev Author: thompsonbry Date: 2013-05-29 14:47:12 +0000 (Wed, 29 May 2013) Log Message: ----------- We have identified a concurrency hole in the IRWStrategy.commit() / IRWStrategy.postCommit() logic. The Journal is MRMW (multiple readers, multiple writers). However, the RWStore allocation write lock needs to be held across those two method calls. The same problem exists for the MemStore. A new getCommitLock() method was added to the IRWStrategy and IStore interfaces. The postCommit() methods now assert that the caller is holding the appropriate lock. This forces the caller to ensure that they have acquired the commitLock before invoking IRWStrategy.commit(). Removed WCS.isFlush() since the assumptions implied for the method were not valid when it was invoked and added some asserts for the same criteria into WCS.flush(). Took out code path in HAJournalServer.handleReplicatedWrite() where it was rethrowing the root cause after entering the error state. In fact, we do not want to propagate the root cause back to the leader since that can cause an uncurable error where an update might otherwise complete with a majority of the services if one service enters an error state. WCS compaction is now enabled by default. HALog compression is now enabled by default. See https://sourceforge.net/apps/trac/bigdata/ticket/674 (WCS write cache compaction causes errors in RWS postHACommit()) See https://sourceforge.net/apps/trac/bigdata/ticket/557 (StressTestConcurrentTx may ignore real errors) See https://sourceforge.net/apps/trac/bigdata/ticket/652 (Compress write cache blocks for replication and in HALogs) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Options.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/RWStrategy.java branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/IRWStrategy.java branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/IStore.java branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWStore.java branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/sector/AllocationContext.java branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/sector/MemStrategy.java branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/sector/MemoryManager.java branches/READ_CACHE/bigdata/src/test/com/bigdata/rwstore/sector/TestMemStore.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java 2013-05-29 12:22:08 UTC (rev 7169) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java 2013-05-29 14:47:12 UTC (rev 7170) @@ -550,7 +550,7 @@ * WCS write cache compaction causes errors in RWS postHACommit() * </a> */ - this.compactionEnabled = false; //canCompact() && compactionThreshold < 100; + this.compactionEnabled = canCompact() && compactionThreshold < 100; if (log.isInfoEnabled()) log.info("Compaction Enabled: " + compactionEnabled @@ -2196,10 +2196,14 @@ try { if(!halt) { /* - * Can not check assertion if there is an existing + * Check assertions for clean WCS after flush(). + * + * Note: Can not check assertion if there is an existing * exception. */ + assert dirtyList.size() == 0; assert compactingCacheRef.get() == null; + assert current.get() == null; } } finally { dirtyListLock.unlock(); @@ -3841,22 +3845,22 @@ } } - /** - * Debug method to verify that the {@link WriteCacheService} has flushed all - * {@link WriteCache} buffers. - * - * @return whether there are no outstanding writes buffered - */ - public boolean isFlushed() { - - final boolean clear = - dirtyList.size() == 0 - && compactingCacheRef.get() == null - && (current.get() == null || current.get().isEmpty()); - - return clear; - - } +// /** +// * Debug method to verify that the {@link WriteCacheService} has flushed all +// * {@link WriteCache} buffers. +// * +// * @return whether there are no outstanding writes buffered +// */ +// public boolean isFlushed() { +// +// final boolean clear = +// dirtyList.size() == 0 +// && compactingCacheRef.get() == null +// && (current.get() == null || current.get().isEmpty()); +// +// return clear; +// +// } /** * An array of writeCache actions is maintained that can be used Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-29 12:22:08 UTC (rev 7169) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-29 14:47:12 UTC (rev 7170) @@ -114,7 +114,6 @@ import com.bigdata.ha.msg.IHADigestRequest; import com.bigdata.ha.msg.IHADigestResponse; import com.bigdata.ha.msg.IHAGatherReleaseTimeRequest; -import com.bigdata.ha.msg.IHAGlobalWriteLockRequest; import com.bigdata.ha.msg.IHALogDigestRequest; import com.bigdata.ha.msg.IHALogDigestResponse; import com.bigdata.ha.msg.IHALogRequest; @@ -3192,7 +3191,9 @@ * retained. */ if (_bufferStrategy instanceof IHistoryManager) { + ((IHistoryManager) _bufferStrategy).checkDeferredFrees(this); + } /* @@ -3244,6 +3245,22 @@ } /* + * Conditionally obtain a lock that will protect the + * commit()/postCommit() protocol. + */ + final long nextOffset; + final Lock commitLock; + if (_bufferStrategy instanceof IRWStrategy) { + commitLock = ((IRWStrategy) _bufferStrategy).getCommitLock(); + } else { + commitLock = null; + } + if (commitLock != null) { + // Take the commit lock. + commitLock.lock(); + } + try { + /* * Call commit on buffer strategy prior to retrieving root block, * required for RWStore since the metaBits allocations are not made * until commit, leading to invalid addresses for recent store @@ -3264,15 +3281,14 @@ * does not create much latency because the WriteCacheService drains * the dirtyList in a seperate thread. */ - _bufferStrategy.commit(); + _bufferStrategy.commit(); + + /* + * The next offset at which user data would be written. + * Calculated, after commit! + */ + nextOffset = _bufferStrategy.getNextOffset(); - /* - * next offset at which user data would be written. - * Calculated, after commit! - */ - - final long nextOffset = _bufferStrategy.getNextOffset(); - final long blockSequence; if (_bufferStrategy instanceof IHABufferStrategy) { @@ -3281,20 +3297,6 @@ blockSequence = ((IHABufferStrategy) _bufferStrategy) .getBlockSequence(); - if (!((IHABufferStrategy) _bufferStrategy) - .getWriteCacheService().isFlushed()) { - - /** - * @see <a - * href="https://sourceforge.net/apps/trac/bigdata/ticket/674" - * > WCS write cache compaction causes errors in RWS - * postHACommit() </a> - */ - - throw new AssertionError(); - - } - } else { blockSequence = old.getBlockSequence(); @@ -3381,11 +3383,16 @@ // write the root block on to the backing store. _bufferStrategy.writeRootBlock(newRootBlock, forceOnCommit); - // Now the root blocks are down we can commit any - // transient state - if (_bufferStrategy instanceof IRWStrategy) { - ((IRWStrategy) _bufferStrategy).postCommit(); - } + if (_bufferStrategy instanceof IRWStrategy) { + + /* + * Now the root blocks are down we can commit any transient + * state. + */ + + ((IRWStrategy) _bufferStrategy).postCommit(); + + } // set the new root block. _rootBlock = newRootBlock; @@ -3482,6 +3489,15 @@ throw new RuntimeException(e); } + } // else HA mode + + } finally { + if(commitLock != null) { + /* + * Release the [commitLock] iff one was taken above. + */ + commitLock.unlock(); + } } final long elapsedNanos = System.nanoTime() - beginNanos; @@ -5901,6 +5917,38 @@ try { + /* + * Note: flush() is done by prepare2Phase(). The only conditions + * under which it is not done already is (a) HARestore (when + * localService is null) and (b) during RESTORE or RESYNC for the + * HAJournalServer (when haStatus will be NotReady). + */ + final boolean shouldFlush = localService == null + || (haStatus == null || haStatus == HAStatusEnum.NotReady); + + /* + * Force application data to stable storage _before_ we update the + * root blocks. This option guarantees that the application data is + * stable on the disk before the atomic commit. Some operating + * systems and/or file systems may otherwise choose an ordered write + * with the consequence that the root blocks are laid down on the + * disk before the application data and a hard failure could result + * in the loss of application data addressed by the new root blocks + * (data loss on restart). + * + * Note: We do not force the file metadata to disk. If that is done, + * it will be done by a force() after we write the root block on the + * disk. + * + * Note: [shouldFlush] is probably sufficient. This test uses + * [shouldFlush||true] to err on the side of safety. + */ + if ((shouldFlush || true) && doubleSync) { + + _bufferStrategy.force(false/* metadata */); + + } + // The timestamp for this commit point. final long commitTime = rootBlock.getLastCommitTime(); @@ -5914,11 +5962,18 @@ .isLeader(rootBlock.getQuorumToken()); if (leader) { - // Now the root blocks are down we can commit any - // transient state - if (_bufferStrategy instanceof IRWStrategy) { - ((IRWStrategy) _bufferStrategy).postCommit(); - } + + if (_bufferStrategy instanceof IRWStrategy) { + + /* + * Now the root blocks are down we can commit any transient + * state. + */ + + ((IRWStrategy) _bufferStrategy).postCommit(); + + } + } else { /* Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Options.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Options.java 2013-05-29 12:22:08 UTC (rev 7169) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Options.java 2013-05-29 14:47:12 UTC (rev 7170) @@ -346,7 +346,7 @@ * Compress write cache blocks for replication and in HALogs </a> */ String HALOG_COMPRESSOR = "HALogCompressor"; - String DEFAULT_HALOG_COMPRESSOR = null;//FIXME Change default: CompressorRegistry.DEFLATE_BEST_SPEED; + String DEFAULT_HALOG_COMPRESSOR = CompressorRegistry.DEFLATE_BEST_SPEED; /** * The initial extent of the journal (bytes). When the journal is backed by Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/RWStrategy.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/RWStrategy.java 2013-05-29 12:22:08 UTC (rev 7169) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/RWStrategy.java 2013-05-29 14:47:12 UTC (rev 7170) @@ -34,6 +34,7 @@ import java.security.MessageDigest; import java.util.UUID; import java.util.concurrent.Future; +import java.util.concurrent.locks.Lock; import org.apache.log4j.Logger; @@ -886,6 +887,11 @@ } @Override + public Lock getCommitLock() { + return m_store.getCommitLock(); + } + + @Override public void postCommit() { m_store.postCommit(); } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/IRWStrategy.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/IRWStrategy.java 2013-05-29 12:22:08 UTC (rev 7169) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/IRWStrategy.java 2013-05-29 14:47:12 UTC (rev 7170) @@ -1,5 +1,7 @@ package com.bigdata.rwstore; +import java.util.concurrent.locks.Lock; + import com.bigdata.journal.IBufferStrategy; import com.bigdata.journal.RWStrategy; import com.bigdata.rawstore.IAllocationManagerStore; @@ -31,10 +33,12 @@ */ public boolean isCommitted(long addr); -// /** -// * Resets allocators from current rootblock -// */ -// void resetFromHARootBlock(IRootBlockView rootBlock); + /** + * Optionally return a {@link Lock} that must be used (when non- + * <code>null</code>) to make the {@link IBufferStrategy#commit()} / + * {@link #postCommit()} strategy atomic. + */ + public Lock getCommitLock(); /** * Called post commit to dispose any transient commit state retained to Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/IStore.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/IStore.java 2013-05-29 12:22:08 UTC (rev 7169) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/IStore.java 2013-05-29 14:47:12 UTC (rev 7170) @@ -25,6 +25,7 @@ package com.bigdata.rwstore; import java.io.File; +import java.util.concurrent.locks.Lock; import com.bigdata.rawstore.IAllocationContext; import com.bigdata.rawstore.IStreamStore; @@ -58,6 +59,13 @@ public void free(long addr, int size); /** + * Optionally return a {@link Lock} that must be used (when non- + * <code>null</code>) to make the {@link #commit()} / {@link #postCommit()} + * strategy atomic. + */ + public Lock getCommitLock(); + + /** * Global commit on the backing store. Previously committed data which has * been marked as {@link #free(long, int)} is now available for recycling. * However, recycling can not occur if session protection is active. Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2013-05-29 12:22:08 UTC (rev 7169) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2013-05-29 14:47:12 UTC (rev 7170) @@ -3137,19 +3137,32 @@ } /** + * {@inheritDoc} + */ + public Lock getCommitLock() { + + return m_allocationWriteLock; + + } + + /** + * {@inheritDoc} + * <p> * Commits the FixedAllocator bits */ public void postCommit() { - m_allocationWriteLock.lock(); - try { - for (FixedAllocator fa : m_commitList) { - fa.postCommit(); - } + + if (!m_allocationWriteLock.isHeldByCurrentThread()) + throw new IllegalMonitorStateException(); + + for (FixedAllocator fa : m_commitList) { + + fa.postCommit(); - m_commitList.clear(); - } finally { - m_allocationWriteLock.unlock(); } + + m_commitList.clear(); + } public int checkDeferredFrees(final AbstractJournal journal) { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/sector/AllocationContext.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/sector/AllocationContext.java 2013-05-29 12:22:08 UTC (rev 7169) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/sector/AllocationContext.java 2013-05-29 14:47:12 UTC (rev 7170) @@ -29,6 +29,7 @@ import java.nio.ByteBuffer; import java.util.LinkedHashSet; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import org.apache.log4j.Logger; @@ -367,6 +368,11 @@ m_root.commit(); } + @Override + public Lock getCommitLock() { + return m_root.getCommitLock(); + } + @Override public void postCommit() { m_root.postCommit(); Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/sector/MemStrategy.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/sector/MemStrategy.java 2013-05-29 12:22:08 UTC (rev 7169) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/sector/MemStrategy.java 2013-05-29 14:47:12 UTC (rev 7170) @@ -29,6 +29,7 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.util.UUID; +import java.util.concurrent.locks.Lock; import com.bigdata.cache.ConcurrentWeakValueCache; import com.bigdata.counters.CounterSet; @@ -159,6 +160,13 @@ } @Override + public Lock getCommitLock() { + + return m_mmgr.getCommitLock(); + + } + + @Override public void postCommit() { m_mmgr.postCommit(); m_dirty = false; Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/sector/MemoryManager.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/sector/MemoryManager.java 2013-05-29 12:22:08 UTC (rev 7169) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/sector/MemoryManager.java 2013-05-29 14:47:12 UTC (rev 7170) @@ -39,6 +39,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import org.apache.log4j.Logger; @@ -104,6 +105,9 @@ /** * The lock used to serialize all allocation/deallocation requests. This is * shared across all allocation contexts to avoid lock ordering problems. + * + * FIXME This should be a read/write lock as per RWStore. That will provide + * better concurrency. */ final /*private*/ ReentrantLock m_allocationLock = new ReentrantLock(); @@ -1196,16 +1200,22 @@ } @Override + public Lock getCommitLock() { + return m_allocationLock; + } + + @Override public void postCommit() { - m_allocationLock.lock(); - try { - final Iterator<SectorAllocator> sectors = m_sectors.iterator(); - while (sectors.hasNext()) { - sectors.next().commit(); - } - } finally { - m_allocationLock.unlock(); - } + if(!m_allocationLock.isHeldByCurrentThread()) + throw new IllegalMonitorStateException(); +// try { + final Iterator<SectorAllocator> sectors = m_sectors.iterator(); + while (sectors.hasNext()) { + sectors.next().commit(); + } +// } finally { +// m_allocationLock.unlock(); +// } } private ConcurrentWeakValueCache<Long, ICommitter> m_externalCache = null; Modified: branches/READ_CACHE/bigdata/src/test/com/bigdata/rwstore/sector/TestMemStore.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/rwstore/sector/TestMemStore.java 2013-05-29 12:22:08 UTC (rev 7169) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/rwstore/sector/TestMemStore.java 2013-05-29 14:47:12 UTC (rev 7170) @@ -111,7 +111,7 @@ * Use a proxy test suite and specify the delegate. */ - ProxyTestSuite suite = new ProxyTestSuite(delegate, + final ProxyTestSuite suite = new ProxyTestSuite(delegate, "MemStore Test Suite"); /* Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-29 12:22:08 UTC (rev 7169) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-05-29 14:47:12 UTC (rev 7170) @@ -1144,6 +1144,16 @@ } // RunStateCallable + /** + * Transition to {@link RunStateEnum#Error}. + * <p> + * Note: if the current {@link Thread} is a {@link Thread} executing one + * of the {@link RunStateCallable#doRun()} methods, then it will be + * <strong>interrupted</strong> when entering the new run state. Thus, + * the caller MAY observe an {@link InterruptedException} in their + * thread, but only if they are being run out of + * {@link RunStateCallable}. + */ void enterErrorState() { /* @@ -1178,7 +1188,11 @@ /* * Transition into the error state. + * + * Note: This can cause the current Thread to be interrupted if it + * is the Thread executing one of the RunStateCallable classes. */ + enterRunState(new ErrorTask()); } @@ -3011,11 +3025,34 @@ try { enterErrorState(); } catch (RuntimeException e) { - // log and ignore. - log.error(e, e); + if (InnerCause.isInnerCause(e, + InterruptedException.class)) { + /* + * Propagate the interrupt. + * + * Note: This probably does not occur in this + * context since we are not running in the + * Thread for any doRun() method. + */ + Thread.interrupted(); + } else { + // log and ignore. + log.error(e, e); + } } - // rethrow exception. - throw new RuntimeException(t); + /* + * Note: DO NOT rethrow the exception. This service will + * leave the met quorum. If we rethrow the exception, + * the the update operation that that generated the live + * replicated write will be failed with the rethrown + * exception as the root cause. However, we want the + * update operation to complete successfully as long as + * we can retain an met quorum (and the same leader) for + * the duration of the update operation. + */ +// // rethrow exception. +// throw new RuntimeException(t); + return; } // /* This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-05-30 21:12:37
|
Revision: 7175 http://bigdata.svn.sourceforge.net/bigdata/?rev=7175&view=rev Author: thompsonbry Date: 2013-05-30 21:12:22 +0000 (Thu, 30 May 2013) Log Message: ----------- TestHA3JournalServer.testABC_LargeLoad: Observed this exception once. I suspect WCS compaction. 2nd run fails (quorum not met). Test is green on 3rd retry. In my opinion, this test has been flaky since we enabled WCS compaction. Disabling WCS compaction results in a 3 green test runs in a row. I am reopening the WCS compaction ticket (https://sourceforge.net/apps/trac/bigdata/ticket/674). {{{ com.bigdata.rdf.sail.webapp.client.HttpException: Status Code=500, Status Line=HTTP/1.1 500 Server Error, Response=DROP ALL java.util.concurrent.ExecutionException: java.lang.RuntimeException: com.bigdata.rwstore.PhysicalAddressResolutionException: Address did not resolve to physical address: -57741 at java.util.concurrent.FutureTask$Sync.innerGet(FutureTask.java:252) at java.util.concurrent.FutureTask.get(FutureTask.java:111) at com.bigdata.rdf.sail.webapp.QueryServlet.doUpdate(QueryServlet.java:395) at com.bigdata.rdf.sail.webapp.QueryServlet.doPost(QueryServlet.java:151) at com.bigdata.rdf.sail.webapp.RESTServlet.doPost(RESTServlet.java:201) at javax.servlet.http.HttpServlet.service(HttpServlet.java:727) at javax.servlet.http.HttpServlet.service(HttpServlet.java:820) at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:534) at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:475) at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:929) at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:403) at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:864) at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:117) at org.eclipse.jetty.server.handler.HandlerList.handle(HandlerList.java:47) at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:114) at org.eclipse.jetty.server.Server.handle(Server.java:352) at org.eclipse.jetty.server.HttpConnection.handleRequest(HttpConnection.java:596) at org.eclipse.jetty.server.HttpConnection$RequestHandler.headerComplete(HttpConnection.java:1051) at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:590) at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:212) at org.eclipse.jetty.server.HttpConnection.handle(HttpConnection.java:426) at org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:508) at org.eclipse.jetty.io.nio.SelectChannelEndPoint.access$000(SelectChannelEndPoint.java:34) at org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:40) at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:451) at java.lang.Thread.run(Thread.java:722) Caused by: java.lang.RuntimeException: com.bigdata.rwstore.PhysicalAddressResolutionException: Address did not resolve to physical address: -57741 at com.bigdata.journal.AbstractJournal.abort(AbstractJournal.java:2667) at com.bigdata.rdf.store.LocalTripleStore.abort(LocalTripleStore.java:96) at com.bigdata.rdf.sail.BigdataSail$BigdataSailConnection.rollback(BigdataSail.java:2921) at org.openrdf.repository.sail.SailRepositoryConnection.rollback(SailRepositoryConnection.java:97) at com.bigdata.rdf.sail.webapp.BigdataRDFContext$AbstractQueryTask.call(BigdataRDFContext.java:1087) at com.bigdata.rdf.sail.webapp.BigdataRDFContext$AbstractQueryTask.call(BigdataRDFContext.java:1) at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:334) at java.util.concurrent.FutureTask.run(FutureTask.java:166) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603) ... 1 more Caused by: com.bigdata.rwstore.PhysicalAddressResolutionException: Address did not resolve to physical address: -57741 at com.bigdata.rwstore.RWStore.getData(RWStore.java:1853) at com.bigdata.journal.RWStrategy.readFromLocalStore(RWStrategy.java:726) at com.bigdata.journal.RWStrategy.read(RWStrategy.java:153) at com.bigdata.journal.AbstractJournal._getCommitRecord(AbstractJournal.java:3918) at com.bigdata.journal.AbstractJournal._abort(AbstractJournal.java:2780) at com.bigdata.journal.AbstractJournal.doLocalAbort(AbstractJournal.java:5885) at com.bigdata.journal.jini.ha.HAJournal.doLocalAbort(HAJournal.java:636) at com.bigdata.journal.AbstractJournal.abort(AbstractJournal.java:2654) ... 10 more at com.bigdata.rdf.sail.webapp.client.RemoteRepository.checkResponseCode(RemoteRepository.java:1452) at com.bigdata.rdf.sail.webapp.client.RemoteRepository$SparqlUpdate.evaluate(RemoteRepository.java:1096) at com.bigdata.journal.jini.ha.TestHA3JournalServer.testABC_LargeLoad(TestHA3JournalServer.java:387) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at junit.framework.TestCase.runTest(TestCase.java:154) at junit.framework.TestCase.runBare(TestCase.java:127) at junit.framework.TestResult$1.protect(TestResult.java:106) at junit.framework.TestResult.runProtected(TestResult.java:124) at junit.framework.TestResult.run(TestResult.java:109) at junit.framework.TestCase.run(TestCase.java:118) at junit.framework.TestSuite.runTest(TestSuite.java:208) at junit.framework.TestSuite.run(TestSuite.java:203) at junit.framework.TestSuite.runTest(TestSuite.java:208) at junit.framework.TestSuite.run(TestSuite.java:203) at org.eclipse.jdt.internal.junit.runner.junit3.JUnit3TestReference.run(JUnit3TestReference.java:130) at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:467) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:683) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:390) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:197) }}} @see https://sourceforge.net/apps/trac/bigdata/ticket/530#comment (HAJournal) @see https://sourceforge.net/apps/trac/bigdata/ticket/674 (WriteCacheService Compaction causes failures). Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/NamedSolutionSetRefUtility.java branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/solutions/SolutionSetStream.java branches/READ_CACHE/bigdata/src/java/com/bigdata/btree/ICheckpointProtocol.java branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractTask.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IBTreeManager.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/IResourceManager.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/JournalDelegate.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/TemporaryStore.java branches/READ_CACHE/bigdata/src/java/com/bigdata/rawstore/IStreamStore.java branches/READ_CACHE/bigdata/src/java/com/bigdata/stream/Stream.java branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestDumpJournal.java branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestName2Addr.java branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestNamedIndices.java branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestTemporaryStore.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysis.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysisBase.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysis_CanJoin.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/cache/CacheConnectionImpl.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/cache/ICacheConnection.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpContext.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUpdate.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTEvalHelper.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/IEvaluationContext.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/RemoteServiceOptions.java branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/TestAll.java branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/cache/TestAll.java branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/TestAll.java branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/TestDescribe.java branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/TestInclude.java branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/include_03.rq branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/update/TestAll.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/QueryServlet.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/RESTServlet.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/client/RemoteRepository.java branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithQuads.java branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSPARQLUpdateTest.java branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSPARQLUpdateTest2.java branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSPARQLUpdateTxTest.java branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSPARQLUpdateTxTest2.java Added Paths: ----------- branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ssets/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ssets/ISolutionSetManager.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ssets/SolutionSetManager.java branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/include_03a.rq branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/ssets/ branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/ssets/TestAll.java branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/ssets/TestSolutionSetManager.java branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSPARQLUpdateTest2DiskRW.java branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/tck/BigdataSPARQLUpdateTest2DiskWORM.java Removed Paths: ------------- branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/cache/ISolutionSetCache.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/cache/SolutionSetCache.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ssets/ISolutionSetManager.java branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/ssets/SolutionSetManager.java branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/cache/TestSolutionSetCache.java branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/ssets/TestAll.java branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/ssets/TestSolutionSetManager.java Property Changed: ---------------- branches/READ_CACHE/ branches/READ_CACHE/bigdata/lib/jetty/ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/aggregate/ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/joinGraph/ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/util/ branches/READ_CACHE/bigdata/src/java/com/bigdata/htree/raba/ branches/READ_CACHE/bigdata/src/java/com/bigdata/jsr166/ branches/READ_CACHE/bigdata/src/test/com/bigdata/bop/joinGraph/ branches/READ_CACHE/bigdata/src/test/com/bigdata/bop/util/ branches/READ_CACHE/bigdata/src/test/com/bigdata/jsr166/ branches/READ_CACHE/bigdata/src/test/com/bigdata/util/httpd/ branches/READ_CACHE/bigdata-compatibility/ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/attr/ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/disco/ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/util/config/ branches/READ_CACHE/bigdata-perf/ branches/READ_CACHE/bigdata-perf/btc/ branches/READ_CACHE/bigdata-perf/btc/src/resources/ branches/READ_CACHE/bigdata-perf/lubm/ branches/READ_CACHE/bigdata-perf/uniprot/ branches/READ_CACHE/bigdata-perf/uniprot/src/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/changesets/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/error/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/internal/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/relation/ branches/READ_CACHE/bigdata-rdf/src/java/com/bigdata/rdf/util/ branches/READ_CACHE/bigdata-rdf/src/samples/ branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/ branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/internal/ branches/READ_CACHE/bigdata-rdf/src/test/com/bigdata/rdf/relation/ branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/ branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/changesets/ branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/ branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/bench/ branches/READ_CACHE/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/ branches/READ_CACHE/dsi-utils/ branches/READ_CACHE/dsi-utils/LEGAL/ branches/READ_CACHE/dsi-utils/lib/ branches/READ_CACHE/dsi-utils/src/ branches/READ_CACHE/dsi-utils/src/java/ branches/READ_CACHE/dsi-utils/src/java/it/ branches/READ_CACHE/dsi-utils/src/java/it/unimi/ branches/READ_CACHE/dsi-utils/src/test/ branches/READ_CACHE/dsi-utils/src/test/it/unimi/ branches/READ_CACHE/dsi-utils/src/test/it/unimi/dsi/ branches/READ_CACHE/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/ branches/READ_CACHE/lgpl-utils/src/test/it/unimi/dsi/fastutil/bytes/custom/ branches/READ_CACHE/osgi/ branches/READ_CACHE/src/resources/bin/config/ Property changes on: branches/READ_CACHE ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE:6769-6785 /branches/BIGDATA_RELEASE_1_2_0:6766-7143 /branches/BTREE_BUFFER_BRANCH:2004-2045 /branches/DEV_BRANCH_27_OCT_2009:2270-2546,2548-2782 /branches/INT64_BRANCH:4486-4522 /branches/JOURNAL_HA_BRANCH:2596-4066 /branches/LARGE_LITERALS_REFACTOR:4175-4387 /branches/LEXICON_REFACTOR_BRANCH:2633-3304 /branches/QUADS_QUERY_BRANCH:4525-4531,4550-4584,4586-4609,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH:4814-4836 /branches/bugfix-btm:2594-3237 /branches/dev-btm:2574-2730 /branches/fko:3150-3194 /trunk:3392-3437,3656-4061 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE:6769-6785 /branches/BIGDATA_RELEASE_1_2_0:6766-7173 /branches/BTREE_BUFFER_BRANCH:2004-2045 /branches/DEV_BRANCH_27_OCT_2009:2270-2546,2548-2782 /branches/INT64_BRANCH:4486-4522 /branches/JOURNAL_HA_BRANCH:2596-4066 /branches/LARGE_LITERALS_REFACTOR:4175-4387 /branches/LEXICON_REFACTOR_BRANCH:2633-3304 /branches/QUADS_QUERY_BRANCH:4525-4531,4550-4584,4586-4609,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH:4814-4836 /branches/bugfix-btm:2594-3237 /branches/dev-btm:2574-2730 /branches/fko:3150-3194 /trunk:3392-3437,3656-4061 Property changes on: branches/READ_CACHE/bigdata/lib/jetty ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/lib/jetty:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/lib/jetty:6766-7143 /branches/INT64_BRANCH/bigdata/lib/jetty:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/lib/jetty:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/lib/jetty:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/lib/jetty:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/lib/jetty:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/lib/jetty:6766-7173 /branches/INT64_BRANCH/bigdata/lib/jetty:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/lib/jetty:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/lib/jetty:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/lib/jetty:4814-4836 Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/BOpContext.java 2013-05-30 20:40:36 UTC (rev 7174) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/BOpContext.java 2013-05-30 21:12:22 UTC (rev 7175) @@ -43,17 +43,13 @@ import com.bigdata.bop.join.BaseJoinStats; import com.bigdata.bop.join.IHashJoinUtility; import com.bigdata.btree.ISimpleIndexAccess; -import com.bigdata.journal.AbstractJournal; -import com.bigdata.journal.IIndexManager; -import com.bigdata.journal.ITx; -import com.bigdata.journal.TimestampUtility; +import com.bigdata.journal.IBTreeManager; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.impl.bnode.SidIV; import com.bigdata.rdf.model.BigdataBNode; import com.bigdata.rdf.sparql.ast.QueryHints; -import com.bigdata.rdf.sparql.ast.cache.CacheConnectionFactory; -import com.bigdata.rdf.sparql.ast.cache.ICacheConnection; -import com.bigdata.rdf.sparql.ast.cache.ISolutionSetCache; +import com.bigdata.rdf.sparql.ast.ssets.ISolutionSetManager; +import com.bigdata.rdf.sparql.ast.ssets.SolutionSetManager; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPO; import com.bigdata.rdf.spo.SPOPredicate; @@ -61,7 +57,6 @@ import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.rwstore.sector.IMemoryManager; -import com.bigdata.service.IBigdataFederation; import com.bigdata.striterator.ChunkedFilter; import com.bigdata.striterator.Chunkerator; import com.bigdata.striterator.CloseableIteratorWrapper; @@ -629,20 +624,20 @@ // Resolve the object which will give us access to the named // solution set. - final ICacheConnection cacheConn = CacheConnectionFactory - .getExistingCacheConnection(getRunningQuery() - .getQueryEngine()); +// final ICacheConnection cacheConn = CacheConnectionFactory +// .getExistingCacheConnection(getRunningQuery() +// .getQueryEngine()); final String namespace = namedSetRef.getNamespace(); final long timestamp = namedSetRef.getTimestamp(); - final ISolutionSetCache sparqlCache = cacheConn == null ? null - : cacheConn.getSparqlCache(namespace, timestamp); - // TODO ClassCastException is possible? - final AbstractJournal localIndexManager = (AbstractJournal) getIndexManager(); + final IBTreeManager localIndexManager = (IBTreeManager) getIndexManager(); + final ISolutionSetManager sparqlCache = new SolutionSetManager( + localIndexManager, namespace, timestamp); + return NamedSolutionSetRefUtility.getSolutionSet(// sparqlCache,// localIndexManager,// Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/NamedSolutionSetRefUtility.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/NamedSolutionSetRefUtility.java 2013-05-30 20:40:36 UTC (rev 7174) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/NamedSolutionSetRefUtility.java 2013-05-30 21:12:22 UTC (rev 7175) @@ -35,10 +35,11 @@ import com.bigdata.btree.IIndex; import com.bigdata.btree.ISimpleIndexAccess; import com.bigdata.journal.AbstractJournal; +import com.bigdata.journal.IBTreeManager; import com.bigdata.journal.ITx; import com.bigdata.journal.TimestampUtility; import com.bigdata.rdf.sparql.ast.ISolutionSetStats; -import com.bigdata.rdf.sparql.ast.cache.ISolutionSetCache; +import com.bigdata.rdf.sparql.ast.ssets.ISolutionSetManager; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.striterator.Chunkerator; import com.bigdata.striterator.ICloseableIterator; @@ -144,6 +145,7 @@ } + @SuppressWarnings("rawtypes") final IVariable[] joinVars; { @@ -394,8 +396,8 @@ * the same data. */ public static ISolutionSetStats getSolutionSetStats(// - final ISolutionSetCache sparqlCache,// - final AbstractJournal localIndexManager, // + final ISolutionSetManager sparqlCache,// + final IBTreeManager localIndexManager, // final String namespace,// final long timestamp,// final String localName,// @@ -491,8 +493,8 @@ * {@link IIndex}? */ public static ICloseableIterator<IBindingSet[]> getSolutionSet( - final ISolutionSetCache sparqlCache,// - final AbstractJournal localIndexManager,// + final ISolutionSetManager sparqlCache,// + final IBTreeManager localIndexManager,// final String namespace,// final long timestamp,// final String localName,// @@ -558,6 +560,7 @@ + localName + ", joinVars=" + Arrays.toString(joinVars)); // Iterator visiting the solution set. + @SuppressWarnings("unchecked") final ICloseableIterator<IBindingSet> src = (ICloseableIterator<IBindingSet>) index .scan(); Property changes on: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/aggregate ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/aggregate:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/aggregate:6766-7143 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/aggregate:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/aggregate:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/aggregate:6766-7173 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/aggregate:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/aggregate:4814-4836 Property changes on: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/joinGraph ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/joinGraph:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/joinGraph:6766-7143 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/joinGraph:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/joinGraph:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/joinGraph:6766-7173 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/joinGraph:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph:4814-4836 Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/solutions/SolutionSetStream.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/solutions/SolutionSetStream.java 2013-05-30 20:40:36 UTC (rev 7174) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/solutions/SolutionSetStream.java 2013-05-30 21:12:22 UTC (rev 7175) @@ -49,7 +49,6 @@ import com.bigdata.rdf.internal.encoder.SolutionSetStreamDecoder; import com.bigdata.rdf.internal.encoder.SolutionSetStreamEncoder; import com.bigdata.rdf.sparql.ast.ISolutionSetStats; -import com.bigdata.rdf.sparql.ast.SolutionSetStats; import com.bigdata.stream.Stream; import com.bigdata.striterator.Chunkerator; import com.bigdata.striterator.ICloseableIterator; @@ -171,7 +170,7 @@ * by {@link Checkpoint#create(IRawStore, IndexMetadata)} since * Stream.create() is being invoked rather than SolutionSetStream.create(). * - * @see https://sourceforge.net/apps/trac/bigdata/ticket/585 (GIST) + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/585" > GIST </a> */ public static SolutionSetStream create(final IRawStore store, final StreamIndexMetadata metadata) { @@ -202,10 +201,10 @@ } /** - * Return the address of the {@link SolutionSetStats} to be written into the + * Return the address of the {@link ISolutionSetStats} to be written into the * next {@link Checkpoint} record. The caller must have {@link #flush()} the * {@link SolutionSetStream} as a pre-condition (to ensure that the stats - * have been written out). If the {@link SolutionSetStats} are not loaded, + * have been written out). If the {@link ISolutionSetStats} are not loaded, * then the address from the last {@link Checkpoint} record is returned. */ public long getStatsAddr() { Property changes on: branches/READ_CACHE/bigdata/src/java/com/bigdata/bop/util ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/util:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/util:6766-7143 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/util:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/util:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/util:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/util:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/bop/util:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/bop/util:6766-7173 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/bop/util:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/util:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/bop/util:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/bop/util:4814-4836 Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/btree/ICheckpointProtocol.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/btree/ICheckpointProtocol.java 2013-05-30 20:40:36 UTC (rev 7174) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/btree/ICheckpointProtocol.java 2013-05-30 21:12:22 UTC (rev 7175) @@ -23,6 +23,7 @@ */ package com.bigdata.btree; +import com.bigdata.btree.view.FusedView; import com.bigdata.counters.ICounterSetAccess; import com.bigdata.journal.AbstractJournal; import com.bigdata.journal.AbstractTask; @@ -39,8 +40,13 @@ * TODO Try to lift out an abstract implementation of this interface for * HTree, BTree, and Stream. This will be another step towards GIST * support. There are protected methods which are used on those classes - * which should be lifted into the abstract base class. - */ + * which should be lifted into the abstract base class. Also, try to + * reconcile this interface with {@link ILocalBTreeView} implementations + * that do not implement {@link ICheckpointProtocol} ({@link FusedView}, + * {@link ReadCommittedView}). + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/585" > GIST </a> + */ public interface ICheckpointProtocol extends ICommitter, ICounterSetAccess, ISimpleIndexAccess { Property changes on: branches/READ_CACHE/bigdata/src/java/com/bigdata/htree/raba ___________________________________________________________________ Modified: svn:mergeinfo - /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/htree/raba:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/htree/raba:6766-7143 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/htree/raba:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/raba:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/htree/raba:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/htree/raba:4814-4836 + /branches/BIGDATA_OPENRDF_2_6_9_UPDATE/bigdata/src/java/com/bigdata/htree/raba:6769-6785 /branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/htree/raba:6766-7173 /branches/INT64_BRANCH/bigdata/src/java/com/bigdata/htree/raba:4486-4522 /branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/raba:4525-4531,4533-4548,4550-4584,4586-4609,4611-4632,4634-4643,4646-4672,4674-4685,4687-4693,4697-4735,4737-4782,4784-4792,4794-4796,4798-4801 /branches/RWSTORE_1_1_0_DEBUG/bigdata/src/java/com/bigdata/htree/raba:5896-5935 /branches/TIDS_PLUS_BLOBS_BRANCH/bigdata/src/java/com/bigdata/htree/raba:4814-4836 Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java 2013-05-30 20:40:36 UTC (rev 7174) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java 2013-05-30 21:12:22 UTC (rev 7175) @@ -550,7 +550,7 @@ * WCS write cache compaction causes errors in RWS postHACommit() * </a> */ - this.compactionEnabled = canCompact() && compactionThreshold < 100; + this.compactionEnabled = false;//canCompact() && compactionThreshold < 100; if (log.isInfoEnabled()) log.info("Compaction Enabled: " + compactionEnabled Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractTask.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractTask.java 2013-05-30 20:40:36 UTC (rev 7174) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractTask.java 2013-05-30 21:12:22 UTC (rev 7175) @@ -123,16 +123,6 @@ static protected final Logger log = Logger.getLogger(AbstractTask.class); /** - * True iff the {@link #log} level is INFO or less. - */ - final protected boolean INFO = log.isInfoEnabled(); - - /** - * True iff the {@link #log} level is DEBUG or less. - */ - final protected boolean DEBUG = log.isDebugEnabled(); - - /** * Used to protect against re-submission of the same task object. */ private final AtomicBoolean submitted = new AtomicBoolean(false); @@ -463,7 +453,7 @@ if (commitList.put(name, this) != null) { - if (INFO) + if (log.isInfoEnabled()) log.info("Added index to commit list: name=" + name); } @@ -477,7 +467,7 @@ */ private void clearIndexCache() { - if (INFO) + if (log.isInfoEnabled()) log.info("Clearing hard reference cache: " + indexCache.size() + " indices accessed"); @@ -543,6 +533,7 @@ * @todo modify to return <code>null</code> if the index is not * registered? */ + @Override synchronized final public ILocalBTreeView getIndex(final String name) { if (name == null) { @@ -1729,7 +1720,7 @@ MDC.put("timestamp", Long.valueOf(timestamp)); - if(INFO) + if(log.isInfoEnabled()) MDC.put("resources", Arrays.toString(resource)); } @@ -1744,7 +1735,7 @@ MDC.remove("timestamp"); - if(INFO) + if(log.isInfoEnabled()) MDC.remove("resources"); } @@ -1865,7 +1856,7 @@ if (isReadWriteTx) { - if (INFO) + if (log.isInfoEnabled()) log.info("Running read-write tx: timestamp=" + timestamp); // if(tx.isReadOnly()) { @@ -1915,7 +1906,7 @@ clearIndexCache(); - if(INFO) log.info("Reader is done: "+this); + if(log.isInfoEnabled()) log.info("Reader is done: "+this); } @@ -1934,7 +1925,7 @@ } finally { - if(INFO) log.info("done: "+this); + if(log.isInfoEnabled()) log.info("done: "+this); } @@ -1954,7 +1945,7 @@ final Thread t = Thread.currentThread(); - if(INFO) + if(log.isInfoEnabled()) log.info("Unisolated write task: " + this + ", thread=" + t); // // declare resource(s) to lock (exclusive locks are used). @@ -2027,7 +2018,7 @@ // set flag. ran = true; - if (INFO) + if (log.isInfoEnabled()) log.info("Task Ok: class=" + this); /* @@ -2049,7 +2040,7 @@ // Do not re-invoke it afterTask failed above. - if (INFO) + if (log.isInfoEnabled()) log.info("Task failed: class=" + this + " : " + t2); writeService.afterTask(this, t2); @@ -2343,6 +2334,8 @@ class IsolatedActionJournal implements IJournal, IAllocationContext { private final AbstractJournal delegate; + + @SuppressWarnings("rawtypes") private final IResourceLocator resourceLocator; public String toString() { @@ -2376,7 +2369,7 @@ * * @param source */ - @SuppressWarnings("unchecked") + @SuppressWarnings({ "unchecked", "rawtypes" }) public IsolatedActionJournal(final AbstractJournal source) { if (source == null) @@ -2416,6 +2409,7 @@ /** * Delegates to the {@link AbstractTask}. */ + @Override public void dropIndex(final String name) { AbstractTask.this.dropIndex(name); @@ -2426,12 +2420,28 @@ * Note: This is the core implementation for registering an index - it * delegates to the {@link AbstractTask}. */ + @Override public IIndex registerIndex(final String name, final BTree btree) { return AbstractTask.this.registerIndex(name, btree); } + @Override + public ICheckpointProtocol register(final String name, final IndexMetadata metadata) { + + /* + * FIXME GIST : Support registration of index types other than BTree + * (HTree, Stream, etc). + * + * @see https://sourceforge.net/apps/trac/bigdata/ticket/585 (GIST) + */ + + throw new UnsupportedOperationException(); + + } + + @Override public void registerIndex(final IndexMetadata indexMetadata) { // delegate to core impl. @@ -2439,6 +2449,7 @@ } + @Override public IIndex registerIndex(final String name, final IndexMetadata indexMetadata) { @@ -2456,6 +2467,31 @@ /** * Note: access to an unisolated index is governed by the AbstractTask. */ + @Override + public ICheckpointProtocol getUnisolatedIndex(String name) { + try { + + /* + * FIXME GIST. This will throw a ClassCastException if the + * returned index is an ILocalBTreeView. + * + * @see https://sourceforge.net/apps/trac/bigdata/ticket/585 (GIST) + */ + + return (ICheckpointProtocol) AbstractTask.this.getIndex(name); + + } catch(NoSuchIndexException ex) { + + // api conformance. + return null; + + } + } + + /** + * Note: access to an unisolated index is governed by the AbstractTask. + */ + @Override public IIndex getIndex(final String name) { try { @@ -2476,16 +2512,50 @@ * declare a lock - such views will always be read-only and support * concurrent readers. */ - public IIndex getIndex(final String name, final long timestamp) { + @Override + public ICheckpointProtocol getIndexLocal(final String name, + final long commitTime) { - if (timestamp == ITx.UNISOLATED) { + if (commitTime == ITx.UNISOLATED) { + + return getUnisolatedIndex(name); + + } + + /* + * The index view is obtained from the resource manager. + */ + + if (resourceManager instanceof IJournal) { + + /* + * This code path supports any type of index (BTree, HTree, + * etc). + */ + + return ((IJournal) resourceManager).getIndexLocal(name, + commitTime); + + } - return getIndex(name); + /** + * FIXME GIST : This code path only supports BTree + * (ILocalBTreeView). An attempt to resolve an HTree or other + * non-BTree based named index data structure will probably result + * in a ClassCastException. + * + * @see <a + * href="https://sourceforge.net/apps/trac/bigdata/ticket/585" + * > GIST </a> + */ + return (ICheckpointProtocol) resourceManager.getIndex(name, commitTime); - } + } - // the index view is obtained from the resource manager. - return resourceManager.getIndex(name, timestamp); + @Override + public IIndex getIndex(final String name, final long timestamp) { + + return (IIndex) getIndexLocal(name, timestamp); } @@ -2495,6 +2565,7 @@ * the name of the backing index as one of the resources for which it * acquired a lock. */ + @Override public SparseRowStore getGlobalRowStore() { // did the task declare the resource name? @@ -2510,6 +2581,7 @@ } + @Override public SparseRowStore getGlobalRowStore(final long timestamp) { if (!TimestampUtility.isReadOnly(timestamp)) { @@ -2547,6 +2619,7 @@ * declared the names of the backing indices as resources for which it * acquired a lock. */ + @Override public BigdataFileSystem getGlobalFileSystem() { // did the task declare the resource name? @@ -2583,6 +2656,7 @@ * and will break semantics when the task is isolated by a transaction * rather than unisolated. */ + @Override public TemporaryStore getTempStore() { return tempStoreFactory.getTempStore(); @@ -2590,24 +2664,28 @@ } private TemporaryStoreFactory tempStoreFactory = new TemporaryStoreFactory(); - public IResourceLocator getResourceLocator() { + @Override + public IResourceLocator<?> getResourceLocator() { return resourceLocator; } + @Override public ILocalTransactionManager getLocalTransactionManager() { return delegate.getLocalTransactionManager(); } + @Override public IResourceLockService getResourceLockService() { return delegate.getResourceLockService(); } + @Override public ExecutorService getExecutorService() { return delegate.getExecutorService(); @@ -2618,34 +2696,42 @@ * Disallowed methods (commit protocol and shutdown protocol). */ + @Override public void abort() { throw new UnsupportedOperationException(); } + @Override public void close() { throw new UnsupportedOperationException(); } + @Override public void destroy() { throw new UnsupportedOperationException(); } + @Override public void deleteResources() { throw new UnsupportedOperationException(); } + @Override public long commit() { throw new UnsupportedOperationException(); } + @Override public void setCommitter(int index, ICommitter committer) { throw new UnsupportedOperationException(); } + @Override public void shutdown() { throw new UnsupportedOperationException(); } + @Override public void shutdownNow() { throw new UnsupportedOperationException(); } @@ -2658,70 +2744,87 @@ // return delegate.getKeyBuilder(); // } + @Override public void force(final boolean metadata) { delegate.force(metadata); } + @Override public int getByteCount(final long addr) { return delegate.getByteCount(addr); } + @Override public ICommitRecord getCommitRecord(final long timestamp) { return delegate.getCommitRecord(timestamp); } + @Override public CounterSet getCounters() { return delegate.getCounters(); } + @Override public File getFile() { return delegate.getFile(); } + @Override public long getOffset(final long addr) { return delegate.getOffset(addr); } + @Override public long getPhysicalAddress(final long addr) { return delegate.getPhysicalAddress(addr); } + @Override public Properties getProperties() { return delegate.getProperties(); } + @Override public UUID getUUID() { return delegate.getUUID(); } + @Override public IResourceMetadata getResourceMetadata() { return delegate.getResourceMetadata(); } + @Override public long getRootAddr(final int index) { return delegate.getRootAddr(index); } + @Override public long getLastCommitTime() { return delegate.getLastCommitTime(); } + @Override public IRootBlockView getRootBlockView() { return delegate.getRootBlockView(); } + @Override public boolean isFullyBuffered() { return delegate.isFullyBuffered(); } + @Override public boolean isOpen() { return delegate.isOpen(); } + @Override public boolean isReadOnly() { return delegate.isReadOnly(); } + @Override public boolean isStable() { return delegate.isStable(); } @@ -2730,22 +2833,27 @@ // delegate.packAddr(out, addr); // } + @Override public ByteBuffer read(final long addr) { return delegate.read(addr); } + @Override public long size() { return delegate.size(); } + @Override public long toAddr(final int nbytes, final long offset) { return delegate.toAddr(nbytes, offset); } + @Override public String toString(final long addr) { return delegate.toString(addr); } + // @Override // public IRootBlockView getRootBlock(final long commitTime) { // return delegate.getRootBlock(commitTime); // } @@ -2762,6 +2870,7 @@ * allocations to be scoped to the AbstractTask. */ + @Override public long write(final ByteBuffer data) { return delegate.write(data, this); } @@ -2782,6 +2891,7 @@ return delegate.getInputStream(addr); } + @Override public void delete(final long addr) { delegate.delete(addr, this); } @@ -2808,19 +2918,23 @@ completeTask(); } + @Override public ScheduledFuture<?> addScheduledTask(final Runnable task, final long initialDelay, final long delay, final TimeUnit unit) { return delegate.addScheduledTask(task, initialDelay, delay, unit); } + @Override public boolean getCollectPlatformStatistics() { return delegate.getCollectPlatformStatistics(); } + @Override public boolean getCollectQueueStatistics() { return delegate.getCollectQueueStatistics(); } + @Override public int getHttpdPort() { return delegate.getHttpdPort(); } @@ -2849,6 +2963,8 @@ private class ReadOnlyJournal implements IJournal { private final IJournal delegate; + + @SuppressWarnings("rawtypes") private final DefaultResourceLocator resourceLocator; public String toString() { @@ -2857,7 +2973,7 @@ } - @SuppressWarnings("unchecked") + @SuppressWarnings({ "unchecked", "rawtypes" }) public ReadOnlyJournal(final AbstractJournal source) { if (source == null) @@ -2885,12 +3001,41 @@ * do). */ + @Override + public IIndex getIndex(final String name, final long timestamp) { + + if (timestamp == ITx.UNISOLATED) + throw new UnsupportedOperationException(); + + if (timestamp == AbstractTask.this.timestamp) { + + // to the AbstractTask + try { + + return AbstractTask.this.getIndex(name); + + } catch(NoSuchIndexException ex) { + + // api conformance. + return null; + + } + + } + + // to the backing journal. + return (IIndex) delegate.getIndexLocal(name, timestamp); + + } + /** * {@inheritDoc} * <p> * Note: Does not allow access to {@link ITx#UNISOLATED} indices. */ - public IIndex getIndex(final String name, final long timestamp) { + @Override + public ICheckpointProtocol getIndexLocal(final String name, + final long commitTime) { if (timestamp == ITx.UNISOLATED) throw new UnsupportedOperationException(); @@ -2900,7 +3045,12 @@ // to the AbstractTask try { - return AbstractTask.this.getIndex(name); + /* + * FIXME GIST : This will throw a ClassCastException if the + * index type is ReadCommittedIndex or FusedView. + */ + return (ICheckpointProtocol) AbstractTask.this + .getIndex(name); } catch(NoSuchIndexException ex) { @@ -2912,7 +3062,7 @@ } // to the backing journal. - return delegate.getIndex(name, timestamp); + return delegate.getIndexLocal(name, timestamp); } @@ -2937,30 +3087,53 @@ * Note: Not supported since this method returns the * {@link ITx#UNISOLATED} index. */ + @Override + public ICheckpointProtocol getUnisolatedIndex(String name) { + + throw new UnsupportedOperationException(); + + } + + /** + * Note: Not supported since this method returns the + * {@link ITx#UNISOLATED} index. + */ + @Override public IIndex getIndex(String name) { throw new UnsupportedOperationException(); } + @Override public void dropIndex(String name) { throw new UnsupportedOperationException(); } + @Override + public ICheckpointProtocol register(String name, IndexMetadata metadata) { + + throw new UnsupportedOperationException(); + + } + + @Override public void registerIndex(IndexMetadata indexMetadata) { throw new UnsupportedOperationException(); } + @Override public IIndex registerIndex(String name, BTree btree) { throw new UnsupportedOperationException(); } + @Override public IIndex registerIndex(String name, IndexMetadata indexMetadata) { throw new UnsupportedOperationException(); @@ -2971,6 +3144,7 @@ * Returns an {@link ITx#READ_COMMITTED} view iff the index exists and * <code>null</code> otherwise. */ + @Override public SparseRowStore getGlobalRowStore() { /* @@ -3000,6 +3174,7 @@ } + @Override public SparseRowStore getGlobalRowStore(final long timestamp) { /* @@ -3036,6 +3211,7 @@ * Returns an {@link ITx#READ_COMMITTED} view iff the file system exists * and <code>null</code> otherwise. */ + @Override public BigdataFileSystem getGlobalFileSystem() { /* @@ -3085,6 +3261,7 @@ * and will break semantics when the task is isolated by a transaction * rather than unisolated. */ + @Override public TemporaryStore getTempStore() { return tempStoreFactory.getTempStore(); @@ -3092,24 +3269,28 @@ } private TemporaryStoreFactory tempStoreFactory = new TemporaryStoreFactory(); - public DefaultResourceLocator getResourceLocator() { + @Override + public DefaultResourceLocator<?> getResourceLocator() { return resourceLocator; } + @Override public ILocalTransactionManager getLocalTransactionManager() { return delegate.getLocalTransactionManager(); } + @Override public IResourceLockService getResourceLockService() { return delegate.getResourceLockService(); } + @Override public ExecutorService getExecutorService() { return delegate.getExecutorService(); @@ -3120,34 +3301,42 @@ * Disallowed methods (commit and shutdown protocols). */ + @Override public void abort() { throw new UnsupportedOperationException(); } + @Override public void close() { throw new UnsupportedOperationException(); } + @Override public void destroy() { throw new UnsupportedOperationException(); } + @Override public long commit() { throw new UnsupportedOperationException(); } + @Override public void deleteResources() { throw new UnsupportedOperationException(); } + @Override public void setCommitter(int index, ICommitter committer) { throw new UnsupportedOperationException(); } + @Override public void shutdown() { throw new UnsupportedOperationException(); } + @Override public void shutdownNow() { throw new UnsupportedOperationException(); } @@ -3156,10 +3345,12 @@ * Disallowed methods (methods that write on the store). */ + @Override public void force(boolean metadata) { throw new UnsupportedOperationException(); } + @Override public long write(ByteBuffer data) { throw new UnsupportedOperationException(); } @@ -3169,6 +3360,7 @@ // throw new UnsupportedOperationException(); // } + @Override public void delete(long addr) { throw new UnsupportedOperationException(); } @@ -3177,86 +3369,107 @@ * Methods that delegate directly to the backing journal. */ + @Override public int getByteCount(long addr) { return delegate.getByteCount(addr); } + @Override public ICommitRecord getCommitRecord(long timestamp) { return delegate.getCommitRecord(timestamp); } + @Override public CounterSet getCounters() { return delegate.getCounters(); } + @Override public File getFile() { return delegate.getFile(); } + @Override public long getOffset(long addr) { return delegate.getOffset(addr); } + @Override public long getPhysicalAddress(final long addr) { return delegate.getPhysicalAddress(addr); } + @Override public Properties getProperties() { return delegate.getProperties(); } + @Override public UUID getUUID() { return delegate.getUUID(); } + @Override public IResourceMetadata getResourceMetadata() { return delegate.getResourceMetadata(); } + @Override public long getRootAddr(int index) { return delegate.getRootAddr(index); } + @Override public long getLastCommitTime() { return delegate.getLastCommitTime(); } + @Override public IRootBlockView getRootBlockView() { return delegate.getRootBlockView(); } + @Override public boolean isFullyBuffered() { return delegate.isFullyBuffered(); } + @Override public boolean isOpen() { return delegate.isOpen(); } + @Override public boolean isReadOnly() { return delegate.isReadOnly(); } + @Override public boolean isStable() { return delegate.isStable(); } + @Override public ByteBuffer read(long addr) { return delegate.read(addr); } + @Override public long size() { return delegate.size(); } + @Override public long toAddr(int nbytes, long offset) { return delegate.toAddr(nbytes, offset); } + @Override public String toString(long addr) { return delegate.toString(addr); } +// @Override // public IRootBlockView getRootBlock(long commitTime) { // return delegate.getRootBlock(commitTime); // } @@ -3265,19 +3478,23 @@ // return delegate.getRootBlocks(startTime); // } + @Override public ScheduledFuture<?> addScheduledTask(Runnable task, long initialDelay, long delay, TimeUnit unit) { return delegate.addScheduledTask(task, initialDelay, delay, unit); } + @Override public boolean getCollectPlatformStatistics() { return delegate.getCollectPlatformStatistics(); } + @Override public boolean getCollectQueueStatistics() { return delegate.getCollectQueueStatistics(); } + @Override public int getHttpdPort() { return delegate.getHttpdPort(); } @@ -3307,71 +3524,87 @@ private IIndexManager delegate; - public DelegateIndexManager(IIndexManager delegate) { + public DelegateIndexManager(final IIndexManager delegate) { this.delegate = delegate; } + @Override public void dropIndex(String name) { delegate.dropIndex(name); } + @Override public ExecutorService getExecutorService() { return delegate.getExecutorService(); } + @Override public BigdataFileSystem getGlobalFileSystem() { return delegate.getGlobalFileSystem(); } + @Override public SparseRowStore getGlobalRowStore() { return delegate.getGlobalRowStore(); } + @Override public SparseRowStore getGlobalRowStore(final long timestamp) { return delegate.getGlobalRowStore(timestamp); } + @Ove... [truncated message content] |
From: <mar...@us...> - 2013-05-31 15:25:40
|
Revision: 7180 http://bigdata.svn.sourceforge.net/bigdata/?rev=7180&view=rev Author: martyncutcher Date: 2013-05-31 15:25:32 +0000 (Fri, 31 May 2013) Log Message: ----------- Fixes problems with WORMStrategy use in HA. Specifically addresses live message compression and HALog playback for both compressed and uncompressed content required in Resync. Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/pipeline/HAReceiveService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/NOPRecordCompressor.java branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/RecordCompressor.java branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java Property Changed: ---------------- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/ Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java 2013-05-31 12:58:23 UTC (rev 7179) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/halog/HALogReader.java 2013-05-31 15:25:32 UTC (rev 7180) @@ -342,6 +342,7 @@ } switch (storeType) { + case WORM: case RW: { if (msg.getSize() > clientBuffer.capacity()) { @@ -383,27 +384,7 @@ break; } - case WORM: { - /* - * Note: The WriteCache block needs to be recovered from the - * WORMStrategy by the caller. The clientBuffer, if supplied, - * is ignored and untouched. - * - * It is permissible for the argument to be null. - */ - // final int nbytes = msg.getSize(); - // clientBuffer.position(0); - // clientBuffer.limit(nbytes); - // - // final long address = m_addressManager.toAddr(nbytes, msg - // .getFirstOffset()); - // final ByteBuffer src = m_bufferStrategy.read(address); - // - // clientBuffer.put(src); - // } - break; - } default: throw new UnsupportedOperationException(); } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/pipeline/HAReceiveService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/pipeline/HAReceiveService.java 2013-05-31 12:58:23 UTC (rev 7179) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/pipeline/HAReceiveService.java 2013-05-31 15:25:32 UTC (rev 7180) @@ -1078,7 +1078,7 @@ if (message.getChk() != (int) chk.getValue()) { throw new ChecksumError("msg=" + message.toString() - + ", actual=" + chk.getValue()); + + ", actual=" + (int) chk.getValue()); } if (callback != null) { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/NOPRecordCompressor.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/NOPRecordCompressor.java 2013-05-31 12:58:23 UTC (rev 7179) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/NOPRecordCompressor.java 2013-05-31 15:25:32 UTC (rev 7180) @@ -35,6 +35,9 @@ import java.io.OutputStream; import java.nio.ByteBuffer; +import org.apache.log4j.Logger; + + /** * A compressor that copies bytes without compression them. * @@ -42,6 +45,8 @@ * @version $Id$ */ public class NOPRecordCompressor implements IRecordCompressor, Externalizable { + + protected static final Logger log = Logger.getLogger(CompressorRegistry.class); /** * @@ -62,6 +67,10 @@ } public ByteBuffer compress(ByteBuffer bin) { + + if (log.isTraceEnabled()) + log.trace("NOP compression " + bin.limit()); + return bin; } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/RecordCompressor.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/RecordCompressor.java 2013-05-31 12:58:23 UTC (rev 7179) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/compression/RecordCompressor.java 2013-05-31 15:25:32 UTC (rev 7180) @@ -40,6 +40,8 @@ import java.util.zip.Inflater; import java.util.zip.InflaterInputStream; +import org.apache.log4j.Logger; + import com.bigdata.btree.IndexSegment; import com.bigdata.io.ByteBufferInputStream; import com.bigdata.io.ByteBufferOutputStream; @@ -59,6 +61,8 @@ */ public class RecordCompressor implements Externalizable, IRecordCompressor { + protected static final Logger log = Logger.getLogger(CompressorRegistry.class); + /** * */ @@ -126,6 +130,9 @@ compress(bin, out); + if (log.isTraceEnabled()) + log.trace("Record compression from " + bin.limit() + " to " + out.size()); + return ByteBuffer.wrap(out.toByteArray()); } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java 2013-05-31 12:58:23 UTC (rev 7179) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java 2013-05-31 15:25:32 UTC (rev 7180) @@ -53,6 +53,7 @@ import org.apache.log4j.Logger; +import com.bigdata.btree.BytesUtil; import com.bigdata.btree.IndexSegmentBuilder; import com.bigdata.counters.CounterSet; import com.bigdata.ha.msg.HAWriteMessage; @@ -772,22 +773,22 @@ * specified to the constructor). */ // package private : exposed to WriteTask.call(). - int getWholeBufferChecksum(final ByteBuffer checksumBuffer) { +// int getWholeBufferChecksum(final ByteBuffer checksumBuffer) { +// +// final ByteBuffer src = peek().duplicate(); +// // flip(limit=pos;pos=0) +// src.flip(); +// +// return getWholeBufferChecksum(checksumBuffer, src, false); +// +// } - final ByteBuffer src = peek().duplicate(); - // flip(limit=pos;pos=0) - src.flip(); + int getWholeBufferChecksum(final ByteBuffer checksumBuffer, final ByteBuffer src, final boolean isCompressed) { - return getWholeBufferChecksum(checksumBuffer, src); - - } - - int getWholeBufferChecksum(final ByteBuffer checksumBuffer, final ByteBuffer src) { - if (checker == null) throw new UnsupportedOperationException(); - if (prefixWrites) { + if (isCompressed || prefixWrites) { /* * Recalculate whole buffer checksum. * @@ -801,7 +802,8 @@ + src.capacity() + ", checksumBuffer.capacity=" + checksumBuffer.capacity(); - checksumBuffer.limit(checksumBuffer.capacity()); + // checksumBuffer.limit(checksumBuffer.capacity()); + checksumBuffer.limit(src.limit()); checksumBuffer.position(0); checksumBuffer.put(src); checksumBuffer.flip(); @@ -1646,18 +1648,21 @@ } - // log.warn("Message, position: " + send.position() + ", limit: " + send.limit()); - + final int chksum = getWholeBufferChecksum(checksumBuffer, send.duplicate(), b != send /*isCompressed*/); final HAWriteMessage msg = new HAWriteMessage(// storeUUID,// lastCommitCounter,// lastCommitTime,// sequence, // - send.limit(), getWholeBufferChecksum(checksumBuffer, send.duplicate()), + send.limit(), chksum, prefixWrites ? StoreTypeEnum.RW : StoreTypeEnum.WORM, quorumToken, fileExtent.get(), firstOffset.get(), compressorKey); - + + if (log.isTraceEnabled()) { + log.trace("Original buffer: " + b.limit() + ", final buffer: " + send.limit() + ", compressorKey: " + compressorKey + ", checksum: " + chksum); + } + return new HAPackage(msg, send); } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java 2013-05-31 12:58:23 UTC (rev 7179) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCacheService.java 2013-05-31 15:25:32 UTC (rev 7180) @@ -546,6 +546,12 @@ /** * FIXME WCS compaction fails! * + * CORRECTION, it is NOT clearly established that WCS compaction fails + * although some failures appear to correlate with it being enabled. + * It may be that with compaction enabled other errors are more likely + * that are not directly associated with the compaction; for example + * as a result of denser data content. + * * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/674" > * WCS write cache compaction causes errors in RWS postHACommit() * </a> Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-31 12:58:23 UTC (rev 7179) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-05-31 15:25:32 UTC (rev 7180) @@ -165,6 +165,7 @@ import com.bigdata.service.AbstractHATransactionService; import com.bigdata.service.AbstractTransactionService; import com.bigdata.service.IBigdataFederation; +import com.bigdata.stream.Stream; import com.bigdata.util.ChecksumUtility; import com.bigdata.util.ClocksNotSynchronizedException; import com.bigdata.util.NT; @@ -5215,7 +5216,7 @@ // return (Stream) getUnisolatedIndex(name); // // } - + /** * Return the mutable view of the named persistence capable data structure * (aka the "live" or {@link ITx#UNISOLATED} view). @@ -5966,7 +5967,7 @@ final boolean leader = localService == null ? false : localService .isLeader(rootBlock.getQuorumToken()); - + if (leader) { if (_bufferStrategy instanceof IRWStrategy) { Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java 2013-05-31 12:58:23 UTC (rev 7179) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/WORMStrategy.java 2013-05-31 15:25:32 UTC (rev 7180) @@ -2507,7 +2507,31 @@ public void writeRawBuffer(final IHAWriteMessage msg, final IBufferAccess b) throws IOException, InterruptedException { - // FIXME Must EXPAND() iff message is compressed. + // expand buffer before writing on the store. + final ByteBuffer xb = msg.expand(b.buffer()); + + if (true || log.isTraceEnabled()) { + log.warn("Buffer, position: " + xb.position() + + ", limit: " + xb.limit()); + } + + final IBufferAccess ba = new IBufferAccess() { + + @Override + public ByteBuffer buffer() { + return xb; + } + + @Override + public void release() throws InterruptedException { + } + + @Override + public void release(long timeout, TimeUnit unit) + throws InterruptedException { + } + }; + /* * Wrap up the data from the message as a WriteCache object. This will @@ -2519,7 +2543,7 @@ * by WriteCache.flush(). We have expanded the payload above. Now we are * just flushing the write cache onto the disk. */ - final WriteCacheImpl writeCache = writeCacheService.newWriteCache(b, + final WriteCacheImpl writeCache = writeCacheService.newWriteCache(ba, useChecksums, true/* bufferHasData */, opener, msg.getFileExtent()); @@ -2546,7 +2570,7 @@ * pos to zero and then write bytes up to the limit. So, we set the * position to the limit before calling flush. */ - final ByteBuffer bb = b.buffer(); + final ByteBuffer bb = ba.buffer(); final int limit = bb.limit(); bb.position(limit); @@ -2574,13 +2598,13 @@ final IHAWriteMessage msg, final IBufferAccess b) throws IOException, InterruptedException { - // read direct from store + // Buffer now contains data directly from log, DO NOT read direct from store final ByteBuffer clientBuffer = b.buffer(); - final int nbytes = msg.getSize(); - clientBuffer.position(0); - clientBuffer.limit(nbytes); - - readRaw(/*nbytes, */msg.getFirstOffset(), clientBuffer); +// final int nbytes = msg.getSize(); +// clientBuffer.position(0); +// clientBuffer.limit(nbytes); +// +// readRaw(/*nbytes, */msg.getFirstOffset(), clientBuffer); assert clientBuffer.remaining() > 0 : "Empty buffer: " + clientBuffer; @@ -2904,13 +2928,16 @@ @Override public void writeRawBuffer(HARebuildRequest req, IHAWriteMessage msg, ByteBuffer transfer) throws IOException { + // expand buffer before writing on the store. + final ByteBuffer xtransfer = msg.expand(transfer); + // if (m_rebuildRequest == null) // throw new IllegalStateException("Store is not in rebuild state"); // // if (m_rebuildSequence != msg.getSequence()) // throw new IllegalStateException("Invalid sequence number for rebuild, expected: " + m_rebuildSequence + ", actual: " + msg.getSequence()); - FileChannelUtility.writeAll(this.opener, transfer, msg.getFirstOffset()); + FileChannelUtility.writeAll(this.opener, xtransfer, msg.getFirstOffset()); // m_rebuildSequence++; } Property changes on: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha ___________________________________________________________________ Modified: svn:ignore - log4j.properties logging.properties + log4j.properties logging.properties results.txt Modified: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java 2013-05-31 12:58:23 UTC (rev 7179) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java 2013-05-31 15:25:32 UTC (rev 7180) @@ -460,6 +460,14 @@ } + protected void assertReady(final HAGlue[] members) throws IOException { + for (HAGlue member : members) { + final HAStatusEnum status = member.getHAStatus(); + System.err.println(member.getServiceName() + ": " + status); + assertFalse(HAStatusEnum.NotReady == status); + } + } + /** * Waits for joined in expected order * Modified: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java 2013-05-31 12:58:23 UTC (rev 7179) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java 2013-05-31 15:25:32 UTC (rev 7180) @@ -849,7 +849,10 @@ // new HAGlue[] { serverA, serverB, serverC }); // Verify binary equality of ALL journals. - assertDigestsEquals(new HAGlue[] { serverA, serverB, serverC }); + HAGlue[] services = new HAGlue[] { serverA, serverB, serverC }; + assertReady(services); + // If the services are all ready then they MUST have compatible journals + assertDigestsEquals(services); // Now force further commit when fully met to remove log files simpleTransaction(); @@ -863,10 +866,18 @@ new HAGlue[] { serverA, serverB, serverC }); // And again verify binary equality of ALL journals. - assertDigestsEquals(new HAGlue[] { serverA, serverB, serverC }); + assertDigestsEquals(new HAGlue[] { serverA, serverB, serverC }); log.info("ALL GOOD!"); } + + public void testStressTestStartAB_C_LiveResync() throws Exception { + for (int i = 0; i < 50; i++) { + log.warn("Starting run " + i); + testStartAB_C_LiveResync(); + destroyAll(); + } + } /** * Test Rebuild of late starting C service - simulates scenario where a service is removed from a This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-06-14 15:42:29
|
Revision: 7195 http://bigdata.svn.sourceforge.net/bigdata/?rev=7195&view=rev Author: thompsonbry Date: 2013-06-14 15:42:19 +0000 (Fri, 14 Jun 2013) Log Message: ----------- AbstractJournal, HAJournal, HAJournalServer: Added maximum clock skew configuration option. Use of debug flag in HAJournal.getExtendedStatus() and HAStatusServletUtil. Added test suite for clock skew. Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestAll.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java Added Paths: ----------- branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestClockSkewDetection.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-06-14 15:39:25 UTC (rev 7194) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-06-14 15:42:19 UTC (rev 7195) @@ -165,7 +165,6 @@ import com.bigdata.service.AbstractHATransactionService; import com.bigdata.service.AbstractTransactionService; import com.bigdata.service.IBigdataFederation; -import com.bigdata.stream.Stream; import com.bigdata.util.ChecksumUtility; import com.bigdata.util.ClocksNotSynchronizedException; import com.bigdata.util.NT; @@ -1780,6 +1779,8 @@ * A timestamp from the another service. * * @throws ClocksNotSynchronizedException + * + * @see ClocksNotSynchronizedException */ protected void assertBefore(final UUID serviceId1, final UUID serviceId2, final long t1, final long t2) throws ClocksNotSynchronizedException { @@ -1787,15 +1788,9 @@ // Maximum allowed clock skew. final long maxSkew = getMaximumClockSkewMillis(); - final long delta = Math.abs(t1 - t2); + ClocksNotSynchronizedException.assertBefore(serviceId1, serviceId2, t1, + t2, maxSkew); - if (delta < maxSkew) - return; - - throw new ClocksNotSynchronizedException("service1=" + serviceId1 - + ", serviceId2=" + serviceId2 + ", skew=" + delta - + "ms exceeds maximumSkew=" + maxSkew + "ms."); - } /** @@ -1804,17 +1799,14 @@ * are within some acceptable skew of one another. It is also used by * {@link #nextCommitTimestamp()} where it specifies the maximum clock skew * that will be corrected without operator intervention. + * <p> + * Note: This is overridden by the HAJournal. * * @see #assertBefore(UUID, UUID, long, long) - * - * FIXME HA TXS : Configuration Option. Note: This is not just an HA - * issue. We also need to be able to override this in order to write on - * a journal if the local clock is wildly different from the clock on - * the machine where the journal was produced. */ protected long getMaximumClockSkewMillis() { - return 5000; + throw new UnsupportedOperationException(); } Modified: branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestAll.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestAll.java 2013-06-14 15:39:25 UTC (rev 7194) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestAll.java 2013-06-14 15:42:19 UTC (rev 7195) @@ -76,6 +76,9 @@ // test suites for file names based on commit counters. suite.addTestSuite( TestCommitCounterUtility.class ); + // test suite for ClocksNotSynchronizedException. + suite.addTestSuite( TestClockSkewDetection.class ); + /* * Test a scalable temporary store (uses the transient and disk-only * buffer modes). Added: branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestClockSkewDetection.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestClockSkewDetection.java (rev 0) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/journal/TestClockSkewDetection.java 2013-06-14 15:42:19 UTC (rev 7195) @@ -0,0 +1,225 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.journal; + +import java.util.UUID; + +import junit.framework.TestCase2; + +import com.bigdata.util.ClocksNotSynchronizedException; + +/** + * Test suite for {@link ClocksNotSynchronizedException}. The basic pattern of + * events is as follows: + * + * <pre> + * leader : t1 : timestamp before gather() messages are sent to followers. + * follower : t2 : timestamp taken when servicing gather() message and sent to leader with response. + * leader : t3 : timestamp taken on leader when barrier breaks. + * </pre> + * + * Of necessity, these events have a temporal order (t1 BEFORE t2; t2 BEFORE + * t3). However, there can be skew in the clocks such that the clock on the + * leader and the clock on the follower(s) are not synchronized. Some clock skew + * is allowed, but significant clock skew can cause a problem on failover. + * <p> + * The problem arises because the clocks are used to assign timestamps for + * commit points, and we index into the journal using those timestamps for + * historical reads (reading on the database as of some wall clock time). + * <p> + * {@link AbstractJournal#commitNow(long)} does ensure that time moves forward + * relative to the timestamp associated with the last commit point on the + * journal. However, if the skew is large, then this could require waiting for + * minutes, hours, or days before a new commit time could be assigned. + * <p> + * In order to avoid such long latency during failover, an error is reported + * proactively if a large clock skew is detected during the release time + * consensus protocol. + * <p> + * This test suite verifies the logic for detecting clock skew. + * + * @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/686" > + * Consensus protocol does not detect clock skew correctly </a> + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class TestClockSkewDetection extends TestCase2 { + + public TestClockSkewDetection() { + } + + public TestClockSkewDetection(String name) { + super(name); + } + + private UUID serviceId1, serviceId2; + private final static long maxSkew = 50; // ms. + + @Override + protected void setUp() throws Exception { + super.setUp(); + serviceId1 = UUID.randomUUID(); + serviceId2 = UUID.randomUUID(); + } + + @Override + protected void tearDown() throws Exception { + super.tearDown(); + serviceId1 = serviceId2 = null; + } + + /** + * Helper calls through to assertBefore(). + * @param t1 + * @param t2 + */ + private void assertBefore(final long t1, final long t2) { + + ClocksNotSynchronizedException.assertBefore(serviceId1, serviceId2, t1, + t2, maxSkew); + + } + + /** + * Helper fails if assertBefore() succeeds. + * @param t1 + * @param t2 + */ + private void assertNotBefore(final long t1, final long t2) { + + try { + + assertBefore(t1, t2); + + fail("Not expecting t1(" + t1 + ") to be 'before' t2(" + t2 + ")"); + + } catch(ClocksNotSynchronizedException ex) { + + if(log.isInfoEnabled()) + log.info("Ignoring expected exception: "+ex); + + } + + } + + /* + * Tests where [t1 LT t2]. + */ + + /** + * Tests where the delta is LT {@value #maxSkew} and <code>t1 LT t2</code> + */ + public void test01() { + + final long delta = 10; + + assertTrue(delta < maxSkew); + + assertBefore(200 - delta, 200); + + assertBefore(300 - delta, 300); + + } + + /** + * Tests where the delta is EQ {@value #maxSkew} and <code>t1 LT t2</code> + */ + public void test02() { + + final long delta = maxSkew; + + assertBefore(200 - delta, 200); + + assertBefore(300 - delta, 300); + + } + + /** + * Tests where the delta is GT {@value #maxSkew} and <code>t1 LT t2</code> + */ + public void test03() { + + final long delta = 60; + + assertTrue(delta > maxSkew); + + assertBefore(100 - delta, 200); + + assertBefore(200 - delta, 300); + + } + + /* + * Tests where [t1 GTE t2]. + */ + + /** + * Tests where the delta is LT {@value #maxSkew} and <code>t1 GTE t2</code>. + * <p> + * Note: This is a test for a "fuzzy" sense of "before". We explicitly allow + * for some clock skew since it will not cause a significantly latency on + * failover and minor clock skew (on the order of the latency of an RMI) is + * common, even with synchronized clocks. + */ + public void test11() { + + final long delta = 10; + + assertTrue(delta < maxSkew); + + assertBefore(200 + delta, 200); + + assertBefore(300 + delta, 300); + + } + + /** + * Tests where the delta is EQ {@value #maxSkew} and <code>t1 GTE t2</code> + */ + public void test12() { + + final long delta = maxSkew; + + assertBefore(200 + delta, 200); + + assertBefore(300 + delta, 300); + + } + + /** + * Tests where the delta is GT {@value #maxSkew} and <code>t1 GTE t2</code> + */ + public void test13() { + + final long delta = 60; + + assertTrue(delta > maxSkew); + + assertNotBefore(200 + delta, 200); + + assertNotBefore(300 + delta, 300); + + } + +} Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-06-14 15:39:25 UTC (rev 7194) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-06-14 15:42:19 UTC (rev 7195) @@ -57,7 +57,6 @@ import com.bigdata.ha.HAGlue; import com.bigdata.ha.QuorumService; import com.bigdata.ha.RunState; -import com.bigdata.ha.halog.HALogReader; import com.bigdata.ha.halog.HALogWriter; import com.bigdata.ha.halog.IHALogReader; import com.bigdata.ha.msg.HADigestResponse; @@ -186,6 +185,11 @@ */ private final long haReleaseTimeConsensusTimeout; + /** + * @see HAJournalServer.ConfigurationOptions#MAXIMUM_CLOCK_SKEW + */ + private final long maximumClockSkew; + // /** // * @see HAJournalServer.ConfigurationOptions#HA_LOG_DIR // */ @@ -352,6 +356,25 @@ } } + + { + maximumClockSkew = (Long) config + .getEntry( + HAJournalServer.ConfigurationOptions.COMPONENT, + HAJournalServer.ConfigurationOptions.MAXIMUM_CLOCK_SKEW, + Long.TYPE, + HAJournalServer.ConfigurationOptions.DEFAULT_MAXIMUM_CLOCK_SKEW); + + if (maximumClockSkew < HAJournalServer.ConfigurationOptions.MIN_MAXIMUM_CLOCK_SKEW) { + throw new ConfigurationException( + HAJournalServer.ConfigurationOptions.MAXIMUM_CLOCK_SKEW + + "=" + + maximumClockSkew + + " : must be GTE " + + HAJournalServer.ConfigurationOptions.MIN_MAXIMUM_CLOCK_SKEW); + } + + } // HALog manager. haLogNexus = new HALogNexus(server, this, config); @@ -511,6 +534,18 @@ } + /** + * {@inheritDoc} + * + * @see HAJournalServer.ConfigurationOptions#MAXIMUM_CLOCK_SKEW + */ + @Override + public final long getMaximumClockSkewMillis() { + + return maximumClockSkew; + + } + // @Override // public final File getHALogDir() { // @@ -1881,11 +1916,14 @@ } else { innerRunStateStr.append("N/A"); } + final boolean debug = true; innerRunStateStr.append(" @ " + journal.getRootBlockView().getCommitCounter()); - innerRunStateStr.append(", haReady=" + getHAReady()); + if(debug) + innerRunStateStr.append(", haReady=" + getHAReady()); innerRunStateStr.append(", haStatus=" + getHAStatus()); - innerRunStateStr.append(", serviceId=" + if(debug) + innerRunStateStr.append(", serviceId=" + (quorumService == null ? "N/A" : quorumService .getServiceId())); /* @@ -1894,7 +1932,8 @@ * not need that synchronized keyword on nextTimestamp(). Try * removing it and then using it here.] */ - innerRunStateStr.append(", now=" + System.currentTimeMillis()); + if(debug) + innerRunStateStr.append(", now=" + System.currentTimeMillis()); final String msg = server.getOperatorAlert(); if (msg != null) innerRunStateStr.append(", msg=[" + msg + "]"); Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-06-14 15:39:25 UTC (rev 7194) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-06-14 15:42:19 UTC (rev 7195) @@ -100,6 +100,7 @@ import com.bigdata.rwstore.RWStore; import com.bigdata.service.AbstractHATransactionService; import com.bigdata.service.jini.FakeLifeCycle; +import com.bigdata.util.ClocksNotSynchronizedException; import com.bigdata.util.InnerCause; import com.bigdata.util.concurrent.LatchedExecutor; import com.bigdata.util.concurrent.MonitoredFutureTask; @@ -197,8 +198,36 @@ long DEFAULT_HA_PREPARE_TIMEOUT = Long.MAX_VALUE; // milliseconds. long MIN_HA_PREPARE_TIMEOUT = 100; // milliseconds. + + /** + * The maximum allowed clock skew (default + * {@value #DEFAULT_MAXIMUM_CLOCK_SKEW} milliseconds). Clock skew is + * identified during the commit protocol. A timestamp (A) is taken on + * the leader. The leader then messages the followers. The followers + * take timestamps (B) and message the leader. The leader then takes + * another timestamp (C). A {@link ClocksNotSynchronizedException} will + * be thrown if any of the following conditions are violated: + * <ul> + * <li>A is not <i>before</i> B (for each follower's value of B)</li> + * <li>B is not <i>before</i> C (for each follower's value of B)</li> + * </ul> + * This option controls the maximum skew in the clocks and thus how much + * error is allowable in the interpretation of the <i>before</i> + * relation. + * + * @see ClocksNotSynchronizedException + */ + String MAXIMUM_CLOCK_SKEW = "maximumClockSkew"; + long DEFAULT_MAXIMUM_CLOCK_SKEW = 5000; + /** + * The mimimum allowed value for the {@link #MAXIMUM_CLOCK_SKEW} + * configuration option. + */ + long MIN_MAXIMUM_CLOCK_SKEW = 100; + + /** * The property whose value is the name of the directory in which write * ahead log files will be created to support resynchronization services * trying to join an HA quorum (default {@value #DEFAULT_HA_LOG_DIR}). Modified: branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java =================================================================== --- branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java 2013-06-14 15:39:25 UTC (rev 7194) +++ branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java 2013-06-14 15:42:19 UTC (rev 7195) @@ -72,6 +72,8 @@ */ public class HAStatusServletUtil { + private static final boolean debug = true; + /** * Disaster recover of this service from the leader (REBUILD). * @@ -265,7 +267,8 @@ // : "")// ).node("br").close(); // Show the current root block. - current.node("pre", rb.toString()); + if(debug) + current.node("pre", rb.toString()); } } @@ -478,7 +481,8 @@ p.close(); - current.node("pre", quorum.toString()); + if(debug) + current.node("pre", quorum.toString()); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-06-30 19:07:09
|
Revision: 7206 http://bigdata.svn.sourceforge.net/bigdata/?rev=7206&view=rev Author: thompsonbry Date: 2013-06-30 19:06:54 +0000 (Sun, 30 Jun 2013) Log Message: ----------- See [1,2]. Reconciled edits with Martyn, primarily with respect to (a) elision of pipeline change events in QuorumPipelineImpl to resolve a deadlock [2]; and (b) closing a concurrency hole when a service joins with a met quorum after the GATHER and before the PREPARE. {{{ - QuorumPipelineImpl : incorporated changes for event elision. This provides a fix for <a href="http://sourceforge.net/apps/trac/bigdata/ticket/681" > HAJournalServer deadlock: pipelineRemove() and getLeaderId() </a>. I also fixed whitespace formatting for the indentation in elideEvents(). This was some odd mixture of tabs and spaces that was causing indent rendering issues. - BufferedWrite: Ignored add of unused logger. - WriteCacheService : did not accept log @ ERROR messages that were designated as "REMOVE TRACE". - HA messages: modified some IHAMessages that did not override toString() to provide useful representations in log output. - IHAAwaitServiceJoinRequest : javadoc and copyright notice. - IHAWriteMessage : javadoc update for the commit counter semantics. - HAReceiveService: added readFuture reference into the log message for changeDownstream. - HASendService: incorporated tryno information into channel reopen log messages per mgc. - HAGlue : folded in changes to awaitServiceJoin(), specifically it will now return the most recent consensus release time on the leader. This closes a hole when a service joins after the GATHER and before the PREPARE. By using the leader's consensus release time, the service will not permit transactions to start against a commit point that has been recycled by the leader. - AbstractJournal: reconciled. - commitNow() was reusing the nonJoinedServices definition from the GATHER. I added a JoinedAndNonJoinedServices helper class. Distinct instances of this class are now used for the GATHER and for the PREPARE/COMMIT. - doLocalCommit(): I did not pick up the 2nd half of this if/then/else. Why is it there? if ((shouldFlush || true) && doubleSync) { _bufferStrategy.force(false/* metadata */); } else { if (log.isInfoEnabled()) log.info("ALWAYS FORCE TO DISK"); _bufferStrategy.force(false/* metadata */); } - AbstractHATransactionService: changed updateReleaseTime() method into an override of setReleaseTime() which is public (versus protected) in order to expose this to the HAJournalServer. - Journal: - accepted implementation of updateReleaseTime() method from AbstractHATransactionService. - modified runWithBarrierLock() to log around the critical section so we can observe when this section runs and finishes. - HAJournal: Modified to log @ INFO the request and result for an HA digest. - HAJournalServer: reconciled all changes. - ServiceLeaveTask: Yes, it should use quorum.token(), not NO_QUORUM. There is a difference between the quorum token (which just reflects zookeeper) and the journal token (which reflects zookeeper plus whether or not the local journal is HAReady). journal.setQuorumToken(getQuorum().token()); // This is correct. - vs - journal.setQuorumToken(Quorum.NO_QUORUM); // This is wrong. - TestHA3JournalServer: reconciled numerous changes. - Modified code that was using a sleep to (presumably) wait until the current quorum broke to instead just await the next quorum meet. // Thread.sleep(50); // wait a while for A to shutdown // // final long token2 = quorum.awaitQuorum(awaitQuorumTimeout * 2, // TimeUnit.MILLISECONDS); // Wait for the next quorum meet on (token+1). final long token2 = awaitNextQuorumMeet(token); - AbstractHA3JournalServerTestCase: converted stderr to log @ INFO. - AbstractHAJournalServerTestCase: accepted log message @ INFO, but made it conditional. Test failures: - The 4 "overrides" tests need to be revisited. They still fail. - testQuorumABC_HAStatusUpdatesWithFailovers: The problem appears to be that [leader] can not be compared with [serverB] and [serverC] using reference testing (==). [leader] has the same data in this case as [serverC] (same UUID, same TcpEndpoint). junit.framework.AssertionFailedError: Did not elect leader consistent with expectations: leader=Proxy[HAGlue,BasicInvocationHandler[BasicObjectEndpoint[b44e72e1-dd5c-4dbc-9640-129bdab11007,TcpEndpoint[192.168.1.135:55983]]]], serverB=Proxy[HAGlue,BasicInvocationHandler[BasicObjectEndpoint[073e0614-26a6-49be-83f4-381ce6338306,TcpEndpoint[192.168.1.135:55965]]]], serverC=Proxy[HAGlue,BasicInvocationHandler[BasicObjectEndpoint[b44e72e1-dd5c-4dbc-9640-129bdab11007,TcpEndpoint[192.168.1.135:55983]]]] at junit.framework.Assert.fail(Assert.java:47) at com.bigdata.journal.jini.ha.TestHA3JournalServer.testQuorumABC_HAStatusUpdatesWithFailovers(TestHA3JournalServer.java:2946) - testStressStartAB_C_MultiTransactionResync_200_5: I have observed a failure of this test (after 33 out of 50 trials). A subsequent run of 50 trials succeeded. Good, but not perfect. junit.framework.AssertionFailedError: Fail after 33 trials : java.util.concurrent.TimeoutException at junit.framework.TestCase2.fail(TestCase2.java:90) at com.bigdata.journal.jini.ha.TestHA3JournalServer.testStressStartAB_C_MultiTransactionResync_200_5(TestHA3JournalServer.java:620) - testStress_RebuildWithPipelineReorganisation: failed on 7th run. java.lang.RuntimeException: junit.framework.AssertionFailedError at com.bigdata.io.TestCase3.assertCondition(TestCase3.java:250) at com.bigdata.journal.jini.ha.AbstractHA3JournalServerTestCase.awaitFullyMetQuorum(AbstractHA3JournalServerTestCase.java:1990) at com.bigdata.journal.jini.ha.TestHA3JournalServer.testStartABC_RebuildWithPipelineReorganisation(TestHA3JournalServer.java:1071) at com.bigdata.journal.jini.ha.TestHA3JournalServer._testStress_RebuildWithPipelineReorganisation(TestHA3JournalServer.java:1090) }}} [1] https://sourceforge.net/apps/trac/bigdata/ticket/530 (Journal HA) [2] https://sourceforge.net/apps/trac/bigdata/ticket/681 (HAJournalServer deadlock: pipelineRemove() and getLeaderId()) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumPipelineImpl.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HADigestRequest.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HADigestResponse.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HALogDigestRequest.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HALogDigestResponse.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HASnapshotDigestRequest.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HASnapshotDigestResponse.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHAWriteMessage.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/pipeline/HAReceiveService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/pipeline/HASendService.java branches/READ_CACHE/bigdata/src/java/com/bigdata/io/writecache/WriteCache.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/Journal.java branches/READ_CACHE/bigdata/src/java/com/bigdata/quorum/QuorumEvent.java branches/READ_CACHE/bigdata/src/java/com/bigdata/rwstore/RWStore.java branches/READ_CACHE/bigdata/src/java/com/bigdata/service/AbstractHATransactionService.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHAJournalServerTestCase.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java Added Paths: ----------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/HAAwaitServiceJoinRequest.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/msg/IHAAwaitServiceJoinRequest.java branches/READ_CACHE/bigdata/src/java/com/bigdata/quorum/QCE.java branches/READ_CACHE/bigdata/src/java/com/bigdata/quorum/QuorumStateChangeEvent.java branches/READ_CACHE/bigdata/src/java/com/bigdata/quorum/QuorumStateChangeEventEnum.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java 2013-06-29 20:21:35 UTC (rev 7205) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/HAGlue.java 2013-06-30 19:06:54 UTC (rev 7206) @@ -32,10 +32,12 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import com.bigdata.ha.msg.IHAAwaitServiceJoinRequest; import com.bigdata.ha.msg.IHADigestRequest; import com.bigdata.ha.msg.IHADigestResponse; import com.bigdata.ha.msg.IHALogDigestRequest; import com.bigdata.ha.msg.IHALogDigestResponse; +import com.bigdata.ha.msg.IHANotifyReleaseTimeResponse; import com.bigdata.ha.msg.IHARemoteRebuildRequest; import com.bigdata.ha.msg.IHARootBlockRequest; import com.bigdata.ha.msg.IHARootBlockResponse; @@ -96,6 +98,38 @@ InterruptedException, TimeoutException, QuorumException, AsynchronousQuorumCloseException; + /** + * A follower uses this message to request that the quorum leader await the + * visibility of the zookeeper event in which the service join becomes + * visible to the leader. This is invoked while holding a lock that blocks + * pipeline replication, so the leader can not flush the write replication + * pipeline and enter the commit. The callback to the leader ensures that + * the service join is visible to the leader before the leader makes an + * atomic decision about the set of services that are joined with the met + * quorum for a 2-phase commit. + * + * @param req + * The request. + * + * @return The most recent consensus release time for the quorum leader. + * This information is used to ensure that a service which joins + * after a gather and before a PREPARE will join with the correct + * release time for its local journal and thus will not permit + * transactions to start against commit points which have been + * recycled by the quorum leader. + * + * @throws InterruptedException + * @throws TimeoutException + * if the timeout is exceeded before the service join becomes + * visible to this service. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/681" > + * HAJournalServer deadlock: pipelineRemove() and getLeaderId()</a> + */ + public IHANotifyReleaseTimeResponse awaitServiceJoin( + IHAAwaitServiceJoinRequest req) throws IOException, + InterruptedException, TimeoutException; + /* * Synchronization. * Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumPipelineImpl.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumPipelineImpl.java 2013-06-29 20:21:35 UTC (rev 7205) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumPipelineImpl.java 2013-06-30 19:06:54 UTC (rev 7206) @@ -29,12 +29,15 @@ import java.io.ObjectOutput; import java.net.InetSocketAddress; import java.nio.ByteBuffer; +import java.util.Iterator; import java.util.UUID; +import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.CancellationException; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; +import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RunnableFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -54,10 +57,12 @@ import com.bigdata.ha.pipeline.HASendService; import com.bigdata.io.DirectBufferPool; import com.bigdata.io.IBufferAccess; -import com.bigdata.io.writecache.WriteCache; +import com.bigdata.quorum.QCE; import com.bigdata.quorum.Quorum; import com.bigdata.quorum.QuorumException; import com.bigdata.quorum.QuorumMember; +import com.bigdata.quorum.QuorumStateChangeEvent; +import com.bigdata.quorum.QuorumStateChangeEventEnum; import com.bigdata.quorum.QuorumStateChangeListener; import com.bigdata.quorum.QuorumStateChangeListenerBase; import com.bigdata.util.InnerCause; @@ -151,11 +156,13 @@ * receive data, but no longer relays data to a downstream service. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ * @param <S> + * + * @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/681" > + * HAJournalServer deadlock: pipelineRemove() and getLeaderId() </a> */ -abstract public class QuorumPipelineImpl<S extends HAPipelineGlue> extends - QuorumStateChangeListenerBase implements QuorumPipeline<S>, +abstract public class QuorumPipelineImpl<S extends HAPipelineGlue> /*extends + QuorumStateChangeListenerBase */implements QuorumPipeline<S>, QuorumStateChangeListener { static private transient final Logger log = Logger @@ -213,8 +220,691 @@ */ private final AtomicReference<PipelineState<S>> pipelineStateRef = new AtomicReference<PipelineState<S>>(); + /** + * Inner class does the actual work once to handle an event. + */ + private final InnerEventHandler innerEventHandler = new InnerEventHandler(); + + /** + * Core implementation of the handler for the various events. Always run + * while holding the {@link #lock}. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + * + * @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/681" > + * HAJournalServer deadlock: pipelineRemove() and getLeaderId() </a> + */ + private final class InnerEventHandler extends QuorumStateChangeListenerBase { + + /** + * A queue of events that can only be handled when a write replication + * operation owns the {@link QuorumPipelineImpl#lock}. + * + * @see QuorumPipelineImpl#lock() + * @see #dispatchEvents() + */ + private final BlockingQueue<QuorumStateChangeEvent> queue = new LinkedBlockingQueue<QuorumStateChangeEvent>(); + + protected InnerEventHandler() { + + } + + /** + * Enqueue an event. + * + * @param e + * The event. + */ + private void queue(final QuorumStateChangeEvent e) { + + if (log.isInfoEnabled()) + log.info("Adding StateChange: " + e); + + queue.add(e); + + } + + /** + * Boolean controls whether or not event elision is used. See below. + * + * @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/681" > + * HAJournalServer deadlock: pipelineRemove() and getLeaderId() + * </a> + */ + static private final boolean s_eventElission = true; + + /** + * Event elission endeavours to ensure that events processed + * represent current state change. + * + * This is best explained with an example from its original usage + * in processing graphic events. Whilst a "button click" is a singular + * event and all button clicks should be processed, a "mouse move" event + * could be elided with the next "mouse move" event. Thus the move events + * (L1 -> L2) and (L2 -> L3) would elide to a single (L1 -> L3). + * + * In HA RMI calls can trigger event processing, whilst other threads monitor + * state changes - such as open sockets. Without elission, monitoring threads + * will observe unnecessary transitional state changes. HOWEVER, there remains + * a problem with this pattern of synchronization. + */ + private void elideEvents() { + + if (!s_eventElission) { + return; + } + + /* + * Check for event elission: check for PIPELINE_UPSTREAM and + * PIPELINE_CHANGE and remove earlier ones check for PIPELINE_ADD + * and PIPELINE_REMOVE pairings. + */ + final Iterator<QuorumStateChangeEvent> events = queue.iterator(); + QuorumStateChangeEvent uce = null; // UPSTREAM CHANGE + QuorumStateChangeEvent dce = null; // DOWNSTREAM CHANGE + QuorumStateChangeEvent add = null; // PIPELINE_ADD + + while (events.hasNext()) { + final QuorumStateChangeEvent tst = events.next(); + if (tst.getEventType() == QuorumStateChangeEventEnum.PIPELINE_UPSTREAM_CHANGE) { + if (uce != null) { + if (log.isDebugEnabled()) + log.debug("Elission removal of: " + uce); + queue.remove(uce); + } + uce = tst; + } else if (tst.getEventType() == QuorumStateChangeEventEnum.PIPELINE_CHANGE) { + if (dce != null) { + // replace 'from' of new state with 'from' of old + tst.getDownstreamOldAndNew()[0] = dce + .getDownstreamOldAndNew()[0]; + + if (log.isDebugEnabled()) + log.debug("Elission removal of: " + dce); + queue.remove(dce); + } + dce = tst; + } else if (tst.getEventType() == QuorumStateChangeEventEnum.PIPELINE_ADD) { + add = tst; + } else if (tst.getEventType() == QuorumStateChangeEventEnum.PIPELINE_REMOVE) { + if (add != null) { + if (log.isDebugEnabled()) { + log.debug("Elission removal of: " + add); + log.debug("Elission removal of: " + tst); + } + queue.remove(add); + queue.remove(tst); + add = null; + } + if (dce != null) { + if (log.isDebugEnabled()) + log.debug("Elission removal of: " + dce); + queue.remove(dce); + dce = null; + } + if (uce != null) { + if (log.isDebugEnabled()) + log.debug("Elission removal of: " + uce); + queue.remove(uce); + uce = null; + } + } + + } + + } // elideEvents() + + /** + * Dispatch any events in the {@link #queue}. + */ + private void dispatchEvents() { + + elideEvents(); + + QuorumStateChangeEvent e; + + // If an event is immediately available, dispatch it now. + while ((e = queue.poll()) != null) { + + if (log.isInfoEnabled()) + log.info("Dispatching: " + e); + + // An event is available. + innerEventHandler.dispatchEvent(e); + + } + + } + + /** + * Dispatch to the InnerEventHandler. + * + * @param e + * The event. + * + * @throws IllegalMonitorStateException + * if the caller does not own the {@link #lock}. + */ + private void dispatchEvent(final QuorumStateChangeEvent e) + throws IllegalMonitorStateException { + + if(!lock.isHeldByCurrentThread()) { + + /* + * The InnerEventHandler should be holding the outer lock. + */ + + throw new IllegalMonitorStateException(); + + } + + if (log.isInfoEnabled()) + log.info(e.toString()); + + switch (e.getEventType()) { + case CONSENSUS: + consensus(e.getLastCommitTimeConsensus()); + break; + case LOST_CONSENSUS: + lostConsensus(); + break; + case MEMBER_ADD: + memberAdd(); + break; + case MEMBER_REMOVE: + memberRemove(); + break; + case PIPELINE_ADD: + pipelineAdd(); + break; + case PIPELINE_CHANGE: { + final UUID[] a = e.getDownstreamOldAndNew(); + pipelineChange(a[0]/* oldDownStreamId */, a[1]/* newDownStreamId */); + break; + } + case PIPELINE_ELECTED_LEADER: + pipelineElectedLeader(); + break; + case PIPELINE_REMOVE: + pipelineRemove(); + break; + case PIPELINE_UPSTREAM_CHANGE: + pipelineUpstreamChange(); + break; + case QUORUM_BREAK: + quorumBreak(); + break; + case QUORUM_MEET: + quorumMeet(e.getToken(), e.getLeaderId()); + break; + case SERVICE_JOIN: + serviceJoin(); + break; + case SERVICE_LEAVE: + serviceLeave(); + break; + default: + throw new UnsupportedOperationException(e.getEventType().toString()); + } + } + +// @Override +// public void serviceLeave() { +// } +// +// @Override +// public void serviceJoin() { +// } +// +// /** +// * Extended to setup this service as a leader ({@link #setUpLeader()}), +// * or a relay ({@link #setUpReceiveAndRelay()}. +// */ +// @Override +// public void quorumMeet(final long token, final UUID leaderId) { +// super.quorumMeet(token, leaderId); +// lock.lock(); +// try { +// this.token = token; +// if(leaderId.equals(serviceId)) { +// setUpLeader(); +// } else if(member.isPipelineMember()) { +// setUpReceiveAndRelay(); +// } +// } finally { +// lock.unlock(); +// } +// } + +// @Override +// public void quorumBreak() { +// super.quorumBreak(); +// lock.lock(); +// try { +// tearDown(); +// } finally { +// lock.unlock(); +// } +// } + + /** + * {@inheritDoc} + * <p> + * This implementation sets up the {@link HASendService} or the + * {@link HAReceiveService} as appropriate depending on whether or not + * this service is the first in the pipeline order. + */ + @Override + public void pipelineAdd() { + if (log.isInfoEnabled()) + log.info(""); + super.pipelineAdd(); + lock.lock(); + try { + // The current pipeline order. + final UUID[] pipelineOrder = member.getQuorum().getPipeline(); + // The index of this service in the pipeline order. + final int index = getIndex(serviceId, pipelineOrder); + if (index == 0) { + setUpSendService(); + } else if (index > 0) { + setUpReceiveService(); + } + } finally { + lock.unlock(); + } + } + + @Override + public void pipelineElectedLeader() { + if (log.isInfoEnabled()) + log.info(""); + super.pipelineElectedLeader(); + lock.lock(); + try { + tearDown(); + setUpSendService(); + } finally { + lock.unlock(); + } + } + + /** + * {@inheritDoc} + * <p> + * This implementation tears down the {@link HASendService} or + * {@link HAReceiveService} associated with this service. + */ + @Override + public void pipelineRemove() { + if (log.isInfoEnabled()) + log.info(""); + super.pipelineRemove(); + lock.lock(); + try { + tearDown(); + } finally { + lock.unlock(); + } + } + + /** + * {@inheritDoc} + * <p> + * This implementation changes the target of the {@link HASendService} + * for the leader (or the {@link HAReceiveService} for a follower) to + * send (or relay) write cache blocks to the specified service. + */ + @Override + public void pipelineChange(final UUID oldDownStreamId, + final UUID newDownStreamId) { + super.pipelineChange(oldDownStreamId, newDownStreamId); + lock.lock(); + try { + // The address of the next service in the pipeline. + final InetSocketAddress addrNext = newDownStreamId == null ? null + : getAddrNext(newDownStreamId); + if (log.isInfoEnabled()) + log.info("oldDownStreamId=" + oldDownStreamId + + ",newDownStreamId=" + newDownStreamId + + ", addrNext=" + addrNext + ", sendService=" + + sendService + ", receiveService=" + + receiveService); + if (sendService != null) { + /* + * Terminate the existing connection (we were the first + * service in the pipeline). + */ + sendService.terminate(); + if (addrNext != null) { + if (log.isDebugEnabled()) + log.debug("sendService.start(): addrNext=" + + addrNext); + sendService.start(addrNext); + } + } else if (receiveService != null) { + /* + * Reconfigure the receive service to change how it is + * relaying (we were relaying, so the receiveService was + * running but not the sendService). + */ + if (log.isDebugEnabled()) + log.debug("receiveService.changeDownStream(): addrNext=" + + addrNext); + receiveService.changeDownStream(addrNext); + } + // populate and/or clear the cache. + cachePipelineState(newDownStreamId); + if (log.isDebugEnabled()) + log.debug("pipelineChange - done."); + } finally { + lock.unlock(); + } + } + + @Override + public void pipelineUpstreamChange() { + super.pipelineUpstreamChange(); + lock.lock(); + try { + if (receiveService != null) { + /* + * Make sure that the receiveService closes out its client + * connection with the old upstream service. + */ + if (log.isInfoEnabled()) + log.info("receiveService=" + receiveService); + receiveService.changeUpStream(); + } + } finally { + lock.unlock(); + } + } + +// @Override +// public void memberRemove() { +// } +// +// @Override +// public void memberAdd() { +// } +// +// @Override +// public void lostConsensus() { +// } +// +// @Override +// public void consensus(long lastCommitTime) { +// } + + /** + * Request the {@link InetSocketAddress} of the write pipeline for a service + * (RMI). + * + * @param downStreamId + * The service. + * + * @return It's {@link InetSocketAddress} + */ + private InetSocketAddress getAddrNext(final UUID downStreamId) { + + if (downStreamId == null) + return null; + + final S service = member.getService(downStreamId); + + try { + + final InetSocketAddress addrNext = service.getWritePipelineAddr(); + + return addrNext; + + } catch (IOException e) { + + throw new RuntimeException(e); + + } + + } + + /** + * Tear down any state associated with the {@link QuorumPipelineImpl}. This + * implementation tears down the send/receive service and releases the + * receive buffer. + */ + private void tearDown() { + if (log.isInfoEnabled()) + log.info(""); + lock.lock(); + try { + /* + * Leader tear down. + */ + { + if (sendService != null) { + sendService.terminate(); + sendService = null; + } + } + /* + * Follower tear down. + */ + { + if (receiveService != null) { + receiveService.terminate(); + try { + receiveService.awaitShutdown(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } finally { + receiveService = null; + } + } + if (receiveBuffer != null) { + try { + /* + * Release the buffer back to the pool. + */ + receiveBuffer.release(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } finally { + receiveBuffer = null; + } + } + } + // clear cache. + pipelineStateRef.set(null); + } finally { + lock.unlock(); + } + } + + /** + * Populate or clear the {@link #pipelineState} cache. + * <p> + * Note: The only times we need to populate the {@link #pipelineState} are + * in response to a {@link #pipelineChange(UUID, UUID)} event or in response + * to message a {@link #pipelineElectedLeader()} event. + * + * @param downStreamId + * The downstream service {@link UUID}. + */ + private void cachePipelineState(final UUID downStreamId) { + + if (downStreamId == null) { + + pipelineStateRef.set(null); + + return; + + } + + final S nextService = member.getService(downStreamId); + + final PipelineState<S> pipelineState = new PipelineState<S>(); + + try { + + pipelineState.addr = nextService.getWritePipelineAddr(); + + } catch (IOException e) { + + throw new RuntimeException(e); + + } + + pipelineState.service = nextService; + + pipelineStateRef.set(pipelineState); + + } + + /** + * Setup the send service. + */ + private void setUpSendService() { + if (log.isInfoEnabled()) + log.info(""); + lock.lock(); + try { + // Allocate the send service. + sendService = new HASendService(); + /* + * The service downstream from this service. + * + * Note: The downstream service in the pipeline is not available + * when the first service adds itself to the pipeline. In those + * cases the pipelineChange() event is used to update the + * HASendService to send to the downstream service. + * + * Note: When we handle a pipelineLeaderElected() message the + * downstream service MAY already be available, which is why we + * handle downstreamId != null conditionally. + */ + final UUID downstreamId = member.getDownstreamServiceId(); + if (downstreamId != null) { + // The address of the next service in the pipeline. + final InetSocketAddress addrNext = member.getService( + downstreamId).getWritePipelineAddr(); + // Start the send service. + sendService.start(addrNext); + } + // populate and/or clear the cache. + cachePipelineState(downstreamId); + } catch (Throwable t) { + try { + tearDown(); + } catch (Throwable t2) { + log.error(t2, t2); + } + throw new RuntimeException(t); + } finally { + lock.unlock(); + } + } + + /** + * Setup the service to receive pipeline writes and to relay them (if there + * is a downstream service). + */ + private void setUpReceiveService() { + lock.lock(); + try { + // The downstream service UUID. + final UUID downstreamId = member.getDownstreamServiceId(); + // Acquire buffer from the pool to receive data. + try { + receiveBuffer = DirectBufferPool.INSTANCE.acquire(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + // The address of this service. + final InetSocketAddress addrSelf = member.getService() + .getWritePipelineAddr(); + // Address of the downstream service (if any). + final InetSocketAddress addrNext = downstreamId == null ? null + : member.getService(downstreamId).getWritePipelineAddr(); + // Setup the receive service. + receiveService = new HAReceiveService<HAMessageWrapper>(addrSelf, + addrNext, new IHAReceiveCallback<HAMessageWrapper>() { + public void callback(final HAMessageWrapper msg, + final ByteBuffer data) throws Exception { + // delegate handling of write cache blocks. + handleReplicatedWrite(msg.req, msg.msg, data); + } + }); + // Start the receive service - will not return until service is + // running + receiveService.start(); + } catch (Throwable t) { + /* + * Always tear down if there was a setup problem to avoid leaking + * threads or a native ByteBuffer. + */ + try { + tearDown(); + } catch (Throwable t2) { + log.error(t2, t2); + } finally { + log.error(t, t); + } + throw new RuntimeException(t); + } finally { + lock.unlock(); + } + } + + }; + + /** + * Acquire {@link #lock} and {@link #dispatchEvents()}. + */ + private void lock() { + boolean ok = false; + this.lock.lock(); + try { + innerEventHandler.dispatchEvents();// have lock, dispatch events. + ok = true; // success. + } finally { + if (!ok) { + // release lock if there was a problem. + this.lock.unlock(); + } + } + } + + /** + * Acquire {@link #lock} and {@link #dispatchEvents()}. + */ + private void lockInterruptibly() throws InterruptedException { + boolean ok = false; + lock.lockInterruptibly(); + try { + innerEventHandler.dispatchEvents(); // have lock, dispatch events. + ok = true; // success. + } finally { + if (!ok) { + // release lock if there was a problem. + this.lock.unlock(); + } + } + } + + /** + * {@link #dispatchEvents()} and release {@link #lock}. + */ + private void unlock() { + try { + innerEventHandler.dispatchEvents(); + } finally { + this.lock.unlock(); + } + } + public QuorumPipelineImpl(final QuorumMember<S> member) { - + if (member == null) throw new IllegalArgumentException(); @@ -232,7 +922,7 @@ @Override protected void finalize() throws Throwable { - tearDown(); + innerEventHandler.tearDown(); super.finalize(); @@ -250,7 +940,7 @@ * @return The index of the service in the array -or- <code>-1</code> if the * service does not appear in the array. */ - private int getIndex(final UUID serviceId, final UUID[] a) { + static private int getIndex(final UUID serviceId, final UUID[] a) { if (serviceId == null) throw new IllegalArgumentException(); @@ -331,330 +1021,129 @@ /* * QuorumStateChangeListener + * + * Note: This interface is delegated using a queue. The queue allows + * the processing of the events to be deferred until the appropriate + * lock is held. This prevents contention for the lock and avoids + * lock ordering problems such as described at [1]. + * + * @see InnerEventHandler */ -// /** -// * Extended to setup this service as a leader ({@link #setUpLeader()}), -// * or a relay ({@link #setUpReceiveAndRelay()}. -// */ -// @Override -// public void quorumMeet(final long token, final UUID leaderId) { -// super.quorumMeet(token, leaderId); -// lock.lock(); -// try { -// this.token = token; -// if(leaderId.equals(serviceId)) { -// setUpLeader(); -// } else if(member.isPipelineMember()) { -// setUpReceiveAndRelay(); -// } -// } finally { -// lock.unlock(); -// } -// } + @Override + public void pipelineAdd() { -// @Override -// public void quorumBreak() { -// super.quorumBreak(); -// lock.lock(); -// try { -// tearDown(); -// } finally { -// lock.unlock(); -// } -// } + innerEventHandler + .queue(new QCE(QuorumStateChangeEventEnum.PIPELINE_ADD)); - /** - * Sets up the {@link HASendService} or the {@link HAReceiveService} as - * appropriate depending on whether or not this service is the first in the - * pipeline order. - */ - public void pipelineAdd() { - if (log.isInfoEnabled()) - log.info(""); - super.pipelineAdd(); - lock.lock(); - try { - // The current pipeline order. - final UUID[] pipelineOrder = member.getQuorum().getPipeline(); - // The index of this service in the pipeline order. - final int index = getIndex(serviceId, pipelineOrder); - if (index == 0) { - setUpSendService(); - } else - if (index > 0) { - setUpReceiveService(); - } - } finally { - lock.unlock(); - } } + @Override public void pipelineElectedLeader() { - if (log.isInfoEnabled()) - log.info(""); - super.pipelineElectedLeader(); - lock.lock(); - try { - tearDown(); - setUpSendService(); - } finally { - lock.unlock(); - } + + innerEventHandler.queue(new QCE( + QuorumStateChangeEventEnum.PIPELINE_ELECTED_LEADER)); + } - - /** - * Tears down the {@link HASendService} or {@link HAReceiveService} - * associated with this service. - */ + @Override public void pipelineRemove() { - if (log.isInfoEnabled()) - log.info(""); - super.pipelineRemove(); - lock.lock(); - try { - tearDown(); - } finally { - lock.unlock(); - } + + innerEventHandler.queue(new QCE( + QuorumStateChangeEventEnum.PIPELINE_REMOVE)); + } - /** - * Changes the target of the {@link HASendService} for the leader (or the - * {@link HAReceiveService} for a follower) to send (or relay) write cache - * blocks to the specified service. - */ + @Override public void pipelineChange(final UUID oldDownStreamId, final UUID newDownStreamId) { - super.pipelineChange(oldDownStreamId, newDownStreamId); - lock.lock(); - try { - // The address of the next service in the pipeline. - final InetSocketAddress addrNext = newDownStreamId == null ? null - : getAddrNext(newDownStreamId); - if (log.isInfoEnabled()) - log.info("oldDownStreamId=" + oldDownStreamId - + ",newDownStreamId=" + newDownStreamId + ", addrNext=" - + addrNext + ", sendService=" + sendService - + ", receiveService=" + receiveService); - if (sendService != null) { - /* - * Terminate the existing connection (we were the first service - * in the pipeline). - */ - sendService.terminate(); - if (addrNext != null) { - if (log.isDebugEnabled()) - log.debug("sendService.start(): addrNext=" + addrNext); - sendService.start(addrNext); - } - } else if (receiveService != null) { - /* - * Reconfigure the receive service to change how it is relaying - * (we were relaying, so the receiveService was running but not - * the sendService). - */ - if (log.isDebugEnabled()) - log.debug("receiveService.changeDownStream(): addrNext=" - + addrNext); - receiveService.changeDownStream(addrNext); - } - // populate and/or clear the cache. - cachePipelineState(newDownStreamId); - if (log.isDebugEnabled()) - log.debug("pipelineChange - done."); - } finally { - lock.unlock(); - } + + innerEventHandler + .queue(new QCE(QuorumStateChangeEventEnum.PIPELINE_CHANGE, + new UUID[] { oldDownStreamId, newDownStreamId }, + null/* lastCommitTimeConsensus */, null/* token */, + null/* leaderId */)); + } @Override public void pipelineUpstreamChange() { - super.pipelineUpstreamChange(); - lock.lock(); - try { - if (receiveService != null) { - /* - * Make sure that the receiveService closes out its client - * connection with the old upstream service. - */ - if (log.isInfoEnabled()) - log.info("receiveService=" + receiveService); - receiveService.changeUpStream(); - } - } finally { - lock.unlock(); - } + + innerEventHandler.queue(new QCE( + QuorumStateChangeEventEnum.PIPELINE_UPSTREAM_CHANGE)); + } - /** - * Request the {@link InetSocketAddress} of the write pipeline for a service - * (RMI). - * - * @param downStreamId - * The service. - * - * @return It's {@link InetSocketAddress} - */ - private InetSocketAddress getAddrNext(final UUID downStreamId) { + @Override + public void memberAdd() { - if (downStreamId == null) - return null; + innerEventHandler.queue(new QCE(QuorumStateChangeEventEnum.MEMBER_ADD)); - final S service = member.getService(downStreamId); + } - try { + @Override + public void memberRemove() { - final InetSocketAddress addrNext = service.getWritePipelineAddr(); + innerEventHandler.queue(new QCE( + QuorumStateChangeEventEnum.MEMBER_REMOVE)); - return addrNext; - - } catch (IOException e) { + } - throw new RuntimeException(e); + @Override + public void consensus(final long lastCommitTime) { - } + innerEventHandler.queue(new QCE(QuorumStateChangeEventEnum.CONSENSUS, + null/* downstreamIds */, + lastCommitTime/* lastCommitTimeConsensus */, null/* token */, + null/* leaderId */)); } - /** - * Tear down any state associated with the {@link QuorumPipelineImpl}. This - * implementation tears down the send/receive service and releases the - * receive buffer. - */ - private void tearDown() { - if (log.isInfoEnabled()) - log.info(""); - lock.lock(); - try { - /* - * Leader tear down. - */ - { - if (sendService != null) { - sendService.terminate(); - sendService = null; - } - } - /* - * Follower tear down. - */ - { - if (receiveService != null) { - receiveService.terminate(); - try { - receiveService.awaitShutdown(); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } finally { - receiveService = null; - } - } - if (receiveBuffer != null) { - try { - /* - * Release the buffer back to the pool. - */ - receiveBuffer.release(); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } finally { - receiveBuffer = null; - } - } - } - // clear cache. - pipelineStateRef.set(null); - } finally { - lock.unlock(); - } + @Override + public void lostConsensus() { + + innerEventHandler.queue(new QCE( + QuorumStateChangeEventEnum.LOST_CONSENSUS)); + } - /** - * Populate or clear the {@link #pipelineState} cache. - * <p> - * Note: The only times we need to populate the {@link #pipelineState} are - * in response to a {@link #pipelineChange(UUID, UUID)} event or in response - * to message a {@link #pipelineElectedLeader()} event. - * - * @param downStreamId - * The downstream service {@link UUID}. - */ - private void cachePipelineState(final UUID downStreamId) { - - if (downStreamId == null) { - - pipelineStateRef.set(null); - - return; - - } + @Override + public void serviceJoin() { - final S nextService = member.getService(downStreamId); - - final PipelineState<S> pipelineState = new PipelineState<S>(); - - try { + innerEventHandler + .queue(new QCE(QuorumStateChangeEventEnum.SERVICE_JOIN)); - pipelineState.addr = nextService.getWritePipelineAddr(); - - } catch (IOException e) { - - throw new RuntimeException(e); - - } - - pipelineState.service = nextService; - - this.pipelineStateRef.set(pipelineState); - } + + @Override + public void serviceLeave() { + + innerEventHandler.queue(new QCE( + QuorumStateChangeEventEnum.SERVICE_LEAVE)); + + } + + @Override + public void quorumMeet(final long token, final UUID leaderId) { + + innerEventHandler.queue(new QCE(QuorumStateChangeEventEnum.QUORUM_MEET, + null/* downstreamIds */, null/* lastCommitTimeConsensus */, + token, leaderId)); + + } + + @Override + public void quorumBreak() { + + innerEventHandler + .queue(new QCE(QuorumStateChangeEventEnum.QUORUM_BREAK)); + + } - /** - * Setup the send service. + /* + * End of QuorumStateChangeListener. */ - private void setUpSendService() { - if (log.isInfoEnabled()) - log.info(""); - lock.lock(); - try { - // Allocate the send service. - sendService = new HASendService(); - /* - * The service downstream from this service. - * - * Note: The downstream service in the pipeline is not available - * when the first service adds itself to the pipeline. In those - * cases the pipelineChange() event is used to update the - * HASendService to send to the downstream service. - * - * Note: When we handle a pipelineLeaderElected() message the - * downstream service MAY already be available, which is why we - * handle downstreamId != null conditionally. - */ - final UUID downstreamId = member.getDownstreamServiceId(); - if (downstreamId != null) { - // The address of the next service in the pipeline. - final InetSocketAddress addrNext = member.getService( - downstreamId).getWritePipelineAddr(); - // Start the send service. - sendService.start(addrNext); - } - // populate and/or clear the cache. - cachePipelineState(downstreamId); - } catch (Throwable t) { - try { - tearDown(); - } catch (Throwable t2) { - log.error(t2, t2); - } - throw new RuntimeException(t); - } finally { - lock.unlock(); - } - } - + /** * Glue class wraps the {@link IHAWriteMessage} and the * {@link IHALogRequest} message and exposes the requires {@link IHAMessage} @@ -686,57 +1175,6 @@ } - /** - * Setup the service to receive pipeline writes and to relay them (if there - * is a downstream service). - */ - private void setUpReceiveService() { - lock.lock(); - try { - // The downstream service UUID. - final UUID downstreamId = member.getDownstreamServiceId(); - // Acquire buffer from the pool to receive data. - try { - receiveBuffer = DirectBufferPool.INSTANCE.acquire(); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - // The address of this service. - final InetSocketAddress addrSelf = member.getService() - .getWritePipelineAddr(); - // Address of the downstream service (if any). - final InetSocketAddress addrNext = downstreamId == null ? null - : member.getService(downstreamId).getWritePipelineAddr(); - // Setup the receive service. - receiveService = new HAReceiveService<HAMessageWrapper>(addrSelf, - addrNext, new IHAReceiveCallback<HAMessageWrapper>() { - public void callback(final HAMessageWrapper msg, - final ByteBuffer data) throws Exception { - // delegate handling of write cache blocks. - handleReplicatedWrite(msg.req, msg.msg, data); - } - }); - // Start the receive service - will not return until service is - // running - receiveService.start(); - } catch (Throwable t) { - /* - * Always tear down if there was a setup problem to avoid leaking - * threads or a native ByteBuffer. - */ - try { - tearDown(); - } catch (Throwable t2) { - log.error(t2, t2); - } finally { - log.error(t, t); - } - throw new RuntimeException(t); - } finally { - lock.unlock(); - } - } - /* * This is the leader, so send() the buffer. */ @@ -746,14 +1184,14 @@ final RunnableFuture<Void> ft; - lock.lock(); + lock(); try { ft = new FutureTask<Void>(new RobustReplicateTask(req, msg, b)); } finally { - lock.unlock(); + unlock(); } @@ -1057,7 +1495,7 @@ */ private void innerReplicate(final int retryCount) throws Exception { - lock.lockInterruptibly(); + lockInterruptibly(); try { @@ -1082,7 +1520,7 @@ } finally { - lock.unlock(); + unlock(); } @@ -1158,22 +1596,22 @@ } // class RobustReplicateTask - /** - * The logic needs to support the asynchronous termination of the - * {@link Future} that is responsible for replicating the {@link WriteCache} - * block, which is why the API exposes the means to inform the caller about - * that {@link Future}. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ - public interface IRetrySendCallback { - /** - * - * @param remoteFuture - */ - void notifyRemoteFuture(final Future<Void> remoteFuture); - } +// /** +// * The logic needs to support the asynchronous termination of the +// * {@link Future} that is responsible for replicating the {@link WriteCache} +// * block, which is why the API exposes the means to inform the caller about +// * that {@link Future}. +// * +// * @author <a href="mailto:tho...@us...">Bryan +// * Thompson</a> +// */ +// public interface IRetrySendCallback { +// /** +// * +// * @param remoteFuture +// */ +// void notifyRemoteFuture(final Future<Void> remoteFuture); +// } /** * Task to send() a buffer to the follower. @@ -1304,7 +1742,7 @@ final RunnableFuture<Void> ft; - lock.lock(); + lock(); try { @@ -1337,50 +1775,125 @@ if (downstream == null) { /* - * This is the last service in the write pipeline, so just receive - * the buffer. + * This is the last service in the write pipeline, so just + * receive the buffer. * * Note: The receive service is executing this Future locally on - * this host. We do not submit it for execution ourselves. + * this host. However, we still want the receiveData() method to + * run while we are not holding the [lock] so we wrap it up as a + * task and submit it. */ - try { + ft = new FutureTask<Void>(new ReceiveTask<S>(member, token, + req, msg, b, receiveService)); + +// try { +// +// // wrap the messages together. +// final HAMessageWrapper wrappedMsg = new HAMessageWrapper( +// req, msg); +// +// // receive. +// return receiveService.receiveData(wrappedMsg, b); +// +// } catch (InterruptedException e) { +// +// throw new RuntimeException(e); +// +// } - // wrap the messages together. - final HAMessageWrapper wrappedMsg = new HAMessageWrapper( - req, msg); - - // receive. - return receiveService.receiveData(wrappedMsg, b); + } else { - } catch (InterruptedException e) { + /* + * A service in the middle of the write pipeline (not the first + * and not the last). + */ - throw new RuntimeException(e); + ft = new FutureTask<Void>(new ReceiveAndReplicateTask<S>( + member, token, req, msg, b, downstream, receiveService)); - } - } - - /* - * A service in the middle of the write pipeline (not the first and - * not the last). - */ - ft = new FutureTask<Void>(new ReceiveAndReplicateTask<S>(member, - token, req, msg, b, downstream, receiveService)); - } finally { - lock.unlock(); + unlock(); } - // execute the FutureTask. + // Execute the FutureTask (w/o the lock). member.getExecutor().execute(ft); return ft; } + + /** + * Task sets up the {@link Future} for the receive on the last follower. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @param <S> + */ + private static class ReceiveTask<S extends HAPipelineGlue> implements + Callable<Void> { + + private final QuorumMember<S> member; + private final long token; + private final IHASyncRequest req; + private final IHAWriteMessage msg; + private final ByteBuffer b; + private final HAReceiveService<HAMessageWrapper> receiveService; + + public ReceiveTask(final QuorumMember<S> member, + final long token, + final IHASyncRequest req, + final IHAWriteMessage msg, fin... [truncated message content] |