From: <tho...@us...> - 2013-11-04 20:33:20
|
Revision: 7509 http://bigdata.svn.sourceforge.net/bigdata/?rev=7509&view=rev Author: thompsonbry Date: 2013-11-04 20:33:12 +0000 (Mon, 04 Nov 2013) Log Message: ----------- Added test coverage for spurious exception throw out of commit2Phase() before the root block is written on the Journal. See #760 (Code review for 2-phase commit protocol). Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/HA2PhaseCommitMessage.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseAbortMessage.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitMessage.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhasePrepareMessage.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServerWithHALogs.java branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitProtocolMessage.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/Mock2PhaseCommitProtocolException.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/HA2PhaseCommitMessage.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/HA2PhaseCommitMessage.java 2013-11-04 16:42:25 UTC (rev 7508) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/HA2PhaseCommitMessage.java 2013-11-04 20:33:12 UTC (rev 7509) @@ -66,5 +66,15 @@ + didAllServicesPrepare + "}"; } + + @Override + public boolean failCommit_beforeWritingRootBlockOnJournal() { + return false; + } + + @Override + public boolean failCommit_beforeClosingHALog() { + return false; + } } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseAbortMessage.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseAbortMessage.java 2013-11-04 16:42:25 UTC (rev 7508) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseAbortMessage.java 2013-11-04 20:33:12 UTC (rev 7509) @@ -28,7 +28,7 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ -public interface IHA2PhaseAbortMessage extends IHAMessage { +public interface IHA2PhaseAbortMessage extends IHA2PhaseCommitProtocolMessage { /** * The token for the quorum for which this request was made. Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitMessage.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitMessage.java 2013-11-04 16:42:25 UTC (rev 7508) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitMessage.java 2013-11-04 20:33:12 UTC (rev 7509) @@ -35,7 +35,7 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ -public interface IHA2PhaseCommitMessage extends IHAMessage { +public interface IHA2PhaseCommitMessage extends IHA2PhaseCommitProtocolMessage { /** * <code>true</code> iff the service was recognized as being joined with the @@ -60,5 +60,23 @@ * the commit will still be performed). */ boolean didAllServicesPrepare(); - + + /** + * When <code>true</code> the COMMIT message will fail within the + * commit2Phase implementation. An exception will be thrown immeditely + * before the new root block is written onto the journal. + * <p> + * Note: This is for unit tests only. + */ + boolean failCommit_beforeWritingRootBlockOnJournal(); + + /** + * When <code>true</code> the COMMIT message will fail within the + * commit2Phase implementation. An exception will be thrown immeditely + * before the closing root block is written onto the HALog file. + * <p> + * Note: This is for unit tests only. + */ + boolean failCommit_beforeClosingHALog(); + } Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitProtocolMessage.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitProtocolMessage.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitProtocolMessage.java 2013-11-04 20:33:12 UTC (rev 7509) @@ -0,0 +1,33 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.ha.msg; + +/** + * Message for one of the 2-phase commit protocol operations. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public interface IHA2PhaseCommitProtocolMessage extends IHAMessage { + +} Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhasePrepareMessage.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhasePrepareMessage.java 2013-11-04 16:42:25 UTC (rev 7508) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhasePrepareMessage.java 2013-11-04 20:33:12 UTC (rev 7509) @@ -36,7 +36,7 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ -public interface IHA2PhasePrepareMessage extends IHAMessage { +public interface IHA2PhasePrepareMessage extends IHA2PhaseCommitProtocolMessage { /** * The consensus release time from the GATHER. @@ -91,6 +91,8 @@ /** * When <code>true</code>, always vote note. + * <p> + * Note: This is for unit tests only. */ boolean voteNo(); Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/Mock2PhaseCommitProtocolException.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/Mock2PhaseCommitProtocolException.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/Mock2PhaseCommitProtocolException.java 2013-11-04 20:33:12 UTC (rev 7509) @@ -0,0 +1,51 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.ha.msg; + +/** + * Instances of this class are used when one of the + * {@link IHA2PhaseCommitProtocolMessage}s is configured to force a runtime + * exception during the 2-phase commit protocol. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class Mock2PhaseCommitProtocolException extends RuntimeException { + + private static final long serialVersionUID = 1L; + + public Mock2PhaseCommitProtocolException() { + super(); + } + + public Mock2PhaseCommitProtocolException(final String msg) { + super(msg); + } + + public Mock2PhaseCommitProtocolException(final RuntimeException cause) { + + super(cause); + + } + +} Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-11-04 16:42:25 UTC (rev 7508) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-11-04 20:33:12 UTC (rev 7509) @@ -140,6 +140,7 @@ import com.bigdata.ha.msg.IHAWriteMessage; import com.bigdata.ha.msg.IHAWriteSetStateRequest; import com.bigdata.ha.msg.IHAWriteSetStateResponse; +import com.bigdata.ha.msg.Mock2PhaseCommitProtocolException; import com.bigdata.htree.HTree; import com.bigdata.io.DirectBufferPool; import com.bigdata.io.IDataRecord; @@ -7040,16 +7041,6 @@ } } // class VoteNoTask - -// /** -// * Method must be extended by subclass to coordinate the rejected -// * commit. -// */ -// protected void doRejectedCommit() { -// -// doLocalAbort(); -// -// } /** * Task prepares for a 2-phase commit (syncs to the disk) and votes YES @@ -7337,9 +7328,9 @@ /* * Hook allows the test suite to force a NO vote. */ - - throw new RuntimeException("Force NO vote"); + throw new Mock2PhaseCommitProtocolException("Force NO vote"); + } // Vote YES. @@ -7640,11 +7631,23 @@ // verify that the qourum has not changed. quorum.assertQuorum(rootBlock.getQuorumToken()); + if (commitMessage.failCommit_beforeWritingRootBlockOnJournal()) { + + throw new Mock2PhaseCommitProtocolException(); + + } + /* * Write the root block on the local journal. */ AbstractJournal.this.doLocalCommit(localService, rootBlock); + if (commitMessage.failCommit_beforeClosingHALog()) { + + throw new Mock2PhaseCommitProtocolException(); + + } + /* * Write the root block on the HALog file, closing out that * file. Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java 2013-11-04 16:42:25 UTC (rev 7508) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java 2013-11-04 20:33:12 UTC (rev 7509) @@ -265,6 +265,16 @@ public void voteNo() throws IOException; /** + * @see IHA2PhaseCommitMessage#failCommit_beforeWritingRootBlockOnJournal() + */ + public void failCommit_beforeWritingRootBlockOnJournal() throws IOException; + + /** + * @see IHA2PhaseCommitMessage#failCommit_beforeClosingHALog() + */ + public void failCommit_beforeClosingHALog() throws IOException; + + /** * Set the next value to be reported by {@link BasicHA#nextTimestamp()}. * <p> * Note: Only a few specific methods call against @@ -278,7 +288,7 @@ * by {@link BasicHA#nextTimestamp()}, after which the * behavior will revert to the default. * - * TODO Add a "clearNextTimestamp() method. + * TODO Add a "clearNextTimestamp()" method. */ public void setNextTimestamp(long nextTimestamp) throws IOException; @@ -424,9 +434,29 @@ /** * Flag used to force the service to vote "NO" on the next two-phase * commit. + * + * @see IHA2PhasePrepareMessage#voteNo() */ private final AtomicBoolean voteNo = new AtomicBoolean(false); + /** + * Flag used to force the service to fail rather than laying down the + * new root block in the COMMIT message. + * + * @see IHA2PhaseCommitMessage#failCommit_beforeWritingRootBlockOnJournal() + */ + private final AtomicBoolean failCommit_beforeWritingRootBlockOnJournal = new AtomicBoolean( + false); + + /** + * Flag used to force the service to fail rather than laying down the + * new root block in the COMMIT message. + * + * @see IHA2PhaseCommitMessage#failCommit_beforeClosingHALog() + */ + private final AtomicBoolean failCommit_beforeClosingHALog = new AtomicBoolean( + false); + private final AtomicLong nextTimestamp = new AtomicLong(-1L); private HAGlueTestImpl(final UUID serviceId) { @@ -487,10 +517,26 @@ @Override public void voteNo() throws IOException { + voteNo.set(true); + } @Override + public void failCommit_beforeWritingRootBlockOnJournal() throws IOException { + + failCommit_beforeWritingRootBlockOnJournal.set(true); + + } + + @Override + public void failCommit_beforeClosingHALog() throws IOException { + + failCommit_beforeClosingHALog.set(true); + + } + + @Override public void setNextTimestamp(long nextTimestamp) throws IOException { this.nextTimestamp.set(nextTimestamp); @@ -915,8 +961,17 @@ if (voteNo.compareAndSet(true/* expect */, false/* update */)) { - return super.prepare2Phase(new MyPrepareMessage(prepareMessage)); + return super + .prepare2Phase(new MyPrepareMessage(prepareMessage) { + + private static final long serialVersionUID = 1L; + @Override + public boolean voteNo() { + return true; + } + }); + } else { return super.prepare2Phase(prepareMessage); @@ -926,13 +981,42 @@ } @Override - public Future<Void> commit2Phase(IHA2PhaseCommitMessage commitMessage) { + public Future<Void> commit2Phase(final IHA2PhaseCommitMessage commitMessage) { checkMethod("commit2Phase", new Class[] { IHA2PhaseCommitMessage.class }); - return super.commit2Phase(commitMessage); + if (failCommit_beforeWritingRootBlockOnJournal.compareAndSet( + true/* expect */, false/* update */)) { + return super.commit2Phase(new MyCommitMessage(commitMessage) { + + private static final long serialVersionUID = 1L; + + @Override + public boolean failCommit_beforeWritingRootBlockOnJournal() { + return true; + } + }); + } else if (failCommit_beforeClosingHALog.compareAndSet( + true/* expect */, false/* update */)) { + + return super.commit2Phase(new MyCommitMessage(commitMessage) { + + private static final long serialVersionUID = 1L; + + @Override + public boolean failCommit_beforeClosingHALog() { + return true; + } + }); + + } else { + + return super.commit2Phase(commitMessage); + + } + } @Override @@ -950,7 +1034,8 @@ */ @Override - public Future<IHAReadResponse> readFromDisk(IHAReadRequest readMessage) { + public Future<IHAReadResponse> readFromDisk( + final IHAReadRequest readMessage) { checkMethod("readFromDisk", new Class[] { IHAReadResponse.class }); @@ -979,8 +1064,8 @@ } @Override - public Future<Void> receiveAndReplicate(IHASyncRequest req, - IHAWriteMessage msg) throws IOException { + public Future<Void> receiveAndReplicate(final IHASyncRequest req, + final IHAWriteMessage msg) throws IOException { checkMethod("receiveAndReplicate", new Class[] { IHASyncRequest.class, IHAWriteMessage.class }); @@ -1157,7 +1242,7 @@ // // try { // -// // FIXME: hould already be closed, can we check this? +// // Should already be closed, can we check this? // // // Obtain a new connection. // ((ZKQuorumImpl) getQuorum()).getZookeeper(); @@ -1239,6 +1324,11 @@ } // class HAGlueTestImpl + /** + * Delegation pattern allows us to override select methods easily. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ private static class MyPrepareMessage implements IHA2PhasePrepareMessage { /** @@ -1288,13 +1378,57 @@ } /** - * Force the PREPARE to vote NO. + * {@inheritDoc} + * <p> + * Overridden to force the PREPARE to vote NO. */ @Override public boolean voteNo() { - return true; + return delegate.voteNo(); } } + + /** + * Delegation pattern allows us to override select methods easily. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ + private static class MyCommitMessage implements IHA2PhaseCommitMessage { + + private static final long serialVersionUID = 1L; + + private final IHA2PhaseCommitMessage delegate; + + public MyCommitMessage(final IHA2PhaseCommitMessage msg) { + this.delegate = msg; + } + + @Override + public boolean isJoinedService() { + return delegate.isJoinedService(); + } + + @Override + public long getCommitTime() { + return delegate.getCommitTime(); + } + + @Override + public boolean didAllServicesPrepare() { + return delegate.didAllServicesPrepare(); + } + + @Override + public boolean failCommit_beforeWritingRootBlockOnJournal() { + return delegate.failCommit_beforeWritingRootBlockOnJournal(); + } + + @Override + public boolean failCommit_beforeClosingHALog() { + return delegate.failCommit_beforeClosingHALog(); + } + + } } // class HAJournalTest Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServerWithHALogs.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServerWithHALogs.java 2013-11-04 16:42:25 UTC (rev 7508) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServerWithHALogs.java 2013-11-04 20:33:12 UTC (rev 7509) @@ -30,10 +30,14 @@ import net.jini.config.Configuration; +import com.bigdata.ha.HACommitGlue; import com.bigdata.ha.HAGlue; +import com.bigdata.ha.HAStatusEnum; import com.bigdata.ha.halog.HALogReader; import com.bigdata.ha.halog.IHALogReader; +import com.bigdata.ha.msg.IHA2PhasePrepareMessage; import com.bigdata.journal.CommitCounterUtility; +import com.bigdata.journal.jini.ha.HAJournalTest.HAGlueTest; /** * Test suite when we are using the {@link DefaultSnapshotPolicy} and @@ -443,4 +447,98 @@ } + /** + * Three services are started in [A,B,C] order. B is setup for + * {@link HACommitGlue#prepare2Phase(IHA2PhasePrepareMessage)} to throw an + * exception inside of the commit2Phase() method rather than at the external + * RMI interface. + * <p> + * A simple transaction is performed. We verify that the transaction + * completes successfully, that the quorum token is unchanged, and that + * [A,C] both participated in the commit. We also verify that B is moved to + * the end of the pipeline (by doing a serviceLeave and then re-entering the + * pipeline) and that it resyncs with the met quorum and finally re-joins + * with the met quorum. The quorum should not break across this test. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/760" > + * Review commit2Phase semantics when a follower fails </a> + * + * @see TestHAJournalServerOverride#testStartABC_commit2Phase_B_failCommit_beforeWritingRootBlockOnJournal_HALogsPurgedAtCommit() + */ + public void testStartABC_commit2Phase_B_failCommit_beforeWritingRootBlockOnJournal_HALogsNotPurgedAtCommit() + throws Exception { + + // Enforce the join order. + final ABC startup = new ABC(true /*sequential*/); + + //HAJournalTest.dumpThreads(); + + final long token = awaitFullyMetQuorum(); + + // Should be one commit point. + awaitCommitCounter(1L, startup.serverA, startup.serverB, + startup.serverC); + + /* + * Setup B to fail the "COMMIT" message (specifically, it will throw + * back an exception rather than executing the commit. + */ + ((HAGlueTest) startup.serverB) + .failCommit_beforeWritingRootBlockOnJournal(); + + /* + * Simple transaction. + * + * Note: B will fail the commit without laying down the root block and + * will transition into the ERROR state. From there, it will move to + * SeekConsensus and then RESYNC. While in RESYNC it will pick up the + * missing HALog and commit point. Finally, it will transition into + * RunMet. + */ + simpleTransaction(); + + // Verify quorum is unchanged. + assertEquals(token, quorum.token()); + + // Should be two commit points on {A,C}. + awaitCommitCounter(2L, startup.serverA, startup.serverC); + + /* + * Just one commit point on B + * + * TODO This is a data race. It is only transiently true. + */ + awaitCommitCounter(1L, startup.serverB); + + /* + * B is NotReady + * + * TODO This is a data race. It is only transiently true. + */ + awaitHAStatus(startup.serverB, HAStatusEnum.NotReady); + + /* + * The pipeline should be reordered. B will do a service leave, then + * enter seek consensus, and then re-enter the pipeline. + */ + awaitPipeline(new HAGlue[] { startup.serverA, startup.serverC, + startup.serverB }); + + awaitFullyMetQuorum(); + + /* + * There should be two commit points on {A,C,B} (note that this assert + * does not pay attention to the pipeline order). + */ + awaitCommitCounter(2L, startup.serverA, startup.serverC, + startup.serverB); + + // B should be a follower again. + awaitHAStatus(startup.serverB, HAStatusEnum.Follower); + + // quorum token is unchanged. + assertEquals(token, quorum.token()); + + } + } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java 2013-11-04 16:42:25 UTC (rev 7508) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java 2013-11-04 20:33:12 UTC (rev 7509) @@ -38,10 +38,10 @@ import com.bigdata.ha.HACommitGlue; import com.bigdata.ha.HAGlue; import com.bigdata.ha.HAStatusEnum; -import com.bigdata.ha.msg.IHA2PhaseCommitMessage; import com.bigdata.ha.msg.IHA2PhasePrepareMessage; import com.bigdata.ha.msg.IHANotifyReleaseTimeRequest; import com.bigdata.journal.AbstractTask; +import com.bigdata.journal.jini.ha.HAJournalServer.RunStateEnum; import com.bigdata.journal.jini.ha.HAJournalTest.HAGlueTest; import com.bigdata.journal.jini.ha.HAJournalTest.SpuriousTestException; import com.bigdata.quorum.zk.ZKQuorumImpl; @@ -174,14 +174,37 @@ * When we add concurrent unisolated writers, the user level transaction * abort will just discard the buffered writes for a specific * {@link AbstractTask}. - * - * @throws Exception */ public void testStartABC_userLevelAbortDoesNotCauseQuorumBreak() throws Exception { - fail("write test"); + final ABC x = new ABC(true/*sequential*/); + + final long token = awaitFullyMetQuorum(); + + // Now run several transactions + final int NTX = 5; + for (int i = 0; i < NTX; i++) + simpleTransaction(); + // wait until the commit point is registered on all services. + awaitCommitCounter(NTX + 1L, new HAGlue[] { x.serverA, x.serverB, + x.serverC }); + + // Verify order. + awaitPipeline(new HAGlue[] { x.serverA, x.serverB, x.serverC }); + awaitJoined(new HAGlue[] { x.serverA, x.serverB, x.serverC }); + + // Run a transaction that forces a 2-phase abort. + ((HAGlueTest) x.serverA).simpleTransaction_abort(); + + // Reverify order. + awaitPipeline(new HAGlue[] { x.serverA, x.serverB, x.serverC }); + awaitJoined(new HAGlue[] { x.serverA, x.serverB, x.serverC }); + + // Verify no failover of the leader. + assertEquals(token, awaitFullyMetQuorum()); + } /** @@ -375,13 +398,14 @@ /** * Three services are started in [A,B,C] order. B is setup for - * {@link HACommitGlue#prepare2Phase(IHA2PhasePrepareMessage)} to vote "NO". - * A simple transaction is performed. We verify that the transaction - * completes successfully, that the quorum token is unchanged, and that - * [A,C] both participated in the commit. We also verify that B is moved to - * the end of the pipeline (by doing a serviceLeave and then re-entering the - * pipeline) and that it resyncs with the met quorum and finally re-joins - * with the met quorum. The quorum should not break across this test. + * {@link HACommitGlue#prepare2Phase(IHA2PhasePrepareMessage)} to throw an + * exception. A simple transaction is performed. We verify that the + * transaction completes successfully, that the quorum token is unchanged, + * and that [A,C] both participated in the commit. We also verify that B is + * moved to the end of the pipeline (by doing a serviceLeave and then + * re-entering the pipeline) and that it resyncs with the met quorum and + * finally re-joins with the met quorum. The quorum should not break across + * this test. */ public void testStartABC_prepare2Phase_B_throws_exception() throws Exception { @@ -472,36 +496,36 @@ /** * Three services are started in [A,B,C] order. B is setup for * {@link HACommitGlue#prepare2Phase(IHA2PhasePrepareMessage)} to throw an - * exeption. A simple transaction is performed. We verify that the - * transaction completes successfully, that the quorum token is unchanged, - * and that [A,C] both participated in the commit. We also verify that B is - * moved to the end of the pipeline (by doing a serviceLeave and then - * re-entering the pipeline) and that it resyncs with the met quorum and - * finally re-joins with the met quorum. The quorum should not break across - * this test. - * - * FIXME Variant where the commit2Phase fails. Note: The COMMIT message is - * design to do as little work as possible. In practice, this requires an - * RMI to the followers, each follower must not encounter an error when it - * validates the COMMIT message, and each follower must put down its new - * root block (from the prepare message) and then sync the disk. Finally, - * the RMI response must be returned. + * exception inside of the commit2Phase() method rather than at the external + * RMI interface. * <p> - * Under what conditions can a COMMIT message fail where we can still - * recover? Single node failure? Leader failure? (QuorumCommitImpl currently - * fails the commit if there is a single failure, even though the quourm - * might have a consensus around the new commit point.) + * A simple transaction is performed. We verify that the transaction + * completes successfully, that the quorum token is unchanged, and that + * [A,C] both participated in the commit. We also verify that B is moved to + * the end of the pipeline (by doing a serviceLeave and then re-entering the + * pipeline). For this test, B DOES NOT resync and join. This is because A + * and C go through their commit2Phase() methods for a fully met quorum. + * Because we have explicitly disabled the {@link DefaultRestorePolicy}, + * this allows them to purge their HALogs. This means that B can not resync + * with the met quorum. As a consequence, B transitions to the + * {@link RunStateEnum#Operator} state and remains + * {@link HAStatusEnum#NotReady}. + * <p> + * The quorum should not break across this test. * - * TODO Consider leader failure scenarios in this test suite, not just - * scenarios where B fails. We MUST also cover failures of C (the 2nd - * follower). We should also cover scenarios where the quorum is barely met - * and a single failure causes a rejected commit (local decision) or 2-phase - * abort (joined services in joint agreement). + * TODO Consider leader failure scenarios in this test suite (commit2Phase() + * fails on the leader), not just scenarios where B fails. We MUST also + * cover failures of C (the 2nd follower). We should also cover scenarios + * where the quorum is barely met and a single failure causes a rejected + * commit (local decision) or 2-phase abort (joined services in joint + * agreement). * * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/760" > * Review commit2Phase semantics when a follower fails </a> + * + * @see TestHA3JournalServerWithHALogs#testStartABC_commit2Phase_B_failCommit_beforeWritingRootBlockOnJournal_HALogsNotPurgedAtCommit() */ - public void testStartABC_commit2Phase_B_fails() + public void testStartABC_commit2Phase_B_failCommit_beforeWritingRootBlockOnJournal_HALogsPurgedAtCommit() throws Exception { // Enforce the join order. @@ -518,120 +542,69 @@ /* * Setup B to fail the "COMMIT" message (specifically, it will throw * back an exception rather than executing the commit. - * - * FIXME We need to cause B to actually fail the commit such that it - * enters the ERROR state. This is only causing the RMI to be rejected - * so B is not being failed out of the pipeline. Thus, B will remain - * joined with the met quorum (but at the wrong commit point) until we - * send down another replicated write. At that point B will notice that - * it is out of whack and enter the ERROR state. */ ((HAGlueTest) startup.serverB) - .failNext("commit2Phase", - new Class[] { IHA2PhaseCommitMessage.class }, - 0/* nwait */, 1/* nfail */); + .failCommit_beforeWritingRootBlockOnJournal(); - /** - * FIXME We need to resolve the correct behavior when B fails the commit - * after having prepared. Two code paths are outlined below. The - * implementation currently does an abort2Phase() when the - * commit2Phase() observe an error for B. That causes the commit point - * to NOT advance. + /* + * Simple transaction. * - * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/760" > - * Review commit2Phase semantics when a follower fails </a> + * Note: B will fail the commit without laying down the root block and + * will transition into the ERROR state. From there, it will move to + * SeekConsensus and then RESYNC. While in RESYNC it will pick up the + * missing HALog and commit point. Finally, it will transition into + * RunMet. */ - - if(true) { + simpleTransaction(); - // Simple transaction. - simpleTransaction(); + // Verify quorum is unchanged. + assertEquals(token, quorum.token()); - // Verify quorum is unchanged. - assertEquals(token, quorum.token()); + // Should be two commit points on {A,C}. + awaitCommitCounter(2L, startup.serverA, startup.serverC); - // Should be two commit points on {A,C}. - awaitCommitCounter(2L, startup.serverA, startup.serverC); + /* + * Just one commit point on B + * + * TODO This is a data race. It is only transiently true. + */ + awaitCommitCounter(1L, startup.serverB); - // Just one commit point on B. - awaitCommitCounter(1L, startup.serverB); + /* + * B is NotReady + * + * TODO This is a data race. It is only transiently true. + */ + awaitHAStatus(startup.serverB, HAStatusEnum.NotReady); - // B is still a follower. - awaitHAStatus(startup.serverB, HAStatusEnum.Follower); - - /* - * B should go into an ERROR state and then into SeekConsensus and - * from there to RESYNC and finally back to RunMet. We can not - * reliably observe the intervening states. So what we really need - * to do is watch for B to move to the end of the pipeline and catch - * up to the same commit point. - * - * FIXME This is forcing B into an error state to simulate what - * would happen if B had encountered an error during the 2-phase - * commit above. - */ - ((HAGlueTest)startup.serverB).enterErrorState(); + /* + * The pipeline should be reordered. B will do a service leave, then + * enter seek consensus, and then re-enter the pipeline. + */ + awaitPipeline(new HAGlue[] { startup.serverA, startup.serverC, + startup.serverB }); - /* - * The pipeline should be reordered. B will do a service leave, then - * enter seek consensus, and then re-enter the pipeline. - */ - awaitPipeline(new HAGlue[] { startup.serverA, startup.serverC, - startup.serverB }); + /* + * IF you allow the purge of the HALog files on a fully met commit AND a + * service fails in commit2Phase() for a fully met quorum THEN the other + * services will have purged their HALog files and the service that + * failed in commit2Phase() will be unable to resync and join the met + * quorum. + */ + awaitRunStateEnum(RunStateEnum.Operator, startup.serverB); + awaitHAStatus(startup.serverB, HAStatusEnum.NotReady); - awaitFullyMetQuorum(); - - /* - * There should be two commit points on {A,C,B} (note that this - * assert does not pay attention to the pipeline order). - */ - awaitCommitCounter(2L, startup.serverA, startup.serverC, - startup.serverB); + // There should be two commit points on {A,C}. + awaitCommitCounter(2L, startup.serverA, startup.serverC); - // B should be a follower again. - awaitHAStatus(startup.serverB, HAStatusEnum.Follower); + // Just one commit point on B. + awaitCommitCounter(1L, startup.serverB); - // quorum token is unchanged. - assertEquals(token, quorum.token()); + // quorum token is unchanged. + assertEquals(token, quorum.token()); - } else { - - try { - - // Simple transaction. - simpleTransaction(); - - fail("Expecting failed transaction"); - - } catch (Exception t) { - - if (!t.getMessage().contains( - SpuriousTestException.class.getName())) { - /* - * Wrong inner cause. - * - * Note: The stack trace of the local exception does not - * include the remote stack trace. The cause is formatted - * into the HTTP response body. - */ - fail("Expecting " + SpuriousTestException.class, t); - } - - } - - // Verify quorum is unchanged. - assertEquals(token, quorum.token()); - - // Should be ONE commit point on {A,B, C]. - awaitCommitCounter(1L, startup.serverA, startup.serverB, - startup.serverC); - - fail("finish test under these assumptions"); - - } - } - + /** * Unit test for failure to RESYNC having a root cause that the live HALog * file did not exist on the quorum leader after an abort2Phase() call. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |