|
From: <tho...@us...> - 2013-11-04 20:33:20
|
Revision: 7509
http://bigdata.svn.sourceforge.net/bigdata/?rev=7509&view=rev
Author: thompsonbry
Date: 2013-11-04 20:33:12 +0000 (Mon, 04 Nov 2013)
Log Message:
-----------
Added test coverage for spurious exception throw out of commit2Phase() before the root block is written on the Journal.
See #760 (Code review for 2-phase commit protocol).
Modified Paths:
--------------
branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/HA2PhaseCommitMessage.java
branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseAbortMessage.java
branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitMessage.java
branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhasePrepareMessage.java
branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractJournal.java
branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java
branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServerWithHALogs.java
branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java
Added Paths:
-----------
branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitProtocolMessage.java
branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/Mock2PhaseCommitProtocolException.java
Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/HA2PhaseCommitMessage.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/HA2PhaseCommitMessage.java 2013-11-04 16:42:25 UTC (rev 7508)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/HA2PhaseCommitMessage.java 2013-11-04 20:33:12 UTC (rev 7509)
@@ -66,5 +66,15 @@
+ didAllServicesPrepare + "}";
}
+
+ @Override
+ public boolean failCommit_beforeWritingRootBlockOnJournal() {
+ return false;
+ }
+
+ @Override
+ public boolean failCommit_beforeClosingHALog() {
+ return false;
+ }
}
Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseAbortMessage.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseAbortMessage.java 2013-11-04 16:42:25 UTC (rev 7508)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseAbortMessage.java 2013-11-04 20:33:12 UTC (rev 7509)
@@ -28,7 +28,7 @@
*
* @author <a href="mailto:tho...@us...">Bryan Thompson</a>
*/
-public interface IHA2PhaseAbortMessage extends IHAMessage {
+public interface IHA2PhaseAbortMessage extends IHA2PhaseCommitProtocolMessage {
/**
* The token for the quorum for which this request was made.
Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitMessage.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitMessage.java 2013-11-04 16:42:25 UTC (rev 7508)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitMessage.java 2013-11-04 20:33:12 UTC (rev 7509)
@@ -35,7 +35,7 @@
*
* @author <a href="mailto:tho...@us...">Bryan Thompson</a>
*/
-public interface IHA2PhaseCommitMessage extends IHAMessage {
+public interface IHA2PhaseCommitMessage extends IHA2PhaseCommitProtocolMessage {
/**
* <code>true</code> iff the service was recognized as being joined with the
@@ -60,5 +60,23 @@
* the commit will still be performed).
*/
boolean didAllServicesPrepare();
-
+
+ /**
+ * When <code>true</code> the COMMIT message will fail within the
+ * commit2Phase implementation. An exception will be thrown immeditely
+ * before the new root block is written onto the journal.
+ * <p>
+ * Note: This is for unit tests only.
+ */
+ boolean failCommit_beforeWritingRootBlockOnJournal();
+
+ /**
+ * When <code>true</code> the COMMIT message will fail within the
+ * commit2Phase implementation. An exception will be thrown immeditely
+ * before the closing root block is written onto the HALog file.
+ * <p>
+ * Note: This is for unit tests only.
+ */
+ boolean failCommit_beforeClosingHALog();
+
}
Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitProtocolMessage.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitProtocolMessage.java (rev 0)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhaseCommitProtocolMessage.java 2013-11-04 20:33:12 UTC (rev 7509)
@@ -0,0 +1,33 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved.
+
+Contact:
+ SYSTAP, LLC
+ 4501 Tower Road
+ Greensboro, NC 27410
+ lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+package com.bigdata.ha.msg;
+
+/**
+ * Message for one of the 2-phase commit protocol operations.
+ *
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ */
+public interface IHA2PhaseCommitProtocolMessage extends IHAMessage {
+
+}
Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhasePrepareMessage.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhasePrepareMessage.java 2013-11-04 16:42:25 UTC (rev 7508)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/IHA2PhasePrepareMessage.java 2013-11-04 20:33:12 UTC (rev 7509)
@@ -36,7 +36,7 @@
*
* @author <a href="mailto:tho...@us...">Bryan Thompson</a>
*/
-public interface IHA2PhasePrepareMessage extends IHAMessage {
+public interface IHA2PhasePrepareMessage extends IHA2PhaseCommitProtocolMessage {
/**
* The consensus release time from the GATHER.
@@ -91,6 +91,8 @@
/**
* When <code>true</code>, always vote note.
+ * <p>
+ * Note: This is for unit tests only.
*/
boolean voteNo();
Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/Mock2PhaseCommitProtocolException.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/Mock2PhaseCommitProtocolException.java (rev 0)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/ha/msg/Mock2PhaseCommitProtocolException.java 2013-11-04 20:33:12 UTC (rev 7509)
@@ -0,0 +1,51 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved.
+
+Contact:
+ SYSTAP, LLC
+ 4501 Tower Road
+ Greensboro, NC 27410
+ lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+package com.bigdata.ha.msg;
+
+/**
+ * Instances of this class are used when one of the
+ * {@link IHA2PhaseCommitProtocolMessage}s is configured to force a runtime
+ * exception during the 2-phase commit protocol.
+ *
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ */
+public class Mock2PhaseCommitProtocolException extends RuntimeException {
+
+ private static final long serialVersionUID = 1L;
+
+ public Mock2PhaseCommitProtocolException() {
+ super();
+ }
+
+ public Mock2PhaseCommitProtocolException(final String msg) {
+ super(msg);
+ }
+
+ public Mock2PhaseCommitProtocolException(final RuntimeException cause) {
+
+ super(cause);
+
+ }
+
+}
Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractJournal.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-11-04 16:42:25 UTC (rev 7508)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-11-04 20:33:12 UTC (rev 7509)
@@ -140,6 +140,7 @@
import com.bigdata.ha.msg.IHAWriteMessage;
import com.bigdata.ha.msg.IHAWriteSetStateRequest;
import com.bigdata.ha.msg.IHAWriteSetStateResponse;
+import com.bigdata.ha.msg.Mock2PhaseCommitProtocolException;
import com.bigdata.htree.HTree;
import com.bigdata.io.DirectBufferPool;
import com.bigdata.io.IDataRecord;
@@ -7040,16 +7041,6 @@
}
} // class VoteNoTask
-
-// /**
-// * Method must be extended by subclass to coordinate the rejected
-// * commit.
-// */
-// protected void doRejectedCommit() {
-//
-// doLocalAbort();
-//
-// }
/**
* Task prepares for a 2-phase commit (syncs to the disk) and votes YES
@@ -7337,9 +7328,9 @@
/*
* Hook allows the test suite to force a NO vote.
*/
-
- throw new RuntimeException("Force NO vote");
+ throw new Mock2PhaseCommitProtocolException("Force NO vote");
+
}
// Vote YES.
@@ -7640,11 +7631,23 @@
// verify that the qourum has not changed.
quorum.assertQuorum(rootBlock.getQuorumToken());
+ if (commitMessage.failCommit_beforeWritingRootBlockOnJournal()) {
+
+ throw new Mock2PhaseCommitProtocolException();
+
+ }
+
/*
* Write the root block on the local journal.
*/
AbstractJournal.this.doLocalCommit(localService, rootBlock);
+ if (commitMessage.failCommit_beforeClosingHALog()) {
+
+ throw new Mock2PhaseCommitProtocolException();
+
+ }
+
/*
* Write the root block on the HALog file, closing out that
* file.
Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java 2013-11-04 16:42:25 UTC (rev 7508)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/HAJournalTest.java 2013-11-04 20:33:12 UTC (rev 7509)
@@ -265,6 +265,16 @@
public void voteNo() throws IOException;
/**
+ * @see IHA2PhaseCommitMessage#failCommit_beforeWritingRootBlockOnJournal()
+ */
+ public void failCommit_beforeWritingRootBlockOnJournal() throws IOException;
+
+ /**
+ * @see IHA2PhaseCommitMessage#failCommit_beforeClosingHALog()
+ */
+ public void failCommit_beforeClosingHALog() throws IOException;
+
+ /**
* Set the next value to be reported by {@link BasicHA#nextTimestamp()}.
* <p>
* Note: Only a few specific methods call against
@@ -278,7 +288,7 @@
* by {@link BasicHA#nextTimestamp()}, after which the
* behavior will revert to the default.
*
- * TODO Add a "clearNextTimestamp() method.
+ * TODO Add a "clearNextTimestamp()" method.
*/
public void setNextTimestamp(long nextTimestamp) throws IOException;
@@ -424,9 +434,29 @@
/**
* Flag used to force the service to vote "NO" on the next two-phase
* commit.
+ *
+ * @see IHA2PhasePrepareMessage#voteNo()
*/
private final AtomicBoolean voteNo = new AtomicBoolean(false);
+ /**
+ * Flag used to force the service to fail rather than laying down the
+ * new root block in the COMMIT message.
+ *
+ * @see IHA2PhaseCommitMessage#failCommit_beforeWritingRootBlockOnJournal()
+ */
+ private final AtomicBoolean failCommit_beforeWritingRootBlockOnJournal = new AtomicBoolean(
+ false);
+
+ /**
+ * Flag used to force the service to fail rather than laying down the
+ * new root block in the COMMIT message.
+ *
+ * @see IHA2PhaseCommitMessage#failCommit_beforeClosingHALog()
+ */
+ private final AtomicBoolean failCommit_beforeClosingHALog = new AtomicBoolean(
+ false);
+
private final AtomicLong nextTimestamp = new AtomicLong(-1L);
private HAGlueTestImpl(final UUID serviceId) {
@@ -487,10 +517,26 @@
@Override
public void voteNo() throws IOException {
+
voteNo.set(true);
+
}
@Override
+ public void failCommit_beforeWritingRootBlockOnJournal() throws IOException {
+
+ failCommit_beforeWritingRootBlockOnJournal.set(true);
+
+ }
+
+ @Override
+ public void failCommit_beforeClosingHALog() throws IOException {
+
+ failCommit_beforeClosingHALog.set(true);
+
+ }
+
+ @Override
public void setNextTimestamp(long nextTimestamp) throws IOException {
this.nextTimestamp.set(nextTimestamp);
@@ -915,8 +961,17 @@
if (voteNo.compareAndSet(true/* expect */, false/* update */)) {
- return super.prepare2Phase(new MyPrepareMessage(prepareMessage));
+ return super
+ .prepare2Phase(new MyPrepareMessage(prepareMessage) {
+
+ private static final long serialVersionUID = 1L;
+ @Override
+ public boolean voteNo() {
+ return true;
+ }
+ });
+
} else {
return super.prepare2Phase(prepareMessage);
@@ -926,13 +981,42 @@
}
@Override
- public Future<Void> commit2Phase(IHA2PhaseCommitMessage commitMessage) {
+ public Future<Void> commit2Phase(final IHA2PhaseCommitMessage commitMessage) {
checkMethod("commit2Phase",
new Class[] { IHA2PhaseCommitMessage.class });
- return super.commit2Phase(commitMessage);
+ if (failCommit_beforeWritingRootBlockOnJournal.compareAndSet(
+ true/* expect */, false/* update */)) {
+ return super.commit2Phase(new MyCommitMessage(commitMessage) {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public boolean failCommit_beforeWritingRootBlockOnJournal() {
+ return true;
+ }
+ });
+ } else if (failCommit_beforeClosingHALog.compareAndSet(
+ true/* expect */, false/* update */)) {
+
+ return super.commit2Phase(new MyCommitMessage(commitMessage) {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public boolean failCommit_beforeClosingHALog() {
+ return true;
+ }
+ });
+
+ } else {
+
+ return super.commit2Phase(commitMessage);
+
+ }
+
}
@Override
@@ -950,7 +1034,8 @@
*/
@Override
- public Future<IHAReadResponse> readFromDisk(IHAReadRequest readMessage) {
+ public Future<IHAReadResponse> readFromDisk(
+ final IHAReadRequest readMessage) {
checkMethod("readFromDisk", new Class[] { IHAReadResponse.class });
@@ -979,8 +1064,8 @@
}
@Override
- public Future<Void> receiveAndReplicate(IHASyncRequest req,
- IHAWriteMessage msg) throws IOException {
+ public Future<Void> receiveAndReplicate(final IHASyncRequest req,
+ final IHAWriteMessage msg) throws IOException {
checkMethod("receiveAndReplicate", new Class[] {
IHASyncRequest.class, IHAWriteMessage.class });
@@ -1157,7 +1242,7 @@
//
// try {
//
-// // FIXME: hould already be closed, can we check this?
+// // Should already be closed, can we check this?
//
// // Obtain a new connection.
// ((ZKQuorumImpl) getQuorum()).getZookeeper();
@@ -1239,6 +1324,11 @@
} // class HAGlueTestImpl
+ /**
+ * Delegation pattern allows us to override select methods easily.
+ *
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ */
private static class MyPrepareMessage implements IHA2PhasePrepareMessage {
/**
@@ -1288,13 +1378,57 @@
}
/**
- * Force the PREPARE to vote NO.
+ * {@inheritDoc}
+ * <p>
+ * Overridden to force the PREPARE to vote NO.
*/
@Override
public boolean voteNo() {
- return true;
+ return delegate.voteNo();
}
}
+
+ /**
+ * Delegation pattern allows us to override select methods easily.
+ *
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ */
+ private static class MyCommitMessage implements IHA2PhaseCommitMessage {
+
+ private static final long serialVersionUID = 1L;
+
+ private final IHA2PhaseCommitMessage delegate;
+
+ public MyCommitMessage(final IHA2PhaseCommitMessage msg) {
+ this.delegate = msg;
+ }
+
+ @Override
+ public boolean isJoinedService() {
+ return delegate.isJoinedService();
+ }
+
+ @Override
+ public long getCommitTime() {
+ return delegate.getCommitTime();
+ }
+
+ @Override
+ public boolean didAllServicesPrepare() {
+ return delegate.didAllServicesPrepare();
+ }
+
+ @Override
+ public boolean failCommit_beforeWritingRootBlockOnJournal() {
+ return delegate.failCommit_beforeWritingRootBlockOnJournal();
+ }
+
+ @Override
+ public boolean failCommit_beforeClosingHALog() {
+ return delegate.failCommit_beforeClosingHALog();
+ }
+
+ }
} // class HAJournalTest
Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServerWithHALogs.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServerWithHALogs.java 2013-11-04 16:42:25 UTC (rev 7508)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServerWithHALogs.java 2013-11-04 20:33:12 UTC (rev 7509)
@@ -30,10 +30,14 @@
import net.jini.config.Configuration;
+import com.bigdata.ha.HACommitGlue;
import com.bigdata.ha.HAGlue;
+import com.bigdata.ha.HAStatusEnum;
import com.bigdata.ha.halog.HALogReader;
import com.bigdata.ha.halog.IHALogReader;
+import com.bigdata.ha.msg.IHA2PhasePrepareMessage;
import com.bigdata.journal.CommitCounterUtility;
+import com.bigdata.journal.jini.ha.HAJournalTest.HAGlueTest;
/**
* Test suite when we are using the {@link DefaultSnapshotPolicy} and
@@ -443,4 +447,98 @@
}
+ /**
+ * Three services are started in [A,B,C] order. B is setup for
+ * {@link HACommitGlue#prepare2Phase(IHA2PhasePrepareMessage)} to throw an
+ * exception inside of the commit2Phase() method rather than at the external
+ * RMI interface.
+ * <p>
+ * A simple transaction is performed. We verify that the transaction
+ * completes successfully, that the quorum token is unchanged, and that
+ * [A,C] both participated in the commit. We also verify that B is moved to
+ * the end of the pipeline (by doing a serviceLeave and then re-entering the
+ * pipeline) and that it resyncs with the met quorum and finally re-joins
+ * with the met quorum. The quorum should not break across this test.
+ *
+ * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/760" >
+ * Review commit2Phase semantics when a follower fails </a>
+ *
+ * @see TestHAJournalServerOverride#testStartABC_commit2Phase_B_failCommit_beforeWritingRootBlockOnJournal_HALogsPurgedAtCommit()
+ */
+ public void testStartABC_commit2Phase_B_failCommit_beforeWritingRootBlockOnJournal_HALogsNotPurgedAtCommit()
+ throws Exception {
+
+ // Enforce the join order.
+ final ABC startup = new ABC(true /*sequential*/);
+
+ //HAJournalTest.dumpThreads();
+
+ final long token = awaitFullyMetQuorum();
+
+ // Should be one commit point.
+ awaitCommitCounter(1L, startup.serverA, startup.serverB,
+ startup.serverC);
+
+ /*
+ * Setup B to fail the "COMMIT" message (specifically, it will throw
+ * back an exception rather than executing the commit.
+ */
+ ((HAGlueTest) startup.serverB)
+ .failCommit_beforeWritingRootBlockOnJournal();
+
+ /*
+ * Simple transaction.
+ *
+ * Note: B will fail the commit without laying down the root block and
+ * will transition into the ERROR state. From there, it will move to
+ * SeekConsensus and then RESYNC. While in RESYNC it will pick up the
+ * missing HALog and commit point. Finally, it will transition into
+ * RunMet.
+ */
+ simpleTransaction();
+
+ // Verify quorum is unchanged.
+ assertEquals(token, quorum.token());
+
+ // Should be two commit points on {A,C}.
+ awaitCommitCounter(2L, startup.serverA, startup.serverC);
+
+ /*
+ * Just one commit point on B
+ *
+ * TODO This is a data race. It is only transiently true.
+ */
+ awaitCommitCounter(1L, startup.serverB);
+
+ /*
+ * B is NotReady
+ *
+ * TODO This is a data race. It is only transiently true.
+ */
+ awaitHAStatus(startup.serverB, HAStatusEnum.NotReady);
+
+ /*
+ * The pipeline should be reordered. B will do a service leave, then
+ * enter seek consensus, and then re-enter the pipeline.
+ */
+ awaitPipeline(new HAGlue[] { startup.serverA, startup.serverC,
+ startup.serverB });
+
+ awaitFullyMetQuorum();
+
+ /*
+ * There should be two commit points on {A,C,B} (note that this assert
+ * does not pay attention to the pipeline order).
+ */
+ awaitCommitCounter(2L, startup.serverA, startup.serverC,
+ startup.serverB);
+
+ // B should be a follower again.
+ awaitHAStatus(startup.serverB, HAStatusEnum.Follower);
+
+ // quorum token is unchanged.
+ assertEquals(token, quorum.token());
+
+ }
+
}
Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java 2013-11-04 16:42:25 UTC (rev 7508)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHAJournalServerOverride.java 2013-11-04 20:33:12 UTC (rev 7509)
@@ -38,10 +38,10 @@
import com.bigdata.ha.HACommitGlue;
import com.bigdata.ha.HAGlue;
import com.bigdata.ha.HAStatusEnum;
-import com.bigdata.ha.msg.IHA2PhaseCommitMessage;
import com.bigdata.ha.msg.IHA2PhasePrepareMessage;
import com.bigdata.ha.msg.IHANotifyReleaseTimeRequest;
import com.bigdata.journal.AbstractTask;
+import com.bigdata.journal.jini.ha.HAJournalServer.RunStateEnum;
import com.bigdata.journal.jini.ha.HAJournalTest.HAGlueTest;
import com.bigdata.journal.jini.ha.HAJournalTest.SpuriousTestException;
import com.bigdata.quorum.zk.ZKQuorumImpl;
@@ -174,14 +174,37 @@
* When we add concurrent unisolated writers, the user level transaction
* abort will just discard the buffered writes for a specific
* {@link AbstractTask}.
- *
- * @throws Exception
*/
public void testStartABC_userLevelAbortDoesNotCauseQuorumBreak()
throws Exception {
- fail("write test");
+ final ABC x = new ABC(true/*sequential*/);
+
+ final long token = awaitFullyMetQuorum();
+
+ // Now run several transactions
+ final int NTX = 5;
+ for (int i = 0; i < NTX; i++)
+ simpleTransaction();
+ // wait until the commit point is registered on all services.
+ awaitCommitCounter(NTX + 1L, new HAGlue[] { x.serverA, x.serverB,
+ x.serverC });
+
+ // Verify order.
+ awaitPipeline(new HAGlue[] { x.serverA, x.serverB, x.serverC });
+ awaitJoined(new HAGlue[] { x.serverA, x.serverB, x.serverC });
+
+ // Run a transaction that forces a 2-phase abort.
+ ((HAGlueTest) x.serverA).simpleTransaction_abort();
+
+ // Reverify order.
+ awaitPipeline(new HAGlue[] { x.serverA, x.serverB, x.serverC });
+ awaitJoined(new HAGlue[] { x.serverA, x.serverB, x.serverC });
+
+ // Verify no failover of the leader.
+ assertEquals(token, awaitFullyMetQuorum());
+
}
/**
@@ -375,13 +398,14 @@
/**
* Three services are started in [A,B,C] order. B is setup for
- * {@link HACommitGlue#prepare2Phase(IHA2PhasePrepareMessage)} to vote "NO".
- * A simple transaction is performed. We verify that the transaction
- * completes successfully, that the quorum token is unchanged, and that
- * [A,C] both participated in the commit. We also verify that B is moved to
- * the end of the pipeline (by doing a serviceLeave and then re-entering the
- * pipeline) and that it resyncs with the met quorum and finally re-joins
- * with the met quorum. The quorum should not break across this test.
+ * {@link HACommitGlue#prepare2Phase(IHA2PhasePrepareMessage)} to throw an
+ * exception. A simple transaction is performed. We verify that the
+ * transaction completes successfully, that the quorum token is unchanged,
+ * and that [A,C] both participated in the commit. We also verify that B is
+ * moved to the end of the pipeline (by doing a serviceLeave and then
+ * re-entering the pipeline) and that it resyncs with the met quorum and
+ * finally re-joins with the met quorum. The quorum should not break across
+ * this test.
*/
public void testStartABC_prepare2Phase_B_throws_exception()
throws Exception {
@@ -472,36 +496,36 @@
/**
* Three services are started in [A,B,C] order. B is setup for
* {@link HACommitGlue#prepare2Phase(IHA2PhasePrepareMessage)} to throw an
- * exeption. A simple transaction is performed. We verify that the
- * transaction completes successfully, that the quorum token is unchanged,
- * and that [A,C] both participated in the commit. We also verify that B is
- * moved to the end of the pipeline (by doing a serviceLeave and then
- * re-entering the pipeline) and that it resyncs with the met quorum and
- * finally re-joins with the met quorum. The quorum should not break across
- * this test.
- *
- * FIXME Variant where the commit2Phase fails. Note: The COMMIT message is
- * design to do as little work as possible. In practice, this requires an
- * RMI to the followers, each follower must not encounter an error when it
- * validates the COMMIT message, and each follower must put down its new
- * root block (from the prepare message) and then sync the disk. Finally,
- * the RMI response must be returned.
+ * exception inside of the commit2Phase() method rather than at the external
+ * RMI interface.
* <p>
- * Under what conditions can a COMMIT message fail where we can still
- * recover? Single node failure? Leader failure? (QuorumCommitImpl currently
- * fails the commit if there is a single failure, even though the quourm
- * might have a consensus around the new commit point.)
+ * A simple transaction is performed. We verify that the transaction
+ * completes successfully, that the quorum token is unchanged, and that
+ * [A,C] both participated in the commit. We also verify that B is moved to
+ * the end of the pipeline (by doing a serviceLeave and then re-entering the
+ * pipeline). For this test, B DOES NOT resync and join. This is because A
+ * and C go through their commit2Phase() methods for a fully met quorum.
+ * Because we have explicitly disabled the {@link DefaultRestorePolicy},
+ * this allows them to purge their HALogs. This means that B can not resync
+ * with the met quorum. As a consequence, B transitions to the
+ * {@link RunStateEnum#Operator} state and remains
+ * {@link HAStatusEnum#NotReady}.
+ * <p>
+ * The quorum should not break across this test.
*
- * TODO Consider leader failure scenarios in this test suite, not just
- * scenarios where B fails. We MUST also cover failures of C (the 2nd
- * follower). We should also cover scenarios where the quorum is barely met
- * and a single failure causes a rejected commit (local decision) or 2-phase
- * abort (joined services in joint agreement).
+ * TODO Consider leader failure scenarios in this test suite (commit2Phase()
+ * fails on the leader), not just scenarios where B fails. We MUST also
+ * cover failures of C (the 2nd follower). We should also cover scenarios
+ * where the quorum is barely met and a single failure causes a rejected
+ * commit (local decision) or 2-phase abort (joined services in joint
+ * agreement).
*
* @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/760" >
* Review commit2Phase semantics when a follower fails </a>
+ *
+ * @see TestHA3JournalServerWithHALogs#testStartABC_commit2Phase_B_failCommit_beforeWritingRootBlockOnJournal_HALogsNotPurgedAtCommit()
*/
- public void testStartABC_commit2Phase_B_fails()
+ public void testStartABC_commit2Phase_B_failCommit_beforeWritingRootBlockOnJournal_HALogsPurgedAtCommit()
throws Exception {
// Enforce the join order.
@@ -518,120 +542,69 @@
/*
* Setup B to fail the "COMMIT" message (specifically, it will throw
* back an exception rather than executing the commit.
- *
- * FIXME We need to cause B to actually fail the commit such that it
- * enters the ERROR state. This is only causing the RMI to be rejected
- * so B is not being failed out of the pipeline. Thus, B will remain
- * joined with the met quorum (but at the wrong commit point) until we
- * send down another replicated write. At that point B will notice that
- * it is out of whack and enter the ERROR state.
*/
((HAGlueTest) startup.serverB)
- .failNext("commit2Phase",
- new Class[] { IHA2PhaseCommitMessage.class },
- 0/* nwait */, 1/* nfail */);
+ .failCommit_beforeWritingRootBlockOnJournal();
- /**
- * FIXME We need to resolve the correct behavior when B fails the commit
- * after having prepared. Two code paths are outlined below. The
- * implementation currently does an abort2Phase() when the
- * commit2Phase() observe an error for B. That causes the commit point
- * to NOT advance.
+ /*
+ * Simple transaction.
*
- * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/760" >
- * Review commit2Phase semantics when a follower fails </a>
+ * Note: B will fail the commit without laying down the root block and
+ * will transition into the ERROR state. From there, it will move to
+ * SeekConsensus and then RESYNC. While in RESYNC it will pick up the
+ * missing HALog and commit point. Finally, it will transition into
+ * RunMet.
*/
-
- if(true) {
+ simpleTransaction();
- // Simple transaction.
- simpleTransaction();
+ // Verify quorum is unchanged.
+ assertEquals(token, quorum.token());
- // Verify quorum is unchanged.
- assertEquals(token, quorum.token());
+ // Should be two commit points on {A,C}.
+ awaitCommitCounter(2L, startup.serverA, startup.serverC);
- // Should be two commit points on {A,C}.
- awaitCommitCounter(2L, startup.serverA, startup.serverC);
+ /*
+ * Just one commit point on B
+ *
+ * TODO This is a data race. It is only transiently true.
+ */
+ awaitCommitCounter(1L, startup.serverB);
- // Just one commit point on B.
- awaitCommitCounter(1L, startup.serverB);
+ /*
+ * B is NotReady
+ *
+ * TODO This is a data race. It is only transiently true.
+ */
+ awaitHAStatus(startup.serverB, HAStatusEnum.NotReady);
- // B is still a follower.
- awaitHAStatus(startup.serverB, HAStatusEnum.Follower);
-
- /*
- * B should go into an ERROR state and then into SeekConsensus and
- * from there to RESYNC and finally back to RunMet. We can not
- * reliably observe the intervening states. So what we really need
- * to do is watch for B to move to the end of the pipeline and catch
- * up to the same commit point.
- *
- * FIXME This is forcing B into an error state to simulate what
- * would happen if B had encountered an error during the 2-phase
- * commit above.
- */
- ((HAGlueTest)startup.serverB).enterErrorState();
+ /*
+ * The pipeline should be reordered. B will do a service leave, then
+ * enter seek consensus, and then re-enter the pipeline.
+ */
+ awaitPipeline(new HAGlue[] { startup.serverA, startup.serverC,
+ startup.serverB });
- /*
- * The pipeline should be reordered. B will do a service leave, then
- * enter seek consensus, and then re-enter the pipeline.
- */
- awaitPipeline(new HAGlue[] { startup.serverA, startup.serverC,
- startup.serverB });
+ /*
+ * IF you allow the purge of the HALog files on a fully met commit AND a
+ * service fails in commit2Phase() for a fully met quorum THEN the other
+ * services will have purged their HALog files and the service that
+ * failed in commit2Phase() will be unable to resync and join the met
+ * quorum.
+ */
+ awaitRunStateEnum(RunStateEnum.Operator, startup.serverB);
+ awaitHAStatus(startup.serverB, HAStatusEnum.NotReady);
- awaitFullyMetQuorum();
-
- /*
- * There should be two commit points on {A,C,B} (note that this
- * assert does not pay attention to the pipeline order).
- */
- awaitCommitCounter(2L, startup.serverA, startup.serverC,
- startup.serverB);
+ // There should be two commit points on {A,C}.
+ awaitCommitCounter(2L, startup.serverA, startup.serverC);
- // B should be a follower again.
- awaitHAStatus(startup.serverB, HAStatusEnum.Follower);
+ // Just one commit point on B.
+ awaitCommitCounter(1L, startup.serverB);
- // quorum token is unchanged.
- assertEquals(token, quorum.token());
+ // quorum token is unchanged.
+ assertEquals(token, quorum.token());
- } else {
-
- try {
-
- // Simple transaction.
- simpleTransaction();
-
- fail("Expecting failed transaction");
-
- } catch (Exception t) {
-
- if (!t.getMessage().contains(
- SpuriousTestException.class.getName())) {
- /*
- * Wrong inner cause.
- *
- * Note: The stack trace of the local exception does not
- * include the remote stack trace. The cause is formatted
- * into the HTTP response body.
- */
- fail("Expecting " + SpuriousTestException.class, t);
- }
-
- }
-
- // Verify quorum is unchanged.
- assertEquals(token, quorum.token());
-
- // Should be ONE commit point on {A,B, C].
- awaitCommitCounter(1L, startup.serverA, startup.serverB,
- startup.serverC);
-
- fail("finish test under these assumptions");
-
- }
-
}
-
+
/**
* Unit test for failure to RESYNC having a root cause that the live HALog
* file did not exist on the quorum leader after an abort2Phase() call.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|