From: <mar...@us...> - 2013-09-12 17:04:03
|
Revision: 7400 http://bigdata.svn.sourceforge.net/bigdata/?rev=7400&view=rev Author: martyncutcher Date: 2013-09-12 17:03:57 +0000 (Thu, 12 Sep 2013) Log Message: ----------- Change test teardown to better ensure a clean tidy of the test directories - removing HALog files, snapshots and journals. This will be more important as we extend service kill tests which previously would have resulted in potentially large numbers of files remaining. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java 2013-09-12 16:09:56 UTC (rev 7399) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java 2013-09-12 17:03:57 UTC (rev 7400) @@ -406,15 +406,15 @@ leaderListener = null; } - if (serverA != null && !serverA.equals(leader)) { + if (leader == null || !leader.equals(serverA)) { destroyA(); } - if (serverB != null && !serverB.equals(leader)) { + if (leader == null || !leader.equals(serverB)) { destroyB(); } - if (serverC != null && !serverC.equals(leader)) { + if (leader == null || !leader.equals(serverC)) { destroyC(); } @@ -676,8 +676,11 @@ protected void safeDestroy(final HAGlue haGlue, final File serviceDir, final ServiceListener serviceListener) { - if (haGlue == null) + if (haGlue == null) { + tidyServiceDirectory(serviceDir); // ensure empty + return; + } try { @@ -724,11 +727,29 @@ } } + + // try and ensure serviceDir is tidied in any event + // Remove *.jnl, HALog/*, snapshot/* + log.warn("Need to clear directory explicitly: " + serviceDir.getAbsolutePath()); + tidyServiceDirectory(serviceDir); } } - + + private void tidyServiceDirectory(final File serviceDir) { + if (serviceDir == null || !serviceDir.exists()) + return; + + for (File file : serviceDir.listFiles()) { + final String name = file.getName(); + + if (name.endsWith(".jnl") || name.equals("snapshot") || name.equals("HALog")) { + recursiveDelete(file); + } + } + } + /** * Some signals understood by Java. * @@ -827,6 +848,12 @@ serverC = null; serviceListenerC = null; } + + protected void kill(final HAGlue service) throws IOException { + final int pid = ((HAGlueTest) service).getPID(); + + trySignal(SignalEnum.KILL, pid); + } /** * NOTE: This relies on equals() being valid for Proxies which isn't @@ -1582,6 +1609,7 @@ } }); + return haGlue; } catch (Throwable t) { Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java 2013-09-12 16:09:56 UTC (rev 7399) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java 2013-09-12 17:03:57 UTC (rev 7400) @@ -1426,6 +1426,96 @@ } + public void testQuorumABC_killC() throws Exception { + + // Start 2 services. + final HAGlue serverA = startA(); + final HAGlue serverB = startB(); + + // Wait for a quorum meet. + final long token = quorum.awaitQuorum(awaitQuorumTimeout, + TimeUnit.MILLISECONDS); + +// // Verify KB exists. +// awaitKBExists(serverA); + + /* + * Note: The quorum was not fully met at the last 2-phase commit. + * Instead, 2 services participated in the 2-phase commit and the third + * service resynchronized when it came up and then went through a local + * commit. Therefore, the HALog files should exist on all nodes. + */ + + // Current commit point. + final long lastCommitCounter = 1; + + // Await initial commit point (KB create). + awaitCommitCounter(lastCommitCounter, serverA, serverB); + + /* + * Verify that HALog files were generated and are available for commit + * point ONE (1) on the services joined with the met quorum. + */ + assertHALogDigestsEquals(1L/* firstCommitCounter */, + lastCommitCounter, new HAGlue[] { serverA, serverB }); + + // Verify binary equality of (A,B) journals. + assertDigestsEquals(new HAGlue[] { serverA, serverB }); + + // Start 3rd service. + final HAGlue serverC = startC(); + + // Wait until the quorum is fully met. The token should not change. + assertEquals(token, awaitFullyMetQuorum()); + + // The commit counter has not changed. + assertEquals( + lastCommitCounter, + serverA.getRootBlock( + new HARootBlockRequest(null/* storeUUID */)) + .getRootBlock().getCommitCounter()); + + // HALog files now exist on ALL services. + assertHALogDigestsEquals(1L/* firstCommitCounter */, lastCommitCounter, + new HAGlue[] { serverA, serverB, serverC }); + + // Verify binary equality of ALL journals. + assertDigestsEquals(new HAGlue[] { serverA, serverB, serverC }); + + /* + * Now go through a commit point with a fully met quorum. The HALog + * files should be purged at that commit point. + */ + simpleTransaction(); + + // Current commit point. + final long lastCommitCounter2 = serverA + .getRootBlock(new HARootBlockRequest(null/* storeUUID */)) + .getRootBlock().getCommitCounter(); + + // There are TWO (2) commit points. + assertEquals(2L, lastCommitCounter2); + + // Verify binary equality of ALL journals. + assertDigestsEquals(new HAGlue[] { serverA, serverB, serverC }); + + // Verify no HALog files since fully met quorum @ commit. + assertHALogNotFound(0L/* firstCommitCounter */, lastCommitCounter, + new HAGlue[] { serverA, serverB, serverC }); + + // Now kill C - this will leave some file detritus + kill(serverC); + + // wait around to let the kill play out by waiting for [A, B] pipeline + awaitPipeline(new HAGlue[] {serverA, serverB}); + + // assert quorum remains met + assertTrue(quorum.isQuorumMet()); + + // ...and with original token + assertTrue(token == quorum.token()); + } + /** * Test quorum breaks and reforms when original leader fails * @@ -1463,6 +1553,42 @@ } /** + * Test quorum breaks and reforms when original leader fails + * + * @throws Exception + */ + public void testQuorumBreaksABC_killLeader() throws Exception { + + // Start 3 services in sequence + final ABC startup = new ABC(true/*sequential*/); + + // Wait for a quorum meet. + final long token = awaitMetQuorum(); + +// // Verify KB exists. +// awaitKBExists(startup.serverA); + + // Verify A is the leader. + assertEquals(startup.serverA, quorum.getClient().getLeader(token)); + + // Verify A is fully up. + awaitNSSAndHAReady(startup.serverA); + + // Now kill leader! + kill(startup.serverA); + + // Check that quorum meets around the remaining 2 services. + final long token2 = awaitNextQuorumMeet(token); + + // Verify that we have a new leader for the quorum. + final HAGlue leader = quorum.getClient().getLeader(token2); + + assertTrue(leader.equals(startup.serverB) + || leader.equals(startup.serverC)); + + } + + /** * Having observed stochastic failures, here is a stress test * to try and generate a more deterministic failure. * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |