|
From: <mar...@us...> - 2014-04-02 13:14:14
|
Revision: 8027
http://sourceforge.net/p/bigdata/code/8027
Author: martyncutcher
Date: 2014-04-02 13:14:09 +0000 (Wed, 02 Apr 2014)
Log Message:
-----------
missed files from initial commit
Added Paths:
-----------
branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/log4j-template-A.properties
branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/log4j-template-B.properties
branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/log4j-template-C.properties
Added: branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/log4j-template-A.properties
===================================================================
--- branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/log4j-template-A.properties (rev 0)
+++ branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/log4j-template-A.properties 2014-04-02 13:14:09 UTC (rev 8027)
@@ -0,0 +1,25 @@
+log4j.rootCategory=WARN,haLog
+log4j.logger.com.bigdata.ha=INFO
+log4j.logger.com.bigdata.haLog=INFO
+
+#log4j.logger.com.bigdata.txLog=ALL
+#log4j.logger.com.bigdata.rwstore=INFO
+#log4j.logger.com.bigdata.journal=INFO
+#log4j.logger.com.bigdata.journal.AbstractBufferStrategy=ALL
+log4j.logger.com.bigdata.journal.jini.ha=ALL
+log4j.logger.com.bigdata.journal.jini.ha.HAJournalServer=ALL
+#log4j.logger.com.bigdata.service.jini.lookup=ALL
+#log4j.logger.com.bigdata.quorum=INFO
+log4j.logger.com.bigdata.quorum.zk=INFO
+log4j.logger.com.bigdata.io.writecache=INFO
+#log4j.logger.com.bigdata.zookeeper=INFO
+#log4j.logger.com.bigdata.zookeeper.ZooHelper=ALL
+log4j.logger.com.bigdata.rdf.internal.LexiconConfiguration=FATAL
+
+log4j.appender.haLog=org.apache.log4j.FileAppender
+log4j.appender.haLog.Threshold=ALL
+# Note: path is relative to the directory in which the service starts.
+log4j.appender.haLog.File=halog-A.txt
+log4j.appender.haLog.Append=true
+log4j.appender.haLog.layout=org.apache.log4j.PatternLayout
+log4j.appender.haLog.layout.ConversionPattern=%-5p: %d{HH:mm:ss,SSS} %r %X{hostname} %X{serviceUUID} %X{taskname} %X{timestamp} %X{resources} %t %l: %m%n
\ No newline at end of file
Added: branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/log4j-template-B.properties
===================================================================
--- branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/log4j-template-B.properties (rev 0)
+++ branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/log4j-template-B.properties 2014-04-02 13:14:09 UTC (rev 8027)
@@ -0,0 +1,25 @@
+log4j.rootCategory=WARN,haLog
+log4j.logger.com.bigdata.ha=INFO
+log4j.logger.com.bigdata.haLog=INFO
+
+#log4j.logger.com.bigdata.txLog=ALL
+#log4j.logger.com.bigdata.rwstore=INFO
+#log4j.logger.com.bigdata.journal=INFO
+#log4j.logger.com.bigdata.journal.AbstractBufferStrategy=ALL
+log4j.logger.com.bigdata.journal.jini.ha=ALL
+log4j.logger.com.bigdata.journal.jini.ha.HAJournalServer=ALL
+#log4j.logger.com.bigdata.service.jini.lookup=ALL
+#log4j.logger.com.bigdata.quorum=INFO
+log4j.logger.com.bigdata.quorum.zk=INFO
+#log4j.logger.com.bigdata.io.writecache=INFO
+#log4j.logger.com.bigdata.zookeeper=INFO
+#log4j.logger.com.bigdata.zookeeper.ZooHelper=ALL
+log4j.logger.com.bigdata.rdf.internal.LexiconConfiguration=FATAL
+
+log4j.appender.haLog=org.apache.log4j.FileAppender
+log4j.appender.haLog.Threshold=ALL
+# Note: path is relative to the directory in which the service starts.
+log4j.appender.haLog.File=halog-B.txt
+log4j.appender.haLog.Append=true
+log4j.appender.haLog.layout=org.apache.log4j.PatternLayout
+log4j.appender.haLog.layout.ConversionPattern=%-5p: %d{HH:mm:ss,SSS} %r %X{hostname} %X{serviceUUID} %X{taskname} %X{timestamp} %X{resources} %t %l: %m%n
\ No newline at end of file
Added: branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/log4j-template-C.properties
===================================================================
--- branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/log4j-template-C.properties (rev 0)
+++ branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/log4j-template-C.properties 2014-04-02 13:14:09 UTC (rev 8027)
@@ -0,0 +1,25 @@
+log4j.rootCategory=WARN,haLog
+log4j.logger.com.bigdata.ha=INFO
+log4j.logger.com.bigdata.haLog=INFO
+
+#log4j.logger.com.bigdata.txLog=ALL
+#log4j.logger.com.bigdata.rwstore=INFO
+#log4j.logger.com.bigdata.journal=INFO
+#log4j.logger.com.bigdata.journal.AbstractBufferStrategy=ALL
+log4j.logger.com.bigdata.journal.jini.ha=ALL
+log4j.logger.com.bigdata.journal.jini.ha.HAJournalServer=ALL
+#log4j.logger.com.bigdata.service.jini.lookup=ALL
+#log4j.logger.com.bigdata.quorum=INFO
+log4j.logger.com.bigdata.quorum.zk=INFO
+#log4j.logger.com.bigdata.io.writecache=INFO
+#log4j.logger.com.bigdata.zookeeper=INFO
+#log4j.logger.com.bigdata.zookeeper.ZooHelper=ALL
+log4j.logger.com.bigdata.rdf.internal.LexiconConfiguration=FATAL
+
+log4j.appender.haLog=org.apache.log4j.FileAppender
+log4j.appender.haLog.Threshold=ALL
+# Note: path is relative to the directory in which the service starts.
+log4j.appender.haLog.File=halog-C.txt
+log4j.appender.haLog.Append=true
+log4j.appender.haLog.layout=org.apache.log4j.PatternLayout
+log4j.appender.haLog.layout.ConversionPattern=%-5p: %d{HH:mm:ss,SSS} %r %X{hostname} %X{serviceUUID} %X{taskname} %X{timestamp} %X{resources} %t %l: %m%n
\ No newline at end of file
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mar...@us...> - 2014-04-09 09:38:56
|
Revision: 8091
http://sourceforge.net/p/bigdata/code/8091
Author: martyncutcher
Date: 2014-04-09 09:38:52 +0000 (Wed, 09 Apr 2014)
Log Message:
-----------
Add HA5 HALog tests for #722
Modified Paths:
--------------
branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3BackupTestCase.java
branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java
branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA5JournalServer.java
Added Paths:
-----------
branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA5JournalServerWithHALogs.java
Property Changed:
----------------
branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/
Index: branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha
===================================================================
--- branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha 2014-04-09 07:44:27 UTC (rev 8090)
+++ branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha 2014-04-09 09:38:52 UTC (rev 8091)
Property changes on: branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha
___________________________________________________________________
Modified: svn:ignore
## -1,3 +1,4 ##
log4j.properties
logging.properties
results.txt
+TestRWStoreAddress.java
Modified: branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3BackupTestCase.java
===================================================================
--- branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3BackupTestCase.java 2014-04-09 07:44:27 UTC (rev 8090)
+++ branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3BackupTestCase.java 2014-04-09 09:38:52 UTC (rev 8091)
@@ -170,17 +170,30 @@
{
final Properties p = new Properties();
+ final File aout = out.getAbsoluteFile();
+ // log.warn(aout.toString() + " modified: " + aout.lastModified());
+
+ p.setProperty(Journal.Options.FILE, aout.toString());
+
+ Journal jnl = new Journal(p);
- p.setProperty(Journal.Options.FILE, out.getAbsoluteFile()
- .toString());
-
- Journal jnl = new Journal(p);
-
try {
// Verify snapshot at the expected commit point.
assertEquals(commitCounterN, jnl.getRootBlockView()
.getCommitCounter());
+// {
+// final MessageDigest digest = MessageDigest
+// .getInstance("MD5");
+//
+// // digest of restored journal.
+// ((IHABufferStrategy) (jnl.getBufferStrategy()))
+// .computeDigest(null/* snapshot */, digest);
+//
+// final byte[] digest2 = digest.digest();
+//
+// System.err.println("Pre-restore: " + BytesUtil.toHexString(digest2));
+// }
// Verify journal can be dumped without error.
dumpJournal(jnl);
@@ -190,6 +203,7 @@
*/
final HARestore rest = new HARestore(jnl, getHALogDirA());
+ // System.err.println("Prior: " + jnl.getRootBlockView().toString());
/*
* Note: We can not test where we stop at the specified
* commit point in this method because the Journal state on
@@ -198,7 +212,21 @@
*/
rest.restore(false/* listCommitPoints */, Long.MAX_VALUE/* haltingCommitCounter */);
- // Verify journal now at the expected commit point.
+ // System.err.println("Post: " + jnl.getRootBlockView().toString());
+ /*
+ * FIXME For some reason, we need to close and reopen the
+ * journal before it can be used. See HARestore.
+ */
+ if (true) {
+ jnl.close();
+
+ // reopen.
+ jnl = new Journal(p);
+ }
+
+ // System.err.println("Post reopen: " + jnl.getRootBlockView().toString());
+
+ // Verify journal now at the expected commit point.
assertEquals(commitCounterM, jnl.getRootBlockView()
.getCommitCounter());
@@ -231,25 +259,17 @@
final String digest2Str = new BigInteger(1, digest2)
.toString(16);
+ System.err.println("Original: " + serverA.getRootBlock(new HARootBlockRequest(null)).getRootBlock().toString());
+ System.err.println("Restored: " + jnl.getRootBlockView().toString());
+
fail("Digests differ after restore and replay: expected="
+ digestAStr + ", actual=" + digest2Str);
-
+
}
}
- /*
- * FIXME For some reason, we need to close and reopen the
- * journal before it can be used. See HARestore.
- */
- if (true) {
- jnl.close();
-
- // reopen.
- jnl = new Journal(p);
- }
-
- // Verify can dump journal after restore.
+ // Verify can dump journal after restore.
dumpJournal(jnl);
} finally {
Modified: branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java
===================================================================
--- branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java 2014-04-09 07:44:27 UTC (rev 8090)
+++ branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java 2014-04-09 09:38:52 UTC (rev 8091)
@@ -227,9 +227,12 @@
/**
* {@link UUID}s for the {@link HAJournalServer}s.
*/
- private UUID serverAId = UUID.randomUUID(), serverBId = UUID.randomUUID(),
- serverCId = UUID.randomUUID();
+ protected UUID serverAId = UUID.randomUUID();
+ private UUID serverBId = UUID.randomUUID();
+
+ private UUID serverCId = UUID.randomUUID();
+
/**
* The HTTP ports at which the services will respond.
*
Modified: branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA5JournalServer.java
===================================================================
--- branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA5JournalServer.java 2014-04-09 07:44:27 UTC (rev 8090)
+++ branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA5JournalServer.java 2014-04-09 09:38:52 UTC (rev 8091)
@@ -740,12 +740,12 @@
serverA, serverB, serverC });
// D and E will have go through Rebuild before joining
- assertEquals(token2, awaitFullyMetQuorum(10/* ticks */));
+ assertEquals(token2, awaitFullyMetQuorum(20/* ticks */));
// Note: I have seen this timeout. This warrants exploring. BBT.
// // Wait until C is fully ready.
- assertEquals(HAStatusEnum.Follower, awaitNSSAndHAReady(serverD, 2*awaitQuorumTimeout, TimeUnit.MILLISECONDS));
- assertEquals(HAStatusEnum.Follower, awaitNSSAndHAReady(serverE, 2*awaitQuorumTimeout, TimeUnit.MILLISECONDS));
+ assertEquals(HAStatusEnum.Follower, awaitNSSAndHAReady(serverD, 4*awaitQuorumTimeout, TimeUnit.MILLISECONDS));
+ assertEquals(HAStatusEnum.Follower, awaitNSSAndHAReady(serverE, 4*awaitQuorumTimeout, TimeUnit.MILLISECONDS));
// Verify binary equality of ALL journals.
assertDigestsEquals(new HAGlue[] { serverA, serverB, serverC, serverD, serverE});
Added: branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA5JournalServerWithHALogs.java
===================================================================
--- branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA5JournalServerWithHALogs.java (rev 0)
+++ branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA5JournalServerWithHALogs.java 2014-04-09 09:38:52 UTC (rev 8091)
@@ -0,0 +1,674 @@
+package com.bigdata.journal.jini.ha;
+
+import java.io.File;
+import java.util.Calendar;
+
+import com.bigdata.ha.HAGlue;
+import com.bigdata.ha.halog.HALogReader;
+import com.bigdata.ha.halog.IHALogReader;
+import com.bigdata.journal.CommitCounterUtility;
+import com.bigdata.journal.jini.ha.AbstractHA3JournalServerTestCase.ABC;
+
+import net.jini.config.Configuration;
+import junit.framework.TestCase;
+
+public class TestHA5JournalServerWithHALogs extends AbstractHA5JournalServerTestCase {
+
+ /**
+ * We need to set the time at which the {@link DefaultSnapshotPolicy} runs
+ * to some point in the future in order to avoid test failures due to
+ * violated assumptions when the policy runs up self-triggering (based on
+ * the specified run time) during a CI run.
+ * <p>
+ * We do this by adding one hour to [now] and then converting it into the
+ * 'hhmm' format as an integer.
+ *
+ * @return The "never run" time as hhmm.
+ */
+ static protected String getNeverRunSnapshotTime() {
+
+ // Right now.
+ final Calendar c = Calendar.getInstance();
+
+ // Plus an hour.
+ c.add(Calendar.HOUR_OF_DAY, 1);
+
+ // Get the hour.
+ final int hh = c.get(Calendar.HOUR_OF_DAY);
+
+ // And the minutes.
+ final int mm = c.get(Calendar.MINUTE);
+
+ // Format as hhmm.
+ final String neverRun = "" + hh + (mm < 10 ? "0" : "") + mm;
+
+ return neverRun;
+
+ }
+
+ /**
+ * {@inheritDoc}
+ * <p>
+ * Note: This overrides some {@link Configuration} values for the
+ * {@link HAJournalServer} in order to establish conditions suitable for
+ * testing the {@link ISnapshotPolicy} and {@link IRestorePolicy}.
+ */
+ @Override
+ protected String[] getOverrides() {
+
+ /*
+ * We need to set the time at which the DefaultSnapshotPolicy runs to
+ * some point in the Future in order to avoid test failures due to
+ * violated assumptions when the policy runs up self-triggering (based
+ * on the specified run time) during a CI run.
+ */
+ final String neverRun = getNeverRunSnapshotTime();
+
+ return new String[]{
+ "com.bigdata.journal.jini.ha.HAJournalServer.restorePolicy=new com.bigdata.journal.jini.ha.DefaultRestorePolicy()",
+ "com.bigdata.journal.jini.ha.HAJournalServer.snapshotPolicy=new com.bigdata.journal.jini.ha.DefaultSnapshotPolicy("+neverRun+",0)",
+ "com.bigdata.journal.jini.ha.HAJournalServer.replicationFactor=5",
+ };
+
+ }
+
+ public TestHA5JournalServerWithHALogs() {
+ }
+
+ public TestHA5JournalServerWithHALogs(String name) {
+ super(name);
+ }
+
+ /**
+ * This is a unit test for the ability to silently remove a logically empty
+ * HALog file. Three services are started in sequence (A,B,C). A series of
+ * small commits are applied to the quorum. (C) is then shutdown. A
+ * logically empty HALog file should exist on each service for the next
+ * commit point. However, since this might have been removed on C when it
+ * was shutdown, we copy the logically empty HALog file from (A) to (C). We
+ * then do one more update. C is then restarted. We verify that C restarts
+ * and that the logically empty HALog file has been replaced by an HALog
+ * file that has the same digest as the HALog file for that commit point on
+ * (A,B).
+ * <p>
+ * Note: We can not reliably observe that the logically HALog file was
+ * removed during startup. However, this is not critical. What is critical
+ * is that the logically empty HALog file (a) does not prevent (C) from
+ * starting; (b) is replaced by the correct HALog data from the quorum
+ * leader; and (c) that (C) resynchronizes with the met quorum and joins
+ * causing a fully met quorum.
+ *
+ * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/679" >
+ * HAJournalServer can not restart due to logically empty log files
+ * </a>
+ */
+ public void test_startABCDE_logicallyEmptyLogFileDeletedOnRestartC() throws Exception {
+
+ final ABCDE abc = new ABCDE(true/* sequential */);
+
+ final HAGlue serverA = abc.serverA, serverB = abc.serverB;
+ HAGlue serverC = abc.serverC;
+ HAGlue serverD = abc.serverD;
+ HAGlue serverE = abc.serverD;
+
+ // Verify quorum is FULLY met.
+ awaitFullyMetQuorum();
+
+ // await the KB create commit point to become visible on each service.
+ awaitCommitCounter(1L, new HAGlue[] { serverA, serverB, serverC, serverD, serverE });
+
+ // Verify binary equality of ALL journals.
+ assertDigestsEquals(new HAGlue[] { serverA, serverB, serverC, serverD, serverE });
+
+ // Verify binary equality of ALL HALog files.
+ assertHALogDigestsEquals(1L/* firstCommitCounter */,
+ 1/* lastCommitCounter */, new HAGlue[] { serverA, serverB,
+ serverC, serverD, serverE });
+
+ /*
+ * Do a series of small commits.
+ */
+
+ final int NSMALL = 5;
+
+ for (int i = 1/* createKB */; i <= NSMALL; i++) {
+
+ simpleTransaction();
+
+ }
+
+ final long commitCounter1 = 1 + NSMALL; // AKA (6)
+
+ // await the commit points to become visible.
+ awaitCommitCounter(commitCounter1,
+ new HAGlue[] { serverA, serverB, serverC, serverD, serverE });
+
+ // Verify binary equality of ALL journals.
+ assertDigestsEquals(new HAGlue[] { serverA, serverB, serverC, serverD, serverE });
+
+ // Verify binary equality of ALL HALog files.
+// assertHALogDigestsEquals(1L/* firstCommitCounter */, commitCounter1,
+// new HAGlue[] { serverA, serverB, serverC, serverD, serverE });
+
+ /*
+ * Verify the expected #of HALogs on each service.
+ *
+ * Note: This is (lastCommitCounter+1) since an empty HALog was created
+ * for the next commit point.
+ */
+ awaitLogCount(getHALogDirA(), commitCounter1 + 1);
+ awaitLogCount(getHALogDirB(), commitCounter1 + 1);
+ awaitLogCount(getHALogDirC(), commitCounter1 + 1);
+ awaitLogCount(getHALogDirD(), commitCounter1 + 1);
+ awaitLogCount(getHALogDirE(), commitCounter1 + 1);
+
+ /*
+ * Shutdown C.
+ *
+ * Note: This might cause the empty HALog file on (C) to be deleted.
+ * That is Ok, since we will copy the desired empty HALOg from (A) to
+ * (C), thus enforcing the desired test condition.
+ */
+ shutdownC();
+
+ /*
+ * Verify that there is an empty HALog file on (A) for the next
+ * commit point.
+ */
+
+ // The next commit point.
+ final long commitCounter2 = commitCounter1 + 1; // AKA (7)
+
+ // The HALog for that next commit point.
+ final File fileA = CommitCounterUtility.getCommitCounterFile(
+ getHALogDirA(), commitCounter2, IHALogReader.HA_LOG_EXT);
+
+ // Verify HALog file for next commit point on A is logically empty.
+ {
+ assertTrue(fileA.exists());
+ final IHALogReader r = new HALogReader(fileA);
+ assertTrue(r.isEmpty());
+ assertFalse(r.isLive());
+ r.close();
+ assertTrue(fileA.exists());
+ }
+
+ // The name of that HALog file on (C).
+ final File fileC = CommitCounterUtility.getCommitCounterFile(
+ getHALogDirC(), commitCounter2, IHALogReader.HA_LOG_EXT);
+
+ // Copy that empty HALog file to (C).
+ copyFile(fileA, fileC, false/* append */);
+
+ /*
+ * Do another transaction. This will cause the HALog file for that
+ * commit point to be non-empty on A.
+ */
+ simpleTransaction();
+
+ /*
+ * Await the commit points to become visible.
+ *
+ * Note: This is (lastCommitCounter+1) since an empty HALog was created
+ * for the next commit point.
+ */
+ awaitCommitCounter(commitCounter2, new HAGlue[] { serverA, serverB, serverD, serverE });
+
+ // Verify the expected #of HALogs on each service.
+ awaitLogCount(getHALogDirA(), commitCounter2 + 1);
+ awaitLogCount(getHALogDirB(), commitCounter2 + 1);
+ awaitLogCount(getHALogDirD(), commitCounter2 + 1);
+ awaitLogCount(getHALogDirE(), commitCounter2 + 1);
+ awaitLogCount(getHALogDirC(), commitCounter2);
+
+ // Verify HALog file for next commit point on A is NOT empty.
+ {
+ assertTrue(fileA.exists());
+ final IHALogReader r = new HALogReader(fileA);
+ assertFalse(r.isEmpty());
+ assertFalse(r.isLive());
+ r.close();
+ assertTrue(fileA.exists());
+ }
+
+ // Verify HALog file for next commit point on C is logically empty.
+ {
+ assertTrue(fileC.exists());
+ final IHALogReader r = new HALogReader(fileC);
+ assertTrue(r.isEmpty());
+ assertFalse(r.isLive());
+ r.close();
+ assertTrue(fileC.exists());
+ }
+
+ /*
+ * Restart (C). It should start without complaint. The logically empty
+ * HALog file should be replaced by the corresponding file from (A) by
+ * the time the quorum fully meets. At this point all services will have
+ * the same digests for all HALog files.
+ */
+
+ // Restart C.
+ serverC = startC();
+
+ // Wait until the quorum is fully met.
+ awaitFullyMetQuorum();
+
+ // await the commit points to become visible.
+ awaitCommitCounter(commitCounter2,
+ new HAGlue[] { serverA, serverB, serverD, serverE, serverC });
+
+ // Verify binary equality of ALL journals.
+ assertDigestsEquals(new HAGlue[] { serverA, serverB, serverC });
+
+ // Verify binary equality of ALL HALog files.
+ assertHALogDigestsEquals(1L/* firstCommitCounter */,
+ commitCounter2 /* lastCommitCounter */, new HAGlue[] { serverA,
+ serverB, serverD, serverE, serverC });
+
+ /*
+ * Verify the expected #of HALogs on each service.
+ *
+ * Note: Each service will have an empty HALog for the next commit
+ * point.
+ */
+ awaitLogCount(getHALogDirA(), commitCounter2+1);
+ awaitLogCount(getHALogDirB(), commitCounter2+1);
+ awaitLogCount(getHALogDirD(), commitCounter2+1);
+ awaitLogCount(getHALogDirE(), commitCounter2+1);
+ awaitLogCount(getHALogDirC(), commitCounter2+1);
+
+ }
+
+
+ /**
+ * This is a unit test for the ability to silently remove a physically empty
+ * HALog file. Three services are started in sequence (A,B,C). A series of
+ * small commits are applied to the quorum. (C) is then shutdown. A
+ * logically empty HALog file should exist on each service for the next
+ * commit point. We now overwrite that file with a physically empty HALog
+ * file (zero length). We then do one more update. C is then restarted. We
+ * verify that C restarts and that the logically empty HALog file has been
+ * replaced by an HALog file that has the same digest as the HALog file for
+ * that commit point on (A,B).
+ * <p>
+ * Note: We can not reliably observe that the physically HALog file was
+ * removed during startup. However, this is not critical. What is critical
+ * is that the physically empty HALog file (a) does not prevent (C) from
+ * starting; (b) is replaced by the correct HALog data from the quorum
+ * leader; and (c) that (C) resynchronizes with the met quorum and joins
+ * causing a fully met quorum.
+ *
+ * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/679" >
+ * HAJournalServer can not restart due to logically empty log files
+ * </a>
+ * @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/775" >
+ * HAJournal start() </a>
+ */
+ public void test_startABCDE_physicallyEmptyLogFileDeletedOnRestartC() throws Exception {
+
+ final ABCDE abc = new ABCDE(true/* sequential */);
+
+ final HAGlue serverA = abc.serverA, serverB = abc.serverB;
+ HAGlue serverC = abc.serverC;
+ HAGlue serverD = abc.serverD;
+ HAGlue serverE = abc.serverE;
+
+ // Verify quorum is FULLY met.
+ awaitFullyMetQuorum();
+
+ // await the KB create commit point to become visible on each service.
+ awaitCommitCounter(1L, new HAGlue[] { serverA, serverB, serverC, serverD, serverE });
+
+ // Verify binary equality of ALL journals.
+ assertDigestsEquals(new HAGlue[] { serverA, serverB, serverC, serverD, serverE });
+
+ // Verify binary equality of ALL HALog files.
+ assertHALogDigestsEquals(1L/* firstCommitCounter */,
+ 1/* lastCommitCounter */, new HAGlue[] { serverA, serverB,
+ serverC, serverD, serverE });
+
+ /*
+ * Do a series of small commits.
+ */
+
+ final int NSMALL = 5;
+
+ for (int i = 1/* createKB */; i <= NSMALL; i++) {
+
+ simpleTransaction();
+
+ }
+
+ final long commitCounter1 = 1 + NSMALL; // AKA (6)
+
+ // await the commit points to become visible.
+ awaitCommitCounter(commitCounter1,
+ new HAGlue[] { serverA, serverB, serverC, serverD, serverE });
+
+ // Verify binary equality of ALL journals.
+ assertDigestsEquals(new HAGlue[] { serverA, serverB, serverC, serverD, serverE });
+
+ // Verify binary equality of ALL HALog files.
+ assertHALogDigestsEquals(1L/* firstCommitCounter */, commitCounter1,
+ new HAGlue[] { serverA, serverB, serverC, serverD, serverE });
+
+ /*
+ * Verify the expected #of HALogs on each service.
+ *
+ * Note: This is (lastCommitCounter+1) since an empty HALog was created
+ * for the next commit point.
+ */
+ awaitLogCount(getHALogDirA(), commitCounter1 + 1);
+ awaitLogCount(getHALogDirB(), commitCounter1 + 1);
+ awaitLogCount(getHALogDirC(), commitCounter1 + 1);
+ awaitLogCount(getHALogDirD(), commitCounter1 + 1);
+ awaitLogCount(getHALogDirE(), commitCounter1 + 1);
+
+ /*
+ * Shutdown C.
+ *
+ * Note: This might cause the empty HALog file on (C) to be deleted.
+ * That is Ok, since we will copy the desired empty HALOg from (A) to
+ * (C), thus enforcing the desired test condition.
+ */
+ shutdownC();
+
+ /*
+ * Verify that there is an empty HALog file on (A) for the next
+ * commit point.
+ */
+
+ // The next commit point.
+ final long commitCounter2 = commitCounter1 + 1; // AKA (7)
+
+ // The HALog for that next commit point.
+ final File fileA = CommitCounterUtility.getCommitCounterFile(
+ getHALogDirA(), commitCounter2, IHALogReader.HA_LOG_EXT);
+
+ // Verify HALog file for next commit point on A is logically empty.
+ {
+ assertTrue(fileA.exists());
+ final IHALogReader r = new HALogReader(fileA);
+ assertTrue(r.isEmpty());
+ assertFalse(r.isLive());
+ r.close();
+ assertTrue(fileA.exists());
+ }
+
+ // The name of that HALog file on (C).
+ final File fileC = CommitCounterUtility.getCommitCounterFile(
+ getHALogDirC(), commitCounter2, IHALogReader.HA_LOG_EXT);
+
+// // Copy that empty HALog file to (C).
+// copyFile(fileA, fileC, false/* append */);
+
+ // delete the logically empty file (if it exists).
+ if (fileC.exists() && !fileC.delete())
+ fail("Could not delete: fileC=" + fileC);
+
+ // create the physically empty file.
+ if (!fileC.createNewFile())
+ fail("Could not create: fileC=" + fileC);
+
+ /*
+ * Do another transaction. This will cause the HALog file for that
+ * commit point to be non-empty on A.
+ */
+ simpleTransaction();
+
+ /*
+ * Await the commit points to become visible.
+ *
+ * Note: This is (lastCommitCounter+1) since an empty HALog was created
+ * for the next commit point.
+ */
+ awaitCommitCounter(commitCounter2, new HAGlue[] { serverA, serverB, serverD, serverE });
+
+ // Verify the expected #of HALogs on each service.
+ awaitLogCount(getHALogDirA(), commitCounter2 + 1);
+ awaitLogCount(getHALogDirB(), commitCounter2 + 1);
+ awaitLogCount(getHALogDirD(), commitCounter2 + 1);
+ awaitLogCount(getHALogDirE(), commitCounter2 + 1);
+ awaitLogCount(getHALogDirC(), commitCounter2);
+
+ // Verify HALog file for next commit point on A is NOT empty.
+ {
+ assertTrue(fileA.exists());
+ final IHALogReader r = new HALogReader(fileA);
+ assertFalse(r.isEmpty());
+ assertFalse(r.isLive());
+ r.close();
+ assertTrue(fileA.exists());
+ }
+
+ // Verify HALog file for next commit point on C is phsyically empty.
+ {
+ assertTrue(fileC.exists());
+ assertEquals(0L, fileC.length());
+ }
+
+ /*
+ * Restart (C). It should start without complaint. The logically empty
+ * HALog file should be replaced by the corresponding file from (A) by
+ * the time the quorum fully meets. At this point all services will have
+ * the same digests for all HALog files.
+ */
+
+ // Restart C.
+ serverC = startC();
+
+ // Wait until the quorum is fully met.
+ awaitFullyMetQuorum();
+
+ // await the commit points to become visible.
+ awaitCommitCounter(commitCounter2,
+ new HAGlue[] { serverA, serverB, serverD, serverE, serverC });
+
+ // Verify binary equality of ALL journals.
+ assertDigestsEquals(new HAGlue[] { serverA, serverB, serverD, serverE, serverC });
+
+ // Verify binary equality of ALL HALog files.
+ assertHALogDigestsEquals(1L/* firstCommitCounter */,
+ commitCounter2 /* lastCommitCounter */, new HAGlue[] { serverA,
+ serverB, serverD, serverE, serverC });
+
+ /*
+ * Verify the expected #of HALogs on each service.
+ *
+ * Note: Each service will have an empty HALog for the next commit
+ * point.
+ */
+ awaitLogCount(getHALogDirA(), commitCounter2+1);
+ awaitLogCount(getHALogDirB(), commitCounter2+1);
+ awaitLogCount(getHALogDirC(), commitCounter2+1);
+ awaitLogCount(getHALogDirD(), commitCounter2+1);
+ awaitLogCount(getHALogDirE(), commitCounter2+1);
+
+ }
+
+ /**
+ * Unit test for a situation in which A B C D and E start. A quorum meets and the
+ * final service resyncs with the met quorum. The quorum then fully meets.
+ * Once the fully met quorum is stable, C is then restarted. This test
+ * exercises a code path that handles the case where C is current, but is
+ * forced into RESYNC in case there are writes in progress on the leader.
+ * <p>
+ * Note: In this version of the test, the HALog files are NOT purged at each
+ * commit of the fully met quorum.
+ */
+ public void testStartABCDE_restartE() throws Exception {
+
+ final ABCDE x = new ABCDE(true/*sequential*/);
+
+ final long token = awaitFullyMetQuorum();
+
+ // Now run several transactions
+ final int NTX = 5;
+ for (int i = 0; i < NTX; i++)
+ simpleTransaction();
+
+ // wait until the commit point is registered on all services.
+ awaitCommitCounter(NTX + 1L, new HAGlue[] { x.serverA, x.serverB,
+ x.serverC, x.serverD, x.serverE });
+
+ /*
+ * The same number of HALog files should exist on all services.
+ *
+ * Note: the restore policy is setup such that we are NOT purging the HALog
+ * files at each commit of a fully met quorum.
+ */
+ awaitLogCount(getHALogDirA(), NTX + 2L);
+ awaitLogCount(getHALogDirB(), NTX + 2L);
+ awaitLogCount(getHALogDirC(), NTX + 2L);
+ awaitLogCount(getHALogDirD(), NTX + 2L);
+ awaitLogCount(getHALogDirE(), NTX + 2L);
+
+ // shutdown E - final service
+ shutdownE();
+
+ // wait for C to be gone from zookeeper.
+ awaitPipeline(new HAGlue[] { x.serverA, x.serverB, x.serverC, x.serverD });
...
[truncated message content] |
|
From: <tho...@us...> - 2014-04-11 12:13:28
|
Revision: 8105
http://sourceforge.net/p/bigdata/code/8105
Author: thompsonbry
Date: 2014-04-11 12:13:24 +0000 (Fri, 11 Apr 2014)
Log Message:
-----------
Reconciled Martyn's edits and my own on the HA1/HA5 branch prior to merge in of delta from the main branch.
Modified Paths:
--------------
branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3BackupTestCase.java
branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java
branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHAJournalServerTestCase.java
branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA1SnapshotPolicy.java
Modified: branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3BackupTestCase.java
===================================================================
--- branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3BackupTestCase.java 2014-04-11 11:43:06 UTC (rev 8104)
+++ branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3BackupTestCase.java 2014-04-11 12:13:24 UTC (rev 8105)
@@ -43,6 +43,7 @@
import com.bigdata.ha.msg.HARootBlockRequest;
import com.bigdata.journal.CommitCounterUtility;
import com.bigdata.journal.IHABufferStrategy;
+import com.bigdata.journal.IRootBlockView;
import com.bigdata.journal.Journal;
import com.bigdata.rdf.sail.webapp.client.ConnectOptions;
import com.bigdata.rdf.sail.webapp.client.RemoteRepository;
@@ -146,10 +147,12 @@
* The current commit counter on the server. This is the commit point
* that should be restored.
*/
- final long commitCounterM = serverA
- .getRootBlock(new HARootBlockRequest(null/* storeUUID */))
- .getRootBlock().getCommitCounter();
+
+ final IRootBlockView serverARootBlock = serverA.getRootBlock(
+ new HARootBlockRequest(null/* storeUUID */)).getRootBlock();
+ final long commitCounterM = serverARootBlock.getCommitCounter();
+
final File snapshotFile = SnapshotManager.getSnapshotFile(
getSnapshotDirA(), commitCounterN);
@@ -170,40 +173,26 @@
{
final Properties p = new Properties();
- final File aout = out.getAbsoluteFile();
- // log.warn(aout.toString() + " modified: " + aout.lastModified());
-
- p.setProperty(Journal.Options.FILE, aout.toString());
-
- Journal jnl = new Journal(p);
+ p.setProperty(Journal.Options.FILE, out.getAbsoluteFile()
+ .toString());
+
+ Journal jnl = new Journal(p);
+
try {
// Verify snapshot at the expected commit point.
assertEquals(commitCounterN, jnl.getRootBlockView()
.getCommitCounter());
-// {
-// final MessageDigest digest = MessageDigest
-// .getInstance("MD5");
-//
-// // digest of restored journal.
-// ((IHABufferStrategy) (jnl.getBufferStrategy()))
-// .computeDigest(null/* snapshot */, digest);
-//
-// final byte[] digest2 = digest.digest();
-//
-// System.err.println("Pre-restore: " + BytesUtil.toHexString(digest2));
-// }
// Verify journal can be dumped without error.
dumpJournal(jnl);
-
+
/*
* Now roll that journal forward using the HALog directory.
*/
final HARestore rest = new HARestore(jnl, getHALogDirA());
- // System.err.println("Prior: " + jnl.getRootBlockView().toString());
/*
* Note: We can not test where we stop at the specified
* commit point in this method because the Journal state on
@@ -212,7 +201,6 @@
*/
rest.restore(false/* listCommitPoints */, Long.MAX_VALUE/* haltingCommitCounter */);
- // System.err.println("Post: " + jnl.getRootBlockView().toString());
/*
* FIXME For some reason, we need to close and reopen the
* journal before it can be used. See HARestore.
@@ -224,12 +212,18 @@
jnl = new Journal(p);
}
- // System.err.println("Post reopen: " + jnl.getRootBlockView().toString());
+ // Verify can dump journal after restore.
+ dumpJournal(jnl);
- // Verify journal now at the expected commit point.
+ // Verify journal now at the expected commit point.
assertEquals(commitCounterM, jnl.getRootBlockView()
.getCommitCounter());
+ if (!serverARootBlock.equals(jnl.getRootBlockView())) {
+ fail("Root blocks differ: serverA=" + serverARootBlock
+ + ", restored=" + jnl.getRootBlockView());
+ }
+
/*
* Compute digest of the restored journal. The digest should
* agree with the digest of the Journal on A since we rolled
@@ -242,14 +236,17 @@
new HADigestRequest(null/* storeUUID */))
.getDigest();
- final MessageDigest digest = MessageDigest
- .getInstance("MD5");
+ final byte[] digest2;
+ {
+ final MessageDigest digest = MessageDigest
+ .getInstance("MD5");
- // digest of restored journal.
- ((IHABufferStrategy) (jnl.getBufferStrategy()))
- .computeDigest(null/* snapshot */, digest);
+ // digest of restored journal.
+ ((IHABufferStrategy) (jnl.getBufferStrategy()))
+ .computeDigest(null/* snapshot */, digest);
- final byte[] digest2 = digest.digest();
+ digest2 = digest.digest();
+ }
if (!BytesUtil.bytesEqual(digestA, digest2)) {
@@ -259,19 +256,13 @@
final String digest2Str = new BigInteger(1, digest2)
.toString(16);
- System.err.println("Original: " + serverA.getRootBlock(new HARootBlockRequest(null)).getRootBlock().toString());
- System.err.println("Restored: " + jnl.getRootBlockView().toString());
-
fail("Digests differ after restore and replay: expected="
+ digestAStr + ", actual=" + digest2Str);
-
+
}
}
- // Verify can dump journal after restore.
- dumpJournal(jnl);
-
} finally {
if (jnl != null) {
Modified: branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java
===================================================================
--- branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java 2014-04-11 11:43:06 UTC (rev 8104)
+++ branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java 2014-04-11 12:13:24 UTC (rev 8105)
@@ -227,7 +227,7 @@
/**
* {@link UUID}s for the {@link HAJournalServer}s.
*/
- protected UUID serverAId = UUID.randomUUID();
+ private UUID serverAId = UUID.randomUUID();
private UUID serverBId = UUID.randomUUID();
Modified: branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHAJournalServerTestCase.java
===================================================================
--- branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHAJournalServerTestCase.java 2014-04-11 11:43:06 UTC (rev 8104)
+++ branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHAJournalServerTestCase.java 2014-04-11 12:13:24 UTC (rev 8105)
@@ -970,7 +970,7 @@
* Verify the the digest of the journal is equal to the digest of the
* indicated snapshot on the specified service.
* <p>
- * Note: This can only succeed if the journal is at the specififed commit
+ * Note: This can only succeed if the journal is at the specified commit
* point. If there are concurrent writes on the journal, then it's digest
* will no longer be consistent with the snapshot.
*
Modified: branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA1SnapshotPolicy.java
===================================================================
--- branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA1SnapshotPolicy.java 2014-04-11 11:43:06 UTC (rev 8104)
+++ branches/BIGDATA_MGC_HA1_HA5/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA1SnapshotPolicy.java 2014-04-11 12:13:24 UTC (rev 8105)
@@ -17,6 +17,10 @@
import com.bigdata.journal.Journal;
import com.bigdata.rdf.sail.webapp.client.RemoteRepository;
+/**
+ * Test suite for the restore of the HA1 Journal from a snapshot and transaction
+ * logs.
+ */
public class TestHA1SnapshotPolicy extends AbstractHA3BackupTestCase {
public TestHA1SnapshotPolicy() {
@@ -437,8 +441,8 @@
*/
public void testA_snapshot_multipleTx_restore_validate() throws Exception {
- final int N1 = 7; //7; // #of transactions to run before the snapshot.
- final int N2 = 8; //8; // #of transactions to run after the snapshot.
+ final int N1 = 7; // #of transactions to run before the snapshot.
+ final int N2 = 8; // #of transactions to run after the snapshot.
// Start service.
final HAGlue serverA = startA();
@@ -458,13 +462,13 @@
// Now run N transactions.
for (int i = 0; i < N1; i++) {
+
+ simpleTransaction();
- simpleTransaction();
-
}
-
- final long commitCounterN1 = N1 + 1;
+ final long commitCounterN1 = N1 + 1;
+
awaitCommitCounter(commitCounterN1, serverA);
/*
@@ -477,7 +481,7 @@
// Snapshot directory is empty.
assertEquals(1, recursiveCount(getSnapshotDirA(),SnapshotManager.SNAPSHOT_FILTER));
-
+
// request snapshot on A.
final Future<IHASnapshotResponse> ft = serverA
.takeSnapshot(new HASnapshotRequest(0/* percentLogSize */));
@@ -503,6 +507,19 @@
}
+ {
+ // Snapshot directory contains just the expected snapshot
+ assertExpectedSnapshots(getSnapshotDirA(),
+ new long[] { commitCounterN1 });
+
+ /*
+ * Now, get the snapshot that we took above, decompress it, and then
+ * roll it forward and verify it against the current committed
+ * journal.
+ */
+ doRestoreA(serverA, commitCounterN1);
+ }
+
// Now run M transactions.
for (int i = 0; i < N2; i++) {
@@ -514,7 +531,6 @@
awaitCommitCounter(commitCounterN2, serverA);
-
// Snapshot directory contains just the expected snapshot
assertExpectedSnapshots(getSnapshotDirA(), new long[]{commitCounterN1});
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|