From: <tho...@us...> - 2013-04-05 18:25:22
|
Revision: 7036 http://bigdata.svn.sourceforge.net/bigdata/?rev=7036&view=rev Author: thompsonbry Date: 2013-04-05 18:25:08 +0000 (Fri, 05 Apr 2013) Log Message: ----------- Fix problems with test suite and code for IRestorePolicy test. @see https://sourceforge.net/apps/trac/bigdata/ticket/645 (HA Backup) Modified Paths: -------------- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumPipeline.java branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumServiceBase.java branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/READ_CACHE/bigdata/src/test/com/bigdata/io/writecache/TestWORMWriteCacheService.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/CommitTimeIndex.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DefaultRestorePolicy.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotManager.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHAJournalServerTestCase.java branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3RestorePolicy.java branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumPipeline.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumPipeline.java 2013-04-04 21:34:01 UTC (rev 7035) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumPipeline.java 2013-04-05 18:25:08 UTC (rev 7036) @@ -182,7 +182,13 @@ * service. * <p> * Note: The open HALog file is NOT removed by this method. + * + * @param token + * The quorum token. The quorum must remain fully met for this + * token across this operation. If that invariant is violated + * then the operation will terminate once this violation is + * noticed. */ - void purgeHALogs(); + void purgeHALogs(final long token); } Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumServiceBase.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumServiceBase.java 2013-04-04 21:34:01 UTC (rev 7035) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/ha/QuorumServiceBase.java 2013-04-05 18:25:08 UTC (rev 7036) @@ -149,9 +149,9 @@ } @Override - public void purgeHALogs() { + public void purgeHALogs(final long token) { - QuorumServiceBase.this.purgeHALogs(); + QuorumServiceBase.this.purgeHALogs(token); } @@ -273,7 +273,7 @@ * Note: The default implementation is a NOP. */ @Override - public void purgeHALogs() { + public void purgeHALogs(final long token) { // NOP Modified: branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-04-04 21:34:01 UTC (rev 7035) +++ branches/READ_CACHE/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2013-04-05 18:25:08 UTC (rev 7036) @@ -5980,18 +5980,14 @@ + rootBlock); } - if (quorum.isQuorumFullyMet(rootBlock.getQuorumToken())) { + /* + * The HA log files are purged on each node any time the + * quorum is fully met and goes through a commit point. + * Leaving only the current open log file. + */ - /* - * The HA log files are purged on each node any time - * the quorum is fully met and goes through a commit - * point. Leaving only the current open log file. - */ + localService.purgeHALogs(rootBlock.getQuorumToken()); - localService.purgeHALogs(); - - } - } catch(Throwable t) { haLog.error("ERROR IN 2-PHASE COMMIT: " + t Modified: branches/READ_CACHE/bigdata/src/test/com/bigdata/io/writecache/TestWORMWriteCacheService.java =================================================================== --- branches/READ_CACHE/bigdata/src/test/com/bigdata/io/writecache/TestWORMWriteCacheService.java 2013-04-04 21:34:01 UTC (rev 7035) +++ branches/READ_CACHE/bigdata/src/test/com/bigdata/io/writecache/TestWORMWriteCacheService.java 2013-04-05 18:25:08 UTC (rev 7036) @@ -377,9 +377,9 @@ } @Override - public void purgeHALogs() { + public void purgeHALogs(final long token) { - MyMockQuorumMember.this.purgeHALogs(); + MyMockQuorumMember.this.purgeHALogs(token); } @@ -479,7 +479,7 @@ } @Override - public void purgeHALogs() { + public void purgeHALogs(final long token) { // NOP Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/CommitTimeIndex.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/CommitTimeIndex.java 2013-04-04 21:34:01 UTC (rev 7035) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/CommitTimeIndex.java 2013-04-05 18:25:08 UTC (rev 7036) @@ -168,15 +168,16 @@ } /** - * Find the first journal whose <em>createTime</em> is strictly greater - * than the timestamp. + * Return the {@link IRootBlockView} identifying the first snapshot whose + * <em>commitTime</em> is strictly greater than the timestamp. * * @param timestamp * The timestamp. A value of ZERO (0) may be used to find the - * first journal. + * first snapshot. * - * @return The commit record -or- <code>null</code> if there is no commit - * record whose timestamp is strictly greater than <i>timestamp</i>. + * @return The root block of that snapshot -or- <code>null</code> if there + * is no snapshot whose timestamp is strictly greater than + * <i>timestamp</i>. */ synchronized public IRootBlockView findNext(final long timestamp) { @@ -188,8 +189,8 @@ throw new IllegalArgumentException(); // find first strictly greater than. - final long index = findIndexOf(Math.abs(timestamp)) + 1; - + final long index = findIndexOf(timestamp) + 1; + if (index == nentries) { // No match. Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DefaultRestorePolicy.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DefaultRestorePolicy.java 2013-04-04 21:34:01 UTC (rev 7035) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DefaultRestorePolicy.java 2013-04-05 18:25:08 UTC (rev 7036) @@ -34,17 +34,33 @@ * deciding when snapshots (aka full backups) and HALogs (aka write ahead log * files for each commit point aka incremental backups) may be purged. * <dl> - * <dt>minSnapshotAgeMillis</dt> - * <dd>The minimum age of a snapshot before it may be deleted.</dd> + * <dt>minRestoreAgeMillis</dt> + * <dd>The minimum restore period (in milliseconds). Snapshots and/or HALog + * files will be retained to ensure the ability to restore a commit point this + * far in the past.</dd> * <dt>minSnapshots</dt> * <dd>The minimum number of snapshot files (aka full backups) that must be - * retained.</dd> + * retained (positive integer). + * <p> + * This must be a positive integer. If the value were ZERO a snapshot would be + * purged as soon as it is taken. That would not provide an opportunity to make + * a copy of the snapshot, rendering the snapshot mechanism useless. + * <p> + * If <code>minSnapshots:=1</code> then a snapshot, once taken, will be retained + * until the next snapshot is taken. Further, the existence of the shapshot will + * cause HALog files for commit points GT that snapshot to accumulate until the + * next snapshot. This will occur regardless of the value of the other + * parameters. Thus, if you occasionally take snapshots and move them offsite, + * you must REMOVE the snapshot by hand in order to allow the retained HALogs to + * be reclaimed as well. + * <p> + * This concern does not arise if you are taking periodic snapshots.</dd> * <dt>minRestorePoints</dt> * <dd>The minimum number of commit points that must be restorable from backup. - * This explicitly controls the number of HALog files that will be retained. It - * also implicitly controls the number of snapshot files that will be retained - * since an HALog file will pin the newest snapshot whose commit counter is LTE - * to the the closing commit counter on that HALog file.</dd> + * This explicitly controls the number of HALog files that will be retained + * (non-negative). It also implicitly controls the number of snapshot files that + * will be retained since an HALog file will pin the newest snapshot whose + * commit counter is LTE to the the closing commit counter on that HALog file.</dd> * </dl> * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> @@ -75,7 +91,7 @@ /** * The minimum age of a snapshot before it may be purged. */ - private final long minSnapshotAgeMillis; + private final long minRestoreAgeMillis; /** * The minimum #of snapshots that must be retained. @@ -91,7 +107,7 @@ public String toString() { return DefaultRestorePolicy.class.getSimpleName()// - + "{minSnapshotAge=" + minSnapshotAgeMillis + "ms"// + + "{minRestoreAge=" + minRestoreAgeMillis + "ms"// + ",minSnapshots=" + minSnapshots // + ",minRestorePoints=" + minRestorePoints // + "}"; @@ -113,13 +129,13 @@ * policy will retain local backups unless all of the criteria are * satisified. * - * @param minSnapshotAgeMillis + * @param minRestoreAgeMillis * The minimum age of a snapshot (in milliseconds) before it may * be purged. */ - public DefaultRestorePolicy(final long minSnapshotAgeMillis) { + public DefaultRestorePolicy(final long minRestoreAgeMillis) { - this(minSnapshotAgeMillis, DEFAULT_MIN_SNAPSHOTS, + this(minRestoreAgeMillis, DEFAULT_MIN_SNAPSHOTS, DEFAULT_MIN_RESTORE_POINTS); } @@ -129,37 +145,72 @@ * policy will retain local backups unless all of the criteria are * satisified. * - * @param minSnapshotAgeMillis + * @param minRestoreAgeMillis * The minimum age of a snapshot (in milliseconds) before it may - * be purged. + * be purged (non-negative integer). * @param minSnapshots * The minimum number of snapshots (aka full backups) that must - * be retained locally. + * be retained locally (positive integer). + * <p> + * This must be a positive integer. If the value were ZERO a + * snapshot would be purged as soon as it is taken. That would + * not provide an opportunity to make a copy of the snapshot, + * rendering the snapshot mechanism useless. + * <p> + * If <code>minSnapshots:=1</code> then a snapshot, once taken, + * will be retained until the next snapshot is taken. Further, + * the existence of the shapshot will cause HALog files for + * commit points GT that snapshot to accumulate until the next + * snapshot. This will occur regardless of the value of the other + * parameters. Thus, if you occasionally take snapshots and move + * them offsite, you must REMOVE the snapshot by hand in order to + * allow the retained HALogs to be reclaimed as well. + * <p> + * This concern does not arise if you are taking periodic + * snapshots. * @param minRestorePoints * The minimum number of restore points (aka HALog files) that - * must be retained locally. If an HALog is pinned by this - * parameter, then the oldest snapshot LTE the commit counter of - * that HALog is also pinned, as are all HALog files GTE the - * snapshot and LT the HALog. + * must be retained locally (non-negative integer). If an HALog + * is pinned by this parameter, then the oldest snapshot LTE the + * commit counter of that HALog is also pinned, as are all HALog + * files GTE the snapshot and LT the HALog. */ - public DefaultRestorePolicy(final long minSnapshotAgeMillis, + public DefaultRestorePolicy(final long minRestoreAgeMillis, final int minSnapshots, final int minRestorePoints) { - if (minSnapshotAgeMillis < 0) + if (minRestoreAgeMillis < 0) throw new IllegalArgumentException( - "minSnapshotAgeMillis must be GTE ZERO (0), not " - + minSnapshotAgeMillis); + "minRestoreAgeMillis must be GTE ZERO (0), not " + + minRestoreAgeMillis); - if (minSnapshots < 1) + if (minSnapshots < 1) { + /* + * This must be a positive integer. If the value were ZERO a + * snapshot would be purged as soon as it is taken. That would not + * provide an opportunity to make a copy of the snapshot, rendering + * the snapshot mechanism useless. + * + * If minSnapshots:=1 then a snapshot, once taken, will be retained + * until the next snapshot is taken. Further, the existence of the + * shapshot will cause HALog files for commit points GT that + * snapshot to accumulate until the next snapshot. This will occur + * regardless of the value of the other parameters. Thus, if you + * occasionally take snapshots and move them offsite, you must + * REMOVE the snapshot by hand in order to allow the retained HALogs + * to be reclaimed as well. + * + * This concern does not arise if you are taking periodic snapshots. + */ throw new IllegalArgumentException( "minSnapshots must be GTE ONE (1), not " + minSnapshots); + } if (minRestorePoints < 0) throw new IllegalArgumentException( "minRestorePoints must be GTE ZERO (0), not " + minRestorePoints); - this.minSnapshotAgeMillis = minSnapshotAgeMillis; + this.minRestoreAgeMillis = minRestoreAgeMillis; this.minSnapshots = minSnapshots; @@ -175,19 +226,26 @@ private long getEarliestRestorableCommitCounterByAge(final HAJournal jnl, final long commitCounterOnJournal) { + // The current time. final long now = System.currentTimeMillis(); - final long then = now - minSnapshotAgeMillis; + // A moment [minRestoreAge] milliseconds ago. + final long then = now - minRestoreAgeMillis; + // The root block for the snapshot with a commitTime LTE [then]. final IRootBlockView rootBlock = jnl.getSnapshotManager().find(then); if (rootBlock == null) { - // There are no snapshots. + // There are no matching snapshots. return commitCounterOnJournal; } + if (log.isInfoEnabled()) + log.info("minRestoreAgeMillis=" + minRestoreAgeMillis + ", now=" + + now + ", then=" + then + ", rootBlock=" + rootBlock); + return rootBlock.getCommitCounter(); } @@ -200,14 +258,8 @@ private long getEarliestRestorableCommitCounterBySnapshots( final HAJournal jnl, final long commitCounterOnJournal) { - if (minSnapshots == 0) { - - return commitCounterOnJournal; - - } - final IRootBlockView rootBlock = jnl.getSnapshotManager() - .getSnapshotByReverseIndex(minSnapshots); + .getSnapshotByReverseIndex(minSnapshots - 1); if (rootBlock == null) { @@ -269,30 +321,45 @@ final long commitCounterOnJournal = jnl.getRootBlockView() .getCommitCounter(); - final long commitCounterByAge = getEarliestRestorableCommitCounterByAge( + final long commitCounterRetainedByAge = getEarliestRestorableCommitCounterByAge( jnl, commitCounterOnJournal); - final long commitCounterBySnapshots = getEarliestRestorableCommitCounterBySnapshots( + final long commitCounterRetainedBySnapshotCount = getEarliestRestorableCommitCounterBySnapshots( jnl, commitCounterOnJournal); - final long commitCounterByHALogs = getEarliestRestorableCommitCounterByHALogs( + final long commitCounterRetainedByHALogCount = getEarliestRestorableCommitCounterByHALogs( jnl, commitCounterOnJournal); - final long ret = Math.min(commitCounterByAge, - Math.min(commitCounterBySnapshots, commitCounterByHALogs)); + /* + * Take the minimum of those values. This is the commit counter that + * will be retained. + * + * Snapshot files and HALogs GTE this commit counter will not be + * released. + */ + final long commitCounterRetained = Math.min(commitCounterRetainedByAge, + Math.min(commitCounterRetainedBySnapshotCount, + commitCounterRetainedByHALogCount)); if (log.isInfoEnabled()) { - log.info("policy=" + this + ", commitCounterOnJournal=" - + commitCounterOnJournal + ", commitCounterByAge=" - + commitCounterByAge + ", commitCounterBySnapshots=" - + commitCounterBySnapshots + ", commitCounterByHALogs=" - + commitCounterByHALogs - + ", effectiveCommitCounterReported=" + ret); + log.info("policy=" + + this + + // + ", commitCounterOnJournal=" + + commitCounterOnJournal // + + ", commitCounterByAge=" + + commitCounterRetainedByAge // + + ", commitCounterBySnapshots=" + + commitCounterRetainedBySnapshotCount // + + ", commitCounterByHALogs=" + + commitCounterRetainedByHALogCount// + + ", effectiveCommitCounterRetained=" + + commitCounterRetained); } - return ret; + return commitCounterRetained; } Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-04-04 21:34:01 UTC (rev 7035) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournal.java 2013-04-05 18:25:08 UTC (rev 7036) @@ -505,7 +505,7 @@ public void deleteResources() { super.deleteResources(); - + recursiveDelete(getHALogDir(), new FileFilter() { @Override Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-04-04 21:34:01 UTC (rev 7035) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAJournalServer.java 2013-04-05 18:25:08 UTC (rev 7036) @@ -77,6 +77,7 @@ import com.bigdata.ha.msg.IHALogRootBlocksResponse; import com.bigdata.ha.msg.IHARebuildRequest; import com.bigdata.ha.msg.IHASendStoreResponse; +import com.bigdata.ha.msg.IHASnapshotResponse; import com.bigdata.ha.msg.IHASyncRequest; import com.bigdata.ha.msg.IHAWriteMessage; import com.bigdata.ha.msg.IHAWriteSetStateResponse; @@ -1256,7 +1257,7 @@ * services should then verify that the quorum is fully met * before they actually age out the HALogs and snapshots. */ - purgeHALogs(); + purgeHALogs(token); } @@ -1497,9 +1498,31 @@ * to serve as a restore point. The service MUST be joined with * a met quorum in order to take a snapshot. */ + { - journal.getSnapshotManager().takeInitialSnapshot(); + // Conditionally request initial snapshot. + final Future<IHASnapshotResponse> ft = journal + .getSnapshotManager().takeInitialSnapshot(); + if (ft != null) { + + /* + * Wait for outcome. + * + * Note: Even though we are blocking on the Future, the + * service is live and can receive writes. Once the + * Future is done, we are just going to block anyway in + * blockInterruptably(). + * + * Note: An exception thrown here will cause the service + * to transition into the error state. + */ + ft.get(); + + } + + } + // Block until this run state gets interrupted. blockInterruptably(); @@ -1730,12 +1753,6 @@ { /* - * Get rid of any existing backups. They will not be - * consistent with the rebuild. - */ - deleteBackups(); - - /* * The current root block on the leader (We want to get some * immutatable metadata from the leader's root block). */ @@ -1757,17 +1774,34 @@ // Verify that the quorum remains met on this token. getQuorum().assertQuorum(token); - + /* - * Install both root blocks. + * Critical section. * - * Note: This will take us through a local abort. That is - * important. We need to discard any writes that might have - * been buffered before we start the resynchronization of - * the local store. + * Up to now we have not modified anything on the disk. Now + * we are going to destroy the local data (both backups and + * the root blocks of the journal). */ - installRootBlocks(rbu.rootBlock0, rbu.rootBlock1); + { + /* + * Get rid of any existing backups. They will not be + * consistent with the rebuild. + */ + deleteBackups(); + + /* + * Install both root blocks. + * + * Note: This will take us through a local abort. That + * is important. We need to discard any writes that + * might have been buffered before we start the + * resynchronization of the local store. + */ + installRootBlocks(rbu.rootBlock0, rbu.rootBlock1); + + } + // Note: Snapshot requires joined with met quorum. // /* // * Take a snapshot. @@ -2969,12 +3003,22 @@ * Note: The current HALog file is NOT deleted by this method. */ @Override - public void purgeHALogs() { + public void purgeHALogs(final long token) { logLock.lock(); try { + if (!getQuorum().isQuorumFullyMet(token)) { + /* + * Halt operation. + * + * Note: This is not an error, but we can not remove + * snapshots or HALogs if this invariant is violated. + */ + return; + } + // We need to retain the backups for this commit point. final long earliestRestorableCommitPoint = journal .getSnapshotManager().getRestorePolicy() @@ -2989,10 +3033,11 @@ // Delete snapshots, returning commit counter of the oldest // retained snapshot. - final long earliestRetainedSnapshotLastCommitCounter = deleteSnapshots(earliestRestorableCommitPoint); + final long earliestRetainedSnapshotLastCommitCounter = deleteSnapshots( + token, earliestRestorableCommitPoint); // Delete HALogs not retained by that snapshot. - deleteHALogs(earliestRetainedSnapshotLastCommitCounter); + deleteHALogs(token, earliestRetainedSnapshotLastCommitCounter); } finally { @@ -3003,33 +3048,6 @@ } /** - * We need to destroy the local backups if we do a REBUILD. Those files - * are no longer guaranteed to be consistent with the history of the - * journal. - */ - private void deleteBackups() { - - logLock.lock(); - - try { - - haLog.warn("Destroying local backups."); - - // Delete all snapshots. - deleteSnapshots(Long.MAX_VALUE); - - // Delete all HALogs (except the current one). - deleteHALogs(Long.MAX_VALUE); - - } finally { - - logLock.unlock(); - - } - - } - - /** * Delete snapshots that are no longer required. * <p> * Note: If ZERO (0) is passed into this method, then no snapshots will @@ -3042,7 +3060,8 @@ * * @return The commitCounter of the earliest retained snapshot. */ - private long deleteSnapshots(final long earliestRestorableCommitPoint) { + private long deleteSnapshots(final long token, + final long earliestRestorableCommitPoint) { /* * List the snapshot files for this service. */ @@ -3076,30 +3095,43 @@ } // Strip off the filename extension. - final String fileBaseName = name.substring(0, - SnapshotManager.SNAPSHOT_EXT.length()); + final int len = name.length() + - SnapshotManager.SNAPSHOT_EXT.length(); + final String fileBaseName = name.substring(0, len); // Closing commitCounter for snapshot file. final long commitCounter = Long.parseLong(fileBaseName); // Count all snapshot files. nfound.incrementAndGet(); - - if (commitCounter >= earliestRestorableCommitPoint) { + + // true iff we will delete this snapshot. + final boolean deleteFile = commitCounter < earliestRestorableCommitPoint; + + if (haLog.isInfoEnabled()) + log.info("snapshotFile=" + + name// + + ", deleteFile=" + + deleteFile// + + ", commitCounter=" + + commitCounter// + + ", earliestRestoreableCommitPoint=" + + earliestRestorableCommitPoint); + + if (!deleteFile + && commitCounter < earliestRetainedSnapshotCommitCounter + .get()) { + /* - * We need to retain this snapshot. + * Update the earliest retained snapshot. */ - if (commitCounter < earliestRetainedSnapshotCommitCounter - .get()) { - // Update the earliest retained snapshot. - earliestRetainedSnapshotCommitCounter - .set(commitCounter); - } - return false; + + earliestRetainedSnapshotCommitCounter + .set(commitCounter); + } - // This snapshot MAY be deleted. - return true; + return deleteFile; } }); @@ -3130,6 +3162,16 @@ // #of bytes in that file. final long len = file.length(); + if (!getQuorum().isQuorumFullyMet(token)) { + /* + * Halt operation. + * + * Note: This is not an error, but we can not remove + * snapshots or HALogs if this invariant is violated. + */ + break; + } + if (!snapshotManager.removeSnapshot(file)) { haLog.warn("COULD NOT DELETE FILE: " + file); @@ -3167,7 +3209,8 @@ * that are GTE this commit counter since they will be * applied to that snapshot. */ - private void deleteHALogs(final long earliestRetainedSnapshotCommitCounter) { + private void deleteHALogs(final long token, + final long earliestRetainedSnapshotCommitCounter) { /* * List the HALog files for this service. */ @@ -3215,22 +3258,29 @@ } // Strip off the filename extension. - final String logFileBaseName = name.substring(0, - IHALogReader.HA_LOG_EXT.length()); + + final int len = name.length() + - IHALogReader.HA_LOG_EXT.length(); + + final String fileBaseName = name.substring(0, len); // Closing commitCounter for HALog file. final long logCommitCounter = Long - .parseLong(logFileBaseName); + .parseLong(fileBaseName); - if (logCommitCounter >= earliestRetainedSnapshotCommitCounter) { - /* - * We need to retain this log file. - */ - return false; - } + final boolean deleteFile = logCommitCounter < earliestRetainedSnapshotCommitCounter; + + if (haLog.isInfoEnabled()) + haLog.info("logFile=" + + name// + + ", delete=" + + deleteFile// + + ", logCommitCounter=" + + logCommitCounter// + + ", earliestRestoreableCommitPoint=" + + earliestRetainedSnapshotCommitCounter); - // This HALog file MAY be deleted. - return true; + return deleteFile; } }); @@ -3245,6 +3295,16 @@ // #of bytes in that HALog file. final long len = logFile.length(); + if (!getQuorum().isQuorumFullyMet(token)) { + /* + * Halt operation. + * + * Note: This is not an error, but we can not remove + * snapshots or HALogs if this invariant is violated. + */ + break; + } + if (!logFile.delete()) { haLog.warn("COULD NOT DELETE FILE: " + logFile); @@ -3267,6 +3327,91 @@ } + /** + * We need to destroy the local backups if we do a REBUILD. Those files + * are no longer guaranteed to be consistent with the history of the + * journal. + * <p> + * Note: This exists as a distinct code path because we will destroy + * those backups without regard to the quorum token. The normal code + * path requires a fully met journal in order to delete snapshots and + * HALog files. + * + * @throws IOException + * if a file could not be deleted. + */ + private void deleteBackups() throws IOException { + + logLock.lock(); + + try { + + haLog.warn("Destroying local backups."); + + // Delete all snapshots. + { + + final File snapshotDir = journal.getSnapshotManager() + .getSnapshotDir(); + + final File[] files = snapshotDir + .listFiles(new FilenameFilter() { + @Override + public boolean accept(final File dir, + final String name) { + return name + .endsWith(SnapshotManager.SNAPSHOT_EXT); + } + }); + for (File file : files) { + if (!file.delete()) + throw new IOException("COULD NOT DELETE FILE: " + + file); + } + + } + + // Delete all HALogs (except the current one). + { + + final File currentLogFile = journal.getHALogWriter() + .getFile(); + + final String currentLogFileName = currentLogFile == null ? null + : currentLogFile.getName(); + + final File logDir = journal.getHALogDir(); + + final File[] files = logDir.listFiles(new FilenameFilter() { + @Override + public boolean accept(final File dir, final String name) { + // filter out the current log file + if (currentLogFile != null + && name.equals(currentLogFileName)) { + /* + * This is the current HALog. We never purge it. + */ + return false; + } + return name.endsWith(IHALogReader.HA_LOG_EXT); + } + }); + for (File file : files) { + if (!file.delete()) + throw new IOException("COULD NOT DELETE FILE: " + + file); + } + + } + + } finally { + + logLock.unlock(); + + } + + } + @Override public void installRootBlocks(final IRootBlockView rootBlock0, final IRootBlockView rootBlock1) { Modified: branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotManager.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotManager.java 2013-04-04 21:34:01 UTC (rev 7035) +++ branches/READ_CACHE/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotManager.java 2013-04-05 18:25:08 UTC (rev 7036) @@ -512,8 +512,8 @@ } /** - * Return the {@link IRootBlock} identifying the snapshot having the largest - * lastCommitTime that is less than or equal to the given value. + * Return the {@link IRootBlockView} identifying the snapshot having the + * largest lastCommitTime that is less than or equal to the given value. * * @param timestamp * The given timestamp. @@ -532,6 +532,24 @@ } /** + * Return the {@link IRootBlockView} identifying the first snapshot whose + * <em>commitTime</em> is strictly greater than the timestamp. + * + * @param timestamp + * The timestamp. A value of ZERO (0) may be used to find the + * first snapshot. + * + * @return The root block of that snapshot -or- <code>null</code> if there + * is no snapshot whose timestamp is strictly greater than + * <i>timestamp</i>. + */ + public IRootBlockView findNext(final long timestamp) { + + return snapshotIndex.findNext(timestamp); + + } + + /** * Find the oldest snapshot that is at least <i>minRestorePoints</i> old and * returns its commit counter. * @@ -1148,7 +1166,7 @@ if (localService != null) { - localService.purgeHALogs(); + localService.purgeHALogs(token); } Modified: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHAJournalServerTestCase.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHAJournalServerTestCase.java 2013-04-04 21:34:01 UTC (rev 7035) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHAJournalServerTestCase.java 2013-04-05 18:25:08 UTC (rev 7036) @@ -599,7 +599,11 @@ * @param lastCommitCounter * The last commit point to be verified (inclusive upper bound). * @param services - * The set of services whose HALog files will be tested. + * The set of services whose HALog files will be tested. If there + * is more than one service, then this method will verify that + * the services have the same digests for their HALog files. If + * there is only one service, then this will verify that the + * HALog file exists by computing its digest. * * @throws IOException * @throws DigestException @@ -762,17 +766,20 @@ * @param expected * The expected commit point. * @param haGlue - * The remote server interface. + * The remote server interface(s). * * @throws IOException */ - protected void assertCommitCounter(final long expected, final HAGlue haGlue) + protected void assertCommitCounter(final long expected, final HAGlue... haGlue) throws IOException { - assertEquals( - expected, - haGlue.getRootBlock(new HARootBlockRequest(null/* storeUUID */)) - .getRootBlock().getCommitCounter()); + for (HAGlue server : haGlue) { + assertEquals( + expected, + server.getRootBlock( + new HARootBlockRequest(null/* storeUUID */)) + .getRootBlock().getCommitCounter()); + } } Modified: branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3RestorePolicy.java =================================================================== --- branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3RestorePolicy.java 2013-04-04 21:34:01 UTC (rev 7035) +++ branches/READ_CACHE/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3RestorePolicy.java 2013-04-05 18:25:08 UTC (rev 7036) @@ -119,9 +119,15 @@ final int N = 7; // #of transactions to run before the snapshot. final int M = 8; // #of transactions to run after the snapshot. - // Start 2 services. + /* + * Start 3 services. + * + * Note: We need to have three services running in order for the + * snapshots + */ final HAGlue serverA = startA(); final HAGlue serverB = startB(); + final HAGlue serverC = startC(); // Wait for a quorum meet. final long token = quorum.awaitQuorum(awaitQuorumTimeout, @@ -134,9 +140,15 @@ assertEquals(serverA, quorum.getClient().getLeader(token)); // Await initial commit point (KB create). - awaitCommitCounter(1L, serverA, serverB); + awaitCommitCounter(1L, serverA, serverB, serverC); - assertCommitCounter(1L, serverA); + /* + * There should not be any snapshots yet since we are using the + * NoSnapshotPolicy. + */ + assertEquals(0, getSnapshotDirA().list().length); + assertEquals(0, getSnapshotDirB().list().length); + assertEquals(0, getSnapshotDirC().list().length); // Now run N transactions. for (int i = 0; i < N; i++) { @@ -147,11 +159,12 @@ final long commitCounterN = N + 1; - assertCommitCounter(commitCounterN, serverA); + assertCommitCounter(commitCounterN, serverA, serverB, serverC); - // Check HALogs equal on A, B. - assertHALogDigestsEquals(1L/* firstCommitCounter */, commitCounterN, - new HAGlue[] { serverA, serverB }); + // Only the live log is retained on the services. + assertEquals(1, getHALogDirA().list().length); + assertEquals(1, getHALogDirA().list().length); + assertEquals(1, getHALogDirA().list().length); /* * Take a snapshot. @@ -208,8 +221,8 @@ /* * Now run sets of M transactions until we have exceeded releasePolicy's - * minimum age for the existing snapshot. Since there is only one - * snapshot, it SHOULD NOT be removed. + * minimum age for the existing snapshot. However, since there is only + * one snapshot, it SHOULD NOT be removed. */ int nnewtx = 0; { @@ -228,15 +241,26 @@ final long commitCounterM = nnewtx + N + 1; - assertCommitCounter(commitCounterM, serverA); - - // Check HALogs equal on A, B. - assertHALogDigestsEquals(1L/* firstCommitCounter */, commitCounterM, - new HAGlue[] { serverA, serverB }); + assertCommitCounter(commitCounterM, serverA, serverB, serverC); - // Snapshot directory contains just the expected snapshot + /* + * Verify that the snapshot directory contains just the expected + * snapshot. + */ assertEquals(new String[] { snapshotFile0.getName() }, getSnapshotDirA().list()); + + /* + * Check HALogs for existence on A. + * + * Note: We can not compare the digests for equality on the + * difference servers because we only took a snapshot on A and + * therefore we have not pinned the HALogs on B or C. + */ + assertHALogDigestsEquals( + commitCounterN + 1/* firstCommitCounter */, + commitCounterM, new HAGlue[] { serverA }); + } // Verify snapshot still exists. @@ -251,10 +275,15 @@ /* * Request another snapshot. * - * Note: We will now have 2 snapshots. The original snapshot is purged - * since it is older than the minimum retention time for a snapshot. + * Note: We will now have 2 snapshots. The original snapshot is NOT + * purged. While it is older than the minimum retention time for a + * snapshot, we do not yet have another snapshot that will allow us to + * recover the commit points GT our oldest snapshot that are within the + * required recovered period. */ final IRootBlockView snapshotRB1; + final File snapshotFile1; + final long lastCommitCounter; { // Request snapshot on A. @@ -275,34 +304,111 @@ snapshotRB1 = ft.get().getRootBlock(); // The name of the new snapshot file. - final File snapshotFile1 = SnapshotManager.getSnapshotFile( - getSnapshotDirA(), snapshotRB1.getCommitCounter()); + snapshotFile1 = SnapshotManager.getSnapshotFile(getSnapshotDirA(), + snapshotRB1.getCommitCounter()); // Verify new snapshot exists. assertTrue(snapshotFile1.exists()); - // Verify old snapshot is gone. - assertFalse(snapshotFile0.exists()); + // Verify old snapshot exists. + assertTrue(snapshotFile0.exists()); - // Verify snapshot directory contains the only the one file. - assertEquals(new String[] { snapshotFile1.getName() }, - getSnapshotDirA().list()); + // Verify snapshot directory contains the necessary files. + assertEquals( + new String[] { snapshotFile0.getName(), + snapshotFile1.getName() }, getSnapshotDirA().list()); - /* - * Verify only the expected HALog files are retained. - */ - // The current commit counter on A. - final long lastCommitCounter = serverA + lastCommitCounter = serverA .getRootBlock(new HARootBlockRequest(null/* storeUUID */)) .getRootBlock().getCommitCounter(); - // Check HALogs equal on A, B. + // Check HALogs found on A. assertHALogDigestsEquals( + commitCounterN + 1/* firstCommitCounter */, + lastCommitCounter/* lastCommitCounter */, + new HAGlue[] { serverA }); + + /* + * Verify only the expected HALog files are retained on A (in fact, + * all HALogs will still be retained on B since we are not taking + * any snapshots there). + */ + assertHALogNotFound(1L/* firstCommitCounter */, + commitCounterN - 1/* lastCommitCounter */, + new HAGlue[] { serverA }); + + } + + /* + * Sleep until the most recent snapshot is old enough to satisify our + * recovery period. + */ + Thread.sleep(restorePolicyMinSnapshotAgeMillis); + + /* + * The older snapshot should still exist since we have not gone through + * a commit. + */ + { + + // Verify new snapshot exists. + assertTrue(snapshotFile1.exists()); + + // Verify old snapshot exists. + assertTrue(snapshotFile0.exists()); + + // Verify snapshot directory contains the necessary files. + assertEquals( + new String[] { snapshotFile0.getName(), + snapshotFile1.getName() }, getSnapshotDirA().list()); + + // Check HALogs found on A. + assertHALogDigestsEquals( + commitCounterN + 1/* firstCommitCounter */, + lastCommitCounter/* lastCommitCounter */, + new HAGlue[] { serverA }); + + } + + // Do a simple transaction. + simpleTransaction(); + + final long lastCommitCounter2 = lastCommitCounter + 1; + + // Verify the current commit counter on A, B. + assertCommitCounter(lastCommitCounter2, new HAGlue[] { serverA, + serverB, serverC }); + + /* + * Verify older snapshot and logs LT the newer snapshot are gone. + */ + { + + // Verify new snapshot exists. + assertTrue(snapshotFile1.exists()); + + // Verify old snapshot is done. + assertFalse(snapshotFile0.exists()); + + // Verify snapshot directory contains the necessary files. + assertEquals(new String[] { snapshotFile1.getName() }, + getSnapshotDirA().list()); + + // Check HALogs found on A. + assertHALogDigestsEquals( snapshotRB1.getCommitCounter()/* firstCommitCounter */, - lastCommitCounter/* lastCommitCounter */, new HAGlue[] { - serverA, serverB }); + lastCommitCounter2/* lastCommitCounter */, + new HAGlue[] { serverA }); + /* + * Verify HALogs were removed from A (again, all HALogs will still + * be on B since we have not taken a snapshot there). + */ + assertHALogNotFound(1L/* firstCommitCounter */, + snapshotRB1.getCommitCounter() - 1/* lastCommitCounter */, + new HAGlue[] { serverA }); + } } Modified: branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java =================================================================== --- branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java 2013-04-04 21:34:01 UTC (rev 7035) +++ branches/READ_CACHE/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java 2013-04-05 18:25:08 UTC (rev 7036) @@ -51,8 +51,6 @@ import com.bigdata.journal.IRootBlockView; import com.bigdata.journal.RootBlockView; import com.bigdata.journal.jini.ha.HAJournal; -import com.bigdata.journal.jini.ha.IRestorePolicy; -import com.bigdata.journal.jini.ha.ISnapshotPolicy; import com.bigdata.journal.jini.ha.SnapshotManager; import com.bigdata.quorum.AsynchronousQuorumCloseException; import com.bigdata.quorum.Quorum; @@ -172,15 +170,15 @@ + journal.getSnapshotManager().getRestorePolicy()) .node("br").close(); - if(true) { - /* - * FIXME HABackup: disable this code block. It is for - * debug purposes only. - */ - p.text("Service: getEarliestRestorableCommitPoint()=" - + journal.getSnapshotManager().getRestorePolicy().getEarliestRestorableCommitPoint(journal)) - .node("br").close(); - } +// if(true) { +// /* +// * HABackup: disable this code block. It is for +// * debug purposes only. +// */ +// p.text("Service: getEarliestRestorableCommitPoint()=" +// + journal.getSnapshotManager().getRestorePolicy().getEarliestRestorableCommitPoint(journal)) +// .node("br").close(); +// } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |