|
From: <tho...@us...> - 2014-06-03 12:59:53
|
Revision: 8440
http://sourceforge.net/p/bigdata/code/8440
Author: thompsonbry
Date: 2014-06-03 12:59:47 +0000 (Tue, 03 Jun 2014)
Log Message:
-----------
javadoc
Modified Paths:
--------------
branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotManager.java
Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotManager.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotManager.java 2014-06-03 00:31:26 UTC (rev 8439)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SnapshotManager.java 2014-06-03 12:59:47 UTC (rev 8440)
@@ -328,6 +328,8 @@
snapshotIndex = SnapshotIndex.createTransient();
+ // Note: Caller MUST invoke init() Callable.
+
}
@Override
@@ -340,7 +342,11 @@
/**
* Task that is used to initialize the {@link SnapshotManager}.
*
- * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @author <a href="mailto:tho...@us...">Bryan
+ * Thompson</a>
+ *
+ * @see <a href="http://trac.bigdata.com/ticket/775" > HAJournal start()
+ * (optimization) </a>
*/
private class InitTask implements Callable<Void> {
@@ -367,9 +373,6 @@
private void doRunWithLock() throws IOException, InterruptedException,
ExecutionException {
- if (log.isInfoEnabled())
- log.info("Starting cleanup.");
-
/*
* Delete any temporary files that were left lying around in the
* snapshot directory.
@@ -381,6 +384,9 @@
* the times for these different scans so I can get a better sense
* of the latencies involved.
*/
+ if (log.isInfoEnabled())
+ log.info("Starting cleanup.");
+
CommitCounterUtility.recursiveDelete(false/* errorIfDeleteFails */,
getSnapshotDir(), TEMP_FILE_FILTER);
@@ -413,12 +419,29 @@
}
/**
- * Scans the {@link #snapshotDir} and populates the {@link #snapshotIndex}
- * from the root blocks in snapshot files found in that directory.
+ * Scans the {@link SnapshotManager#getSnapshotDir()} and populates the
+ * {@link SnapshotIndex} from the root blocks in snapshot files found in
+ * that directory.
*
* @throws IOException
* @throws ExecutionException
* @throws InterruptedException
+ *
+ * TODO Follow the code pattern for the HALogNexus and
+ * provide robust error handling for snapshot files. Note
+ * that snapshots are taken locally based on various
+ * criteria (including the size of the delta, the #of
+ * HALogs, etc.). As long as we have all HALogs the services
+ * should be able to make a purely local decisions about
+ * what to do if we have a bad snapshot file. One option is
+ * to force a snapshot when the service starts. That option
+ * is only available of course if the service can join with
+ * the quorum.
+ * <p>
+ * Note: If the service CAN NOT do a point in time recovery
+ * because it lacks a combination of valid HALog files and
+ * snapshots, then a failover to that service will degrade
+ * the availability of the cluster.
*/
private void populateIndexRecursive(final LatchedExecutor executor,
final File f, final FileFilter fileFilter, final int depth)
@@ -474,6 +497,20 @@
/*
* Await futures, obtaining snapshot records for the current
* leaf directory.
+ *
+ * TODO If the root blocks are bad, then this will throw an
+ * IOException and that will prevent the startup of the
+ * HAJournalServer. However, if we start up the server with
+ * a known bad snapshot *and* the snapshot is the earliest
+ * snapshot, then we can not restore commit points which
+ * depend on that earliest snapshot (we can still restore
+ * commit points that are GTE the first useable snapshot).
+ *
+ * TODO A similar problem exists if any of the HALog files
+ * GTE the earliest snapshot are missing, have bad root
+ * blocks, etc. We will not be able to restore the commit
+ * point associated with that HALog file unless it also
+ * happens to correspond to a snapshot.
*/
final List<SnapshotRecord> records = new ArrayList<SnapshotRecord>(
children.length);
@@ -493,6 +530,22 @@
snapshotIndex.add(r);
+ final long nentries = snapshotIndex.getEntryCount();
+
+ if (nentries % 1000 == 0) {
+
+ /*
+ * Provide an indication that the server is doing
+ * work during startup (it would be unusual to have
+ * a lot of snapshot files, but this provides
+ * symmetry with the HALog startup procedure).
+ */
+
+ haLog.warn("Indexed " + nentries
+ + " snapshot files");
+
+ }
+
}
} finally {
@@ -533,7 +586,7 @@
}
- }
+ } // class InitTask
private void ensureSnapshotDirExists() throws IOException {
@@ -642,20 +695,6 @@
* if the file can not be read.
* @throws ChecksumError
* if there is a checksum problem with the root blocks.
- *
- * TODO If the root blocks are bad, then this will throw an
- * IOException and that will prevent the startup of the
- * HAJournalServer. However, if we start up the server with a
- * known bad snapshot *and* the snapshot is the earliest
- * snapshot, then we can not restore commit points which depend
- * on that earliest snapshot (we can still restore commit points
- * that are GTE the first useable snapshot).
- *
- * TODO A similar problem exists if any of the HALog files GTE
- * the earliest snapshot are missing, have bad root blocks, etc.
- * We will not be able to restore the commit point associated
- * with that HALog file unless it also happens to correspond to
- * a snapshot.
*/
private SnapshotRecord getSnapshotRecord(final File file) throws IOException {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|