[Bigdata-commit] SF.net SVN: bigdata:[8578] branches/BIGDATA_RELEASE_1_3_0

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 8578
          http://sourceforge.net/p/bigdata/code/8578
Author:   thompsonbry
Date:     2014-07-18 15:45:57 +0000 (Fri, 18 Jul 2014)
Log Message:
-----------
Checkpoint on GIST refactor (#585) in support of group commit for the REST API (#566).  

This commit introduces hierarchical locking (vs enumeration of the indices) into the AbstractApiTask and makes some progress toward a refactor of the ITask interface and the AbstractTask implementation to support access at the ICheckpointProtocol layer (supports BTree, HTree and Stream) in addition to the IIndex layer (supports BTree, FusedView, and IsolatedFusedView).

The next step will be to refactor AbstractTask.getIndex() to push down a method to obtaining the ICheckpointProtocol object.  In order to do this, the AbstractTask.indexCache must be relayered.  It is currently specific to the IIndex interface.  In order to provide caching for ICheckpointProtocol objects, it needs to be rewritten to the ICheckpointProtocol layer. However, the code needs to be carefully reviewed to determine whether we also need caching at the IIndex layer or if we can only cache at the ICheckpointProtocol layer.  This is a question of both correctness and performance.

Modified Paths:
--------------
    branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractTask.java
    branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/ITask.java
    branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/task/AbstractApiTask.java

Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractTask.java
===================================================================

--- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractTask.java	2014-07-18 15:44:45 UTC (rev 8577)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/AbstractTask.java	2014-07-18 15:45:57 UTC (rev 8578)
@@ -56,6 +56,7 @@
 import com.bigdata.bfs.GlobalFileSystemHelper;
 import com.bigdata.btree.AbstractBTree;
 import com.bigdata.btree.BTree;
+import com.bigdata.btree.Checkpoint;
 import com.bigdata.btree.ICheckpointProtocol;
 import com.bigdata.btree.IDirtyListener;
 import com.bigdata.btree.IIndex;
@@ -64,6 +65,7 @@
 import com.bigdata.btree.view.FusedView;
 import com.bigdata.concurrent.NonBlockingLockManager;
 import com.bigdata.counters.CounterSet;
+import com.bigdata.htree.AbstractHTree;
 import com.bigdata.mdi.IResourceMetadata;
 import com.bigdata.rawstore.IAllocationContext;
 import com.bigdata.rawstore.IPSOutputStream;
@@ -114,9 +116,6 @@
  * {@link ConcurrencyManager#submit(AbstractTask)} it.
  * 
  * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
- * 
- * @todo declare generic type for the return as <? extends Object> to be compatible
- * with {@link ConcurrencyManager#submit(AbstractTask)}
  */
 public abstract class AbstractTask<T> implements Callable<T>, ITask<T> {
 
@@ -264,6 +263,7 @@
      * Cache of named indices resolved by this task for its {@link #timestamp}.
      * 
      * @see #getIndex(String name)
+     * @see #getIndexLocal(String)
      */
     final private Map<String,ILocalBTreeView> indexCache;
 
@@ -543,8 +543,7 @@
     }
 
     /**
-     * Return a view of the named index appropriate for the timestamp associated
-     * with this task.
+     * {@inheritDoc}
      * <p>
      * Note: There are two ways in which a task may access an
      * {@link ITx#UNISOLATED} index, but in all cases access to the index is
@@ -553,37 +552,13 @@
      * {@link IJournal#getIndex(String)} on that journal, which is simply
      * delegated to this method. See {@link IsolatedActionJournal}.
      * 
-     * @param name
-     *            The name of the index.
-     * 
-     * @throws NullPointerException
-     *             if <i>name</i> is <code>null</code>.
-     * @throws IllegalStateException
-     *             if <i>name</i> is not a declared resource.
-     * @throws StaleLocatorException
-     *             if <i>name</i> identifies an index partition which has been
-     *             split, joined, or moved.
-     * @throws NoSuchIndexException
-     *             if the named index is not registered as of the timestamp.
-     * 
-     * @return The index.
-     * 
-     * @todo modify to return <code>null</code> if the index is not registered?
-     * 
-     *       FIXME GIST. This will throw a ClassCastException if the returned
-     *       index is an ILocalBTreeView.
-     * 
      * @see http://trac.bigdata.com/ticket/585 (GIST)
      */
     @Override
     synchronized final public ILocalBTreeView getIndex(final String name) {
 
-        if (name == null) {
-
-            // @todo change to IllegalArgumentException for API consistency?
+        if (name == null)
             throw new NullPointerException();
-            
-        }
         
         // validate that this is a declared index.
         assertResource(name);
@@ -636,10 +611,11 @@
              * index from the store, set the [lastCommitTime], and enter it into
              * the unisolated Name2Addr's cache of unisolated indices.
              */
-            BTree btree;
+            ICheckpointProtocol ndx;
             
             // the unisolated name2Addr object.
-            final Name2Addr name2Addr = resourceManager.getLiveJournal()._getName2Addr();
+            final Name2Addr name2Addr = resourceManager.getLiveJournal()
+                    ._getName2Addr();
 
             synchronized (name2Addr) {
 
@@ -680,46 +656,61 @@
                      * But, fetch the btree from the cache to ensure we use the
                      * most recent checkpoint
                      */
-                    btree = null;
+                    ndx = null;
 
-                    final BTree tmpbtree = (BTree) name2Addr.getIndexCache(name);
-                    if (tmpbtree != null)
-                        checkpointAddr = tmpbtree.getCheckpoint().getCheckpointAddr();
+                    final ICheckpointProtocol tmp_ndx = name2Addr
+                            .getIndexCache(name);
+
+                    if (tmp_ndx != null) {
+
+                        checkpointAddr = tmp_ndx.getCheckpoint()
+                                .getCheckpointAddr();
+
+                    }
                     
                 } else {
-                    // recover from unisolated index cache.
-                    btree = (BTree) name2Addr.getIndexCache(name);
+
+                    // Recover from unisolated index cache.
+                    ndx = name2Addr.getIndexCache(name);
+                    
                 }
                 
-                if (btree == null) {
+                if (ndx == null) {
 
-                    final IJournal tmp;
-//                    tmp = resourceManager.getLiveJournal();
-                    tmp = getJournal();// wrap with the IsolatedActionJournal.
+                    // wrap with the IsolatedActionJournal.
+                    final IJournal tmp = getJournal();
+//                  tmp = resourceManager.getLiveJournal();
                     
                     // re-load btree from the store.
-                    btree = BTree.load(//
+                    ndx = Checkpoint.loadFromCheckpoint(//
                             tmp, // backing store.
                             checkpointAddr,//
                             false// readOnly
                             );
 
                     // set the lastCommitTime on the index.
-                    btree.setLastCommitTime(entry.commitTime);
+                    ndx.setLastCommitTime(entry.commitTime);
 
                     // add to the unisolated index cache (must not exist).
-                    name2Addr.putIndexCache(name, btree, false/* replace */);
+                    name2Addr.putIndexCache(name, ndx, false/* replace */);
 
-                    btree.setBTreeCounters(resourceManager
-                            .getIndexCounters(name));
+                    // set performance counters iff the class supports it.
+                    if (ndx instanceof AbstractBTree) {
+                        ((AbstractBTree) ndx).setBTreeCounters(resourceManager
+                                .getIndexCounters(name));
+                    } else if (ndx instanceof AbstractHTree) {
+                        ((AbstractHTree) ndx).setBTreeCounters(resourceManager
+                                .getIndexCounters(name));
+                    }
                     
                 }
 
             }
 
             try {
-             
-                return getUnisolatedIndexView(name, btree);
+
+                // wrap B+Tree as FusedView: FIXME GIST : BTree specific code path.
+                return getUnisolatedIndexView(name, (BTree) ndx);
                 
             } catch (NoSuchStoreException ex) {
                 
@@ -760,10 +751,12 @@
     /**
      * Given the name of an index and a {@link BTree}, obtain the view for all
      * source(s) described by the {@link BTree}s index partition metadata (if
-     * any), inserts that view into the {@link #indexCache}, and return the view.
+     * any), inserts that view into the {@link #indexCache}, and return the
+     * view.
      * <p>
-     * Note: This method is used both when registering a new index ({@link #registerIndex(String, BTree)})
-     * and when reading an index view from the source ({@link #getIndex(String)}).
+     * Note: This method is used both when registering a new index (
+     * {@link #registerIndex(String, BTree)}) and when reading an index view
+     * from the source ({@link #getIndex(String)}).
      * 
      * @param name
      *            The index name.

Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/ITask.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/ITask.java	2014-07-18 15:44:45 UTC (rev 8577)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/journal/ITask.java	2014-07-18 15:45:57 UTC (rev 8578)
@@ -98,7 +98,8 @@
     String toString();
 
     /**
-     * Return an appropriate view of the named index for the operation.
+     * Return an appropriate view of the named B+Tree that has the appropriate
+     * isolation level for the operation (non-GIST).
      * <p>
      * When the task is isolated by a transaction, then the index will be
      * isolated by the transaction using the appropriate isolation level. If the
@@ -124,23 +125,63 @@
      * 
      * @return An appropriate view of the named index.
      * 
-     * @exception NoSuchIndexException
-     *                if the named index does not exist at the time that the
-     *                operation is executed.
+     * @throws NullPointerException
+     *             if <i>name</i> is <code>null</code>.
+     * @throws IllegalStateException
+     *             if <i>name</i> is not a declared resource.
+     * @throws StaleLocatorException
+     *             if <i>name</i> identifies an index partition which has been
+     *             split, joined, or moved.
+     * @throws NoSuchIndexException
+     *             if the named index is not registered as of the timestamp.
      * 
-     * @exception StaleLocatorException
-     *                if the named index does not exist at the time the
-     *                operation is executed and the {@link IResourceManager} has
-     *                information which indicates that the index partition has
-     *                been split, joined or moved.
+     *             TODO modify to return <code>null</code> if the index is not
+     *             registered?
+     */
+    IIndex getIndex(String name); // non-GIST
+    
+    /**
+     * Return an appropriate view of the named index for the operation (GIST).
+     * <p>
+     * This method MUST be used to access non-B+Tree data structures that do not
+     * (yet) support {@link FusedView} style transaction isolation.
+     * <p>
+     * This method MAY NOT be used to access data structures if the operation is
+     * isolated by a read-write transaction.
+     * <p>
+     * This method DOES NOT understand the ordered views used by scale-out. The
+     * {@link ICheckpointProtocol} interface returned by this method is a
+     * concrete durable GIST data structure with a specific commit record. It is
+     * NOT a {@link FusedView} or similar data structure assembled from an
+     * ordered array of indices. If this method is used for a GIST data
+     * structure it will ONLY return the {@link ICheckpointProtocol} and will
+     * not wrap it with a {@link FusedView}. (This is of practical importance
+     * only for scale-out which uses {@link FusedView}s to support the dynamic
+     * key range partitioning algorithm for the distributed B+Tree data
+     * structure.)
      * 
-     * @exception IllegalStateException
-     *                if the named index is not one of the resources declared to
-     *                the constructor.
+     * @param name
+     *            The index name.
      * 
-     * @see IGISTLocalManager
+     * @return An appropriate view of the named index.
+     * 
+     * @throws NullPointerException
+     *             if <i>name</i> is <code>null</code>.
+     * @throws IllegalStateException
+     *             if <i>name</i> is not a declared resource.
+     * @throws StaleLocatorException
+     *             if <i>name</i> identifies an index partition which has been
+     *             split, joined, or moved.
+     * @throws NoSuchIndexException
+     *             if the named index is not registered as of the timestamp.
+     * @throws UnsupportedOperationException
+     *             if the {@link ITask} is associated with a read-write
+     *             transaction.
+     * 
+     *             TODO modify to return <code>null</code> if the index is not
+     *             registered?
      */
-    IIndex getIndex(String name); // non-GIST
+//    ICheckpointProtocol getIndexLocal(String name); // GIST
 
     /**
      * The object used to track events and times for the task.

Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/task/AbstractApiTask.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/task/AbstractApiTask.java	2014-07-18 15:44:45 UTC (rev 8577)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/task/AbstractApiTask.java	2014-07-18 15:45:57 UTC (rev 8578)
@@ -24,8 +24,6 @@
  */
 package com.bigdata.rdf.task;
 
-import java.util.HashSet;
-import java.util.Set;
 import java.util.concurrent.Future;
 import java.util.concurrent.FutureTask;
 import java.util.concurrent.atomic.AtomicReference;
@@ -314,24 +312,12 @@
              * locks and will have exclusive access to the resources guarded by
              * those locks when they run.
              * 
-             * FIXME GROUP COMMIT: The {@link AbstractTask} was written to
-             * require the exact set of resource lock declarations. However, for
-             * the REST API, we want to operate on all indices associated with a
-             * KB instance. This requires either:
-             * <p>
-             * (a) pre-resolving the names of those indices and passing them all
-             * into the AbstractTask; or
-             * <P>
-             * (b) allowing the caller to only declare the namespace and then to
-             * be granted access to all indices whose names are in that
-             * namespace.
-             * 
-             * (b) is now possible with the fix to the Name2Addr prefix scan.
-             * 
-             * Note: We also need to isolate any named solution sets in the
-             * namespace of the KB. Those will be discovered along with the
-             * indices, but they may require changes to {@link AbstractTask}
-             * for GIST support.
+             * FIXME GROUP COMMIT: The hierarchical locking mechanisms will fail
+             * on durable named solution sets because they use either HTree or
+             * Stream and AbstractTask does not yet support those durable data
+             * structures (it is still being refactored to support the
+             * ICheckpointProtocol rather than the BTree in its Name2Addr
+             * isolation logic).
              */
 
             // Obtain the necessary locks for R/w access to KB indices.
@@ -350,7 +336,8 @@
     }
     
     /**
-     * Acquire the locks for the named indices associated with the specified KB.
+     * Return the set of locks that the task must acquire in order to operate on
+     * the specified namespace.
      * 
      * @param indexManager
      *            The {@link Journal}.
@@ -360,52 +347,41 @@
      * @return The locks for the named indices associated with that KB instance.
      * 
      * @throws DatasetNotFoundException
-     * 
-     *             FIXME GROUP COMMIT : [This should be replaced by the use of
-     *             the namespace and hierarchical locking support in
-     *             AbstractTask.] This could fail to discover a recently create
-     *             KB between the time when the KB is created and when the group
-     *             commit for that create becomes visible. This data race exists
-     *             because we are using [lastCommitTime] rather than the
-     *             UNISOLATED view of the GRS.
-     *             <p>
-     *             Note: This data race MIGHT be closed by the default locator
-     *             cache. If it records the new KB properties when they are
-     *             created, then they should be visible. If they are not
-     *             visible, then we have a data race. (But if it records them
-     *             before the group commit for the KB create, then the actual KB
-     *             indices will not be durable until the that group commit...).
-     *             <p>
-     *             Note: The problem can obviously be resolved by using the
-     *             UNISOLATED index to obtain the KB properties, but that would
-     *             serialize ALL updates. What we need is a suitable caching
-     *             mechanism that (a) ensures that newly create KB instances are
-     *             visible; and (b) has high concurrency for read-only requests
-     *             for the properties for those KB instances.
      */
     private static String[] getLocksForKB(final Journal indexManager,
             final String namespace) throws DatasetNotFoundException {
 
-        final long timestamp = indexManager.getLastCommitTime();
+        /*
+         * Note: There are two possible approaches here. One is to explicitly
+         * enumerate the index names for the triple store. The other is to
+         * specify the namespace of the triple store and use hierarchical
+         * locking.
+         * 
+         * This is now using hierarchical locking, so it just returns the
+         * namespace.
+         */
+        return new String[]{namespace};
+        
+//        final long timestamp = indexManager.getLastCommitTime();
+//
+//        final AbstractTripleStore tripleStore = (AbstractTripleStore) indexManager
+//                .getResourceLocator().locate(namespace, timestamp);
+//
+//        if (tripleStore == null)
+//            throw new DatasetNotFoundException("Not found: namespace="
+//                    + namespace + ", timestamp="
+//                    + TimestampUtility.toString(timestamp));
+//
+//        final Set<String> lockSet = new HashSet<String>();
+//
+//        lockSet.addAll(tripleStore.getSPORelation().getIndexNames());
+//
+//        lockSet.addAll(tripleStore.getLexiconRelation().getIndexNames());
+//
+//        final String[] locks = lockSet.toArray(new String[lockSet.size()]);
+//
+//        return locks;
 
-        final AbstractTripleStore tripleStore = (AbstractTripleStore) indexManager
-                .getResourceLocator().locate(namespace, timestamp);
-
-        if (tripleStore == null)
-            throw new DatasetNotFoundException("Not found: namespace="
-                    + namespace + ", timestamp="
-                    + TimestampUtility.toString(timestamp));
-
-        final Set<String> lockSet = new HashSet<String>();
-
-        lockSet.addAll(tripleStore.getSPORelation().getIndexNames());
-
-        lockSet.addAll(tripleStore.getLexiconRelation().getIndexNames());
-
-        final String[] locks = lockSet.toArray(new String[lockSet.size()]);
-
-        return locks;
-
     }
     
 }

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





[Bigdata-commit] SF.net SVN: bigdata:[8578] branches/BIGDATA_RELEASE_1_3_0

Fast, scalable, robust graph database platform

[Bigdata-commit] SF.net SVN: bigdata:[8578] branches/BIGDATA_RELEASE_1_3_0