[cweb-CVS] bigdata/src/java/com/bigdata/objndx IndexSegmentExtensionMetadata.java, NONE, 1.1 Index

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/cweb/bigdata/src/java/com/bigdata/objndx
In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv15000/src/java/com/bigdata/objndx

Modified Files:
	IndexSegmentBuilder.java IndexSegmentFileStore.java 
	AbstractBTree.java BTreeMetadata.java IndexSegment.java 
	IndexSegmentMetadata.java 
Added Files:
	IndexSegmentExtensionMetadata.java 
Log Message:
Updated the UML model and added a ZIP containing an HTML presentation of the model.
Working on partitioned index support.

Index: IndexSegmentFileStore.java
===================================================================
RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/objndx/IndexSegmentFileStore.java,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** IndexSegmentFileStore.java	6 Mar 2007 20:38:05 -0000	1.9
--- IndexSegmentFileStore.java	8 Mar 2007 18:14:05 -0000	1.10
***************
*** 1,9 ****
--- 1,15 ----
  package com.bigdata.objndx;

+ import it.unimi.dsi.mg4j.util.BloomFilter;
+ 
  import java.io.File;
  import java.io.IOException;
  import java.io.RandomAccessFile;
+ import java.lang.reflect.Constructor;
  import java.nio.ByteBuffer;

+ import org.apache.log4j.Logger;
+ 
+ import com.bigdata.io.SerializerUtil;
  import com.bigdata.rawstore.Addr;
  import com.bigdata.rawstore.IRawStore;
***************
*** 24,27 ****
--- 30,39 ----

      /**
+      * Logger.
+      */
+     protected static final Logger log = Logger
+             .getLogger(IndexSegmentFileStore.class);
+ 
+     /**
       * A buffer containing the disk image of the nodes in the index segment.
       * While some nodes will be held in memory by the hard reference queue
***************
*** 47,50 ****
--- 59,67 ----

      /**
+      * A read-only view of the extension metadata record for the index segment.
+      */
+     protected final IndexSegmentExtensionMetadata extensionMetadata;
+     
+     /**
       * True iff the store is open.
       */
***************
*** 59,63 ****
       * 
       * @todo make it optional to fully buffer the index nodes?
!      * @todo make it optional to fully buffer the entire file.
       */
      public IndexSegmentFileStore(File file) {
--- 76,82 ----
       * 
       * @todo make it optional to fully buffer the index nodes?
!      * @todo make it optional to fully buffer the leaves as well as the nodes?
!      * 
!      * @see #load()
       */
      public IndexSegmentFileStore(File file) {
***************
*** 86,89 ****
--- 105,113 ----

              /*
+              * Read in the extension metadata record.
+              */
+             this.extensionMetadata = readExtensionMetadata();
+ 
+             /*
               * Read the index nodes from the file into a buffer. If there are no
               * index nodes then we skip this step. Note that we always read in
***************
*** 95,98 ****
--- 119,126 ----
                      : null);

+             /*
+              * Mark as open so that we can use read(long addr) to read other
+              * data (the root node/leaf).
+              */
              this.open = true;

***************
*** 105,108 ****
--- 133,175 ----
      }

+     /**
+      * Load the {@link IndexSegment} or derived class from the store. The
+      * {@link IndexSegment} or derived class MUST provide a public constructor
+      * with the following signature: <code>
+      * 
+      * <i>className</i>(IndexSegmentFileStore store)
+      * 
+      * </code>
+      * 
+      * @param store
+      *            The store.
+      * 
+      * @return The {@link IndexSegment} or derived class loaded from that store.
+      * 
+      * @see IndexSegmentExtensionMetadata, which provides a metadata extension
+      *      protocol for the {@link IndexSegment}.
+      */
+     public IndexSegment load() {
+         
+         try {
+             
+             Class cl = Class.forName(extensionMetadata.className);
+             
+             Constructor ctor = cl
+                     .getConstructor(new Class[] { IndexSegmentFileStore.class });
+ 
+             IndexSegment seg = (IndexSegment) ctor
+                     .newInstance(new Object[] { this });
+             
+             return seg;
+             
+         } catch(Exception ex) {
+             
+             throw new RuntimeException(ex);
+             
+         }
+         
+     }
+     
      public boolean isOpen() {

***************
*** 284,286 ****
--- 351,486 ----
      }

+     /**
+      * Reads the bloom filter directly from the file.
+      * 
+      * @return The bloom filter -or- <code>null</code> if the bloom filter was
+      *         not constructed when the {@link IndexSegment} was built.
+      */
+     protected BloomFilter readBloomFilter() throws IOException {
+ 
+         final long addr = metadata.addrBloom;
+         
+         if(addr == 0L) {
+             
+             return null;
+             
+         }
+         
+         log.info("reading bloom filter: "+Addr.toString(addr));
+         
+         final int off = Addr.getOffset(addr);
+         
+         final int len = Addr.getByteCount(addr);
+         
+         ByteBuffer buf = ByteBuffer.allocate(len);
+ 
+         buf.limit(len);
+ 
+         buf.position(0);
+ 
+         try {
+ 
+             // read into [dst] - does not modify the channel's position().
+             final int nread = raf.getChannel().read(buf, off);
+             
+             assert nread == len;
+             
+             buf.flip(); // Flip buffer for reading.
+             
+         } catch (IOException ex) {
+ 
+             throw new RuntimeException(ex);
+ 
+         }
+ 
+         assert buf.position() == 0;
+         assert buf.limit() == len;
+ 
+ //        ByteBufferInputStream bais = new ByteBufferInputStream(buf);
+ //        
+ ////        ByteArrayInputStream bais = new ByteArrayInputStream(buf.array());
+ //        
+ //        ObjectInputStream ois = new ObjectInputStream(bais);
+ //        
+ //        try {
+ //
+ //            BloomFilter bloomFilter = (BloomFilter) ois.readObject();
+ //            
+ //            log.info("Read bloom filter: minKeys=" + bloomFilter.size()
+ //                    + ", entryCount=" + metadata.nentries + ", bytesOnDisk="
+ //                    + len + ", errorRate=" + metadata.errorRate);
+ //            
+ //            return bloomFilter;
+ //            
+ //        }
+ //        
+ //        catch(Exception ex) {
+ //            
+ //            IOException ex2 = new IOException("Could not read bloom filter: "+ex);
+ //            
+ //            ex2.initCause(ex);
+ //            
+ //            throw ex2;
+ //            
+ //        }
+ 
+       BloomFilter bloomFilter = (BloomFilter) SerializerUtil.deserialize(buf);
+       
+       log.info("Read bloom filter: minKeys=" + bloomFilter.size()
+               + ", entryCount=" + metadata.nentries + ", bytesOnDisk="
+               + len + ", errorRate=" + metadata.errorRate);
+       
+       return bloomFilter;
+ 
+     }
+     
+     /**
+      * Reads the {@link IndexSegmentExtensionMetadata} record directly from the
+      * file.
+      */
+     protected IndexSegmentExtensionMetadata readExtensionMetadata() throws IOException {
+ 
+         final long addr = metadata.addrExtensionMetadata;
+         
+         assert addr != 0L;
+         
+         log.info("reading extension metadata record: "+Addr.toString(addr));
+         
+         final int off = Addr.getOffset(addr);
+         
+         final int len = Addr.getByteCount(addr);
+         
+         ByteBuffer buf = ByteBuffer.allocate(len);
+ 
+         buf.limit(len);
+ 
+         buf.position(0);
+ 
+         try {
+ 
+             // read into [dst] - does not modify the channel's position().
+             final int nread = raf.getChannel().read(buf, off);
+             
+             assert nread == len;
+             
+             buf.flip(); // Flip buffer for reading.
+             
+         } catch (IOException ex) {
+ 
+             throw new RuntimeException(ex);
+ 
+         }
+ 
+         assert buf.position() == 0;
+         assert buf.limit() == len;
+ 
+         IndexSegmentExtensionMetadata extensionMetadata = (IndexSegmentExtensionMetadata) SerializerUtil
+                 .deserialize(buf);
+ 
+         log.info("Read extension metadata: " + extensionMetadata);
+ 
+         return extensionMetadata;
+ 
+     }
+     
  }

--- NEW FILE: IndexSegmentExtensionMetadata.java ---
/**

The Notice below must appear in each file of the Source Code of any
copy you distribute of the Licensed Product.  Contributors to any
Modifications may add their own copyright notices to identify their
own contributions.

License:

The contents of this file are subject to the CognitiveWeb Open Source
License Version 1.1 (the License).  You may not copy or use this file,
in either source code or executable form, except in compliance with
the License.  You may obtain a copy of the License from

  http://www.CognitiveWeb.org/legal/license/

Software distributed under the License is distributed on an AS IS
basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
the License for the specific language governing rights and limitations
under the License.

Copyrights:

Portions created by or assigned to CognitiveWeb are Copyright
(c) 2003-2003 CognitiveWeb.  All Rights Reserved.  Contact
information for CognitiveWeb is available at

  http://www.CognitiveWeb.org

Portions Copyright (c) 2002-2003 Bryan Thompson.

Acknowledgements:

Special thanks to the developers of the Jabber Open Source License 1.0
(JOSL), from which this License was derived.  This License contains
terms that differ from JOSL.

Special thanks to the CognitiveWeb Open Source Contributors for their
suggestions and support of the Cognitive Web.

Modifications:

*/
/*
 * Created on Mar 7, 2007
 */

package com.bigdata.objndx;

import java.io.Serializable;

import com.bigdata.io.SerializerUtil;

/**
 * The base class for variable length metadataMap and extension metadataMap for an
 * {@link IndexSegment} as persisted on an {@link IndexSegmentFileStore}. The
 * {@link IndexSegmentMetadata} class is NOT extensible and is used solely for
 * fixed length metadataMap common to all {@link IndexSegment}s, including the
 * root addresses required to bootstrap the load of an {@link IndexSegment} from
 * a file. In contrast, this class provides for both required variable length
 * metadataMap and arbitrary extension metadataMap for an {@link IndexSegment}.
 * 
 * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
 * @version $Id$
 */
public class IndexSegmentExtensionMetadata implements Serializable {

    private static final long serialVersionUID = 4846316492768402991L;

    /**
     * Either {@link IndexSegment} or a derived class that will be instantiated
     * when the index segment is loaded using
     * {@link IndexSegmentFileStore#load()}
     */
    public final String className;

    /**
     * The serializer used for the values in the leaves of the index.
     */
    public final IValueSerializer valSer;

    /**
     * When non-null, a {@link RecordCompressor} that was used to write the
     * nodes and leaves of the {@link IndexSegment}.
     * 
     * @todo modify to use an interface.
     */
    final public RecordCompressor recordCompressor;

//    /**
//     * When non-null, a map containing extension metadata.
//     * 
//     * @see #getMetadata(String name)
//     */
//    final private Map<String, Serializable> metadataMap;
//
//    /**
//     * Return the metadata object stored under the key.
//     * 
//     * @param name
//     *            The key.
//     * 
//     * @return The metadata object or <code>null</code> if there is nothing
//     *         stored under that key.
//     */
//    public Serializable getMetadata(String name) {
//        
//        if(metadataMap==null) return null;
//        
//        return metadataMap.get(name);
//        
//    }

    /**
     * 
     * @param cl
     *            The name of the {@link IndexSegment} class that will be
     *            instantiated when the {@link IndexSegment} is loaded from the
     *            file.
     * 
     * @param valSer
     *            The object responsible for (de-)serializing the values in the
     *            leaves of the B+-Tree.
     * 
     * @param recordCompressor
     *            When non-null, a {@link RecordCompressor} that was used to
     *            write the nodes and leaves of the {@link IndexSegment}.
     */
//    * 
//    * @param metadataMap
//    *            An optional serializable map containing application defined
//    *            extension metadata. The map will be serialized with the
//    *            {@link IndexSegmentExtensionMetadata} object as part of the
//    *            {@link IndexSegmentFileStore}.
    public IndexSegmentExtensionMetadata(Class cl, IValueSerializer valSer,
            RecordCompressor recordCompressor) {
//            Map<String, Serializable> metadataMap) {

        if( cl == null ) throw new IllegalArgumentException();

        if( ! IndexSegment.class.isAssignableFrom(cl) ) {

            throw new IllegalArgumentException("Does not extend: "
                    + IndexSegment.class);

        }

        if( valSer == null ) throw new IllegalArgumentException();

        this.className = cl.getName();

        this.valSer = valSer;

        this.recordCompressor = recordCompressor;

//        this.metadataMap = metadataMap;

    }

    /**
     * Read the extension metadataMap record from the store.
     * 
     * @param store
     *            the store.
     * @param addr
     *            the address of the extension metadataMap record.
     * 
     * @return the extension metadataMap record.
     * 
     * @see IndexSegmentFileStore#load(), which will return an
     *      {@link IndexSegment} that is ready for use.
     */
    public static IndexSegmentExtensionMetadata read(IndexSegmentFileStore store, long addr) {

        return (IndexSegmentExtensionMetadata) SerializerUtil.deserialize(store.read(addr));

    }

}

Index: IndexSegmentBuilder.java
===================================================================
RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/objndx/IndexSegmentBuilder.java,v
retrieving revision 1.26
retrieving revision 1.27
diff -C2 -d -r1.26 -r1.27
*** IndexSegmentBuilder.java	6 Mar 2007 20:38:05 -0000	1.26
--- IndexSegmentBuilder.java	8 Mar 2007 18:14:05 -0000	1.27
***************
*** 63,66 ****
--- 63,69 ----
  import org.apache.log4j.Logger;

+ import com.bigdata.io.SerializerUtil;
+ import com.bigdata.isolation.UnisolatedIndexSegment;
+ import com.bigdata.isolation.Value;
  import com.bigdata.journal.Journal;
  import com.bigdata.journal.TemporaryRawStore;
***************
*** 74,80 ****
--- 77,85 ----
   * factor. There are two main use cases:
   * <ol>
+  * 
   * <li>Evicting a key range of an index into an optimized on-disk index. In
   * this case, the input is a {@link BTree} that is ideally backed by a fully
   * buffered {@link IRawStore} so that no random reads are required.</li>
+  * 
   * <li>Merging index segments. In this case, the input is typically records
   * emerging from a merge-sort. There are two distinct cases here. In one, we
***************
*** 88,91 ****
--- 93,97 ----
   * history policy is defined, then it must be applied here to cause key-value
   * whose retention is no longer required by that policy to be dropped.</li>
+  * 
   * </ol>
   * 
***************
*** 109,116 ****
--- 115,129 ----
   *      less efficient on first glance.
   * 
+  * FIXME support build from a key range rather than just an entire source tree.
+  * this is required to support partitioned indices.
+  * 
+  * FIXME support efficient prior/next leaf scans.
+  * 
   * FIXME use the shortest separator key.
   * 
   * @see IndexSegment
   * @see IndexSegmentFile
+  * @see IndexSegmentMetadata
+  * @see IndexSegmentExtensionMetadata
   * @see IndexSegmentMerger
   */
***************
*** 200,203 ****
--- 213,222 ----
      final BloomFilter bloomFilter;

+ //    /**
+ //     * When non-null, a map containing extension metadata.  This is set by the
+ //     * constructor.
+ //     */
+ //    final Map<String, Serializable> metadataMap;
+     
      /**
       * The offset in the output file of the last leaf written onto that file.
***************
*** 263,266 ****
--- 282,301 ----
       */
      final public IndexSegmentPlan plan;
+    
+ //    /**
+ //     * The address at which each leaf is written on the file.  This information
+ //     * is stored in the {@link IndexSegmentExtensionMetadata} and may be used to
+ //     * perform fast forward or reverse leaf scans by first looking up the leaf
+ //     * address based on its ordinal position within the file and then scanning
+ //     * forward or backward through the addresses in this array.  The array is
+ //     * buffered when the {@link IndexSegment} is loaded.
+ //     *
+ //     * @todo We need to be able to extend the leaf data structure for this
+ //     * purpose.  We could write the addr of the prior leaf on the leaf, but
+ //     * not of the next leaf.  For that we need to ordinal position of the leaf.
+ //     * 
+ //     * @see ... 
+ //     */
+ //    final long[] leaveAddrs;

      /**
***************
*** 412,424 ****
       *            option should only be enabled if you know that point access
       *            tests are a hotspot for an index.
-      * 
       * @throws IOException
-      * 
-      * FIXME support efficient prior/next leaf scans.
       */
      public IndexSegmentBuilder(File outFile, File tmpDir, final int entryCount,
              IEntryIterator entryIterator, final int m,
              IValueSerializer valueSerializer, boolean useChecksum,
!             RecordCompressor recordCompressor, final double errorRate)
              throws IOException {

--- 447,463 ----
       *            option should only be enabled if you know that point access
       *            tests are a hotspot for an index.
       * @throws IOException
       */
+ //    * @param metadataMap
+ //    *            An optional serializable map containing application defined
+ //    *            extension metadataMap. The map will be serialized with the
+ //    *            {@link IndexSegmentExtensionMetadata} object as part of the
+ //    *            {@link IndexSegmentFileStore}.
      public IndexSegmentBuilder(File outFile, File tmpDir, final int entryCount,
              IEntryIterator entryIterator, final int m,
              IValueSerializer valueSerializer, boolean useChecksum,
!             RecordCompressor recordCompressor, final double errorRate
! //          , final Map<String, Serializable> metadataMap
!             )
              throws IOException {

***************
*** 501,504 ****
--- 540,545 ----
              }

+ //            this.metadataMap = metadataMap;
+             
              // Used to serialize the nodes and leaves for the output tree.
              nodeSer = new NodeSerializer(NOPNodeFactory.INSTANCE,
***************
*** 1055,1076 ****
          ByteBuffer buf = nodeSer.putLeaf(leaf);

- //        /*
- //         * Write leaf on the channel.
- //         */
- //        
- //        FileChannel outChannel = out.getChannel();
- //
- //        // position on the channel before the write.
- //        final long offset = outChannel.position();
- //        
- //        if(offset>Integer.MAX_VALUE) {
- //            
- //            throw new IOException("Index segment exceeds int32 bytes.");
- //            
- //        }
- //        
- //        // write on the channel.
- //        final int nbytes = outChannel.write(buf);
- 
          final long addr1 = leafBuffer.write(buf);

--- 1096,1099 ----
***************
*** 1199,1210 ****
       * 
       * <pre>
!      * metadata record
!      * leaves
!      * nodes (may be empty)
!      * extension metadata records, including the optional bloom filter.
       * </pre>
       * 
!      * Each of these regions has a variable length and some may be missing
!      * entirely.
       * <p>
       * Once all nodes and leaves have been buffered we are ready to start
--- 1222,1241 ----
       * 
       * <pre>
!      *  fixed length metadata record (required)
!      *  leaves (required)
!      *  nodes (may be empty)
!      *  the bloom filter (optional).
!      *  the extension metadata record (required, but extensible).
       * </pre>
       * 
!      * <p>
!      * The index segment metadata is divided into a base
!      * {@link IndexSegmentMetadata} record with a fixed format containing only
!      * essential data and additional metadata records written at the end of the
!      * file including the optional bloom filter and the required
!      * {@link IndexSegmentExtensionMetadata} record. the latter is where we
!      * write variable length metadata including the _name_ of the index, or
!      * additional metadata defined by a specific class of index.
!      * 
       * <p>
       * Once all nodes and leaves have been buffered we are ready to start
***************
*** 1225,1246 ****
       * @throws IOException
       * 
-      * @todo the metadata record can be divided into a base record with a fixed
-      *       format containing only essential data and an extensible record to
-      *       be written at the end of the file. the latter section is where we
-      *       would write the bloom filters and other variable length metadata
-      *       including the _name_ of the index, or additional metadata defined
-      *       by a specific class of index.
-      *       <p>
-      *       the commit point for the index segment file should be a metadata
-      *       record at the head of the file having identical timestamps at the
-      *       start and end of its data section. since the file format is
-      *       immutable it is ok to have what is essentially only a single root
-      *       block. if the timestamps do not agree then the build was no
-      *       successfully completed.
-      *       <p>
-      *       the checksum of the index segment file should be stored in the
-      *       partitioned index so that it can be validated after being moved
-      *       around, etc.
-      * 
       * @todo it would be nice to have the same file format and addresses as the
       *       journal. in order to do this we need to (a) write two root blocks
--- 1256,1259 ----
***************
*** 1376,1379 ****
--- 1389,1438 ----

          /*
+          * Write out the extensible metadata record at the end of the file.
+          */
+         final long addrExtensionMetadata;
+         {
+ 
+             /*
+              * Choose the implementation class based on whether or not the index
+              * is isolatable.
+              * 
+              * @todo this test may be too fragile and is not extensible to other
+              * implementation classes.
+              * 
+              * FIXME at a minimum, the BTree class should be accessible and
+              * provided to the extension metadata constructor so that any
+              * interesting metadata may also be stored in the index segment.
+              */
+             final Class cl = (nodeSer.valueSerializer instanceof Value.Serializer ? UnisolatedIndexSegment.class
+                     : IndexSegment.class);
+ 
+             /*
+              * Setup and serialize the extension metadata.
+              */
+             IndexSegmentExtensionMetadata extensionMetadata = new IndexSegmentExtensionMetadata(
+                     cl, nodeSer.valueSerializer, nodeSer.recordCompressor);
+ //                    metadataMap);
+ 
+             final byte[] extensionMetadataBytes = SerializerUtil
+                     .serialize(extensionMetadata);
+ 
+             assert out.length() < Integer.MAX_VALUE;
+             
+             final int offset = (int)out.length(); 
+ 
+             // seek to the end of the file.
+             out.seek(offset);
+             
+             // write the serialized extension metadata.
+             out.write(extensionMetadataBytes, 0, extensionMetadataBytes.length);
+ 
+             // note its address.
+             addrExtensionMetadata = Addr.toLong(extensionMetadataBytes.length,
+                     offset);
+             
+         }
+         
+         /*
           * Seek to the start of the file and write out the metadata record.
           */
***************
*** 1383,1396 ****
              final long now = System.currentTimeMillis();

-             // @todo name of the index segment - drop this field? add uuids?
-             final String name = "<no name>";
-             
              outChannel.position(0);

              IndexSegmentMetadata md = new IndexSegmentMetadata(plan.m,
!                     plan.height, useChecksum, recordCompressor != null,
!                     plan.nleaves, nnodesWritten, plan.nentries,
!                     maxNodeOrLeafLength, addrLeaves, addrNodes, addrRoot,
!                     errorRate, addrBloom, out.length(), now, name);

              md.write(out);
--- 1442,1452 ----
              final long now = System.currentTimeMillis();

              outChannel.position(0);

              IndexSegmentMetadata md = new IndexSegmentMetadata(plan.m,
!                     plan.height, useChecksum, plan.nleaves, nnodesWritten,
!                     plan.nentries, maxNodeOrLeafLength, addrLeaves, addrNodes,
!                     addrRoot, addrExtensionMetadata, addrBloom, errorRate, out
!                             .length(), now);

              md.write(out);
***************
*** 1480,1483 ****
--- 1536,1548 ----

          // mutable.
+         
+ //        /**
+ //         * The ordinal position of this leaf in the {@link IndexSegment}.
+ //         */
+ //        int leafIndex;
+         
+         /**
+          * The values stored in the leaf.
+          */
          final Object[] vals;

Index: IndexSegmentMetadata.java
===================================================================
RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/objndx/IndexSegmentMetadata.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** IndexSegmentMetadata.java	5 Feb 2007 18:17:39 -0000	1.10
--- IndexSegmentMetadata.java	8 Mar 2007 18:14:05 -0000	1.11
***************
*** 4,7 ****
--- 4,8 ----
  import java.io.RandomAccessFile;
  import java.util.Date;
+ import java.util.UUID;

  import com.bigdata.rawstore.Addr;
***************
*** 10,38 ****
  /**
   * The metadata record for an {@link IndexSegment}.
   * 
   * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
   * @version $Id$
   * 
!  * @todo consider recording the min/max key or just making it easy to determine
!  *       that for an {@link IndexSegment}. This has do to with both correct
!  *       rejection of queries directed to the wrong index segment and managing
!  *       the metadata for a distributed index.
!  * 
!  * @todo add a uuid for each index segment and a uuid for the index to which the
!  *       segments belong? examine the format of the uuid. can we use part of it
!  *       as the unique basis for one up identifiers within a parition?
!  * 
!  * FIXME We need a general mechanism for persisting metadata including the
!  * valSer, record compressor, and user-defined objects for indices. These data
!  * can go into a series of extensible metadata records located at the end of the
!  * file. The bloom filter itself could be an example of such a metadata record.
!  * Such metadata should survive conversions from a btree to an index segment,
!  * mergers of index segments or btrees, and conversion from an index segment to
!  * a btree.
!  * 
!  * FIXME introduce two timestamps in the metadata record. the record is valid
!  * iff both timestamps agree and are non-zero.
   */
  public class IndexSegmentMetadata {

      /**
--- 11,66 ----
  /**
   * The metadata record for an {@link IndexSegment}.
+  * <p>
+  * The commit point for the index segment file should be a metadata record at
+  * the head of the file having identical timestamps at the start and end of its
+  * data section. Since the file format is immutable it is ok to have what is
+  * essentially only a single root block. If the timestamps do not agree then the
+  * build was not successfully completed.
   * 
   * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
   * @version $Id$
   * 
!  * @todo the checksum of the index segment file should be stored in the
!  *       partitioned index so that it can be validated after being moved around,
!  *       etc. it would also be good to checksum the {@link IndexSegmentMetadata}
!  *       record.
   */
  public class IndexSegmentMetadata {
+ 
+     static final int SIZEOF_MAGIC = Bytes.SIZEOF_INT;
+     static final int SIZEOF_VERSION = Bytes.SIZEOF_INT;
+     static final int SIZEOF_BRANCHING_FACTOR = Bytes.SIZEOF_INT;
+     static final int SIZEOF_COUNTS = Bytes.SIZEOF_INT;
+     static final int SIZEOF_NBYTES = Bytes.SIZEOF_INT;
+     static final int SIZEOF_ADDR = Bytes.SIZEOF_LONG;
+     static final int SIZEOF_ERROR_RATE = Bytes.SIZEOF_DOUBLE;
+     static final int SIZEOF_TIMESTAMP = Bytes.SIZEOF_LONG;
+ 
+     /**
+      * The #of unused bytes in the metadata record format. Note that the unused
+      * space occurs between the file size and the final timestamp in the record.
+      * As the unused bytes are allocated in new versions the value in this field
+      * MUST be adjusted down from its original value of 256.
+      */
+     static final int SIZEOF_UNUSED = 256;
+ 
+     /**
+      * The #of bytes required by the current metadata record format.
+      */
+     static final int SIZE = //
+             SIZEOF_MAGIC + //
+             SIZEOF_VERSION + //
+             Bytes.SIZEOF_LONG + // timestamp0
+             Bytes.SIZEOF_UUID + // index segment UUID.
+             SIZEOF_BRANCHING_FACTOR + // branchingFactor
+             SIZEOF_COUNTS * 4 + // height, #leaves, #nodes, #entries
+             SIZEOF_NBYTES + // max record length
+             Bytes.SIZEOF_BYTE + // useChecksum
+             SIZEOF_ADDR * 5 + // leaves, nodes, root, ext metadata, bloomFilter
+             Bytes.SIZEOF_DOUBLE + // errorRate
+             Bytes.SIZEOF_LONG + // file size
+             SIZEOF_UNUSED + // available bytes for future versions.
+             Bytes.SIZEOF_LONG // timestamp1
+     ;

      /**
***************
*** 45,49 ****
       */
      static transient final public int VERSION0 = 0x0;
!     
      /**
       * Branching factor for the index segment.
--- 73,82 ----
       */
      static transient final public int VERSION0 = 0x0;
!    
!     /**
!      * UUID for this {@link IndexSegment}.
!      */
!     final public UUID uuid;
! 
      /**
       * Branching factor for the index segment.
***************
*** 58,76 ****

      /**
-      * When true, the checksum was computed and stored for the nodes and leaves
-      * in the file and will be verified on de-serialization.
-      */
-     final public boolean useChecksum;
-     
-     /**
-      * When true, a {@link RecordCompressor} was used to write the nodes and
-      * leaves of the {@link IndexSegment}.
-      * 
-      * @todo modify to specify the implementation of a record compressor
-      *       interface.
-      */
-     final public boolean useRecordCompressor;
-     
-     /**
       * The #of leaves serialized in the file.
       */
--- 91,94 ----
***************
*** 99,102 ****
--- 117,126 ----

      /**
+      * When true, the checksum was computed and stored for the nodes and leaves
+      * in the file and will be verified on de-serialization.
+      */
+     final public boolean useChecksum;
+     
+     /**
       * The {@link Addr address} of the contiguous region containing the
       * serialized leaves in the file.
***************
*** 123,130 ****

      /**
!      * The target error rate for the optional bloom filter and 0.0 iff
!      * the bloom filter was not constructed.
       */
!     final public double errorRate;

      /**
--- 147,153 ----

      /**
!      * The address of the {@link IndexSegmentExtensionMetadata} record.
       */
!     final public long addrExtensionMetadata;

      /**
***************
*** 137,140 ****
--- 160,169 ----

      /**
+      * The target error rate for the optional bloom filter and 0.0 iff
+      * the bloom filter was not constructed.
+      */
+     final public double errorRate;
+     
+     /**
       * Length of the file in bytes.
       */
***************
*** 145,165 ****
       */
      final public long timestamp;
-     
-     /**
-      * @todo Name of the index?  or uuid? or drop?
-      */
-     final public String name;
- 
-     /**
-      * The #of bytes in the metadata record.
-      * 
-      * @todo This is oversized in order to allow some slop for future entries
-      *       and in order to permit the variable length index name to be
-      *       recorded in the index segment file.  The size needs to be reviewed
-      *       once the design is crisper.
-      */
-     public static final int SIZE = Bytes.kilobyte32 * 4;
-     
-     public static final int MAX_NAME_LENGTH = Bytes.kilobyte32 * 2;

      /**
--- 174,177 ----
***************
*** 192,195 ****
--- 204,211 ----

          }
+ 
+         final long timestamp0 = raf.readLong();
+         
+         uuid = new UUID(raf.readLong()/*MSB*/, raf.readLong()/*LSB*/);

          branchingFactor = raf.readInt();
***************
*** 197,204 ****
          height = raf.readInt();

-         useChecksum = raf.readBoolean();
-         
-         useRecordCompressor = raf.readBoolean();
-         
          nleaves = raf.readInt();

--- 213,216 ----
***************
*** 209,212 ****
--- 221,226 ----
          maxNodeOrLeafLength = raf.readInt();

+         useChecksum = raf.readBoolean();
+                 
          addrLeaves = raf.readLong();

***************
*** 215,222 ****
          addrRoot = raf.readLong();

!         errorRate = raf.readDouble();
!         
          addrBloom = raf.readLong();

          length = raf.readLong();

--- 229,238 ----
          addrRoot = raf.readLong();

!         addrExtensionMetadata = raf.readLong();
! 
          addrBloom = raf.readLong();

+         errorRate = raf.readDouble();
+         
          length = raf.readLong();

***************
*** 226,234 ****
          }

!         timestamp = raf.readLong();
! 
!         name = raf.readUTF();

!         assert name.length() <= MAX_NAME_LENGTH;

      }
--- 242,256 ----
          }

!         raf.skipBytes(SIZEOF_UNUSED);

!         final long timestamp1 = raf.readLong();
!         
!         if(timestamp0 != timestamp1) {
!             
!             throw new RuntimeException("Timestamps do not agree - file is not useable.");
!             
!         }
!         
!         this.timestamp = timestamp0;

      }
***************
*** 241,249 ****
       */
      public IndexSegmentMetadata(int branchingFactor, int height,
!             boolean useChecksum, boolean useRecordCompressor, int nleaves,
!             int nnodes, int nentries, int maxNodeOrLeafLength,
!             long addrLeaves, long addrNodes, long addrRoot,
!             double errorRate, long addrBloom, long length, long timestamp,
!             String name) {

          assert branchingFactor >= BTree.MIN_BRANCHING_FACTOR;
--- 263,270 ----
       */
      public IndexSegmentMetadata(int branchingFactor, int height,
!             boolean useChecksum, int nleaves, int nnodes, int nentries,
!             int maxNodeOrLeafLength, long addrLeaves, long addrNodes,
!             long addrRoot, long addrExtensionMetadata, long addrBloom,
!             double errorRate, long length, long timestamp) {

          assert branchingFactor >= BTree.MIN_BRANCHING_FACTOR;
***************
*** 286,292 ****
          assert timestamp != 0L;

!         assert name != null;
!         
!         assert name.length() <= MAX_NAME_LENGTH;

          this.branchingFactor = branchingFactor;
--- 307,311 ----
          assert timestamp != 0L;

!         this.uuid = UUID.randomUUID();

          this.branchingFactor = branchingFactor;
***************
*** 294,301 ****
          this.height = height;

-         this.useChecksum = useChecksum;
- 
-         this.useRecordCompressor = useRecordCompressor;
-         
          this.nleaves = nleaves;

--- 313,316 ----
***************
*** 306,309 ****
--- 321,326 ----
          this.maxNodeOrLeafLength = maxNodeOrLeafLength;

+         this.useChecksum = useChecksum;
+         
          this.addrLeaves = addrLeaves;

***************
*** 312,324 ****
          this.addrRoot = addrRoot;

!         this.errorRate = errorRate;
!         
          this.addrBloom = addrBloom;

          this.length = length;

          this.timestamp = timestamp;
- 
-         this.name = name;

      }
--- 329,341 ----
          this.addrRoot = addrRoot;

!         this.addrExtensionMetadata = addrExtensionMetadata;
! 
          this.addrBloom = addrBloom;

+         this.errorRate = errorRate;
+         
          this.length = length;

          this.timestamp = timestamp;

      }
***************
*** 339,351 ****

          raf.writeInt(VERSION0);

          raf.writeInt(branchingFactor);

          raf.writeInt(height);

-         raf.writeBoolean(useChecksum);
-                 
-         raf.writeBoolean(useRecordCompressor);
-         
          raf.writeInt(nleaves);

--- 356,370 ----

          raf.writeInt(VERSION0);
+ 
+         raf.writeLong(timestamp);

+         raf.writeLong(uuid.getMostSignificantBits());
+ 
+         raf.writeLong(uuid.getLeastSignificantBits());
+ 
          raf.writeInt(branchingFactor);

          raf.writeInt(height);

          raf.writeInt(nleaves);

***************
*** 356,359 ****
--- 375,380 ----
          raf.writeInt(maxNodeOrLeafLength);

+         raf.writeBoolean(useChecksum);
+         
          raf.writeLong(addrLeaves);

***************
*** 362,375 ****
          raf.writeLong(addrRoot);

!         raf.writeDouble(errorRate);

          raf.writeLong(addrBloom);

          raf.writeLong(length);

          raf.writeLong(timestamp);

-         raf.writeUTF(name);
-         
      }

--- 383,398 ----
          raf.writeLong(addrRoot);

!         raf.writeLong(addrExtensionMetadata);

          raf.writeLong(addrBloom);

+         raf.writeDouble(errorRate);
+         
          raf.writeLong(length);
+ 
+         raf.skipBytes(SIZEOF_UNUSED);

          raf.writeLong(timestamp);

      }

***************
*** 382,401 ****

          sb.append("magic="+Integer.toHexString(MAGIC));
          sb.append(", branchingFactor="+branchingFactor);
          sb.append(", height=" + height);
-         sb.append(", useChecksum=" + useChecksum);
-         sb.append(", useRecordCompressor=" + useRecordCompressor);
          sb.append(", nleaves=" + nleaves);
          sb.append(", nnodes=" + nnodes);
          sb.append(", nentries=" + nentries);
          sb.append(", maxNodeOrLeafLength=" + maxNodeOrLeafLength);
!         sb.append(", addrLeaves=" + addrLeaves);
!         sb.append(", addrNodes=" + addrNodes);
          sb.append(", addrRoot=" + Addr.toString(addrRoot));
!         sb.append(", errorRate=" + errorRate);
          sb.append(", addrBloom=" + Addr.toString(addrBloom));
          sb.append(", length=" + length);
          sb.append(", timestamp=" + new Date(timestamp));
-         sb.append(", name="+name);

          return sb.toString();
--- 405,424 ----

          sb.append("magic="+Integer.toHexString(MAGIC));
+         sb.append(", uuid="+uuid);
          sb.append(", branchingFactor="+branchingFactor);
          sb.append(", height=" + height);
          sb.append(", nleaves=" + nleaves);
          sb.append(", nnodes=" + nnodes);
          sb.append(", nentries=" + nentries);
          sb.append(", maxNodeOrLeafLength=" + maxNodeOrLeafLength);
!         sb.append(", useChecksum=" + useChecksum);
!         sb.append(", addrLeaves=" + Addr.toString(addrLeaves));
!         sb.append(", addrNodes=" + Addr.toString(addrNodes));
          sb.append(", addrRoot=" + Addr.toString(addrRoot));
!         sb.append(", addrExtensionMetadata=" + Addr.toString(addrExtensionMetadata));
          sb.append(", addrBloom=" + Addr.toString(addrBloom));
+         sb.append(", errorRate=" + errorRate);
          sb.append(", length=" + length);
          sb.append(", timestamp=" + new Date(timestamp));

          return sb.toString();

Index: BTreeMetadata.java
===================================================================
RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/objndx/BTreeMetadata.java,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** BTreeMetadata.java	21 Feb 2007 20:17:21 -0000	1.11
--- BTreeMetadata.java	8 Mar 2007 18:14:05 -0000	1.12
***************
*** 7,11 ****

  import com.bigdata.io.SerializerUtil;
- import com.bigdata.isolation.IConflictResolver;
  import com.bigdata.rawstore.Addr;
  import com.bigdata.rawstore.IRawStore;
--- 7,10 ----
***************
*** 155,159 ****
      /**
       * Re-load the {@link BTree} or derived class from the store. The
!      * {@link BTree} or derived class MUST provide a public construct with the
       * following signature: <code>
       * 
--- 154,158 ----
      /**
       * Re-load the {@link BTree} or derived class from the store. The
!      * {@link BTree} or derived class MUST provide a public constructor with the
       * following signature: <code>
       * 
***************
*** 171,175 ****
       *         {@link BTreeMetadata} record.
       * 
!      * @see BTree#newMetadata(), which MUST be overloaded if you subclass extend
       *      {@link BTreeMetadata}.
       */
--- 170,174 ----
       *         {@link BTreeMetadata} record.
       * 
!      * @see BTree#newMetadata(), which MUST be overloaded if you subclass
       *      {@link BTreeMetadata}.
       */

Index: IndexSegment.java
===================================================================
RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/objndx/IndexSegment.java,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** IndexSegment.java	12 Feb 2007 21:51:07 -0000	1.16
--- IndexSegment.java	8 Mar 2007 18:14:05 -0000	1.17
***************
*** 1,16 ****
  package com.bigdata.objndx;

- import it.unimi.dsi.mg4j.util.BloomFilter;
- 
  import java.io.DataInputStream;
  import java.io.DataOutputStream;
  import java.io.IOException;
- import java.io.ObjectInputStream;
- import java.nio.ByteBuffer;

  import org.CognitiveWeb.extser.LongPacker;

  import com.bigdata.cache.HardReferenceQueue;
- import com.bigdata.io.ByteBufferInputStream;
  import com.bigdata.rawstore.Addr;
  import com.bigdata.rawstore.Bytes;
--- 1,11 ----
***************
*** 36,40 ****
   * @version $Id$
   */
! public class IndexSegment extends AbstractBTree implements IIndex {

      /**
--- 31,35 ----
   * @version $Id$
   */
! public class IndexSegment extends AbstractBTree {

      /**
***************
*** 43,52 ****
      final protected IndexSegmentFileStore fileStore;

- //    /**
- //     * The root of the btree. Since this is a read-only index the root can never
- //     * be replaced.
- //     */
- //    final protected AbstractNode root;
- 
      /**
       * An optional bloom filter that will be used to filter point tests. Since
--- 38,41 ----
***************
*** 87,96 ****
      }

- //    public AbstractNode getRoot() {
- //        
- //        return root;
- //        
- //    }
-     
      public int getEntryCount() {

--- 76,79 ----
***************
*** 99,108 ****
      }

!     public IndexSegment(IndexSegmentFileStore fileStore, IValueSerializer valSer) {
! 
          this(fileStore, new HardReferenceQueue<PO>(
                  new DefaultEvictionListener(),
                  BTree.DEFAULT_HARD_REF_QUEUE_CAPACITY,
!                 BTree.DEFAULT_HARD_REF_QUEUE_SCAN), valSer);

      }
--- 82,91 ----
      }

!     public IndexSegment(IndexSegmentFileStore fileStore ) {
!         
          this(fileStore, new HardReferenceQueue<PO>(
                  new DefaultEvictionListener(),
                  BTree.DEFAULT_HARD_REF_QUEUE_CAPACITY,
!                 BTree.DEFAULT_HARD_REF_QUEUE_SCAN));

      }
***************
*** 121,147 ****
       *            large, e.g., try with 100 and 20 respectively.
       * @param valSer
       * @throws IOException
       * 
       * @todo explore good defaults for the hard reference queue, which should
       *       probably be much smaller as the branching factor grows larger.
-      * 
-      * FIXME move the value serializer into the metadata record.
-      * 
-      * FIXME add a boolean flag to mark index segments that are the final result
-      * of a compacting merge.  This will make it possible to reconstruct from the
-      * file system which index segments are part of the consistent state for a
-      * given restart time.
       */
!     public IndexSegment(IndexSegmentFileStore fileStore,
!             HardReferenceQueue<PO> hardReferenceQueue,
!             IValueSerializer valSer) {

          super(fileStore, fileStore.metadata.branchingFactor,
                  fileStore.metadata.maxNodeOrLeafLength, hardReferenceQueue,
                  new CustomAddressSerializer(Addr
!                         .getOffset(fileStore.metadata.addrNodes)), valSer,
                  ImmutableNodeFactory.INSTANCE,
!                 fileStore.metadata.useRecordCompressor ? new RecordCompressor()
!                         : null, fileStore.metadata.useChecksum);

          // Type-safe reference to the backing store.
--- 104,124 ----
       *            large, e.g., try with 100 and 20 respectively.
       * @param valSer
+      * 
       * @throws IOException
       * 
       * @todo explore good defaults for the hard reference queue, which should
       *       probably be much smaller as the branching factor grows larger.
       */
!     protected IndexSegment(IndexSegmentFileStore fileStore,
!             HardReferenceQueue<PO> hardReferenceQueue) {

          super(fileStore, fileStore.metadata.branchingFactor,
                  fileStore.metadata.maxNodeOrLeafLength, hardReferenceQueue,
                  new CustomAddressSerializer(Addr
!                         .getOffset(fileStore.metadata.addrNodes)),
!                 fileStore.extensionMetadata.valSer,
                  ImmutableNodeFactory.INSTANCE,
!                 fileStore.extensionMetadata.recordCompressor,
!                 fileStore.metadata.useChecksum);

          // Type-safe reference to the backing store.
***************
*** 167,171 ****
              try {

!                 this.bloomFilter = readBloomFilter(fileStore.metadata.addrBloom);

              } catch (IOException ex) {
--- 144,148 ----
              try {

!                 this.bloomFilter = fileStore.readBloomFilter();

              } catch (IOException ex) {
***************
*** 179,258 ****
      }

-     /**
-      * Reads the bloom filter from the file.
-      * 
-      * Note: this goes around the {@link IndexSegmentFileStore} API since the bloom filter
-      * is not (currently) written as a compressed record and since the size of
-      * the largest compressed record does not pay attention to the serialized
-      * size of the optional bloom filter.
-      */
-     protected BloomFilter readBloomFilter(long addr) throws IOException {
- 
-         assert addr != 0L;
-         
-         System.err.println("reading bloom filter: "+Addr.toString(addr));
-         
-         final int off = Addr.getOffset(addr);
-         
-         final int len = Addr.getByteCount(addr);
-         
-         ByteBuffer buf = ByteBuffer.allocate(len);
- 
-         buf.limit(len);
- 
-         buf.position(0);
- 
-         try {
- 
-             // read into [dst] - does not modify the channel's position().
-             final int nread = fileStore.raf.getChannel().read(buf, off);
-             
-             assert nread == len;
-             
-             buf.flip(); // Flip buffer for reading.
-             
-         } catch (IOException ex) {
- 
-             throw new RuntimeException(ex);
- 
-         }
- 
-         assert buf.position() == 0;
-         assert buf.limit() == len;
- 
-         ByteBufferInputStream bais = new ByteBufferInputStream(buf);
-         
- //        ByteArrayInputStream bais = new ByteArrayInputStream(buf.array());
-         
-         ObjectInputStream ois = new ObjectInputStream(bais);
-         
-         try {
- 
-             BloomFilter bloomFilter = (BloomFilter) ois.readObject();
-             
-             log.info("Read bloom filter: minKeys=" + bloomFilter.size()
-                     + ", entryCount=" + getEntryCount() + ", bytesOnDisk="
-                     + len + ", errorRate=" + fileStore.metadata.errorRate);
-             
-             return bloomFilter;
-             
-         }
-         
-         catch(Exception ex) {
-             
-             IOException ex2 = new IOException("Could not read bloom filter: "+ex);
-             
-             ex2.initCause(ex);
-             
-             throw ex2;
-             
-         }
- 
-     }
-     
-     /**
-      * @todo move to parent class and have various methods test to validate that
-      *       the index is open (lookup, insert, remove, scan).
-      */
      public void close() {

--- 156,159 ----

Index: AbstractBTree.java
===================================================================
RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/objndx/AbstractBTree.java,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** AbstractBTree.java	21 Feb 2007 20:17:21 -0000	1.17
--- AbstractBTree.java	8 Mar 2007 18:14:05 -0000	1.18
***************
*** 106,113 ****
      /**
       * Log for btree opeations.
-      * 
-      * @todo consider renaming the logger.
       */
!     protected static final Logger log = Logger.getLogger(BTree.class);

      /**
--- 106,111 ----
      /**
       * Log for btree opeations.
       */
!     protected static final Logger log = Logger.getLogger(AbstractBTree.class);

      /**

[cweb-CVS] bigdata/src/java/com/bigdata/objndx IndexSegmentExtensionMetadata.java, NONE, 1.1 Index

[cweb-CVS] bigdata/src/java/com/bigdata/objndx IndexSegmentExtensionMetadata.java, NONE, 1.1 IndexSegmentBuilder.java, 1.26, 1.27 IndexSegmentFileStore.java, 1.9, 1.10 AbstractBTree.java, 1.17, 1.18 BTreeMetadata.java, 1.11, 1.12 IndexSegment.java, 1.16, 1.17 IndexSegmentMetadata.java, 1.10, 1.11