[Bigdata-commit] SF.net SVN: bigdata:[4480] branches/QUADS_QUERY_BRANCH/bigdata

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 4480
          http://bigdata.svn.sourceforge.net/bigdata/?rev=4480&view=rev
Author:   mrpersonick
Date:     2011-05-11 03:19:09 +0000 (Wed, 11 May 2011)

Log Message:
-----------
inline sids and reverse lookup

Modified Paths:
--------------
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestAll.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/axioms/BaseAxioms.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/Justification.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataStatementImpl.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/StatementEnum.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/BulkCompleteConverter.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ISPO.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPO.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOIndexRemover.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOIndexWriteProc.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOIndexWriter.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOKeyOrder.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOTupleSerializer.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/TestEncodeDecodeKeys.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPO.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOKeyCoders.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOTupleSerializer.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOValueCoders.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/store/TestStatementIdentifiers.java
    branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithSids.java
    branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestChangeSets.java
    branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSids.java

Added Paths:
-----------
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestBits.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/SidIV.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ModifiedEnum.java

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestAll.java
===================================================================

--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestAll.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestAll.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -80,6 +80,8 @@
         
         suite.addTest(com.bigdata.util.httpd.TestAll.suite());
 
+        suite.addTestSuite( TestBits.class );
+        
         return suite;
         
     }

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestBits.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestBits.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestBits.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -0,0 +1,166 @@
+/*
+
+Copyright (C) SYSTAP, LLC 2006-2008.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Aug 6, 2009
+ */
+
+package com.bigdata.util;
+
+import junit.framework.TestCase2;
+
+/**
+ * Test suite for {@link Bits}.
+ */
+public class TestBits extends TestCase2 {
+
+    /**
+     * 
+     */
+    public TestBits() {
+    }
+
+    /**
+     * @param name
+     */
+    public TestBits(String name) {
+        super(name);
+    }
+
+    public void test_ctor1() {
+
+//        final byte[] d = new byte[1];
+//        final ByteBuffer b = ByteBuffer.wrap(d);
+//        final BitVector v = new ByteBufferBitVector(b);
+//
+//        assertEquals("length", 8L, v.length());
+    	
+    	byte v = 0;
+
+        // verify range check.
+        try {
+            Bits.get(v, -1);
+            fail("Expecting: " + IndexOutOfBoundsException.class);
+        } catch (IndexOutOfBoundsException ex) {
+            if (log.isInfoEnabled())
+                log.info("Ignoring expected exception: " + ex);
+        }
+
+        // verify range check.
+        try {
+            Bits.get(v, 8);
+            fail("Expecting: " + IndexOutOfBoundsException.class);
+        } catch (IndexOutOfBoundsException ex) {
+            if (log.isInfoEnabled())
+                log.info("Ignoring expected exception: " + ex);
+        }
+
+        for (int i = 0; i < 8; i++)
+            assertEquals(false, Bits.get(v, i));
+
+        // set bit zero.
+//        d[0] |= (1 << 0);
+        v = Bits.set(v, 0, true);
+        
+        if (log.isInfoEnabled())
+            log.info(Bits.toString(v));
+        assertEquals(true, Bits.get(v, 0));
+
+        // clear bit zero.
+//        d[0] &= ~(1 << 0);
+        v = Bits.set(v, 0, false);
+        
+        if (log.isInfoEnabled())
+            log.info(Bits.toString(v));
+        assertEquals(false, Bits.get(v, 0));
+
+    }
+
+    /**
+     * Verify set/clear of each bit in the first byte.
+     */
+    public void test_getBoolean() {
+
+//        final byte[] d = new byte[1];
+//        final ByteBuffer b = ByteBuffer.wrap(d);
+//        final BitVector v = new ByteBufferBitVector(b);
+    	
+    	byte v = 0;
+
+        // verify all bits are zero.
+        for (int i = 0; i < 8; i++)
+            assertEquals(false, Bits.get(v, i));
+
+        // set/clear each bit in the first byte in turn.
+        for (int i = 0; i < 8; i++) {
+
+            // set bit
+//            d[0] |= (1 << i);
+        	v = Bits.set(v, i, true);
+            
+            if (log.isInfoEnabled())
+                log.info(Bits.toString(v) + " : i=" + i + ", (1<<" + i + ")="
+                        + (1 << i));
+            assertEquals(true, Bits.get(v, i));
+
+            // clear bit
+//            d[0] &= ~(1 << i);
+        	v = Bits.set(v, i, false);
+            
+            if (log.isInfoEnabled())
+                log.info(Bits.toString(v));
+            assertEquals(false, Bits.get(v, i));
+
+        }
+
+    }
+
+    /**
+     * Verify set/clear of each bit in the first byte.
+     */
+    public void test_getMask() {
+
+    	byte v = 0;
+
+        // verify all bits are zero.
+        for (int i = 0; i < 8; i++)
+            assertEquals(false, Bits.get(v, i));
+
+        // set each bit in the byte
+        for (int i = 0; i < 8; i++) {
+
+            // set bit
+        	v = Bits.set(v, i, true);
+            assertEquals(true, Bits.get(v, i));
+            
+        }
+        
+        // mask off all but the 0 and 1 bits
+        v = Bits.mask(v, 0, 1);
+        if (log.isInfoEnabled())
+            log.info(Bits.toString(v));
+        assertEquals(3, v);
+
+    }
+
+}

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/axioms/BaseAxioms.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/axioms/BaseAxioms.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/axioms/BaseAxioms.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -285,7 +285,7 @@
         
         metadata.setBranchingFactor(branchingFactor);
 
-        tupleSer = new SPOTupleSerializer(SPOKeyOrder.SPO);
+        tupleSer = new SPOTupleSerializer(SPOKeyOrder.SPO, false/* sids */);
         
         metadata.setTupleSerializer(tupleSer);
         

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -1,14 +1,13 @@
 package com.bigdata.rdf.changesets;
 
 import java.util.Iterator;
-import java.util.Map;
+
 import org.apache.log4j.Logger;
+
 import com.bigdata.rdf.changesets.IChangeRecord.ChangeAction;
-import com.bigdata.rdf.internal.IV;
-import com.bigdata.rdf.model.BigdataBNode;
 import com.bigdata.rdf.spo.ISPO;
+import com.bigdata.rdf.spo.ModifiedEnum;
 import com.bigdata.rdf.spo.SPO;
-import com.bigdata.rdf.spo.ISPO.ModifiedEnum;
 import com.bigdata.rdf.store.AbstractTripleStore;
 import com.bigdata.relation.accesspath.IElementFilter;
 import com.bigdata.striterator.ChunkedArrayIterator;

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/Justification.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/Justification.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/Justification.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -921,7 +921,7 @@
 //            assert arity == 3;
             
             // Note: keys are SPOs; no values stored for the tuples.
-            tupleSer = new SPOTupleSerializer(SPOKeyOrder.SPO,
+            tupleSer = new SPOTupleSerializer(SPOKeyOrder.SPO, false/* sids */,
                     DefaultTupleSerializer.getDefaultLeafKeysCoder(),
                     EmptyRabaValueCoder.INSTANCE);
             

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -29,7 +29,6 @@
 import com.bigdata.rdf.lexicon.LexiconRelation;
 import com.bigdata.rdf.model.BigdataValue;
 import com.bigdata.rdf.model.BigdataValueFactory;
-import com.bigdata.rdf.store.AbstractTripleStore;
 
 /**
  * Configuration determines which RDF Values are inlined into the statement
@@ -72,91 +71,4 @@
      */
     void initExtensions(final LexiconRelation lex);
     
-    /**
-     * <code>true</code> iff the <code>vte</code> and <code>dte</code> 
-     * should be inlined.
-     * 
-     * @param vte
-     *          the term type
-     * @param dte
-     *          the data type
-    public boolean isInline(VTE vte, DTE dte);
-     */
-    
-//    /**
-//     * <code>true</code> iff <code>xsd:boolean</code> should be inlined.
-//     */
-//    public boolean isBooleanInline();
-//
-//    /**
-//     * <code>true</code> iff the fixed size numerics (<code>xsd:int</code>,
-//     * <code>xsd:short</code>, <code>xsd:float</code>, etc) should be inlined.
-//     */
-//    public boolean isSmallNumericInline();
-//
-//    /**
-//     * <code>true</code> iff xsd:integer should be inlined.
-//     * <p>
-//     * Note: The maximum length for the encoding is ~32kb per key. With a B+Tree
-//     * branching factor of 256 that is ~ 8MB per leaf before compression. While
-//     * that is definitely large, it is not so outrageous that we need to forbid
-//     * it.
-//     */
-//    public boolean isXSDIntegerInline();
-//
-//    /**
-//     * <code>true</code> iff <code>xsd:decimal</code> should be inlined.
-//     */
-//    public boolean isXSDDecimalInline();
-//
-//    /**
-//     * <code>true</code> iff blank node identifiers should be inlined. This
-//     * is only possible when the blank node identifiers are internally
-//     * generated {@link UUID}s since otherwise they can be arbitrary Unicode
-//     * strings which, like text-based Literals, can not be inlined.
-//     * <p>
-//     * This option is NOT compatible with
-//     * {@link AbstractTripleStore.Options#STORE_BLANK_NODES}.
-//     */
-//    public boolean isBlankNodeInline();
-//
-//    /**
-//     * <code>true</code> if UUID values (other than blank nodes) should be
-//     * inlined.
-//     */
-//    public boolean isUUIDInline();
-//
-//    /**
-//     * Option to enable storing of long literals (over a configured
-//     *       threshold) as blob references. The TERM2ID index would have a
-//     *       hash function (MD5, SHA-1, SHA-2, etc) of the value and assign
-//     *       a termId. The ID2TERM index would map the termId to a blob
-//     *       reference. The blob data would be stored in the journal and
-//     *       migrate into index segments during overflow processing for
-//     *       scale-out.
-//     */
-//    public boolean isLongLiteralAsBlob();
-//
-//    /**
-//     * Return the {@link MessageDigest} used to compute a hash code for a long
-//     * literal. The message digest should compute a hash function with a very
-//     * small probability of collisions. In general, <code>SHA-256</code> (32
-//     * bytes), <code>SHA-384</code> (48 bytes) and <code>SHA-512</code> (64
-//     * byte) should be reasonable choices.
-//     * <p>
-//     * Appropriate hash algorithms are defined in the <a
-//     * href="http://csrc.nist.gov/publications/fips/index.html">FIPS PUB
-//     * 180-2</a> (which has been replaced by <a href=
-//     * "http://csrc.nist.gov/publications/fips/fips180-3/fips180-3_final.pdf"
-//     * >FIPS PUB 180-3</a>. Also see Recommendation for Applications Using
-//     * Approved Hash Algorithms in <a href=
-//     * "http://csrc.nist.gov/publications/nistpubs/800-107/NIST-SP-800-107.pdf"
-//     * >SP 800-107</a>, which provides information about the collision
-//     * resistance of these hash algorithms.
-//     * 
-//     * @return A {@link MessageDigest} object which can be used to compute the
-//     *         hash code for a long literal.
-//     */
-//    public MessageDigest getLongLiteralMessageDigest();
-//    
 }

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -32,12 +32,17 @@
 import java.util.ArrayList;
 import java.util.UUID;
 
+import org.apache.log4j.Logger;
+
 import com.bigdata.btree.keys.IKeyBuilder;
 import com.bigdata.btree.keys.KeyBuilder;
 import com.bigdata.rawstore.Bytes;
 import com.bigdata.rdf.internal.constraints.MathBOp.MathOp;
 import com.bigdata.rdf.model.BigdataBNode;
 import com.bigdata.rdf.model.BigdataLiteral;
+import com.bigdata.rdf.model.StatementEnum;
+import com.bigdata.rdf.spo.ISPO;
+import com.bigdata.rdf.spo.SPOKeyOrder;
 
 
 /**
@@ -45,6 +50,8 @@
  */
 public class IVUtility {
 
+	private static final transient Logger log = Logger.getLogger(IVUtility.class);
+	
     public static boolean equals(IV iv1, IV iv2) {
         
         // same IV or both null
@@ -421,23 +428,42 @@
      */
     public static IV[] decode(final byte[] key, final int numTerms) {
 
+    	return decode(key, 0 /* offset */, numTerms);
+    	
+    }
+    
+    /**
+     * Decodes up to numTerms {@link IV}s from a byte[].
+     * 
+     * @param key
+     *            The byte[].
+     * @param offset
+     *            The offset into the byte[] key.
+     * @param numTerms
+     *            The number of terms to decode.
+     *            
+     * @return The set of {@link IV}s.
+     */
+    public static IV[] decode(final byte[] key, final int offset, 
+    		final int numTerms) {
+        	
         if (numTerms <= 0)
             return new IV[0];
         
         final IV[] ivs = new IV[numTerms];
         
-        int offset = 0;
+        int o = offset;
         
         for (int i = 0; i < numTerms; i++) {
 
-            if (offset >= key.length)
+            if (o >= key.length)
                 throw new IllegalArgumentException(
                         "key is not long enough to decode " 
                         + numTerms + " terms.");
             
-            ivs[i] = decodeFromOffset(key, offset);
+            ivs[i] = decodeFromOffset(key, o);
             
-            offset += ivs[i] == null 
+            o += ivs[i] == null 
                     ? NullIV.INSTANCE.byteLength() : ivs[i].byteLength();
             
         }
@@ -502,6 +528,22 @@
         // The value type (URI, Literal, BNode, SID)
         final VTE vte = AbstractIV.getInternalValueTypeEnum(flags);
 
+        // handle inline sids
+        if (vte == VTE.STATEMENT) {
+        	
+        	// spo is directly decodable from key
+        	final ISPO spo = SPOKeyOrder.SPO.decodeKey(key, o);
+        	
+        	// all spos that have a sid are explicit
+        	spo.setStatementType(StatementEnum.Explicit);
+        	spo.setStatementIdentifier(true);
+        	
+        	// create a sid iv and return it
+        	final SidIV sid = new SidIV(spo);
+        	return sid;
+        	
+        }
+        
         // The data type
         final DTE dte = AbstractIV.getInternalDataTypeEnum(flags);
         

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -318,6 +318,8 @@
     public boolean isInline(final VTE vte, final DTE dte) {
 
         switch (vte) {
+	        case STATEMENT:
+	            return true;
             case BNODE:
                 return inlineBNodes && isSupported(dte);
             case LITERAL:

Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/SidIV.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/SidIV.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/SidIV.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -0,0 +1,212 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2011.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+package com.bigdata.rdf.internal;
+
+import java.math.BigInteger;
+
+import org.apache.log4j.Logger;
+
+import com.bigdata.btree.keys.IKeyBuilder;
+import com.bigdata.btree.keys.KeyBuilder;
+import com.bigdata.rdf.lexicon.LexiconRelation;
+import com.bigdata.rdf.model.BigdataBNode;
+import com.bigdata.rdf.model.BigdataValueFactory;
+import com.bigdata.rdf.spo.ISPO;
+import com.bigdata.rdf.spo.SPO;
+import com.bigdata.rdf.spo.SPOComparator;
+import com.bigdata.rdf.spo.SPOKeyOrder;
+
+/**
+ * Internal value representing an inline statement identifier. Uses the
+ * {@link ISPO} supplied in the ctor as the inline value. The
+ * {@link #asValue(BigdataValueFactory, ILexiconConfiguration)} method returns a
+ * {@link BigdataBNode} that is used to represent the sid in serialization
+ * formats (such as the custom RDF/XML extension for sids). The bnode is
+ * guaranteed to always have the same bnode id for a given inlined SPO. This is
+ * accomplished using the byte[] key encoding for the spo along with the
+ * BigInteger class.
+ * <p>
+ * This internal value has a {@link VTE} of {@link VTE#STATEMENT}. It is encoded
+ * into the statement indices by directly encoding the spo using
+ * {@link SPOKeyOrder#encodeKey(IKeyBuilder, ISPO)} via the
+ * {@link SPOKeyOrder#SPO} key order. Thus when decoded from the statement
+ * indices, the spo associated with this sid is materialized directly from the
+ * sid itself. See {@link IVUtility#decode(byte[])}. The spo decoded from the
+ * sid IV will be marked as explicit (only explicit statements have sids) and
+ * this SidIV will be attached to it. This completely eliminates the need for a
+ * reverse index from sid->spo, as the spo is encoded inline into the SidIV
+ * itself.  This replaces the TermId model for representing sids.
+ * <p>
+ * {@inheritDoc}
+ */
+public class SidIV<V extends BigdataBNode> extends
+        AbstractInlineIV<V, ISPO> {
+
+    /**
+	 * 
+	 */
+	private static final long serialVersionUID = 685148537376856907L;
+	
+	protected static final Logger log = Logger.getLogger(SidIV.class);
+
+	/**
+	 * The inline spo.
+	 */
+	private final ISPO spo;
+	
+	/**
+	 * The cached byte[] key for the encoding of this IV.
+	 */
+	private transient byte[] key;
+	
+	/**
+	 * The cached materialized BigdataValue for this sid.
+	 */
+	private transient V bnode;
+
+	/**
+	 * Ctor with internal value spo specified.
+	 */
+	public SidIV(final ISPO spo) {
+
+        /*
+         * Note: XSDBoolean happens to be assigned the code value of 0, which is
+         * the value we we want when the data type enumeration will be ignored.
+         */
+        super(VTE.STATEMENT, DTE.XSDBoolean);
+        
+        this.spo = spo;
+        
+    }
+
+	/**
+	 * Returns the inline spo.
+	 */
+	public ISPO getInlineValue() throws UnsupportedOperationException {
+		return spo;
+	}
+
+	/**
+	 * Returns the bnode representation of this IV, useful for serialization
+	 * formats such as RDF/XML.  See {@link #bnodeId()}.
+	 */
+    public V asValue(final LexiconRelation lex) {
+    	if (bnode == null) {
+	        bnode = (V) lex.getValueFactory().createBNode(bnodeId());
+	        bnode.setIV(this);
+	        bnode.setStatementIdentifier(true);
+    	}
+        return bnode;
+    }
+
+    /**
+     * Return the byte length for the byte[] encoded representation of this
+     * internal value.  Depends on the byte length of the encoded inline spo.
+     */
+	public int byteLength() {
+		return 1 + key().length;
+	}
+
+	public String toString() {
+		return stringValue();
+	}
+	
+	public String stringValue() {
+		return "Sid("+toString(spo)+")";
+	}
+	
+	/**
+	 * Pretty print the inline spo.  Calling SPO.toString() results in an
+	 * infinite loop.
+	 */
+	private static String toString(final ISPO spo) {
+        return (SPO.toString(spo.s()) + ":" + 
+        		SPO.toString(spo.p()) + ":" + 
+        		SPO.toString(spo.o()));
+	}
+
+	public int hashCode() {
+		return spo.hashCode();
+	}
+	
+	/**
+	 * Using the BigInteger class to create a unique bnode id based on the 
+	 * byte[] key of the inline spo.
+	 */
+	private String bnodeId() {
+//		// just use the hash code.  can result in collisions
+//		return String.valueOf(hashCode());
+		
+		// create a big integer using the spo key.  should result in unique ids
+		final byte[] key = key();
+		final int signum = key.length > 0 ? 1 : 0;
+		final BigInteger bi = new BigInteger(signum, key);
+		return bi.toString();
+	}
+
+	public boolean equals(Object o) {
+        if (this == o)
+            return true;
+        if (o instanceof SidIV) {
+    		final ISPO spo2 = ((SidIV) o).spo;
+            return spo.equals(spo2);
+        }
+        return false;
+	}
+
+	protected int _compareTo(IV o) {
+		final ISPO spo2 = ((SidIV) o).spo;
+        return SPOComparator.INSTANCE.compare(spo, spo2);
+	}
+	
+    /**
+     * Encode this internal value into the supplied key builder.  Emits the
+     * flags, following by the encoded byte[] representing the spo, in SPO
+     * key order.
+     * <p>
+     * {@inheritDoc}
+     */
+    public IKeyBuilder encode(final IKeyBuilder keyBuilder) {
+
+        // First emit the flags byte.
+        keyBuilder.append(flags());
+		
+		// Then append the SPO's key in SPOKeyOrder.SPO
+        keyBuilder.append(key());
+        
+        return keyBuilder;
+            
+    }
+    
+    private byte[] key() {
+    	if (key == null) {
+            /*
+    		 * Build the SPO's key in SPOKeyOrder.SPO.
+    		 */
+    		key = SPOKeyOrder.SPO.encodeKey(new KeyBuilder(), spo);
+    	}
+    	return key;
+    }
+
+}
\ No newline at end of file

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -72,7 +72,6 @@
 import com.bigdata.btree.keys.KVO;
 import com.bigdata.btree.keys.KeyBuilder;
 import com.bigdata.btree.keys.StrengthEnum;
-import com.bigdata.btree.proc.IResultHandler;
 import com.bigdata.btree.proc.AbstractKeyArrayIndexProcedure.ResultBuffer;
 import com.bigdata.btree.proc.AbstractKeyArrayIndexProcedure.ResultBufferHandler;
 import com.bigdata.btree.proc.BatchLookup.BatchLookupConstructor;
@@ -82,7 +81,6 @@
 import com.bigdata.journal.IResourceLock;
 import com.bigdata.journal.ITx;
 import com.bigdata.journal.TimestampUtility;
-import com.bigdata.rawstore.Bytes;
 import com.bigdata.rdf.internal.IDatatypeURIResolver;
 import com.bigdata.rdf.internal.IExtensionFactory;
 import com.bigdata.rdf.internal.ILexiconConfiguration;
@@ -90,18 +88,14 @@
 import com.bigdata.rdf.internal.IVUtility;
 import com.bigdata.rdf.internal.LexiconConfiguration;
 import com.bigdata.rdf.internal.TermId;
-import com.bigdata.rdf.lexicon.Term2IdWriteProc.Term2IdWriteProcConstructor;
 import com.bigdata.rdf.model.BigdataBNode;
 import com.bigdata.rdf.model.BigdataLiteral;
 import com.bigdata.rdf.model.BigdataURI;
 import com.bigdata.rdf.model.BigdataValue;
 import com.bigdata.rdf.model.BigdataValueFactory;
 import com.bigdata.rdf.model.BigdataValueFactoryImpl;
-import com.bigdata.rdf.model.StatementEnum;
 import com.bigdata.rdf.rio.IStatementBuffer;
 import com.bigdata.rdf.rio.StatementBuffer;
-import com.bigdata.rdf.spo.ISPO;
-import com.bigdata.rdf.spo.SPOComparator;
 import com.bigdata.rdf.store.AbstractTripleStore;
 import com.bigdata.rdf.store.IRawTripleStore;
 import com.bigdata.relation.AbstractRelation;
@@ -111,7 +105,6 @@
 import com.bigdata.relation.rule.IRule;
 import com.bigdata.search.FullTextIndex;
 import com.bigdata.service.IBigdataFederation;
-import com.bigdata.service.Split;
 import com.bigdata.striterator.ChunkedArrayIterator;
 import com.bigdata.striterator.IChunkedOrderedIterator;
 import com.bigdata.striterator.IKeyOrder;
@@ -1468,197 +1461,197 @@
         
     }
 
-    /**
-     * Assign unique statement identifiers to triples.
-     * <p>
-     * Each distinct {@link StatementEnum#Explicit} {s,p,o} is assigned a unique
-     * statement identifier using the {@link LexiconKeyOrder#TERM2ID} index. The
-     * assignment of statement identifiers is <i>consistent</i> using an
-     * unisolated atomic write operation similar to
-     * {@link #addTerms(BigdataValue[], int, boolean)}
-     * <p>
-     * Note: Statement identifiers are NOT inserted into the reverse (id:term)
-     * index. Instead, they are written into the values associated with the
-     * {s,p,o} in each of the statement indices. That is handled by
-     * {@link AbstractTripleStore#addStatements(AbstractTripleStore, boolean, IChunkedOrderedIterator, IElementFilter)}
-     * , which is also responsible for invoking this method in order to have the
-     * statement identifiers on hand before it writes on the statement indices.
-     * <p>
-     * Note: The caller's {@link ISPO}[] is sorted into SPO order as a
-     * side-effect.
-     * <p>
-     * Note: The statement identifiers are assigned to the {@link ISPO}s as a
-     * side-effect.
-     * <p>
-     * Note: SIDs are NOT supported for quads, so this code is never executed
-     * for quads.
-     */
-    public void addStatementIdentifiers(final ISPO[] a, final int n) {
+//    /**
+//     * Assign unique statement identifiers to triples.
+//     * <p>
+//     * Each distinct {@link StatementEnum#Explicit} {s,p,o} is assigned a unique
+//     * statement identifier using the {@link LexiconKeyOrder#TERM2ID} index. The
+//     * assignment of statement identifiers is <i>consistent</i> using an
+//     * unisolated atomic write operation similar to
+//     * {@link #addTerms(BigdataValue[], int, boolean)}
+//     * <p>
+//     * Note: Statement identifiers are NOT inserted into the reverse (id:term)
+//     * index. Instead, they are written into the values associated with the
+//     * {s,p,o} in each of the statement indices. That is handled by
+//     * {@link AbstractTripleStore#addStatements(AbstractTripleStore, boolean, IChunkedOrderedIterator, IElementFilter)}
+//     * , which is also responsible for invoking this method in order to have the
+//     * statement identifiers on hand before it writes on the statement indices.
+//     * <p>
+//     * Note: The caller's {@link ISPO}[] is sorted into SPO order as a
+//     * side-effect.
+//     * <p>
+//     * Note: The statement identifiers are assigned to the {@link ISPO}s as a
+//     * side-effect.
+//     * <p>
+//     * Note: SIDs are NOT supported for quads, so this code is never executed
+//     * for quads.
+//     */
+//    public void addStatementIdentifiers(final ISPO[] a, final int n) {
+//
+//        //        * @throws UnsupportedOperationException
+////        *             if {@link Options#STATEMENT_IDENTIFIERS} was not specified.
+////        * 
+////        if (!statementIdentifiers)
+////            throw new UnsupportedOperationException();
+//
+//        if (n == 0)
+//            return;
+//
+//        final long begin = System.currentTimeMillis();
+//        final long keyGenTime; // time to convert {s,p,o} to byte[] sort keys.
+//        final long sortTime; // time to sort terms by assigned byte[] keys.
+//        final long insertTime; // time to insert terms into the term:id index.
+//
+//        /*
+//         * Sort the caller's array into SPO order. This order will correspond to
+//         * the total order of the term:id index.
+//         * 
+//         * Note: This depends critically on SPOComparator producing the same
+//         * total order as we would obtain by an unsigned byte[] sort of the
+//         * generated sort keys.
+//         * 
+//         * Note: the keys for the term:id index are NOT precisely the keys used
+//         * by the SPO index since there is a prefix code used to mark the keys
+//         * are Statements (vs Literals, BNodes, or URIs).
+//         */
+//        {
+//
+//            final long _begin = System.currentTimeMillis();
+//
+//            Arrays.sort(a, 0, n, SPOComparator.INSTANCE);
+//
+//            sortTime = System.currentTimeMillis() - _begin;
+//
+//        }
+//
+//        /*
+//         * Insert into the forward index (term -> id). This will either assign a
+//         * statement identifier or return the existing statement identifier if
+//         * the statement is already in the lexicon (the statement identifier is
+//         * in a sense a term identifier since it is assigned by the term:id
+//         * index).
+//         * 
+//         * Note: Since we only assign statement identifiers for explicit
+//         * statements the caller's SPO[] can not be directly correlated to the
+//         * keys[]. We copy the references into b[] so that we can keep that
+//         * correlation 1:1.
+//         */
+//        final byte[][] keys = new byte[n][];
+//        final ISPO[] b = new ISPO[n];
+//
+//        /*
+//         * Generate the sort keys for the term:id index.
+//         */
+//        int nexplicit = 0;
+//        {
+//
+//            final long _begin = System.currentTimeMillis();
+//
+//            // local instance, no unicode support.
+//            final IKeyBuilder keyBuilder = KeyBuilder
+//                    .newInstance(1/* statement byte */+ (3/* triple */* Bytes.SIZEOF_LONG));
+//
+//            for (int i = 0; i < n; i++) {
+//
+//                final ISPO spo = a[i];
+//
+//                if (!spo.isExplicit())
+//                    continue;
+//                
+//                if (!spo.isFullyBound())
+//                    throw new IllegalArgumentException("Not fully bound: "
+//                            + spo.toString(/*this*/));
+//
+//                /*
+//                 * Creating a dummy term for the Term2Id index.
+//                 */
+//                keyBuilder.reset().append(ITermIndexCodes.TERM_CODE_STMT);
+//                spo.s().encode(keyBuilder);
+//                spo.p().encode(keyBuilder);
+//                spo.o().encode(keyBuilder);
+//                keys[nexplicit] = keyBuilder.getKey();
+//
+//                // Note: keeps correlation between key and SPO.
+//                b[nexplicit] = spo;
+//
+//                nexplicit++;
+//
+//            }
+//
+//            keyGenTime = System.currentTimeMillis() - _begin;
+//
+//        }
+//
+//        /*
+//         * Execute a remote unisolated batch operation that assigns the
+//         * statement identifier.
+//         */
+//        {
+//
+//            final long _begin = System.currentTimeMillis();
+//
+//            final IIndex termIdIndex = getTerm2IdIndex();
+//
+//            // run the procedure.
+//            if (nexplicit > 0) {
+//
+//                termIdIndex.submit(0/* fromIndex */, nexplicit/* toIndex */,
+//                        keys, null/* vals */, new Term2IdWriteProcConstructor(
+//                                false/* readOnly */, storeBlankNodes, //scaleOutTermIds,
+//                                termIdBitsToReverse),
+//                        new IResultHandler<Term2IdWriteProc.Result, Void>() {
+//
+//                            /**
+//                             * Copy the assigned / discovered statement
+//                             * identifiers onto the corresponding elements of
+//                             * the SPO[].
+//                             */
+//                            public void aggregate(Term2IdWriteProc.Result result,
+//                                    Split split) {
+//
+//                                for (int i = split.fromIndex, j = 0; i < split.toIndex; i++, j++) {
+//
+////                                    if (b[i].c() != 0L
+////                                            && b[i].c() != result.ids[j]) {
+////                                        System.err.println("spo="
+////                                                + getContainer().toString(b[i])
+////                                                + ", sid="
+////                                                + getContainer().toString(
+////                                                        result.ids[j]));
+////                                    }
+//
+//                                    b[i].setStatementIdentifier(result.ivs[j]);
+//
+//                                }
+//
+//                            }
+//
+//                            public Void getResult() {
+//
+//                                return null;
+//
+//                            }
+//
+//                        });
+//
+//            }
+//
+//            insertTime = System.currentTimeMillis() - _begin;
+//
+//        }
+//
+//        final long elapsed = System.currentTimeMillis() - begin;
+//
+//        if (log.isInfoEnabled() && n > 1000 || elapsed > 3000) {
+//
+//            log.info("Wrote " + n + " in " + elapsed + "ms; keygen="
+//                    + keyGenTime + "ms, sort=" + sortTime + "ms, insert="
+//                    + insertTime + "ms");
+//
+//        }
+//
+//    }
 
-        //        * @throws UnsupportedOperationException
-//        *             if {@link Options#STATEMENT_IDENTIFIERS} was not specified.
-//        * 
-//        if (!statementIdentifiers)
-//            throw new UnsupportedOperationException();
-
-        if (n == 0)
-            return;
-
-        final long begin = System.currentTimeMillis();
-        final long keyGenTime; // time to convert {s,p,o} to byte[] sort keys.
-        final long sortTime; // time to sort terms by assigned byte[] keys.
-        final long insertTime; // time to insert terms into the term:id index.
-
-        /*
-         * Sort the caller's array into SPO order. This order will correspond to
-         * the total order of the term:id index.
-         * 
-         * Note: This depends critically on SPOComparator producing the same
-         * total order as we would obtain by an unsigned byte[] sort of the
-         * generated sort keys.
-         * 
-         * Note: the keys for the term:id index are NOT precisely the keys used
-         * by the SPO index since there is a prefix code used to mark the keys
-         * are Statements (vs Literals, BNodes, or URIs).
-         */
-        {
-
-            final long _begin = System.currentTimeMillis();
-
-            Arrays.sort(a, 0, n, SPOComparator.INSTANCE);
-
-            sortTime = System.currentTimeMillis() - _begin;
-
-        }
-
-        /*
-         * Insert into the forward index (term -> id). This will either assign a
-         * statement identifier or return the existing statement identifier if
-         * the statement is already in the lexicon (the statement identifier is
-         * in a sense a term identifier since it is assigned by the term:id
-         * index).
-         * 
-         * Note: Since we only assign statement identifiers for explicit
-         * statements the caller's SPO[] can not be directly correlated to the
-         * keys[]. We copy the references into b[] so that we can keep that
-         * correlation 1:1.
-         */
-        final byte[][] keys = new byte[n][];
-        final ISPO[] b = new ISPO[n];
-
-        /*
-         * Generate the sort keys for the term:id index.
-         */
-        int nexplicit = 0;
-        {
-
-            final long _begin = System.currentTimeMillis();
-
-            // local instance, no unicode support.
-            final IKeyBuilder keyBuilder = KeyBuilder
-                    .newInstance(1/* statement byte */+ (3/* triple */* Bytes.SIZEOF_LONG));
-
-            for (int i = 0; i < n; i++) {
-
-                final ISPO spo = a[i];
-
-                if (!spo.isExplicit())
-                    continue;
-                
-                if (!spo.isFullyBound())
-                    throw new IllegalArgumentException("Not fully bound: "
-                            + spo.toString(/*this*/));
-
-                /*
-                 * Creating a dummy term for the Term2Id index.
-                 */
-                keyBuilder.reset().append(ITermIndexCodes.TERM_CODE_STMT);
-                spo.s().encode(keyBuilder);
-                spo.p().encode(keyBuilder);
-                spo.o().encode(keyBuilder);
-                keys[nexplicit] = keyBuilder.getKey();
-
-                // Note: keeps correlation between key and SPO.
-                b[nexplicit] = spo;
-
-                nexplicit++;
-
-            }
-
-            keyGenTime = System.currentTimeMillis() - _begin;
-
-        }
-
-        /*
-         * Execute a remote unisolated batch operation that assigns the
-         * statement identifier.
-         */
-        {
-
-            final long _begin = System.currentTimeMillis();
-
-            final IIndex termIdIndex = getTerm2IdIndex();
-
-            // run the procedure.
-            if (nexplicit > 0) {
-
-                termIdIndex.submit(0/* fromIndex */, nexplicit/* toIndex */,
-                        keys, null/* vals */, new Term2IdWriteProcConstructor(
-                                false/* readOnly */, storeBlankNodes, //scaleOutTermIds,
-                                termIdBitsToReverse),
-                        new IResultHandler<Term2IdWriteProc.Result, Void>() {
-
-                            /**
-                             * Copy the assigned / discovered statement
-                             * identifiers onto the corresponding elements of
-                             * the SPO[].
-                             */
-                            public void aggregate(Term2IdWriteProc.Result result,
-                                    Split split) {
-
-                                for (int i = split.fromIndex, j = 0; i < split.toIndex; i++, j++) {
-
-//                                    if (b[i].c() != 0L
-//                                            && b[i].c() != result.ids[j]) {
-//                                        System.err.println("spo="
-//                                                + getContainer().toString(b[i])
-//                                                + ", sid="
-//                                                + getContainer().toString(
-//                                                        result.ids[j]));
-//                                    }
-
-                                    b[i].setStatementIdentifier(result.ivs[j]);
-
-                                }
-
-                            }
-
-                            public Void getResult() {
-
-                                return null;
-
-                            }
-
-                        });
-
-            }
-
-            insertTime = System.currentTimeMillis() - _begin;
-
-        }
-
-        final long elapsed = System.currentTimeMillis() - begin;
-
-        if (log.isInfoEnabled() && n > 1000 || elapsed > 3000) {
-
-            log.info("Wrote " + n + " in " + elapsed + "ms; keygen="
-                    + keyGenTime + "ms, sort=" + sortTime + "ms, insert="
-                    + insertTime + "ms");
-
-        }
-
-    }
-
     /**
      * <p>
      * Add the terms to the full text index so that we can do fast lookup of the
@@ -2365,26 +2358,28 @@
         
         if (tid.isStatement()) {
 
-            /*
-             * Statement identifiers are not stored in the reverse lexicon (or
-             * the cache).
-             * 
-             * A statement identifier is externalized as a BNode. The "S" prefix
-             * is a syntactic marker for those in the know to indicate that the
-             * BNode corresponds to a statement identifier.
-             */
+//            /*
+//             * Statement identifiers are not stored in the reverse lexicon (or
+//             * the cache).
+//             * 
+//             * A statement identifier is externalized as a BNode. The "S" prefix
+//             * is a syntactic marker for those in the know to indicate that the
+//             * BNode corresponds to a statement identifier.
+//             */
+//
+//			final BigdataBNode stmt = valueFactory.createBNode("S"
+//                    + Long.toString(tid.getTermId()));
+//
+//            // set the term identifier on the object.
+//            stmt.setIV(tid);
+//
+//            // mark as a statement identifier.
+//			stmt.setStatementIdentifier(true);
+//			
+//            return stmt;
 
-			final BigdataBNode stmt = valueFactory.createBNode("S"
-                    + Long.toString(tid.getTermId()));
+        	throw new IllegalArgumentException("sids should be inline");
 
-            // set the term identifier on the object.
-            stmt.setIV(tid);
-
-            // mark as a statement identifier.
-			stmt.setStatementIdentifier(true);
-
-            return stmt;
-
         }
 
         if (!storeBlankNodes && tid.isBNode()) {

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataStatementImpl.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataStatementImpl.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataStatementImpl.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -27,9 +27,9 @@
 import org.openrdf.model.Statement;
 import org.openrdf.model.Value;
 
-import com.bigdata.io.ByteArrayBuffer;
 import com.bigdata.rdf.internal.IV;
-import com.bigdata.rdf.spo.SPO;
+import com.bigdata.rdf.internal.SidIV;
+import com.bigdata.rdf.spo.ModifiedEnum;
 import com.bigdata.rdf.store.AbstractTripleStore;
 import com.bigdata.rdf.store.IRawTripleStore;
 
@@ -282,29 +282,24 @@
 
     }
 
-    public final void setStatementIdentifier(final IV sid) {
+    public final void setStatementIdentifier(final boolean sidable) {
 
-        if (sid == null)
-            throw new IllegalArgumentException();
+        if (sidable && type != StatementEnum.Explicit) {
 
-        if (!sid.isStatement())
-            throw new IllegalArgumentException("Not a statement identifier: "
-                    + sid);
-
-        if (type != StatementEnum.Explicit) {
-
             // Only allowed for explicit statements.
             throw new IllegalStateException();
 
         }
 
-        if (c != null && c.getIV() != sid)
-            throw new IllegalStateException(
-                    "Different statement identifier already defined: "
-                            + toString() + ", new=" + sid);
+        if (c == null) {
+        	
+        	// this SHOULD not ever happen
+        	throw new IllegalStateException();
+        	
+        }
+        
+        c.setIV(new SidIV(this));
 
-        c.setIV(sid);
-
     }
 
     public final IV getStatementIdentifier() {
@@ -335,13 +330,6 @@
         
     }
 
-    public byte[] serializeValue(final ByteArrayBuffer buf) {
-
-        return SPO.serializeValue(buf, override, userFlag, type, 
-        	c != null ? c.getIV() : null);
-
-    }
-
     /**
      * Note: this implementation is equivalent to {@link #toString()} since the
      * {@link Value}s are already resolved.

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/StatementEnum.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/StatementEnum.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/StatementEnum.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -132,11 +132,10 @@
 
     static public StatementEnum deserialize(final byte[] val) {
 
-        if (val.length != 1 && val.length != (1 + 8)) {
+        if (val.length != 1) {
 
             throw new RuntimeException(
-                    "Expecting either one byte or nine bytes, not "
-                            + val.length);
+                    "Expecting one byte, not " + val.length);
             
         }
         
@@ -183,7 +182,6 @@
     public static boolean isOverride(final byte b) {
 
         return (b & StatementEnum.MASK_OVERRIDE) != 0;
-//        return (b & StatementEnum.MASK_OVERRIDE) == 1;
         
     }
     

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -3204,8 +3204,6 @@
             long chunksOut = 0;
             long elementsOut = 0;
 
-            final ByteArrayBuffer vbuf = new ByteArrayBuffer(1 + 8/* max length */);
-
             latch.inc();
 
             try {
@@ -3233,7 +3231,7 @@
                         final byte[] key = tupleSer.serializeKey(spo);
 
                         // generate value for the index.
-                        final byte[] val = spo.serializeValue(vbuf);
+                        final byte[] val = tupleSer.serializeVal(spo);
 
                         /*
                          * Note: The SPO is deliberately not provided to the KVO

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/BulkCompleteConverter.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/BulkCompleteConverter.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/BulkCompleteConverter.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -136,7 +136,7 @@
             
             if (val != null) {
                 
-                SPO.decodeValue(chunk[i], val);
+                tupleSer.decodeValue(chunk[i], val);
                 
             } else {
                 

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ISPO.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ISPO.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ISPO.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -28,10 +28,7 @@
 
 package com.bigdata.rdf.spo;
 
-import org.openrdf.model.Value;
-
 import com.bigdata.bop.IElement;
-import com.bigdata.io.ByteArrayBuffer;
 import com.bigdata.rdf.inf.Justification;
 import com.bigdata.rdf.inf.TruthMaintenance;
 import com.bigdata.rdf.internal.IV;
@@ -179,20 +176,30 @@
      */
     boolean isAxiom();
 
+//    /**
+//     * Set the statement identifier. This sets the 4th position of the quad, but
+//     * some constraints are imposed on its argument.
+//     * 
+//     * @param sid
+//     *            The statement identifier.
+//     * 
+//     * @throws IllegalArgumentException
+//     *             if <i>sid</i> is {@link #NULL}.
+//     * @throws IllegalStateException
+//     *             if the statement identifier is already set.
+//     */
+//    void setStatementIdentifier(final IV sid);
+
     /**
      * Set the statement identifier. This sets the 4th position of the quad, but
      * some constraints are imposed on its argument.
      * 
      * @param sid
-     *            The statement identifier.
-     * 
-     * @throws IllegalArgumentException
-     *             if <i>sid</i> is {@link #NULL}.
-     * @throws IllegalStateException
-     *             if the statement identifier is already set.
+     *            If sid is true, this ISPO will produce a sid on-demand when
+     *            requested.
      */
-    void setStatementIdentifier(final IV sid);
-
+    void setStatementIdentifier(final boolean sidable);
+    
     /**
      * The statement identifier (optional). This has nearly identical semantics
      * to {@link #c()}, but will throw an exception if the 4th position is not
@@ -272,25 +279,6 @@
     public boolean isModified();
 
     /**
-     * Return the byte[] that would be written into a statement index for this
-     * {@link ISPO}, including the optional {@link StatementEnum#MASK_OVERRIDE}
-     * bit. If the {@link #hasStatementIdentifier()} would return
-     * <code>true</code>, then the SID will be included in the returned byte[].
-     * Note that {@link #hasStatementIdentifier()} is defined in terms of the
-     * bit pattern of the SID identifiers and therefore will be
-     * <code>true</code> ONLY for a statement identifier and NOT for an RDF
-     * {@link Value} identifier.
-     * 
-     * @param buf
-     *            A buffer supplied by the caller. The buffer will be reset
-     *            before the value is written on the buffer.
-     * 
-     * @return The value that would be written into a statement index for this
-     *         {@link ISPO}.
-     */
-    public byte[] serializeValue(ByteArrayBuffer buf);
-
-    /**
      * Method may be used to externalize the {@link BigdataValue}s in the
      * {@link ISPO}.
      * 
@@ -299,59 +287,4 @@
      */
     public String toString(IRawTripleStore db);
     
-    public enum ModifiedEnum {
-        
-        INSERTED, REMOVED, UPDATED, NONE;
-        
-        public static boolean[] toBooleans(final ModifiedEnum[] modified, final int n) {
-            
-            final boolean[] b = new boolean[n*2];
-            for (int i = 0; i < n; i++) {
-                switch(modified[i]) {
-                case INSERTED:
-                    b[i*2] = true;
-                    b[i*2+1] = false;
-                    break;
-                case REMOVED:
-                    b[i*2] = false;
-                    b[i*2+1] = true;
-                    break;
-                case UPDATED:
-                    b[i*2] = true;
-                    b[i*2+1] = true;
-                    break;
-                case NONE:
-                default:
-                    b[i*2] = false;
-                    b[i*2+1] = false;
-                    break;
-                }
-            }
-            
-            return b;
-            
-        }
-        
-        public static ModifiedEnum[] fromBooleans(final boolean[] b, final int n) {
-            
-            assert n <= b.length && n % 2 == 0 : "n="+n+", b.length="+b.length;
-            
-            final ModifiedEnum[] m = new ModifiedEnum[n/2];
-            for (int i = 0; i < n; i+=2) {
-                if (b[i] && !b[i+1])
-                    m[i/2] = INSERTED;
-                else if (!b[i] && b[i+1])
-                    m[i/2] = REMOVED;
-                else if (b[i] && b[i+1])
-                    m[i/2] = UPDATED;
-                else
-                    m[i/2] = NONE;
-            }
-            
-            return m;
-            
-        }
-        
-    }
-
 }

Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ModifiedEnum.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ModifiedEnum.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ModifiedEnum.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -0,0 +1,56 @@
+package com.bigdata.rdf.spo;
+
+public enum ModifiedEnum {
+
+    INSERTED, REMOVED, UPDATED, NONE;
+    
+    public static boolean[] toBooleans(final ModifiedEnum[] modified, final int n) {
+        
+        final boolean[] b = new boolean[n*2];
+        for (int i = 0; i < n; i++) {
+            switch(modified[i]) {
+            case INSERTED:
+                b[i*2] = true;
+                b[i*2+1] = false;
+                break;
+            case REMOVED:
+                b[i*2] = false;
+                b[i*2+1] = true;
+                break;
+            case UPDATED:
+                b[i*2] = true;
+                b[i*2+1] = true;
+                break;
+            case NONE:
+            default:
+                b[i*2] = false;
+                b[i*2+1] = false;
+                break;
+            }
+        }
+        
+        return b;
+        
+    }
+    
+    public static ModifiedEnum[] fromBooleans(final boolean[] b, final int n) {
+        
+        assert n <= b.length && n % 2 == 0 : "n="+n+", b.length="+b.length;
+        
+        final ModifiedEnum[] m = new ModifiedEnum[n/2];
+        for (int i = 0; i < n; i+=2) {
+            if (b[i] && !b[i+1])
+                m[i/2] = INSERTED;
+            else if (!b[i] && b[i+1])
+                m[i/2] = REMOVED;
+            else if (b[i] && b[i+1])
+                m[i/2] = UPDATED;
+            else
+                m[i/2] = NONE;
+        }
+        
+        return m;
+        
+    }
+    
+}

Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPO.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPO.java	2011-05-10 19:39:39 UTC (rev 4479)
+++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPO.java	2011-05-11 03:19:09 UTC (rev 4480)
@@ -28,12 +28,10 @@
 import com.bigdata.bop.IConstant;
 import com.bigdata.bop.IPredicate;
 import com.bigdata.bop.IVariableOrConstant;
-import com.bigdata.io.ByteArrayBuffer;
 import com.bigdata.rdf.inf.Justification;
 import com.bigdata.rdf.internal.IV;
 import com.bigdata.rdf.internal.IVUtility;
-import com.bigdata.rdf.internal.TermId;
-import com.bigdata.rdf.internal.VTE;
+import com.bigdata.rdf.internal.SidIV;
 import com.bigdata.rdf.model.BigdataResource;
 import com.bigdata.rdf.model.BigdataStatement;
 import com.bigdata.rdf.model.BigdataStatementImpl;
@@ -43,6 +41,7 @@
 import com.bigdata.rdf.store.AbstractTripleStore;
 import com.bigdata.rdf.store.IRawTripleStore;
 import com.bigdata.relation.accesspath.IAccessPath;
+import com.bigdata.util.Bits;
 
 /**
  * Represents a triple, triple+SID, or quad. When used to represent a triple,
@@ -74,31 +73,73 @@
      */
     private IV c = null;
 
+//    /**
+//     * Statement type (inferred, explicit, or axiom).
+//     */
+//    private StatementEnum type;
+//    
+//    /**
+//     * User flag
+//     */
+//    private boolean userFlag;
+//    
+//    /**
+//     * Override flag used for downgrading statements during truth maintenance.
+//     */
+//    private transient boolean override = false;
+//    
+////    private transient boolean modified = false;
+//    private transient ModifiedEnum modified = ModifiedEnum.NONE;
+//    
+//   /**
+//    * If sidable, we will lazily instantiate a sid when requested via
+//    * {@link #c()}, {@link #getStatementIdentifier()}, and {@link SPO#get(int)}
+//    * with a parameter of 3. This should reduce heap pressure by only creating
+//    * sids on-demand on an as-needed basis.
+//    */
+//    private boolean sidable = false;    
+  
+	/**
+	 * Bit flags used to represent statement type, user flag, override, 
+	 * modified enum, and sidable flag.  Much more compact representation.
+	 */
+	private byte flags = 0;
+
+	/**
+	 * Denotes which bit to find the StatementType within the {@link #flags}.
+	 * Type takes two bits.
+	 */
+    private static int TYPE_BIT = 0;
+
     /**
-     * Statement type (inferred, explicit, or axiom).
-   ...
 
[truncated message content]

[Bigdata-commit] SF.net SVN: bigdata:[4480] branches/QUADS_QUERY_BRANCH/bigdata

Fast, scalable, robust graph database platform

[Bigdata-commit] SF.net SVN: bigdata:[4480] branches/QUADS_QUERY_BRANCH/bigdata