From: <mrp...@us...> - 2011-05-11 03:19:18
|
Revision: 4480 http://bigdata.svn.sourceforge.net/bigdata/?rev=4480&view=rev Author: mrpersonick Date: 2011-05-11 03:19:09 +0000 (Wed, 11 May 2011) Log Message: ----------- inline sids and reverse lookup Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/axioms/BaseAxioms.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/Justification.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataStatementImpl.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/StatementEnum.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/BulkCompleteConverter.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ISPO.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPO.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOIndexRemover.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOIndexWriteProc.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOIndexWriter.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOTupleSerializer.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/TestEncodeDecodeKeys.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPO.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOKeyCoders.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOTupleSerializer.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOValueCoders.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/store/TestStatementIdentifiers.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBigdataSailWithSids.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestChangeSets.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSids.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestBits.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/SidIV.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ModifiedEnum.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestAll.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestAll.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -80,6 +80,8 @@ suite.addTest(com.bigdata.util.httpd.TestAll.suite()); + suite.addTestSuite( TestBits.class ); + return suite; } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestBits.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestBits.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/util/TestBits.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -0,0 +1,166 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 6, 2009 + */ + +package com.bigdata.util; + +import junit.framework.TestCase2; + +/** + * Test suite for {@link Bits}. + */ +public class TestBits extends TestCase2 { + + /** + * + */ + public TestBits() { + } + + /** + * @param name + */ + public TestBits(String name) { + super(name); + } + + public void test_ctor1() { + +// final byte[] d = new byte[1]; +// final ByteBuffer b = ByteBuffer.wrap(d); +// final BitVector v = new ByteBufferBitVector(b); +// +// assertEquals("length", 8L, v.length()); + + byte v = 0; + + // verify range check. + try { + Bits.get(v, -1); + fail("Expecting: " + IndexOutOfBoundsException.class); + } catch (IndexOutOfBoundsException ex) { + if (log.isInfoEnabled()) + log.info("Ignoring expected exception: " + ex); + } + + // verify range check. + try { + Bits.get(v, 8); + fail("Expecting: " + IndexOutOfBoundsException.class); + } catch (IndexOutOfBoundsException ex) { + if (log.isInfoEnabled()) + log.info("Ignoring expected exception: " + ex); + } + + for (int i = 0; i < 8; i++) + assertEquals(false, Bits.get(v, i)); + + // set bit zero. +// d[0] |= (1 << 0); + v = Bits.set(v, 0, true); + + if (log.isInfoEnabled()) + log.info(Bits.toString(v)); + assertEquals(true, Bits.get(v, 0)); + + // clear bit zero. +// d[0] &= ~(1 << 0); + v = Bits.set(v, 0, false); + + if (log.isInfoEnabled()) + log.info(Bits.toString(v)); + assertEquals(false, Bits.get(v, 0)); + + } + + /** + * Verify set/clear of each bit in the first byte. + */ + public void test_getBoolean() { + +// final byte[] d = new byte[1]; +// final ByteBuffer b = ByteBuffer.wrap(d); +// final BitVector v = new ByteBufferBitVector(b); + + byte v = 0; + + // verify all bits are zero. + for (int i = 0; i < 8; i++) + assertEquals(false, Bits.get(v, i)); + + // set/clear each bit in the first byte in turn. + for (int i = 0; i < 8; i++) { + + // set bit +// d[0] |= (1 << i); + v = Bits.set(v, i, true); + + if (log.isInfoEnabled()) + log.info(Bits.toString(v) + " : i=" + i + ", (1<<" + i + ")=" + + (1 << i)); + assertEquals(true, Bits.get(v, i)); + + // clear bit +// d[0] &= ~(1 << i); + v = Bits.set(v, i, false); + + if (log.isInfoEnabled()) + log.info(Bits.toString(v)); + assertEquals(false, Bits.get(v, i)); + + } + + } + + /** + * Verify set/clear of each bit in the first byte. + */ + public void test_getMask() { + + byte v = 0; + + // verify all bits are zero. + for (int i = 0; i < 8; i++) + assertEquals(false, Bits.get(v, i)); + + // set each bit in the byte + for (int i = 0; i < 8; i++) { + + // set bit + v = Bits.set(v, i, true); + assertEquals(true, Bits.get(v, i)); + + } + + // mask off all but the 0 and 1 bits + v = Bits.mask(v, 0, 1); + if (log.isInfoEnabled()) + log.info(Bits.toString(v)); + assertEquals(3, v); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/axioms/BaseAxioms.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/axioms/BaseAxioms.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/axioms/BaseAxioms.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -285,7 +285,7 @@ metadata.setBranchingFactor(branchingFactor); - tupleSer = new SPOTupleSerializer(SPOKeyOrder.SPO); + tupleSer = new SPOTupleSerializer(SPOKeyOrder.SPO, false/* sids */); metadata.setTupleSerializer(tupleSer); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/changesets/StatementWriter.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -1,14 +1,13 @@ package com.bigdata.rdf.changesets; import java.util.Iterator; -import java.util.Map; + import org.apache.log4j.Logger; + import com.bigdata.rdf.changesets.IChangeRecord.ChangeAction; -import com.bigdata.rdf.internal.IV; -import com.bigdata.rdf.model.BigdataBNode; import com.bigdata.rdf.spo.ISPO; +import com.bigdata.rdf.spo.ModifiedEnum; import com.bigdata.rdf.spo.SPO; -import com.bigdata.rdf.spo.ISPO.ModifiedEnum; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.striterator.ChunkedArrayIterator; Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/Justification.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/Justification.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/Justification.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -921,7 +921,7 @@ // assert arity == 3; // Note: keys are SPOs; no values stored for the tuples. - tupleSer = new SPOTupleSerializer(SPOKeyOrder.SPO, + tupleSer = new SPOTupleSerializer(SPOKeyOrder.SPO, false/* sids */, DefaultTupleSerializer.getDefaultLeafKeysCoder(), EmptyRabaValueCoder.INSTANCE); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -29,7 +29,6 @@ import com.bigdata.rdf.lexicon.LexiconRelation; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.model.BigdataValueFactory; -import com.bigdata.rdf.store.AbstractTripleStore; /** * Configuration determines which RDF Values are inlined into the statement @@ -72,91 +71,4 @@ */ void initExtensions(final LexiconRelation lex); - /** - * <code>true</code> iff the <code>vte</code> and <code>dte</code> - * should be inlined. - * - * @param vte - * the term type - * @param dte - * the data type - public boolean isInline(VTE vte, DTE dte); - */ - -// /** -// * <code>true</code> iff <code>xsd:boolean</code> should be inlined. -// */ -// public boolean isBooleanInline(); -// -// /** -// * <code>true</code> iff the fixed size numerics (<code>xsd:int</code>, -// * <code>xsd:short</code>, <code>xsd:float</code>, etc) should be inlined. -// */ -// public boolean isSmallNumericInline(); -// -// /** -// * <code>true</code> iff xsd:integer should be inlined. -// * <p> -// * Note: The maximum length for the encoding is ~32kb per key. With a B+Tree -// * branching factor of 256 that is ~ 8MB per leaf before compression. While -// * that is definitely large, it is not so outrageous that we need to forbid -// * it. -// */ -// public boolean isXSDIntegerInline(); -// -// /** -// * <code>true</code> iff <code>xsd:decimal</code> should be inlined. -// */ -// public boolean isXSDDecimalInline(); -// -// /** -// * <code>true</code> iff blank node identifiers should be inlined. This -// * is only possible when the blank node identifiers are internally -// * generated {@link UUID}s since otherwise they can be arbitrary Unicode -// * strings which, like text-based Literals, can not be inlined. -// * <p> -// * This option is NOT compatible with -// * {@link AbstractTripleStore.Options#STORE_BLANK_NODES}. -// */ -// public boolean isBlankNodeInline(); -// -// /** -// * <code>true</code> if UUID values (other than blank nodes) should be -// * inlined. -// */ -// public boolean isUUIDInline(); -// -// /** -// * Option to enable storing of long literals (over a configured -// * threshold) as blob references. The TERM2ID index would have a -// * hash function (MD5, SHA-1, SHA-2, etc) of the value and assign -// * a termId. The ID2TERM index would map the termId to a blob -// * reference. The blob data would be stored in the journal and -// * migrate into index segments during overflow processing for -// * scale-out. -// */ -// public boolean isLongLiteralAsBlob(); -// -// /** -// * Return the {@link MessageDigest} used to compute a hash code for a long -// * literal. The message digest should compute a hash function with a very -// * small probability of collisions. In general, <code>SHA-256</code> (32 -// * bytes), <code>SHA-384</code> (48 bytes) and <code>SHA-512</code> (64 -// * byte) should be reasonable choices. -// * <p> -// * Appropriate hash algorithms are defined in the <a -// * href="http://csrc.nist.gov/publications/fips/index.html">FIPS PUB -// * 180-2</a> (which has been replaced by <a href= -// * "http://csrc.nist.gov/publications/fips/fips180-3/fips180-3_final.pdf" -// * >FIPS PUB 180-3</a>. Also see Recommendation for Applications Using -// * Approved Hash Algorithms in <a href= -// * "http://csrc.nist.gov/publications/nistpubs/800-107/NIST-SP-800-107.pdf" -// * >SP 800-107</a>, which provides information about the collision -// * resistance of these hash algorithms. -// * -// * @return A {@link MessageDigest} object which can be used to compute the -// * hash code for a long literal. -// */ -// public MessageDigest getLongLiteralMessageDigest(); -// } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -32,12 +32,17 @@ import java.util.ArrayList; import java.util.UUID; +import org.apache.log4j.Logger; + import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.KeyBuilder; import com.bigdata.rawstore.Bytes; import com.bigdata.rdf.internal.constraints.MathBOp.MathOp; import com.bigdata.rdf.model.BigdataBNode; import com.bigdata.rdf.model.BigdataLiteral; +import com.bigdata.rdf.model.StatementEnum; +import com.bigdata.rdf.spo.ISPO; +import com.bigdata.rdf.spo.SPOKeyOrder; /** @@ -45,6 +50,8 @@ */ public class IVUtility { + private static final transient Logger log = Logger.getLogger(IVUtility.class); + public static boolean equals(IV iv1, IV iv2) { // same IV or both null @@ -421,23 +428,42 @@ */ public static IV[] decode(final byte[] key, final int numTerms) { + return decode(key, 0 /* offset */, numTerms); + + } + + /** + * Decodes up to numTerms {@link IV}s from a byte[]. + * + * @param key + * The byte[]. + * @param offset + * The offset into the byte[] key. + * @param numTerms + * The number of terms to decode. + * + * @return The set of {@link IV}s. + */ + public static IV[] decode(final byte[] key, final int offset, + final int numTerms) { + if (numTerms <= 0) return new IV[0]; final IV[] ivs = new IV[numTerms]; - int offset = 0; + int o = offset; for (int i = 0; i < numTerms; i++) { - if (offset >= key.length) + if (o >= key.length) throw new IllegalArgumentException( "key is not long enough to decode " + numTerms + " terms."); - ivs[i] = decodeFromOffset(key, offset); + ivs[i] = decodeFromOffset(key, o); - offset += ivs[i] == null + o += ivs[i] == null ? NullIV.INSTANCE.byteLength() : ivs[i].byteLength(); } @@ -502,6 +528,22 @@ // The value type (URI, Literal, BNode, SID) final VTE vte = AbstractIV.getInternalValueTypeEnum(flags); + // handle inline sids + if (vte == VTE.STATEMENT) { + + // spo is directly decodable from key + final ISPO spo = SPOKeyOrder.SPO.decodeKey(key, o); + + // all spos that have a sid are explicit + spo.setStatementType(StatementEnum.Explicit); + spo.setStatementIdentifier(true); + + // create a sid iv and return it + final SidIV sid = new SidIV(spo); + return sid; + + } + // The data type final DTE dte = AbstractIV.getInternalDataTypeEnum(flags); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -318,6 +318,8 @@ public boolean isInline(final VTE vte, final DTE dte) { switch (vte) { + case STATEMENT: + return true; case BNODE: return inlineBNodes && isSupported(dte); case LITERAL: Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/SidIV.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/SidIV.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/SidIV.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -0,0 +1,212 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.rdf.internal; + +import java.math.BigInteger; + +import org.apache.log4j.Logger; + +import com.bigdata.btree.keys.IKeyBuilder; +import com.bigdata.btree.keys.KeyBuilder; +import com.bigdata.rdf.lexicon.LexiconRelation; +import com.bigdata.rdf.model.BigdataBNode; +import com.bigdata.rdf.model.BigdataValueFactory; +import com.bigdata.rdf.spo.ISPO; +import com.bigdata.rdf.spo.SPO; +import com.bigdata.rdf.spo.SPOComparator; +import com.bigdata.rdf.spo.SPOKeyOrder; + +/** + * Internal value representing an inline statement identifier. Uses the + * {@link ISPO} supplied in the ctor as the inline value. The + * {@link #asValue(BigdataValueFactory, ILexiconConfiguration)} method returns a + * {@link BigdataBNode} that is used to represent the sid in serialization + * formats (such as the custom RDF/XML extension for sids). The bnode is + * guaranteed to always have the same bnode id for a given inlined SPO. This is + * accomplished using the byte[] key encoding for the spo along with the + * BigInteger class. + * <p> + * This internal value has a {@link VTE} of {@link VTE#STATEMENT}. It is encoded + * into the statement indices by directly encoding the spo using + * {@link SPOKeyOrder#encodeKey(IKeyBuilder, ISPO)} via the + * {@link SPOKeyOrder#SPO} key order. Thus when decoded from the statement + * indices, the spo associated with this sid is materialized directly from the + * sid itself. See {@link IVUtility#decode(byte[])}. The spo decoded from the + * sid IV will be marked as explicit (only explicit statements have sids) and + * this SidIV will be attached to it. This completely eliminates the need for a + * reverse index from sid->spo, as the spo is encoded inline into the SidIV + * itself. This replaces the TermId model for representing sids. + * <p> + * {@inheritDoc} + */ +public class SidIV<V extends BigdataBNode> extends + AbstractInlineIV<V, ISPO> { + + /** + * + */ + private static final long serialVersionUID = 685148537376856907L; + + protected static final Logger log = Logger.getLogger(SidIV.class); + + /** + * The inline spo. + */ + private final ISPO spo; + + /** + * The cached byte[] key for the encoding of this IV. + */ + private transient byte[] key; + + /** + * The cached materialized BigdataValue for this sid. + */ + private transient V bnode; + + /** + * Ctor with internal value spo specified. + */ + public SidIV(final ISPO spo) { + + /* + * Note: XSDBoolean happens to be assigned the code value of 0, which is + * the value we we want when the data type enumeration will be ignored. + */ + super(VTE.STATEMENT, DTE.XSDBoolean); + + this.spo = spo; + + } + + /** + * Returns the inline spo. + */ + public ISPO getInlineValue() throws UnsupportedOperationException { + return spo; + } + + /** + * Returns the bnode representation of this IV, useful for serialization + * formats such as RDF/XML. See {@link #bnodeId()}. + */ + public V asValue(final LexiconRelation lex) { + if (bnode == null) { + bnode = (V) lex.getValueFactory().createBNode(bnodeId()); + bnode.setIV(this); + bnode.setStatementIdentifier(true); + } + return bnode; + } + + /** + * Return the byte length for the byte[] encoded representation of this + * internal value. Depends on the byte length of the encoded inline spo. + */ + public int byteLength() { + return 1 + key().length; + } + + public String toString() { + return stringValue(); + } + + public String stringValue() { + return "Sid("+toString(spo)+")"; + } + + /** + * Pretty print the inline spo. Calling SPO.toString() results in an + * infinite loop. + */ + private static String toString(final ISPO spo) { + return (SPO.toString(spo.s()) + ":" + + SPO.toString(spo.p()) + ":" + + SPO.toString(spo.o())); + } + + public int hashCode() { + return spo.hashCode(); + } + + /** + * Using the BigInteger class to create a unique bnode id based on the + * byte[] key of the inline spo. + */ + private String bnodeId() { +// // just use the hash code. can result in collisions +// return String.valueOf(hashCode()); + + // create a big integer using the spo key. should result in unique ids + final byte[] key = key(); + final int signum = key.length > 0 ? 1 : 0; + final BigInteger bi = new BigInteger(signum, key); + return bi.toString(); + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (o instanceof SidIV) { + final ISPO spo2 = ((SidIV) o).spo; + return spo.equals(spo2); + } + return false; + } + + protected int _compareTo(IV o) { + final ISPO spo2 = ((SidIV) o).spo; + return SPOComparator.INSTANCE.compare(spo, spo2); + } + + /** + * Encode this internal value into the supplied key builder. Emits the + * flags, following by the encoded byte[] representing the spo, in SPO + * key order. + * <p> + * {@inheritDoc} + */ + public IKeyBuilder encode(final IKeyBuilder keyBuilder) { + + // First emit the flags byte. + keyBuilder.append(flags()); + + // Then append the SPO's key in SPOKeyOrder.SPO + keyBuilder.append(key()); + + return keyBuilder; + + } + + private byte[] key() { + if (key == null) { + /* + * Build the SPO's key in SPOKeyOrder.SPO. + */ + key = SPOKeyOrder.SPO.encodeKey(new KeyBuilder(), spo); + } + return key; + } + +} \ No newline at end of file Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -72,7 +72,6 @@ import com.bigdata.btree.keys.KVO; import com.bigdata.btree.keys.KeyBuilder; import com.bigdata.btree.keys.StrengthEnum; -import com.bigdata.btree.proc.IResultHandler; import com.bigdata.btree.proc.AbstractKeyArrayIndexProcedure.ResultBuffer; import com.bigdata.btree.proc.AbstractKeyArrayIndexProcedure.ResultBufferHandler; import com.bigdata.btree.proc.BatchLookup.BatchLookupConstructor; @@ -82,7 +81,6 @@ import com.bigdata.journal.IResourceLock; import com.bigdata.journal.ITx; import com.bigdata.journal.TimestampUtility; -import com.bigdata.rawstore.Bytes; import com.bigdata.rdf.internal.IDatatypeURIResolver; import com.bigdata.rdf.internal.IExtensionFactory; import com.bigdata.rdf.internal.ILexiconConfiguration; @@ -90,18 +88,14 @@ import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.internal.LexiconConfiguration; import com.bigdata.rdf.internal.TermId; -import com.bigdata.rdf.lexicon.Term2IdWriteProc.Term2IdWriteProcConstructor; import com.bigdata.rdf.model.BigdataBNode; import com.bigdata.rdf.model.BigdataLiteral; import com.bigdata.rdf.model.BigdataURI; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.model.BigdataValueFactory; import com.bigdata.rdf.model.BigdataValueFactoryImpl; -import com.bigdata.rdf.model.StatementEnum; import com.bigdata.rdf.rio.IStatementBuffer; import com.bigdata.rdf.rio.StatementBuffer; -import com.bigdata.rdf.spo.ISPO; -import com.bigdata.rdf.spo.SPOComparator; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.rdf.store.IRawTripleStore; import com.bigdata.relation.AbstractRelation; @@ -111,7 +105,6 @@ import com.bigdata.relation.rule.IRule; import com.bigdata.search.FullTextIndex; import com.bigdata.service.IBigdataFederation; -import com.bigdata.service.Split; import com.bigdata.striterator.ChunkedArrayIterator; import com.bigdata.striterator.IChunkedOrderedIterator; import com.bigdata.striterator.IKeyOrder; @@ -1468,197 +1461,197 @@ } - /** - * Assign unique statement identifiers to triples. - * <p> - * Each distinct {@link StatementEnum#Explicit} {s,p,o} is assigned a unique - * statement identifier using the {@link LexiconKeyOrder#TERM2ID} index. The - * assignment of statement identifiers is <i>consistent</i> using an - * unisolated atomic write operation similar to - * {@link #addTerms(BigdataValue[], int, boolean)} - * <p> - * Note: Statement identifiers are NOT inserted into the reverse (id:term) - * index. Instead, they are written into the values associated with the - * {s,p,o} in each of the statement indices. That is handled by - * {@link AbstractTripleStore#addStatements(AbstractTripleStore, boolean, IChunkedOrderedIterator, IElementFilter)} - * , which is also responsible for invoking this method in order to have the - * statement identifiers on hand before it writes on the statement indices. - * <p> - * Note: The caller's {@link ISPO}[] is sorted into SPO order as a - * side-effect. - * <p> - * Note: The statement identifiers are assigned to the {@link ISPO}s as a - * side-effect. - * <p> - * Note: SIDs are NOT supported for quads, so this code is never executed - * for quads. - */ - public void addStatementIdentifiers(final ISPO[] a, final int n) { +// /** +// * Assign unique statement identifiers to triples. +// * <p> +// * Each distinct {@link StatementEnum#Explicit} {s,p,o} is assigned a unique +// * statement identifier using the {@link LexiconKeyOrder#TERM2ID} index. The +// * assignment of statement identifiers is <i>consistent</i> using an +// * unisolated atomic write operation similar to +// * {@link #addTerms(BigdataValue[], int, boolean)} +// * <p> +// * Note: Statement identifiers are NOT inserted into the reverse (id:term) +// * index. Instead, they are written into the values associated with the +// * {s,p,o} in each of the statement indices. That is handled by +// * {@link AbstractTripleStore#addStatements(AbstractTripleStore, boolean, IChunkedOrderedIterator, IElementFilter)} +// * , which is also responsible for invoking this method in order to have the +// * statement identifiers on hand before it writes on the statement indices. +// * <p> +// * Note: The caller's {@link ISPO}[] is sorted into SPO order as a +// * side-effect. +// * <p> +// * Note: The statement identifiers are assigned to the {@link ISPO}s as a +// * side-effect. +// * <p> +// * Note: SIDs are NOT supported for quads, so this code is never executed +// * for quads. +// */ +// public void addStatementIdentifiers(final ISPO[] a, final int n) { +// +// // * @throws UnsupportedOperationException +//// * if {@link Options#STATEMENT_IDENTIFIERS} was not specified. +//// * +//// if (!statementIdentifiers) +//// throw new UnsupportedOperationException(); +// +// if (n == 0) +// return; +// +// final long begin = System.currentTimeMillis(); +// final long keyGenTime; // time to convert {s,p,o} to byte[] sort keys. +// final long sortTime; // time to sort terms by assigned byte[] keys. +// final long insertTime; // time to insert terms into the term:id index. +// +// /* +// * Sort the caller's array into SPO order. This order will correspond to +// * the total order of the term:id index. +// * +// * Note: This depends critically on SPOComparator producing the same +// * total order as we would obtain by an unsigned byte[] sort of the +// * generated sort keys. +// * +// * Note: the keys for the term:id index are NOT precisely the keys used +// * by the SPO index since there is a prefix code used to mark the keys +// * are Statements (vs Literals, BNodes, or URIs). +// */ +// { +// +// final long _begin = System.currentTimeMillis(); +// +// Arrays.sort(a, 0, n, SPOComparator.INSTANCE); +// +// sortTime = System.currentTimeMillis() - _begin; +// +// } +// +// /* +// * Insert into the forward index (term -> id). This will either assign a +// * statement identifier or return the existing statement identifier if +// * the statement is already in the lexicon (the statement identifier is +// * in a sense a term identifier since it is assigned by the term:id +// * index). +// * +// * Note: Since we only assign statement identifiers for explicit +// * statements the caller's SPO[] can not be directly correlated to the +// * keys[]. We copy the references into b[] so that we can keep that +// * correlation 1:1. +// */ +// final byte[][] keys = new byte[n][]; +// final ISPO[] b = new ISPO[n]; +// +// /* +// * Generate the sort keys for the term:id index. +// */ +// int nexplicit = 0; +// { +// +// final long _begin = System.currentTimeMillis(); +// +// // local instance, no unicode support. +// final IKeyBuilder keyBuilder = KeyBuilder +// .newInstance(1/* statement byte */+ (3/* triple */* Bytes.SIZEOF_LONG)); +// +// for (int i = 0; i < n; i++) { +// +// final ISPO spo = a[i]; +// +// if (!spo.isExplicit()) +// continue; +// +// if (!spo.isFullyBound()) +// throw new IllegalArgumentException("Not fully bound: " +// + spo.toString(/*this*/)); +// +// /* +// * Creating a dummy term for the Term2Id index. +// */ +// keyBuilder.reset().append(ITermIndexCodes.TERM_CODE_STMT); +// spo.s().encode(keyBuilder); +// spo.p().encode(keyBuilder); +// spo.o().encode(keyBuilder); +// keys[nexplicit] = keyBuilder.getKey(); +// +// // Note: keeps correlation between key and SPO. +// b[nexplicit] = spo; +// +// nexplicit++; +// +// } +// +// keyGenTime = System.currentTimeMillis() - _begin; +// +// } +// +// /* +// * Execute a remote unisolated batch operation that assigns the +// * statement identifier. +// */ +// { +// +// final long _begin = System.currentTimeMillis(); +// +// final IIndex termIdIndex = getTerm2IdIndex(); +// +// // run the procedure. +// if (nexplicit > 0) { +// +// termIdIndex.submit(0/* fromIndex */, nexplicit/* toIndex */, +// keys, null/* vals */, new Term2IdWriteProcConstructor( +// false/* readOnly */, storeBlankNodes, //scaleOutTermIds, +// termIdBitsToReverse), +// new IResultHandler<Term2IdWriteProc.Result, Void>() { +// +// /** +// * Copy the assigned / discovered statement +// * identifiers onto the corresponding elements of +// * the SPO[]. +// */ +// public void aggregate(Term2IdWriteProc.Result result, +// Split split) { +// +// for (int i = split.fromIndex, j = 0; i < split.toIndex; i++, j++) { +// +//// if (b[i].c() != 0L +//// && b[i].c() != result.ids[j]) { +//// System.err.println("spo=" +//// + getContainer().toString(b[i]) +//// + ", sid=" +//// + getContainer().toString( +//// result.ids[j])); +//// } +// +// b[i].setStatementIdentifier(result.ivs[j]); +// +// } +// +// } +// +// public Void getResult() { +// +// return null; +// +// } +// +// }); +// +// } +// +// insertTime = System.currentTimeMillis() - _begin; +// +// } +// +// final long elapsed = System.currentTimeMillis() - begin; +// +// if (log.isInfoEnabled() && n > 1000 || elapsed > 3000) { +// +// log.info("Wrote " + n + " in " + elapsed + "ms; keygen=" +// + keyGenTime + "ms, sort=" + sortTime + "ms, insert=" +// + insertTime + "ms"); +// +// } +// +// } - // * @throws UnsupportedOperationException -// * if {@link Options#STATEMENT_IDENTIFIERS} was not specified. -// * -// if (!statementIdentifiers) -// throw new UnsupportedOperationException(); - - if (n == 0) - return; - - final long begin = System.currentTimeMillis(); - final long keyGenTime; // time to convert {s,p,o} to byte[] sort keys. - final long sortTime; // time to sort terms by assigned byte[] keys. - final long insertTime; // time to insert terms into the term:id index. - - /* - * Sort the caller's array into SPO order. This order will correspond to - * the total order of the term:id index. - * - * Note: This depends critically on SPOComparator producing the same - * total order as we would obtain by an unsigned byte[] sort of the - * generated sort keys. - * - * Note: the keys for the term:id index are NOT precisely the keys used - * by the SPO index since there is a prefix code used to mark the keys - * are Statements (vs Literals, BNodes, or URIs). - */ - { - - final long _begin = System.currentTimeMillis(); - - Arrays.sort(a, 0, n, SPOComparator.INSTANCE); - - sortTime = System.currentTimeMillis() - _begin; - - } - - /* - * Insert into the forward index (term -> id). This will either assign a - * statement identifier or return the existing statement identifier if - * the statement is already in the lexicon (the statement identifier is - * in a sense a term identifier since it is assigned by the term:id - * index). - * - * Note: Since we only assign statement identifiers for explicit - * statements the caller's SPO[] can not be directly correlated to the - * keys[]. We copy the references into b[] so that we can keep that - * correlation 1:1. - */ - final byte[][] keys = new byte[n][]; - final ISPO[] b = new ISPO[n]; - - /* - * Generate the sort keys for the term:id index. - */ - int nexplicit = 0; - { - - final long _begin = System.currentTimeMillis(); - - // local instance, no unicode support. - final IKeyBuilder keyBuilder = KeyBuilder - .newInstance(1/* statement byte */+ (3/* triple */* Bytes.SIZEOF_LONG)); - - for (int i = 0; i < n; i++) { - - final ISPO spo = a[i]; - - if (!spo.isExplicit()) - continue; - - if (!spo.isFullyBound()) - throw new IllegalArgumentException("Not fully bound: " - + spo.toString(/*this*/)); - - /* - * Creating a dummy term for the Term2Id index. - */ - keyBuilder.reset().append(ITermIndexCodes.TERM_CODE_STMT); - spo.s().encode(keyBuilder); - spo.p().encode(keyBuilder); - spo.o().encode(keyBuilder); - keys[nexplicit] = keyBuilder.getKey(); - - // Note: keeps correlation between key and SPO. - b[nexplicit] = spo; - - nexplicit++; - - } - - keyGenTime = System.currentTimeMillis() - _begin; - - } - - /* - * Execute a remote unisolated batch operation that assigns the - * statement identifier. - */ - { - - final long _begin = System.currentTimeMillis(); - - final IIndex termIdIndex = getTerm2IdIndex(); - - // run the procedure. - if (nexplicit > 0) { - - termIdIndex.submit(0/* fromIndex */, nexplicit/* toIndex */, - keys, null/* vals */, new Term2IdWriteProcConstructor( - false/* readOnly */, storeBlankNodes, //scaleOutTermIds, - termIdBitsToReverse), - new IResultHandler<Term2IdWriteProc.Result, Void>() { - - /** - * Copy the assigned / discovered statement - * identifiers onto the corresponding elements of - * the SPO[]. - */ - public void aggregate(Term2IdWriteProc.Result result, - Split split) { - - for (int i = split.fromIndex, j = 0; i < split.toIndex; i++, j++) { - -// if (b[i].c() != 0L -// && b[i].c() != result.ids[j]) { -// System.err.println("spo=" -// + getContainer().toString(b[i]) -// + ", sid=" -// + getContainer().toString( -// result.ids[j])); -// } - - b[i].setStatementIdentifier(result.ivs[j]); - - } - - } - - public Void getResult() { - - return null; - - } - - }); - - } - - insertTime = System.currentTimeMillis() - _begin; - - } - - final long elapsed = System.currentTimeMillis() - begin; - - if (log.isInfoEnabled() && n > 1000 || elapsed > 3000) { - - log.info("Wrote " + n + " in " + elapsed + "ms; keygen=" - + keyGenTime + "ms, sort=" + sortTime + "ms, insert=" - + insertTime + "ms"); - - } - - } - /** * <p> * Add the terms to the full text index so that we can do fast lookup of the @@ -2365,26 +2358,28 @@ if (tid.isStatement()) { - /* - * Statement identifiers are not stored in the reverse lexicon (or - * the cache). - * - * A statement identifier is externalized as a BNode. The "S" prefix - * is a syntactic marker for those in the know to indicate that the - * BNode corresponds to a statement identifier. - */ +// /* +// * Statement identifiers are not stored in the reverse lexicon (or +// * the cache). +// * +// * A statement identifier is externalized as a BNode. The "S" prefix +// * is a syntactic marker for those in the know to indicate that the +// * BNode corresponds to a statement identifier. +// */ +// +// final BigdataBNode stmt = valueFactory.createBNode("S" +// + Long.toString(tid.getTermId())); +// +// // set the term identifier on the object. +// stmt.setIV(tid); +// +// // mark as a statement identifier. +// stmt.setStatementIdentifier(true); +// +// return stmt; - final BigdataBNode stmt = valueFactory.createBNode("S" - + Long.toString(tid.getTermId())); + throw new IllegalArgumentException("sids should be inline"); - // set the term identifier on the object. - stmt.setIV(tid); - - // mark as a statement identifier. - stmt.setStatementIdentifier(true); - - return stmt; - } if (!storeBlankNodes && tid.isBNode()) { Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataStatementImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataStatementImpl.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/BigdataStatementImpl.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -27,9 +27,9 @@ import org.openrdf.model.Statement; import org.openrdf.model.Value; -import com.bigdata.io.ByteArrayBuffer; import com.bigdata.rdf.internal.IV; -import com.bigdata.rdf.spo.SPO; +import com.bigdata.rdf.internal.SidIV; +import com.bigdata.rdf.spo.ModifiedEnum; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.rdf.store.IRawTripleStore; @@ -282,29 +282,24 @@ } - public final void setStatementIdentifier(final IV sid) { + public final void setStatementIdentifier(final boolean sidable) { - if (sid == null) - throw new IllegalArgumentException(); + if (sidable && type != StatementEnum.Explicit) { - if (!sid.isStatement()) - throw new IllegalArgumentException("Not a statement identifier: " - + sid); - - if (type != StatementEnum.Explicit) { - // Only allowed for explicit statements. throw new IllegalStateException(); } - if (c != null && c.getIV() != sid) - throw new IllegalStateException( - "Different statement identifier already defined: " - + toString() + ", new=" + sid); + if (c == null) { + + // this SHOULD not ever happen + throw new IllegalStateException(); + + } + + c.setIV(new SidIV(this)); - c.setIV(sid); - } public final IV getStatementIdentifier() { @@ -335,13 +330,6 @@ } - public byte[] serializeValue(final ByteArrayBuffer buf) { - - return SPO.serializeValue(buf, override, userFlag, type, - c != null ? c.getIV() : null); - - } - /** * Note: this implementation is equivalent to {@link #toString()} since the * {@link Value}s are already resolved. Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/StatementEnum.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/StatementEnum.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/model/StatementEnum.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -132,11 +132,10 @@ static public StatementEnum deserialize(final byte[] val) { - if (val.length != 1 && val.length != (1 + 8)) { + if (val.length != 1) { throw new RuntimeException( - "Expecting either one byte or nine bytes, not " - + val.length); + "Expecting one byte, not " + val.length); } @@ -183,7 +182,6 @@ public static boolean isOverride(final byte b) { return (b & StatementEnum.MASK_OVERRIDE) != 0; -// return (b & StatementEnum.MASK_OVERRIDE) == 1; } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -3204,8 +3204,6 @@ long chunksOut = 0; long elementsOut = 0; - final ByteArrayBuffer vbuf = new ByteArrayBuffer(1 + 8/* max length */); - latch.inc(); try { @@ -3233,7 +3231,7 @@ final byte[] key = tupleSer.serializeKey(spo); // generate value for the index. - final byte[] val = spo.serializeValue(vbuf); + final byte[] val = tupleSer.serializeVal(spo); /* * Note: The SPO is deliberately not provided to the KVO Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/BulkCompleteConverter.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/BulkCompleteConverter.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/BulkCompleteConverter.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -136,7 +136,7 @@ if (val != null) { - SPO.decodeValue(chunk[i], val); + tupleSer.decodeValue(chunk[i], val); } else { Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ISPO.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ISPO.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ISPO.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -28,10 +28,7 @@ package com.bigdata.rdf.spo; -import org.openrdf.model.Value; - import com.bigdata.bop.IElement; -import com.bigdata.io.ByteArrayBuffer; import com.bigdata.rdf.inf.Justification; import com.bigdata.rdf.inf.TruthMaintenance; import com.bigdata.rdf.internal.IV; @@ -179,20 +176,30 @@ */ boolean isAxiom(); +// /** +// * Set the statement identifier. This sets the 4th position of the quad, but +// * some constraints are imposed on its argument. +// * +// * @param sid +// * The statement identifier. +// * +// * @throws IllegalArgumentException +// * if <i>sid</i> is {@link #NULL}. +// * @throws IllegalStateException +// * if the statement identifier is already set. +// */ +// void setStatementIdentifier(final IV sid); + /** * Set the statement identifier. This sets the 4th position of the quad, but * some constraints are imposed on its argument. * * @param sid - * The statement identifier. - * - * @throws IllegalArgumentException - * if <i>sid</i> is {@link #NULL}. - * @throws IllegalStateException - * if the statement identifier is already set. + * If sid is true, this ISPO will produce a sid on-demand when + * requested. */ - void setStatementIdentifier(final IV sid); - + void setStatementIdentifier(final boolean sidable); + /** * The statement identifier (optional). This has nearly identical semantics * to {@link #c()}, but will throw an exception if the 4th position is not @@ -272,25 +279,6 @@ public boolean isModified(); /** - * Return the byte[] that would be written into a statement index for this - * {@link ISPO}, including the optional {@link StatementEnum#MASK_OVERRIDE} - * bit. If the {@link #hasStatementIdentifier()} would return - * <code>true</code>, then the SID will be included in the returned byte[]. - * Note that {@link #hasStatementIdentifier()} is defined in terms of the - * bit pattern of the SID identifiers and therefore will be - * <code>true</code> ONLY for a statement identifier and NOT for an RDF - * {@link Value} identifier. - * - * @param buf - * A buffer supplied by the caller. The buffer will be reset - * before the value is written on the buffer. - * - * @return The value that would be written into a statement index for this - * {@link ISPO}. - */ - public byte[] serializeValue(ByteArrayBuffer buf); - - /** * Method may be used to externalize the {@link BigdataValue}s in the * {@link ISPO}. * @@ -299,59 +287,4 @@ */ public String toString(IRawTripleStore db); - public enum ModifiedEnum { - - INSERTED, REMOVED, UPDATED, NONE; - - public static boolean[] toBooleans(final ModifiedEnum[] modified, final int n) { - - final boolean[] b = new boolean[n*2]; - for (int i = 0; i < n; i++) { - switch(modified[i]) { - case INSERTED: - b[i*2] = true; - b[i*2+1] = false; - break; - case REMOVED: - b[i*2] = false; - b[i*2+1] = true; - break; - case UPDATED: - b[i*2] = true; - b[i*2+1] = true; - break; - case NONE: - default: - b[i*2] = false; - b[i*2+1] = false; - break; - } - } - - return b; - - } - - public static ModifiedEnum[] fromBooleans(final boolean[] b, final int n) { - - assert n <= b.length && n % 2 == 0 : "n="+n+", b.length="+b.length; - - final ModifiedEnum[] m = new ModifiedEnum[n/2]; - for (int i = 0; i < n; i+=2) { - if (b[i] && !b[i+1]) - m[i/2] = INSERTED; - else if (!b[i] && b[i+1]) - m[i/2] = REMOVED; - else if (b[i] && b[i+1]) - m[i/2] = UPDATED; - else - m[i/2] = NONE; - } - - return m; - - } - - } - } Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ModifiedEnum.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ModifiedEnum.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ModifiedEnum.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -0,0 +1,56 @@ +package com.bigdata.rdf.spo; + +public enum ModifiedEnum { + + INSERTED, REMOVED, UPDATED, NONE; + + public static boolean[] toBooleans(final ModifiedEnum[] modified, final int n) { + + final boolean[] b = new boolean[n*2]; + for (int i = 0; i < n; i++) { + switch(modified[i]) { + case INSERTED: + b[i*2] = true; + b[i*2+1] = false; + break; + case REMOVED: + b[i*2] = false; + b[i*2+1] = true; + break; + case UPDATED: + b[i*2] = true; + b[i*2+1] = true; + break; + case NONE: + default: + b[i*2] = false; + b[i*2+1] = false; + break; + } + } + + return b; + + } + + public static ModifiedEnum[] fromBooleans(final boolean[] b, final int n) { + + assert n <= b.length && n % 2 == 0 : "n="+n+", b.length="+b.length; + + final ModifiedEnum[] m = new ModifiedEnum[n/2]; + for (int i = 0; i < n; i+=2) { + if (b[i] && !b[i+1]) + m[i/2] = INSERTED; + else if (!b[i] && b[i+1]) + m[i/2] = REMOVED; + else if (b[i] && b[i+1]) + m[i/2] = UPDATED; + else + m[i/2] = NONE; + } + + return m; + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPO.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPO.java 2011-05-10 19:39:39 UTC (rev 4479) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPO.java 2011-05-11 03:19:09 UTC (rev 4480) @@ -28,12 +28,10 @@ import com.bigdata.bop.IConstant; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariableOrConstant; -import com.bigdata.io.ByteArrayBuffer; import com.bigdata.rdf.inf.Justification; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; -import com.bigdata.rdf.internal.TermId; -import com.bigdata.rdf.internal.VTE; +import com.bigdata.rdf.internal.SidIV; import com.bigdata.rdf.model.BigdataResource; import com.bigdata.rdf.model.BigdataStatement; import com.bigdata.rdf.model.BigdataStatementImpl; @@ -43,6 +41,7 @@ import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.rdf.store.IRawTripleStore; import com.bigdata.relation.accesspath.IAccessPath; +import com.bigdata.util.Bits; /** * Represents a triple, triple+SID, or quad. When used to represent a triple, @@ -74,31 +73,73 @@ */ private IV c = null; +// /** +// * Statement type (inferred, explicit, or axiom). +// */ +// private StatementEnum type; +// +// /** +// * User flag +// */ +// private boolean userFlag; +// +// /** +// * Override flag used for downgrading statements during truth maintenance. +// */ +// private transient boolean override = false; +// +//// private transient boolean modified = false; +// private transient ModifiedEnum modified = ModifiedEnum.NONE; +// +// /** +// * If sidable, we will lazily instantiate a sid when requested via +// * {@link #c()}, {@link #getStatementIdentifier()}, and {@link SPO#get(int)} +// * with a parameter of 3. This should reduce heap pressure by only creating +// * sids on-demand on an as-needed basis. +// */ +// private boolean sidable = false; + + /** + * Bit flags used to represent statement type, user flag, override, + * modified enum, and sidable flag. Much more compact representation. + */ + private byte flags = 0; + + /** + * Denotes which bit to find the StatementType within the {@link #flags}. + * Type takes two bits. + */ + private static int TYPE_BIT = 0; + /** - * Statement type (inferred, explicit, or axiom). - ... [truncated message content] |