From: <mrp...@us...> - 2010-07-21 19:48:49
|
Revision: 3260 http://bigdata.svn.sourceforge.net/bigdata/?rev=3260&view=rev Author: mrpersonick Date: 2010-07-21 19:48:42 +0000 (Wed, 21 Jul 2010) Log Message: ----------- renamed a bunch of stuff and added support for inline bnodes Modified Paths: -------------- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/DTE.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/DummyIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LegacyTermIdUtility.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/TermId.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/VTE.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSD.java Added Paths: ----------- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractLiteralIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/UUIDBNodeIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/UUIDLiteralIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDByteIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDecimalIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDoubleIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDFloatIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntegerIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDLongIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDShortIV.java Removed Paths: ------------- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractDatatypeLiteralInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractLiteralInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/BNodeInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/UUIDInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDByteInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDecimalInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDoubleInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDFloatInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntegerInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDLongInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDShortInternalValue.java Deleted: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractDatatypeLiteralInternalValue.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractDatatypeLiteralInternalValue.java 2010-07-21 18:04:22 UTC (rev 3259) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractDatatypeLiteralInternalValue.java 2010-07-21 19:48:42 UTC (rev 3260) @@ -1,128 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -package com.bigdata.rdf.internal; - -import java.math.BigDecimal; -import java.math.BigInteger; - -import com.bigdata.rdf.model.BigdataLiteral; - -/** - * Abstract base class for RDF datatype literals adds primitive data type - * value access methods. - * <p> - * {@inheritDoc} - * - * @todo What are the SPARQL semantics for casting among these datatypes? - * They should probably be reflected here since that is the real use - * case. I believe that those casts also require failing a solution if - * the cast is not legal, in which case these methods might not be all - * that useful. - * <p> - * Also see BigdataLiteralImpl and XMLDatatypeUtil. It handles the - * conversions by reparsing, but there is no reason to do that here - * since we have the canonical point in the value space. - * - * @see http://www.w3.org/TR/rdf-sparql-query/#FunctionMapping, The casting - * rules for SPARQL - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - * @version $Id: TestEncodeDecodeKeys.java 2753 2010-05-01 16:36:59Z - * thompsonbry $ - */ -abstract public class AbstractDatatypeLiteralInternalValue<V extends BigdataLiteral, T> - extends AbstractLiteralInternalValue<V, T> { - - /** - * - */ - private static final long serialVersionUID = 5962615541158537189L; - - protected AbstractDatatypeLiteralInternalValue(final DTE dte) { - - super(dte); - - } - - final public long getTermId() { - throw new UnsupportedOperationException(); - } - - /** Return the <code>boolean</code> value of <i>this</i> value. */ - abstract public boolean booleanValue(); - - /** - * Return the <code>byte</code> value of <i>this</i> value. - * <p> - * Note: Java lacks unsigned data types. For safety, operations on - * unsigned XSD data types should be conducted after a widening - * conversion. For example, operations on <code>xsd:unsignedByte</code> - * should be performed using {@link #shortValue()}. - */ - abstract public byte byteValue(); - - /** - * Return the <code>short</code> value of <i>this</i> value. - * <p> - * Note: Java lacks unsigned data types. For safety, operations on - * unsigned XSD data types should be conducted after a widening - * conversion. For example, operations on <code>xsd:unsignedShort</code> - * should be performed using {@link #intValue()}. - */ - abstract public short shortValue(); - - /** - * Return the <code>int</code> value of <i>this</i> value. - * <p> - * Note: Java lacks unsigned data types. For safety, operations on - * unsigned XSD data types should be conducted after a widening - * conversion. For example, operations on <code>xsd:unsignedInt</code> - * should be performed using {@link #longValue()}. - */ - abstract public int intValue(); - - /** - * Return the <code>long</code> value of <i>this</i> value. - * <p> - * Note: Java lacks unsigned data types. For safety, operations on - * unsigned XSD data types should be conducted after a widening - * conversion. For example, operations on <code>xsd:unsignedLong</code> - * should be performed using {@link #integerValue()}. - */ - abstract public long longValue(); - - /** Return the <code>float</code> value of <i>this</i> value. */ - abstract public float floatValue(); - - /** Return the <code>double</code> value of <i>this</i> value. */ - abstract public double doubleValue(); - - /** Return the {@link BigInteger} value of <i>this</i> value. */ - abstract public BigInteger integerValue(); - - /** Return the {@link BigDecimal} value of <i>this</i> value. */ - abstract public BigDecimal decimalValue(); - -} \ No newline at end of file Copied: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractIV.java (from rev 3258, branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInternalValue.java) =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractIV.java (rev 0) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractIV.java 2010-07-21 19:48:42 UTC (rev 3260) @@ -0,0 +1,680 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on May 3, 2010 + */ + +package com.bigdata.rdf.internal; + +import java.io.DataOutput; +import java.io.IOException; +import java.util.UUID; +import org.deri.iris.basics.Literal; +import org.openrdf.model.Value; +import com.bigdata.btree.keys.IKeyBuilder; +import com.bigdata.btree.keys.KeyBuilder; +import com.bigdata.rdf.model.BigdataValue; + +/** + * Abstract base class for the inline representation of an RDF Value (the + * representation which is encoded in to the keys of the statement indices). + * This class is responsible for combining the {@link VTE} and the {@link DTE} + * together into the flags byte used as a common prefix for all keys formed from + * RDF Values regardless of whether they are based on an assigned term + * identifier or the inlining of the RDF Value. + * + * <h3>Binary record format</h3> + * + * We currently have 14 built-in (or intrinsic) data types (see {@link DTE}). + * Each of those types has a natural order which we can encode and decode from + * the B+Tree key. In general, there is a relatively limited set of interesting + * intrinsic codings, which is important since we will dedicate just 4 bits for + * to code the natural order of the value space, which is just only 16 + * distinctions. Given that we have 14 intrinsic data types, that leaves room + * for just two more. One of those bits is reserved against (see + * {@link DTE#Reserved1}). The other bit is reserved for extensibility in the + * framework itself as described below (see {@link DTE#Extension}). + * <p> + * The header byte contains various bit flags which are laid out as follows: + * + * <pre> + * [valueType] : 2 bits + * [inline] : 1 bit + * [extension] : 1 bit + * [dataTypeCode] : 4 bits + * </pre> + * + * <dl> + * <dt>valueType</dt> + * <dd>These 2 bits distinguish between URIs, Literals, Blank Nodes, and + * statement identifiers (SIDs). These bits are up front and therefore partition + * the key space first by the RDF Value type. See {@link VTE} which governs + * these bits.</dd> + * <dt>inline</dt> + * <dd>This bit indicates whether the value is inline or represented by a term + * identifier in the key. This bit is set based on how a given triple store or + * quad store instance is configured. However, because the bit is present in the + * flags, we know how to decode the key without reference to this configuration + * metadata.</dd> + * <dt>extension</dt> + * <dd>This bit is ignored (and should be zero) unless the RDF Value is a + * Literal with a data type URI which is being inlined. For data type literals, + * this bit is set if the actual data type is not one of those which we handle + * intrinsically but is one of those which has been registered (by the + * application) as an "extended" data type projected onto one of the intrinsic + * data types. Thus, this bit partitions the key space into the intrinsic data + * types and the extended data types.<br/> + * When <code>true</code>, this bit signals that information about the actual + * RDF Value data type will follow (see below). When <code>false</code>, the + * datatype URI is directly recoverable (for a data type Literal) from the + * <code>dataTypeCode</code>.</dd> + * <dt>dataTypeCode</dt> + * <dd>These 4 bits indicate the intrinsic data type for the inline value and + * are ignored (and should be zero) unless a data type Literal is being inlined. + * These bits partition the key space. However, since <code>extension</code> bit + * comes first this will not interleave inline values for intrinsic and extended + * data types having the same <code>dataTypeCode</code>. <br/> + * Note: The <code>dataTypeCode</code> <code>0xf</code> ({@link DTE#Extension)} + * is reserved for extending the set of intrinsic data types. When the code is + * <code>0xf</code> the next byte must be considered as well to determine the + * actual intrinsic data type code.</dd> + * </dl> + * + * <pre> + * ---------- byte boundary ---------- + * </pre> + * + * If <code>extension</code> was true, then then the next byte(s) encode + * information about the source data type URI and the key space will be + * partitioned based on the extended data type URI [the precise format of that + * data has not yet been decided -- see below]. + * + * <pre> + * ---------- byte boundary ---------- + * </pre> + * + * The unsigned byte[] representation of the value in the value space for one of + * the intrinsic types. The length of this byte[] may be directly determined + * from the [dataTypeCode] for most data types. However, for xsd:integer and + * xsd:decimal, the length is part of the representation. + * + * <pre> + * ---------- byte boundary and end of the record ---------- + * </pre> + * + * <h3>Extensibility</h3> + * + * There are three core use cases for extensibility: + * <dl> + * <dt>projections</dt> + * <dd>A projection takes an application specific data type and maps it onto one + * of the intrinsic data types (int, float, double, etc). Projections provide an + * extensible mechanism which allows an application to benefit from inline + * representation of RDF Values and allows the query optimizer to chose + * key-range scans for application defined data types if they can be projected + * onto intrinsic data types. For example, if you define an application specific + * data type <code>foo:milliseconds</code> representing milliseconds since the + * epoch, then the value space of that data type can be projected onto an + * <code>xsd:long</code>.</dd> + * <dt>enumerations</dt> + * <dd>An enumeration is an application specific data type having a specific set + * of values. Those values are then projected onto an intrinsic data type such + * as <code>byte</code> (256 distinctions) or <code>short</code> (64k + * distinctions). Enumerations make it possible to inline application specific + * data types while benefiting from XSD validation of those RDF Values. When an + * enumeration is registered, the order in which the members of the enumeration + * are given may optionally specify the natural order of that enumeration. The + * natural order is imposed by projecting the first member of the enumeration + * one ZERO, the second member onto ONE, etc. An enumeration with a natural + * order will be sorted based on that defined order and query optimizations may + * perform key-range scans informed by that natural order.<br/> + * Enumerations may be used in cases where you might otherwise use short + * character codes. For example, an enumeration could be defined for the two + * character abbreviations for the 50 US States. That enumeration could be + * mapped onto a single byte.</dd> + * <dt>custom indices</dt> + * <dd>The best example here is spatial data, which requires literals which + * represent points, rectangles, circles, arcs, clouds, etc to be inserted into + * special spatial indices. Queries must be aware of spatial data and must be + * rewritten to run against the appropriate spatial indices.<br/> + * Another use case would be carrying specialized indices for bioinformatics or + * genomics data.</dd> + * </dl> + * Note: Both projected and enumerated extensible data types MAY map many RDF + * Values onto the same internal value but each internal value MUST map onto a + * single RDF Value (materialization must be deterministic). This can be seen as + * normalization imposed by the database. + * + * @todo Note: There can be more than one URI for the same XSD datatype (there + * is more than one accepted namespace - see <a + * href="http://www.w3.org/TR/xmlschema-2/#namespaces"> XML Schema + * Datatypes namespaces </a>). I propose that we collapse these by default + * onto a canonical datatype URI. + * + * @todo For a extensible data type which is being projected onto an intrinsic + * data type we would need both (a) a method to project the RDF Value onto + * the appropriate intrinsic data type; and (b) a method to materialize an + * RDF Value from the inline representation. + * <p> + * If we put the registrations into their own index, then we could use a + * more compact representation (the term identifier of the datatype URI is + * 8 bytes, but we could do with 2 or 4 bytes). Alternatively, we could + * use the LongPacker to pack an unsigned long integer into as few bytes + * as possible. This would break the natural ordering across the + * dataTypeIds, but I can not see how that would matter since the term + * identifiers are essentially arbitrary anyway so their order has little + * value. + * + * @todo Can we inline the language code for a literal? I think that the + * language code must be ASCII and might be restricted to two characters. + * This might use up our {@link DTE#Reserved1} bit. + * + * @todo One consequences of this refactor is that you must use equals() rather + * than == to compare internal values, including term identifiers. This + * boils down to verifying that the two internal values are the same type + * (same VTE, DTE, etc) and have the same value (termId, long, etc). That + * can all be done rather quickly, but it is more overhead than testing a + * == b. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestEncodeDecodeKeys.java 2753 2010-05-01 16:36:59Z thompsonbry + * $ + * @param <V> + * The generic type for the RDF {@link Value} implementation. + * @param <T> + * The generic type for the inline value. + */ +public abstract class AbstractIV<V extends BigdataValue, T> + implements IV<V, T> { + + /** + * + */ + private static final long serialVersionUID = 4710700756635103123L; + + /** + * Bit flags indicating the kind of RDF Value ({@link VTE}), whether the RDF + * Value is inline, whether this is an extension datatype, and the natural + * order and binary representation of the inline value ({@link See #DTE}). + * + * @see VTE + * @see DTE + */ + private final byte flags; + + /** + * The RDF Value type (URI, BNode, Literal or Statement) and the data type + * are combined and stored in a single byte together with whether the RDF + * value has been inlined (an <i>inline</i> bit) and whether the RDF Value + * is an extended data type (the <i>extension</> bit). The <i>vte</i> has 4 + * distinctions and is thus TWO (2) bits wide. The <i>dte</i> allows for up + * to 16 distinctions and is SIX (6) bits wide. The bits allocated to these + * sets of distinctions are combined within a single byte as follows: + * + * <pre> + * [vviedddd] + * </pre> + * + * where <code>v</code> is a {@link VTE} bit, <code>i</code> is the + * <i>inline</i> bit, <code>e</code> is the extension bit, and + * <code>d</code> is a {@link DTE} bit. + * + * @param vte + * The RDF Value type (URI, BNode, Literal, or Statement). + * @param inline + * <code>true</code> iff the RDF value will be represented inline + * in the key. When <code>false</code>, the term identifier of + * the RDF Value will be represented inline instead of its actual + * value. + * @param extension + * When <code>true</code>, the actual RDF data type URI differs + * from the intrinsic data type (the {@link DTE}) but has been + * projected onto the natural order of the intrinsic data type. + * @param dte + * The internal datatype for the RDF value (termId, xsd:int, + * xsd:long, xsd:float, xsd:double, etc). + * + * @see VTE + * @see DTE + */ + protected AbstractIV(final VTE vte, final boolean inline, + final boolean extension, final DTE dte) { + + // vte << 6 bits (it is in the high 2 bits). + // inline << 5 bits + // extension << 4 bits + // dte is in the low 4 bits. + this( (byte) ((// + (((int) vte.v) << VTE_SHIFT)// + | ((inline ? 1 : 0) << INLINE_SHIFT)// + | ((extension ? 1 : 0) << EXTENSION_SHIFT) // + | (dte.v)// + ) & 0xff)); + + } + + /** + * Constructor used when decoding since you already have the flags. + * + * @param flags + * The flags. + */ + protected AbstractIV(final byte flags) { + + this.flags = flags; + + } + + final public byte flags() { + + return flags; + + } + + /** + * The #of bits (SIX) that the {@link VTE} is shifted to + * the left when encoding it into the {@link #flags}. + */ + private final static int VTE_SHIFT = 6; + + /** + * The bit mask that is bit-wise ANDed with the flags in order to reveal the + * {@link VTE}. The high TWO (2) bits of the low byte in the mask are set. + */ + private final static int VTE_MASK = 0xC0; + + /** + * The #of bits (FIVE) that the <i>inline</i> flag is shifted to the left + * when encoding it into the {@link #flags}. + */ + private final static int INLINE_SHIFT = 5; + + /** + * The bit mask that is bit-wise ANDed with the flags in order to reveal + * the <code>inline</code> bit. + */ + private final static int INLINE_MASK = 0x20; + + /** + * The #of bits (FOUR) that the <i>extension</i> flag is shifted to the left + * when encoding it into the {@link #flags}. + */ + private final static int EXTENSION_SHIFT = 4; + + /** + * The bit mask that is bit-wise ANDed with the flags in order to reveal + * the <code>inline</code> bit. + */ + private final static int EXTENSION_MASK = 0x10; + + /** + * The bit mask that is bit-wise ANDed with the flags in order to reveal the + * {@link DTE}. The low FOUR (4) bits in the mask are set. + */ + private final static int DTE_MASK = 0x0f; + + /** + * Return <code>true</code> if the flags byte has its <code>inline</code> + * bit set. + * + * @param flags + * The flags byte. + */ + static public boolean isInline(final byte flags) { + + return (flags & INLINE_MASK) != 0; + + } + + /** + * FIXME I think we really need to be able to say from the flags whether + * an IV is null or non-null. The context position of statements can + * often be null. + * + * @param flags + * The flags byte. + */ + static public boolean isNull(final byte flags) { + + return false; + + } + + /** + * Return <code>true</code> if the flags byte has its <code>extension</code> + * bit set. + * + * @param flags + * The flags byte. + * + * @todo unit test for this. + */ + static public boolean isExtension(final byte flags) { + + return (flags & EXTENSION_MASK) != 0; + + } + + final public VTE getVTE() { + + return VTE + .valueOf((byte) (((flags & VTE_MASK) >>> VTE_SHIFT) & 0xff)); + + } + + /** + * Return the {@link DTE} for this {@link IV}. + */ + final public DTE getDTE() { + + return DTE.valueOf((byte) ((flags & DTE_MASK) & 0xff)); + + } + + /** + * Helper method decodes a flags byte as found in a statement index key to + * an {@link VTE}. + * + * @param flags + * The flags byte. + * + * @return The {@link VTE} + */ + static final public VTE getInternalValueTypeEnum( + final byte flags) { + + return VTE + .valueOf((byte) (((flags & VTE_MASK) >>> VTE_SHIFT) & 0xff)); + + } + + /** + * Helper method decodes a flags byte as found in a statement index key to + * an {@link DTE}. + * + * @param flags + * The flags byte. + * @return The {@link DTE} + */ + static public DTE getInternalDataTypeEnum(final byte flags) { + + return DTE.valueOf((byte) ((flags & DTE_MASK) & 0xff)); + + } + + final public boolean isLiteral() { + + return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.LITERAL.v; + + } + + final public boolean isBNode() { + + return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.BNODE.v; + + } + + final public boolean isURI() { + + return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.URI.v; + + } + + final public boolean isStatement() { + + return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.STATEMENT.v; + + } + + /** + * {@inheritDoc} + * <p> + * This implementation based on the <code>inline</code> bit flag. This can + * be overridden in many derived classes which have compile time knowledge + * of whether the RDF value is inline or not. + */ + public boolean isInline() { + return isInline(flags); + } + + /** + * {@inheritDoc} + * <p> + * This implementation based on the <code>extension</code> bit flag. Since + * the extension flag is only used for datatype literals, this method can be + * overridden in many derived classes which have compile time knowledge of + * whether the value is an RDF {@link Literal} or not. + */ + public boolean isExtension() { + return isExtension(flags); + } + + /** + * {@inheritDoc} + * <p> + * This implementation based on the <code>inline</code> bit flag. This can + * be overridden in many derived classes which have compile time knowledge + * of whether the RDF value is inline or not. + */ + public boolean isTermId() { + return !isInline(); + } + + final public boolean isNumeric() { + return isInline() && getDTE().isNumeric(); + } + + final public boolean isSignedNumeric() { + return isInline() && getDTE().isSignedNumeric(); + } + + final public boolean isUnsignedNumeric() { + return isInline() && getDTE().isUnsignedNumeric(); + } + + final public boolean isFixedNumeric() { + return isInline() && getDTE().isFixedNumeric(); + } + + final public boolean isBigNumeric() { + return isInline() && getDTE().isBigNumeric(); + } + + final public boolean isFloatingPointNumeric() { + return isInline() && getDTE().isFloatingPointNumeric(); + } + + /** + * Return a hash code based on the value of the point in the value space. + */ + abstract public int hashCode(); + + /** + * Return true iff the two values are the same point in the same value + * space. Points in different value spaces (as identified by different + * datatype URIs) are NOT equal even if they have the same value in the + * corresponding primitive data type. + */ + abstract public boolean equals(Object o); + + /** + * Imposes an ordering of IVs based on their natural sort ordering in the + * index as unsigned byte[]s. + */ + public int compareTo(final IV o) { + + if (this == o) + return 0; + + if (o == null) + return 1; + + /* + * First order based on the flags byte. This is the first byte of the + * key, so it always partitions the key space and hence provides the + * initial dimension of the total IV ordering. + * + * Note: This comparison will always sort out things such that URIs, + * Literals, BNodes, and SIDs will never compare as equals. It will also + * sort out extension types and datatype literals with a natural + * datatype. + */ + int ret = (int) flags - (int) o.flags(); + + if (ret < 0) + return -1; + + if (ret > 0) + return 1; + + if(this instanceof TermId) { + + final long tid1 = ((TermId<?>) this).getTermId(); + final long tid2 = ((TermId<?>) o).getTermId(); + + /* + * Note: logic avoids possible overflow of [long] by not computing + * the difference between two longs. + */ + + ret = tid1 < tid2 ? -1 : tid1 > tid2 ? 1 : 0; + + return ret; + + } + + if(isExtension()) { + /* + * @todo we may need to handle extension types here explicitly once + * their semantics are firmed up further. + */ + throw new UnsupportedOperationException(); + } + + /* + * At this point we are comparing two IVs of the same intrinsic + * datatype. That is, they are both datatype literals expressed using + * one of the predefined datatypes. These can be compared by directly + * comparing their primitive values. E.g., long to long, int to int, + * etc. + */ + return _compareTo(o); + + } + + /** + * Compare two {@link IV}s having the same intrinsic datatype. + * + * @todo This should probably be moved to + * {@link AbstractInlineIV} and implementations provided + * for each concrete instance of that abstract class. + */ + protected int _compareTo(IV o) { + + throw new UnsupportedOperationException(getClass().toString()); + + } + + /** + * {@inheritDoc} + * + * FIXME Handle extension types, probably in a subclass, and maybe requiring + * the caller to pass in an object with the context for the extension types. + */ + public IKeyBuilder encode(final IKeyBuilder keyBuilder) { + + // First emit the flags byte. + keyBuilder.append(flags); + + if (!isInline()) { + /* + * Since the RDF Value is not inline, it will be represented as a + * term identifier. + */ + keyBuilder.append(getTermId()); + return keyBuilder; + } + + /* + * Append the natural value type representation. + * + * Note: We have to handle the unsigned byte, short, int and long values + * specially to get the correct total key order. + */ + final DTE dte = getDTE(); + + final AbstractLiteralIV<?, ?> t = (AbstractLiteralIV<?, ?>) this; + + switch (dte) { + case XSDBoolean: + keyBuilder.append((byte) (t.booleanValue() ? 1 : 0)); + break; + case XSDByte: + keyBuilder.append(t.byteValue()); + break; + case XSDShort: + keyBuilder.append(t.shortValue()); + break; + case XSDInt: + keyBuilder.append(t.intValue()); + break; + case XSDLong: + keyBuilder.append(t.longValue()); + break; + case XSDFloat: + keyBuilder.append(t.floatValue()); + break; + case XSDDouble: + keyBuilder.append(t.doubleValue()); + break; + case XSDInteger: + keyBuilder.append(t.integerValue()); + break; + case XSDDecimal: + keyBuilder.append(t.decimalValue()); + break; + case UUID: + keyBuilder.append((UUID)t.getInlineValue()); + break; +// case XSDUnsignedByte: +// keyBuilder.appendUnsigned(t.byteValue()); +// break; +// case XSDUnsignedShort: +// keyBuilder.appendUnsigned(t.shortValue()); +// break; +// case XSDUnsignedInt: +// keyBuilder.appendUnsigned(t.intValue()); +// break; +// case XSDUnsignedLong: +// keyBuilder.appendUnsigned(t.longValue()); +// break; + default: + throw new AssertionError(toString()); + } + + return keyBuilder; + + } + +} Copied: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineIV.java (from rev 3258, branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineInternalValue.java) =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineIV.java (rev 0) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineIV.java 2010-07-21 19:48:42 UTC (rev 3260) @@ -0,0 +1,91 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.rdf.internal; + +import org.openrdf.model.Value; + +import com.bigdata.rdf.model.BigdataValue; + +/** + * Abstract base class for inline RDF values (literals, blank nodes, and + * statement identifiers can be inlined). + * <p> + * {@inheritDoc} + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + * @version $Id: TestEncodeDecodeKeys.java 2753 2010-05-01 16:36:59Z + * thompsonbry $ + */ +abstract public class AbstractInlineIV<V extends BigdataValue, T> + extends AbstractIV<V, T> { + + /** + * + */ + private static final long serialVersionUID = -2847844163772097836L; + + protected AbstractInlineIV(final VTE vte, + final DTE dte) { + + super(vte, true/* inline */, false/* extension */, dte); + + } + + /** + * Returns the String-value of a Value object. This returns either a + * Literal's label, a URI's URI or a BNode's ID. + * + * @see Value#stringValue() + */ + abstract public String stringValue(); + + /** + * Always returns <code>true</code> since the value is inline. + */ + final public boolean isInline() { + return true; + } + + /** + * Always returns <code>false</code> since the value is inline. + */ + final public boolean isTermId() { + return false; + } + + /** + * Always returns <code>false</code> since the value is inline. + */ + final public boolean isNull() { + return false; + } + + public String toString() { + + return super.getDTE() + "(" + stringValue() + ")"; + + } + +} \ No newline at end of file Deleted: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineInternalValue.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineInternalValue.java 2010-07-21 18:04:22 UTC (rev 3259) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineInternalValue.java 2010-07-21 19:48:42 UTC (rev 3260) @@ -1,91 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -package com.bigdata.rdf.internal; - -import org.openrdf.model.Value; - -import com.bigdata.rdf.model.BigdataValue; - -/** - * Abstract base class for inline RDF values (literals, blank nodes, and - * statement identifiers can be inlined). - * <p> - * {@inheritDoc} - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - * @version $Id: TestEncodeDecodeKeys.java 2753 2010-05-01 16:36:59Z - * thompsonbry $ - */ -abstract public class AbstractInlineInternalValue<V extends BigdataValue, T> - extends AbstractInternalValue<V, T> { - - /** - * - */ - private static final long serialVersionUID = -2847844163772097836L; - - protected AbstractInlineInternalValue(final VTE vte, - final DTE dte) { - - super(vte, true/* inline */, false/* extension */, dte); - - } - - /** - * Returns the String-value of a Value object. This returns either a - * Literal's label, a URI's URI or a BNode's ID. - * - * @see Value#stringValue() - */ - abstract public String stringValue(); - - /** - * Always returns <code>true</code> since the value is inline. - */ - final public boolean isInline() { - return true; - } - - /** - * Always returns <code>false</code> since the value is inline. - */ - final public boolean isTermId() { - return false; - } - - /** - * Always returns <code>false</code> since the value is inline. - */ - final public boolean isNull() { - return false; - } - - public String toString() { - - return super.getDTE() + "(" + stringValue() + ")"; - - } - -} \ No newline at end of file Deleted: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInternalValue.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInternalValue.java 2010-07-21 18:04:22 UTC (rev 3259) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInternalValue.java 2010-07-21 19:48:42 UTC (rev 3260) @@ -1,680 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on May 3, 2010 - */ - -package com.bigdata.rdf.internal; - -import java.io.DataOutput; -import java.io.IOException; -import java.util.UUID; -import org.deri.iris.basics.Literal; -import org.openrdf.model.Value; -import com.bigdata.btree.keys.IKeyBuilder; -import com.bigdata.btree.keys.KeyBuilder; -import com.bigdata.rdf.model.BigdataValue; - -/** - * Abstract base class for the inline representation of an RDF Value (the - * representation which is encoded in to the keys of the statement indices). - * This class is responsible for combining the {@link VTE} and the {@link DTE} - * together into the flags byte used as a common prefix for all keys formed from - * RDF Values regardless of whether they are based on an assigned term - * identifier or the inlining of the RDF Value. - * - * <h3>Binary record format</h3> - * - * We currently have 14 built-in (or intrinsic) data types (see {@link DTE}). - * Each of those types has a natural order which we can encode and decode from - * the B+Tree key. In general, there is a relatively limited set of interesting - * intrinsic codings, which is important since we will dedicate just 4 bits for - * to code the natural order of the value space, which is just only 16 - * distinctions. Given that we have 14 intrinsic data types, that leaves room - * for just two more. One of those bits is reserved against (see - * {@link DTE#Reserved1}). The other bit is reserved for extensibility in the - * framework itself as described below (see {@link DTE#Extension}). - * <p> - * The header byte contains various bit flags which are laid out as follows: - * - * <pre> - * [valueType] : 2 bits - * [inline] : 1 bit - * [extension] : 1 bit - * [dataTypeCode] : 4 bits - * </pre> - * - * <dl> - * <dt>valueType</dt> - * <dd>These 2 bits distinguish between URIs, Literals, Blank Nodes, and - * statement identifiers (SIDs). These bits are up front and therefore partition - * the key space first by the RDF Value type. See {@link VTE} which governs - * these bits.</dd> - * <dt>inline</dt> - * <dd>This bit indicates whether the value is inline or represented by a term - * identifier in the key. This bit is set based on how a given triple store or - * quad store instance is configured. However, because the bit is present in the - * flags, we know how to decode the key without reference to this configuration - * metadata.</dd> - * <dt>extension</dt> - * <dd>This bit is ignored (and should be zero) unless the RDF Value is a - * Literal with a data type URI which is being inlined. For data type literals, - * this bit is set if the actual data type is not one of those which we handle - * intrinsically but is one of those which has been registered (by the - * application) as an "extended" data type projected onto one of the intrinsic - * data types. Thus, this bit partitions the key space into the intrinsic data - * types and the extended data types.<br/> - * When <code>true</code>, this bit signals that information about the actual - * RDF Value data type will follow (see below). When <code>false</code>, the - * datatype URI is directly recoverable (for a data type Literal) from the - * <code>dataTypeCode</code>.</dd> - * <dt>dataTypeCode</dt> - * <dd>These 4 bits indicate the intrinsic data type for the inline value and - * are ignored (and should be zero) unless a data type Literal is being inlined. - * These bits partition the key space. However, since <code>extension</code> bit - * comes first this will not interleave inline values for intrinsic and extended - * data types having the same <code>dataTypeCode</code>. <br/> - * Note: The <code>dataTypeCode</code> <code>0xf</code> ({@link DTE#Extension)} - * is reserved for extending the set of intrinsic data types. When the code is - * <code>0xf</code> the next byte must be considered as well to determine the - * actual intrinsic data type code.</dd> - * </dl> - * - * <pre> - * ---------- byte boundary ---------- - * </pre> - * - * If <code>extension</code> was true, then then the next byte(s) encode - * information about the source data type URI and the key space will be - * partitioned based on the extended data type URI [the precise format of that - * data has not yet been decided -- see below]. - * - * <pre> - * ---------- byte boundary ---------- - * </pre> - * - * The unsigned byte[] representation of the value in the value space for one of - * the intrinsic types. The length of this byte[] may be directly determined - * from the [dataTypeCode] for most data types. However, for xsd:integer and - * xsd:decimal, the length is part of the representation. - * - * <pre> - * ---------- byte boundary and end of the record ---------- - * </pre> - * - * <h3>Extensibility</h3> - * - * There are three core use cases for extensibility: - * <dl> - * <dt>projections</dt> - * <dd>A projection takes an application specific data type and maps it onto one - * of the intrinsic data types (int, float, double, etc). Projections provide an - * extensible mechanism which allows an application to benefit from inline - * representation of RDF Values and allows the query optimizer to chose - * key-range scans for application defined data types if they can be projected - * onto intrinsic data types. For example, if you define an application specific - * data type <code>foo:milliseconds</code> representing milliseconds since the - * epoch, then the value space of that data type can be projected onto an - * <code>xsd:long</code>.</dd> - * <dt>enumerations</dt> - * <dd>An enumeration is an application specific data type having a specific set - * of values. Those values are then projected onto an intrinsic data type such - * as <code>byte</code> (256 distinctions) or <code>short</code> (64k - * distinctions). Enumerations make it possible to inline application specific - * data types while benefiting from XSD validation of those RDF Values. When an - * enumeration is registered, the order in which the members of the enumeration - * are given may optionally specify the natural order of that enumeration. The - * natural order is imposed by projecting the first member of the enumeration - * one ZERO, the second member onto ONE, etc. An enumeration with a natural - * order will be sorted based on that defined order and query optimizations may - * perform key-range scans informed by that natural order.<br/> - * Enumerations may be used in cases where you might otherwise use short - * character codes. For example, an enumeration could be defined for the two - * character abbreviations for the 50 US States. That enumeration could be - * mapped onto a single byte.</dd> - * <dt>custom indices</dt> - * <dd>The best example here is spatial data, which requires literals which - * represent points, rectangles, circles, arcs, clouds, etc to be inserted into - * special spatial indices. Queries must be aware of spatial data and must be - * rewritten to run against the appropriate spatial indices.<br/> - * Another use case would be carrying specialized indices for bioinformatics or - * genomics data.</dd> - * </dl> - * Note: Both projected and enumerated extensible data types MAY map many RDF - * Values onto the same internal value but each internal value MUST map onto a - * single RDF Value (materialization must be deterministic). This can be seen as - * normalization imposed by the database. - * - * @todo Note: There can be more than one URI for the same XSD datatype (there - * is more than one accepted namespace - see <a - * href="http://www.w3.org/TR/xmlschema-2/#namespaces"> XML Schema - * Datatypes namespaces </a>). I propose that we collapse these by default - * onto a canonical datatype URI. - * - * @todo For a extensible data type which is being projected onto an intrinsic - * data type we would need both (a) a method to project the RDF Value onto - * the appropriate intrinsic data type; and (b) a method to materialize an - * RDF Value from the inline representation. - * <p> - * If we put the registrations into their own index, then we could use a - * more compact representation (the term identifier of the datatype URI is - * 8 bytes, but we could do with 2 or 4 bytes). Alternatively, we could - * use the LongPacker to pack an unsigned long integer into as few bytes - * as possible. This would break the natural ordering across the - * dataTypeIds, but I can not see how that would matter since the term - * identifiers are essentially arbitrary anyway so their order has little - * value. - * - * @todo Can we inline the language code for a literal? I think that the - * language code must be ASCII and might be restricted to two characters. - * This might use up our {@link DTE#Reserved1} bit. - * - * @todo One consequences of this refactor is that you must use equals() rather - * than == to compare internal values, including term identifiers. This - * boils down to verifying that the two internal values are the same type - * (same VTE, DTE, etc) and have the same value (termId, long, etc). That - * can all be done rather quickly, but it is more overhead than testing a - * == b. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id: TestEncodeDecodeKeys.java 2753 2010-05-01 16:36:59Z thompsonbry - * $ - * @param <V> - * The generic type for the RDF {@link Value} implementation. - * @param <T> - * The generic type for the inline value. - */ -public abstract class AbstractInternalValue<V extends BigdataValue, T> - implements IV<V, T> { - - /** - * - */ - private static final long serialVersionUID = 4710700756635103123L; - - /** - * Bit flags indicating the kind of RDF Value ({@link VTE}), whether the RDF - * Value is inline, whether this is an extension datatype, and the natural - * order and binary representation of the inline value ({@link See #DTE}). - * - * @see VTE - * @see DTE - */ - private final byte flags; - - /** - * The RDF Value type (URI, BNode, Literal or Statement) and the data type - * are combined and stored in a single byte together with whether the RDF - * value has been inlined (an <i>inline</i> bit) and whether the RDF Value - * is an extended data type (the <i>extension</> bit). The <i>vte</i> has 4 - * distinctions and is thus TWO (2) bits wide. The <i>dte</i> allows for up - * to 16 distinctions and is SIX (6) bits wide. The bits allocated to these - * sets of distinctions are combined within a single byte as follows: - * - * <pre> - * [vviedddd] - * </pre> - * - * where <code>v</code> is a {@link VTE} bit, <code>i</code> is the - * <i>inline</i> bit, <code>e</code> is the extension bit, and - * <code>d</code> is a {@link DTE} bit. - * - * @param vte - * The RDF Value type (URI, BNode, Literal, or Statement). - * @param inline - * <code>true</code> iff the RDF value will be represented inline - * in the key. When <code>false</code>, the term identifier of - * the RDF Value will be represented inline instead of its actual - * value. - * @param extension - * When <code>true</code>, the actual RDF data type URI differs - * from the intrinsic data type (the {@link DTE}) but has been - * projected onto the natural order of the intrinsic data type. - * @param dte - * The internal datatype for the RDF value (termId, xsd:int, - * xsd:long, xsd:float, xsd:double, etc). - * - * @see VTE - * @see DTE - */ - protected AbstractInternalValue(final VTE vte, final boolean inline, - final boolean extension, final DTE dte) { - - // vte << 6 bits (it is in the high 2 bits). - // inline << 5 bits - // extension << 4 bits - // dte is in the low 4 bits. - this( (byte) ((// - (((int) vte.v) << VTE_SHIFT)// - | ((inline ? 1 : 0) << INLINE_SHIFT)// - | ((extension ? 1 : 0) << EXTENSION_SHIFT) // - | (dte.v)// - ) & 0xff)); - - } - - /** - * Constructor used when decoding since you already have the flags. - * - * @param flags - * The flags. - */ - protected AbstractInternalValue(final byte flags) { - - this.flags = flags; - - } - - final public byte flags() { - - return flags; - - } - - /** - * The #of bits (SIX) that the {@link VTE} is shifted to - * the left when encoding it into the {@link #flags}. - */ - private final static int VTE_SHIFT = 6; - - /** - * The bit mask that is bit-wise ANDed with the flags in order to reveal the - * {@link VTE}. The high TWO (2) bits of the low byte in the mask are set. - */ - private final static int VTE_MASK = 0xC0; - - /** - * The #of bits (FIVE) that the <i>inline</i> flag is shifted to the left - * when encoding it into the {@link #flags}. - */ - private final static int INLINE_SHIFT = 5; - - /** - * The bit mask that is bit-wise ANDed with the flags in order to reveal - * the <code>inline</code> bit. - */ - private final static int INLINE_MASK = 0x20; - - /** - * The #of bits (FOUR) that the <i>extension</i> flag is shifted to the left - * when encoding it into the {@link #flags}. - */ - private final static int EXTENSION_SHIFT = 4; - - /** - * The bit mask that is bit-wise ANDed with the flags in order to reveal - * the <code>inline</code> bit. - */ - private final static int EXTENSION_MASK = 0x10; - - /** - * The bit mask that is bit-wise ANDed with the flags in order to reveal the - * {@link DTE}. The low FOUR (4) bits in the mask are set. - */ - private final static int DTE_MASK = 0x0f; - - /** - * Return <code>true</code> if the flags byte has its <code>inline</code> - * bit set. - * - * @param flags - * The flags byte. - */ - static public boolean isInline(final byte flags) { - - return (flags & INLINE_MASK) != 0; - - } - - /** - * FIXME I think we really need to be able to say from the flags whether - * an IV is null or non-null. The context position of statements can - * often be null. - * - * @param flags - * The flags byte. - */ - static public boolean isNull(final byte flags) { - - return false; - - } - - /** - * Return <code>true</code> if the flags byte has its <code>extension</code> - * bit set. - * - * @param flags - * The flags byte. - * - * @todo unit test for this. - */ - static public boolean isExtension(final byte flags) { - - return (flags & EXTENSION_MASK) != 0; - - } - - final public VTE getVTE() { - - return VTE - .valueOf((byte) (((flags & VTE_MASK) >>> VTE_SHIFT) & 0xff)); - - } - - /** - * Return the {@link DTE} for this {@link IV}. - */ - final public DTE getDTE() { - - return DTE.valueOf((byte) ((flags & DTE_MASK) & 0xff)); - - } - - /** - * Helper method decodes a flags byte as found in a statement index key to - * an {@link VTE}. - * - * @param flags - * The flags byte. - * - * @return The {@link VTE} - */ - static final public VTE getInternalValueTypeEnum( - final byte flags) { - - return VTE - .valueOf((byte) (((flags & VTE_MASK) >>> VTE_SHIFT) & 0xff)); - - } - - /** - * Helper method decodes a flags byte as found in a statement index key to - * an {@link DTE}. - * - * @param flags - * The flags byte. - * @return The {@link DTE} - */ - static public DTE getInternalDataTypeEnum(final byte flags) { - - return DTE.valueOf((byte) ((flags & DTE_MASK) & 0xff)); - - } - - final public boolean isLiteral() { - - return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.LITERAL.v; - - } - - final public boolean isBNode() { - - return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.BNODE.v; - - } - - final public boolean isURI() { - - return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.URI.v; - - } - - final public boolean isStatement() { - - return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.STATEMENT.v; - - } - - /** - * {@inheritDoc} - * <p> - * This implementation based on the <code>inline</code> bit flag. This can - * be overridden in many derived classes which have compile time knowledge - * of whether the RDF value is inline or not. - */ - public boolean isInline() { - return isInline(flags); - } - - /** - * {@inheritDoc} - * <p> - * This implementation based on the <code>extension</code> bit flag. Since - * the extension flag is only used for datatype literals, this method can be - * overridden in many derived classes which have compile time knowledge of - * whether the value is an RDF {@link Literal} or not. - */ - public boolean isExtension() { - return isExtension(flags); - } - - /** - * {@inheritDoc} - * <p> - * This implementation based on the <code>inline</code> bit flag. This can - * be overridden in many derived classes which have compile time knowledge - * of whether the RDF value is inline or not. - */ - public boolean isTermId() { - return !isInline(); - } - - final public boolean isNumeric() { - return isInline() && getDTE().isNumeric(); - } - - final public boolean isSignedNumeric() { - return isInline() && getDTE().isSignedNumeric(); - } - - final public boolean isUnsignedNumeric() { - return isInline() && getDTE().isUnsignedNumeric(); - } - - final public boolean isFixedNumeric() { - return isInline() && getDTE().isFixedNumeric(); - } - - final public boolean isBigNumeric() { - return isInline() && g... [truncated message content] |