From: <mrp...@us...> - 2010-07-21 19:49:16
|
Revision: 3261 http://bigdata.svn.sourceforge.net/bigdata/?rev=3261&view=rev Author: mrpersonick Date: 2010-07-21 19:49:10 +0000 (Wed, 21 Jul 2010) Log Message: ----------- renamed a bunch of stuff and added support for inline bnodes Added Paths: ----------- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractBNodeIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/NumericBNodeIV.java Added: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractBNodeIV.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractBNodeIV.java (rev 0) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractBNodeIV.java 2010-07-21 19:49:10 UTC (rev 3261) @@ -0,0 +1,68 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.rdf.internal; + +import com.bigdata.rdf.model.BigdataBNode; +import com.bigdata.rdf.model.BigdataValueFactory; +import com.bigdata.rdf.store.AbstractTripleStore; + +/** + * Class for inline RDF blank nodes. Blank nodes MUST be based on UUIDs or + * some other numeric in order to be inlined. + * <p> + * {@inheritDoc} + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + * @version $Id: TestEncodeDecodeKeys.java 2753 2010-05-01 16:36:59Z + * thompsonbry $ + * + * @see AbstractTripleStore.Options + */ +abstract public class AbstractBNodeIV<V extends BigdataBNode, T> extends + AbstractInlineIV<V, T> { + + /** + * + */ + private static final long serialVersionUID = -4560216387427028030L; + + public AbstractBNodeIV(DTE dte) { + + super(VTE.BNODE, dte); + + } + + final public long getTermId() { + throw new UnsupportedOperationException(); + } + + public V asValue(BigdataValueFactory f) + throws UnsupportedOperationException { + final V bnode = (V) f.createBNode(stringValue()); + bnode.setIV(this); + return bnode; + } + +} \ No newline at end of file Added: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/NumericBNodeIV.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/NumericBNodeIV.java (rev 0) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/NumericBNodeIV.java 2010-07-21 19:49:10 UTC (rev 3261) @@ -0,0 +1,87 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.rdf.internal; + +import com.bigdata.rawstore.Bytes; +import com.bigdata.rdf.model.BigdataBNode; +import com.bigdata.rdf.store.AbstractTripleStore; + +/** + * Class for inline RDF blank nodes. Blank nodes MUST be based on a numeric + * value to be inlined with this class. + * <p> + * {@inheritDoc} + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + * @version $Id: TestEncodeDecodeKeys.java 2753 2010-05-01 16:36:59Z + * thompsonbry $ + * + * @see AbstractTripleStore.Options + */ +public class NumericBNodeIV<V extends BigdataBNode> extends + AbstractBNodeIV<V, Integer> { + + /** + * + */ + private static final long serialVersionUID = -2057725744604560753L; + + private final int id; + + public NumericBNodeIV(final int id) { + + super(DTE.XSDInt); + + this.id = id; + + } + + @Override + public String stringValue() { + return String.valueOf(id); + } + + final public Integer getInlineValue() { + return id; + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (o instanceof NumericBNodeIV<?>) { + return this.id == ((NumericBNodeIV<?>) o).id; + } + return false; + } + + public int hashCode() { + return id; + } + + public int byteLength() { + return 1 + Bytes.SIZEOF_INT; + } + +} \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-07-21 19:48:49
|
Revision: 3260 http://bigdata.svn.sourceforge.net/bigdata/?rev=3260&view=rev Author: mrpersonick Date: 2010-07-21 19:48:42 +0000 (Wed, 21 Jul 2010) Log Message: ----------- renamed a bunch of stuff and added support for inline bnodes Modified Paths: -------------- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/DTE.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/DummyIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LegacyTermIdUtility.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/TermId.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/VTE.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSD.java Added Paths: ----------- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractLiteralIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/UUIDBNodeIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/UUIDLiteralIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDByteIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDecimalIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDoubleIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDFloatIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntegerIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDLongIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDShortIV.java Removed Paths: ------------- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractDatatypeLiteralInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractLiteralInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/BNodeInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/UUIDInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDBooleanInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDByteInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDecimalInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDoubleInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDFloatInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntegerInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDLongInternalValue.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDShortInternalValue.java Deleted: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractDatatypeLiteralInternalValue.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractDatatypeLiteralInternalValue.java 2010-07-21 18:04:22 UTC (rev 3259) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractDatatypeLiteralInternalValue.java 2010-07-21 19:48:42 UTC (rev 3260) @@ -1,128 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -package com.bigdata.rdf.internal; - -import java.math.BigDecimal; -import java.math.BigInteger; - -import com.bigdata.rdf.model.BigdataLiteral; - -/** - * Abstract base class for RDF datatype literals adds primitive data type - * value access methods. - * <p> - * {@inheritDoc} - * - * @todo What are the SPARQL semantics for casting among these datatypes? - * They should probably be reflected here since that is the real use - * case. I believe that those casts also require failing a solution if - * the cast is not legal, in which case these methods might not be all - * that useful. - * <p> - * Also see BigdataLiteralImpl and XMLDatatypeUtil. It handles the - * conversions by reparsing, but there is no reason to do that here - * since we have the canonical point in the value space. - * - * @see http://www.w3.org/TR/rdf-sparql-query/#FunctionMapping, The casting - * rules for SPARQL - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - * @version $Id: TestEncodeDecodeKeys.java 2753 2010-05-01 16:36:59Z - * thompsonbry $ - */ -abstract public class AbstractDatatypeLiteralInternalValue<V extends BigdataLiteral, T> - extends AbstractLiteralInternalValue<V, T> { - - /** - * - */ - private static final long serialVersionUID = 5962615541158537189L; - - protected AbstractDatatypeLiteralInternalValue(final DTE dte) { - - super(dte); - - } - - final public long getTermId() { - throw new UnsupportedOperationException(); - } - - /** Return the <code>boolean</code> value of <i>this</i> value. */ - abstract public boolean booleanValue(); - - /** - * Return the <code>byte</code> value of <i>this</i> value. - * <p> - * Note: Java lacks unsigned data types. For safety, operations on - * unsigned XSD data types should be conducted after a widening - * conversion. For example, operations on <code>xsd:unsignedByte</code> - * should be performed using {@link #shortValue()}. - */ - abstract public byte byteValue(); - - /** - * Return the <code>short</code> value of <i>this</i> value. - * <p> - * Note: Java lacks unsigned data types. For safety, operations on - * unsigned XSD data types should be conducted after a widening - * conversion. For example, operations on <code>xsd:unsignedShort</code> - * should be performed using {@link #intValue()}. - */ - abstract public short shortValue(); - - /** - * Return the <code>int</code> value of <i>this</i> value. - * <p> - * Note: Java lacks unsigned data types. For safety, operations on - * unsigned XSD data types should be conducted after a widening - * conversion. For example, operations on <code>xsd:unsignedInt</code> - * should be performed using {@link #longValue()}. - */ - abstract public int intValue(); - - /** - * Return the <code>long</code> value of <i>this</i> value. - * <p> - * Note: Java lacks unsigned data types. For safety, operations on - * unsigned XSD data types should be conducted after a widening - * conversion. For example, operations on <code>xsd:unsignedLong</code> - * should be performed using {@link #integerValue()}. - */ - abstract public long longValue(); - - /** Return the <code>float</code> value of <i>this</i> value. */ - abstract public float floatValue(); - - /** Return the <code>double</code> value of <i>this</i> value. */ - abstract public double doubleValue(); - - /** Return the {@link BigInteger} value of <i>this</i> value. */ - abstract public BigInteger integerValue(); - - /** Return the {@link BigDecimal} value of <i>this</i> value. */ - abstract public BigDecimal decimalValue(); - -} \ No newline at end of file Copied: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractIV.java (from rev 3258, branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInternalValue.java) =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractIV.java (rev 0) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractIV.java 2010-07-21 19:48:42 UTC (rev 3260) @@ -0,0 +1,680 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on May 3, 2010 + */ + +package com.bigdata.rdf.internal; + +import java.io.DataOutput; +import java.io.IOException; +import java.util.UUID; +import org.deri.iris.basics.Literal; +import org.openrdf.model.Value; +import com.bigdata.btree.keys.IKeyBuilder; +import com.bigdata.btree.keys.KeyBuilder; +import com.bigdata.rdf.model.BigdataValue; + +/** + * Abstract base class for the inline representation of an RDF Value (the + * representation which is encoded in to the keys of the statement indices). + * This class is responsible for combining the {@link VTE} and the {@link DTE} + * together into the flags byte used as a common prefix for all keys formed from + * RDF Values regardless of whether they are based on an assigned term + * identifier or the inlining of the RDF Value. + * + * <h3>Binary record format</h3> + * + * We currently have 14 built-in (or intrinsic) data types (see {@link DTE}). + * Each of those types has a natural order which we can encode and decode from + * the B+Tree key. In general, there is a relatively limited set of interesting + * intrinsic codings, which is important since we will dedicate just 4 bits for + * to code the natural order of the value space, which is just only 16 + * distinctions. Given that we have 14 intrinsic data types, that leaves room + * for just two more. One of those bits is reserved against (see + * {@link DTE#Reserved1}). The other bit is reserved for extensibility in the + * framework itself as described below (see {@link DTE#Extension}). + * <p> + * The header byte contains various bit flags which are laid out as follows: + * + * <pre> + * [valueType] : 2 bits + * [inline] : 1 bit + * [extension] : 1 bit + * [dataTypeCode] : 4 bits + * </pre> + * + * <dl> + * <dt>valueType</dt> + * <dd>These 2 bits distinguish between URIs, Literals, Blank Nodes, and + * statement identifiers (SIDs). These bits are up front and therefore partition + * the key space first by the RDF Value type. See {@link VTE} which governs + * these bits.</dd> + * <dt>inline</dt> + * <dd>This bit indicates whether the value is inline or represented by a term + * identifier in the key. This bit is set based on how a given triple store or + * quad store instance is configured. However, because the bit is present in the + * flags, we know how to decode the key without reference to this configuration + * metadata.</dd> + * <dt>extension</dt> + * <dd>This bit is ignored (and should be zero) unless the RDF Value is a + * Literal with a data type URI which is being inlined. For data type literals, + * this bit is set if the actual data type is not one of those which we handle + * intrinsically but is one of those which has been registered (by the + * application) as an "extended" data type projected onto one of the intrinsic + * data types. Thus, this bit partitions the key space into the intrinsic data + * types and the extended data types.<br/> + * When <code>true</code>, this bit signals that information about the actual + * RDF Value data type will follow (see below). When <code>false</code>, the + * datatype URI is directly recoverable (for a data type Literal) from the + * <code>dataTypeCode</code>.</dd> + * <dt>dataTypeCode</dt> + * <dd>These 4 bits indicate the intrinsic data type for the inline value and + * are ignored (and should be zero) unless a data type Literal is being inlined. + * These bits partition the key space. However, since <code>extension</code> bit + * comes first this will not interleave inline values for intrinsic and extended + * data types having the same <code>dataTypeCode</code>. <br/> + * Note: The <code>dataTypeCode</code> <code>0xf</code> ({@link DTE#Extension)} + * is reserved for extending the set of intrinsic data types. When the code is + * <code>0xf</code> the next byte must be considered as well to determine the + * actual intrinsic data type code.</dd> + * </dl> + * + * <pre> + * ---------- byte boundary ---------- + * </pre> + * + * If <code>extension</code> was true, then then the next byte(s) encode + * information about the source data type URI and the key space will be + * partitioned based on the extended data type URI [the precise format of that + * data has not yet been decided -- see below]. + * + * <pre> + * ---------- byte boundary ---------- + * </pre> + * + * The unsigned byte[] representation of the value in the value space for one of + * the intrinsic types. The length of this byte[] may be directly determined + * from the [dataTypeCode] for most data types. However, for xsd:integer and + * xsd:decimal, the length is part of the representation. + * + * <pre> + * ---------- byte boundary and end of the record ---------- + * </pre> + * + * <h3>Extensibility</h3> + * + * There are three core use cases for extensibility: + * <dl> + * <dt>projections</dt> + * <dd>A projection takes an application specific data type and maps it onto one + * of the intrinsic data types (int, float, double, etc). Projections provide an + * extensible mechanism which allows an application to benefit from inline + * representation of RDF Values and allows the query optimizer to chose + * key-range scans for application defined data types if they can be projected + * onto intrinsic data types. For example, if you define an application specific + * data type <code>foo:milliseconds</code> representing milliseconds since the + * epoch, then the value space of that data type can be projected onto an + * <code>xsd:long</code>.</dd> + * <dt>enumerations</dt> + * <dd>An enumeration is an application specific data type having a specific set + * of values. Those values are then projected onto an intrinsic data type such + * as <code>byte</code> (256 distinctions) or <code>short</code> (64k + * distinctions). Enumerations make it possible to inline application specific + * data types while benefiting from XSD validation of those RDF Values. When an + * enumeration is registered, the order in which the members of the enumeration + * are given may optionally specify the natural order of that enumeration. The + * natural order is imposed by projecting the first member of the enumeration + * one ZERO, the second member onto ONE, etc. An enumeration with a natural + * order will be sorted based on that defined order and query optimizations may + * perform key-range scans informed by that natural order.<br/> + * Enumerations may be used in cases where you might otherwise use short + * character codes. For example, an enumeration could be defined for the two + * character abbreviations for the 50 US States. That enumeration could be + * mapped onto a single byte.</dd> + * <dt>custom indices</dt> + * <dd>The best example here is spatial data, which requires literals which + * represent points, rectangles, circles, arcs, clouds, etc to be inserted into + * special spatial indices. Queries must be aware of spatial data and must be + * rewritten to run against the appropriate spatial indices.<br/> + * Another use case would be carrying specialized indices for bioinformatics or + * genomics data.</dd> + * </dl> + * Note: Both projected and enumerated extensible data types MAY map many RDF + * Values onto the same internal value but each internal value MUST map onto a + * single RDF Value (materialization must be deterministic). This can be seen as + * normalization imposed by the database. + * + * @todo Note: There can be more than one URI for the same XSD datatype (there + * is more than one accepted namespace - see <a + * href="http://www.w3.org/TR/xmlschema-2/#namespaces"> XML Schema + * Datatypes namespaces </a>). I propose that we collapse these by default + * onto a canonical datatype URI. + * + * @todo For a extensible data type which is being projected onto an intrinsic + * data type we would need both (a) a method to project the RDF Value onto + * the appropriate intrinsic data type; and (b) a method to materialize an + * RDF Value from the inline representation. + * <p> + * If we put the registrations into their own index, then we could use a + * more compact representation (the term identifier of the datatype URI is + * 8 bytes, but we could do with 2 or 4 bytes). Alternatively, we could + * use the LongPacker to pack an unsigned long integer into as few bytes + * as possible. This would break the natural ordering across the + * dataTypeIds, but I can not see how that would matter since the term + * identifiers are essentially arbitrary anyway so their order has little + * value. + * + * @todo Can we inline the language code for a literal? I think that the + * language code must be ASCII and might be restricted to two characters. + * This might use up our {@link DTE#Reserved1} bit. + * + * @todo One consequences of this refactor is that you must use equals() rather + * than == to compare internal values, including term identifiers. This + * boils down to verifying that the two internal values are the same type + * (same VTE, DTE, etc) and have the same value (termId, long, etc). That + * can all be done rather quickly, but it is more overhead than testing a + * == b. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: TestEncodeDecodeKeys.java 2753 2010-05-01 16:36:59Z thompsonbry + * $ + * @param <V> + * The generic type for the RDF {@link Value} implementation. + * @param <T> + * The generic type for the inline value. + */ +public abstract class AbstractIV<V extends BigdataValue, T> + implements IV<V, T> { + + /** + * + */ + private static final long serialVersionUID = 4710700756635103123L; + + /** + * Bit flags indicating the kind of RDF Value ({@link VTE}), whether the RDF + * Value is inline, whether this is an extension datatype, and the natural + * order and binary representation of the inline value ({@link See #DTE}). + * + * @see VTE + * @see DTE + */ + private final byte flags; + + /** + * The RDF Value type (URI, BNode, Literal or Statement) and the data type + * are combined and stored in a single byte together with whether the RDF + * value has been inlined (an <i>inline</i> bit) and whether the RDF Value + * is an extended data type (the <i>extension</> bit). The <i>vte</i> has 4 + * distinctions and is thus TWO (2) bits wide. The <i>dte</i> allows for up + * to 16 distinctions and is SIX (6) bits wide. The bits allocated to these + * sets of distinctions are combined within a single byte as follows: + * + * <pre> + * [vviedddd] + * </pre> + * + * where <code>v</code> is a {@link VTE} bit, <code>i</code> is the + * <i>inline</i> bit, <code>e</code> is the extension bit, and + * <code>d</code> is a {@link DTE} bit. + * + * @param vte + * The RDF Value type (URI, BNode, Literal, or Statement). + * @param inline + * <code>true</code> iff the RDF value will be represented inline + * in the key. When <code>false</code>, the term identifier of + * the RDF Value will be represented inline instead of its actual + * value. + * @param extension + * When <code>true</code>, the actual RDF data type URI differs + * from the intrinsic data type (the {@link DTE}) but has been + * projected onto the natural order of the intrinsic data type. + * @param dte + * The internal datatype for the RDF value (termId, xsd:int, + * xsd:long, xsd:float, xsd:double, etc). + * + * @see VTE + * @see DTE + */ + protected AbstractIV(final VTE vte, final boolean inline, + final boolean extension, final DTE dte) { + + // vte << 6 bits (it is in the high 2 bits). + // inline << 5 bits + // extension << 4 bits + // dte is in the low 4 bits. + this( (byte) ((// + (((int) vte.v) << VTE_SHIFT)// + | ((inline ? 1 : 0) << INLINE_SHIFT)// + | ((extension ? 1 : 0) << EXTENSION_SHIFT) // + | (dte.v)// + ) & 0xff)); + + } + + /** + * Constructor used when decoding since you already have the flags. + * + * @param flags + * The flags. + */ + protected AbstractIV(final byte flags) { + + this.flags = flags; + + } + + final public byte flags() { + + return flags; + + } + + /** + * The #of bits (SIX) that the {@link VTE} is shifted to + * the left when encoding it into the {@link #flags}. + */ + private final static int VTE_SHIFT = 6; + + /** + * The bit mask that is bit-wise ANDed with the flags in order to reveal the + * {@link VTE}. The high TWO (2) bits of the low byte in the mask are set. + */ + private final static int VTE_MASK = 0xC0; + + /** + * The #of bits (FIVE) that the <i>inline</i> flag is shifted to the left + * when encoding it into the {@link #flags}. + */ + private final static int INLINE_SHIFT = 5; + + /** + * The bit mask that is bit-wise ANDed with the flags in order to reveal + * the <code>inline</code> bit. + */ + private final static int INLINE_MASK = 0x20; + + /** + * The #of bits (FOUR) that the <i>extension</i> flag is shifted to the left + * when encoding it into the {@link #flags}. + */ + private final static int EXTENSION_SHIFT = 4; + + /** + * The bit mask that is bit-wise ANDed with the flags in order to reveal + * the <code>inline</code> bit. + */ + private final static int EXTENSION_MASK = 0x10; + + /** + * The bit mask that is bit-wise ANDed with the flags in order to reveal the + * {@link DTE}. The low FOUR (4) bits in the mask are set. + */ + private final static int DTE_MASK = 0x0f; + + /** + * Return <code>true</code> if the flags byte has its <code>inline</code> + * bit set. + * + * @param flags + * The flags byte. + */ + static public boolean isInline(final byte flags) { + + return (flags & INLINE_MASK) != 0; + + } + + /** + * FIXME I think we really need to be able to say from the flags whether + * an IV is null or non-null. The context position of statements can + * often be null. + * + * @param flags + * The flags byte. + */ + static public boolean isNull(final byte flags) { + + return false; + + } + + /** + * Return <code>true</code> if the flags byte has its <code>extension</code> + * bit set. + * + * @param flags + * The flags byte. + * + * @todo unit test for this. + */ + static public boolean isExtension(final byte flags) { + + return (flags & EXTENSION_MASK) != 0; + + } + + final public VTE getVTE() { + + return VTE + .valueOf((byte) (((flags & VTE_MASK) >>> VTE_SHIFT) & 0xff)); + + } + + /** + * Return the {@link DTE} for this {@link IV}. + */ + final public DTE getDTE() { + + return DTE.valueOf((byte) ((flags & DTE_MASK) & 0xff)); + + } + + /** + * Helper method decodes a flags byte as found in a statement index key to + * an {@link VTE}. + * + * @param flags + * The flags byte. + * + * @return The {@link VTE} + */ + static final public VTE getInternalValueTypeEnum( + final byte flags) { + + return VTE + .valueOf((byte) (((flags & VTE_MASK) >>> VTE_SHIFT) & 0xff)); + + } + + /** + * Helper method decodes a flags byte as found in a statement index key to + * an {@link DTE}. + * + * @param flags + * The flags byte. + * @return The {@link DTE} + */ + static public DTE getInternalDataTypeEnum(final byte flags) { + + return DTE.valueOf((byte) ((flags & DTE_MASK) & 0xff)); + + } + + final public boolean isLiteral() { + + return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.LITERAL.v; + + } + + final public boolean isBNode() { + + return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.BNODE.v; + + } + + final public boolean isURI() { + + return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.URI.v; + + } + + final public boolean isStatement() { + + return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.STATEMENT.v; + + } + + /** + * {@inheritDoc} + * <p> + * This implementation based on the <code>inline</code> bit flag. This can + * be overridden in many derived classes which have compile time knowledge + * of whether the RDF value is inline or not. + */ + public boolean isInline() { + return isInline(flags); + } + + /** + * {@inheritDoc} + * <p> + * This implementation based on the <code>extension</code> bit flag. Since + * the extension flag is only used for datatype literals, this method can be + * overridden in many derived classes which have compile time knowledge of + * whether the value is an RDF {@link Literal} or not. + */ + public boolean isExtension() { + return isExtension(flags); + } + + /** + * {@inheritDoc} + * <p> + * This implementation based on the <code>inline</code> bit flag. This can + * be overridden in many derived classes which have compile time knowledge + * of whether the RDF value is inline or not. + */ + public boolean isTermId() { + return !isInline(); + } + + final public boolean isNumeric() { + return isInline() && getDTE().isNumeric(); + } + + final public boolean isSignedNumeric() { + return isInline() && getDTE().isSignedNumeric(); + } + + final public boolean isUnsignedNumeric() { + return isInline() && getDTE().isUnsignedNumeric(); + } + + final public boolean isFixedNumeric() { + return isInline() && getDTE().isFixedNumeric(); + } + + final public boolean isBigNumeric() { + return isInline() && getDTE().isBigNumeric(); + } + + final public boolean isFloatingPointNumeric() { + return isInline() && getDTE().isFloatingPointNumeric(); + } + + /** + * Return a hash code based on the value of the point in the value space. + */ + abstract public int hashCode(); + + /** + * Return true iff the two values are the same point in the same value + * space. Points in different value spaces (as identified by different + * datatype URIs) are NOT equal even if they have the same value in the + * corresponding primitive data type. + */ + abstract public boolean equals(Object o); + + /** + * Imposes an ordering of IVs based on their natural sort ordering in the + * index as unsigned byte[]s. + */ + public int compareTo(final IV o) { + + if (this == o) + return 0; + + if (o == null) + return 1; + + /* + * First order based on the flags byte. This is the first byte of the + * key, so it always partitions the key space and hence provides the + * initial dimension of the total IV ordering. + * + * Note: This comparison will always sort out things such that URIs, + * Literals, BNodes, and SIDs will never compare as equals. It will also + * sort out extension types and datatype literals with a natural + * datatype. + */ + int ret = (int) flags - (int) o.flags(); + + if (ret < 0) + return -1; + + if (ret > 0) + return 1; + + if(this instanceof TermId) { + + final long tid1 = ((TermId<?>) this).getTermId(); + final long tid2 = ((TermId<?>) o).getTermId(); + + /* + * Note: logic avoids possible overflow of [long] by not computing + * the difference between two longs. + */ + + ret = tid1 < tid2 ? -1 : tid1 > tid2 ? 1 : 0; + + return ret; + + } + + if(isExtension()) { + /* + * @todo we may need to handle extension types here explicitly once + * their semantics are firmed up further. + */ + throw new UnsupportedOperationException(); + } + + /* + * At this point we are comparing two IVs of the same intrinsic + * datatype. That is, they are both datatype literals expressed using + * one of the predefined datatypes. These can be compared by directly + * comparing their primitive values. E.g., long to long, int to int, + * etc. + */ + return _compareTo(o); + + } + + /** + * Compare two {@link IV}s having the same intrinsic datatype. + * + * @todo This should probably be moved to + * {@link AbstractInlineIV} and implementations provided + * for each concrete instance of that abstract class. + */ + protected int _compareTo(IV o) { + + throw new UnsupportedOperationException(getClass().toString()); + + } + + /** + * {@inheritDoc} + * + * FIXME Handle extension types, probably in a subclass, and maybe requiring + * the caller to pass in an object with the context for the extension types. + */ + public IKeyBuilder encode(final IKeyBuilder keyBuilder) { + + // First emit the flags byte. + keyBuilder.append(flags); + + if (!isInline()) { + /* + * Since the RDF Value is not inline, it will be represented as a + * term identifier. + */ + keyBuilder.append(getTermId()); + return keyBuilder; + } + + /* + * Append the natural value type representation. + * + * Note: We have to handle the unsigned byte, short, int and long values + * specially to get the correct total key order. + */ + final DTE dte = getDTE(); + + final AbstractLiteralIV<?, ?> t = (AbstractLiteralIV<?, ?>) this; + + switch (dte) { + case XSDBoolean: + keyBuilder.append((byte) (t.booleanValue() ? 1 : 0)); + break; + case XSDByte: + keyBuilder.append(t.byteValue()); + break; + case XSDShort: + keyBuilder.append(t.shortValue()); + break; + case XSDInt: + keyBuilder.append(t.intValue()); + break; + case XSDLong: + keyBuilder.append(t.longValue()); + break; + case XSDFloat: + keyBuilder.append(t.floatValue()); + break; + case XSDDouble: + keyBuilder.append(t.doubleValue()); + break; + case XSDInteger: + keyBuilder.append(t.integerValue()); + break; + case XSDDecimal: + keyBuilder.append(t.decimalValue()); + break; + case UUID: + keyBuilder.append((UUID)t.getInlineValue()); + break; +// case XSDUnsignedByte: +// keyBuilder.appendUnsigned(t.byteValue()); +// break; +// case XSDUnsignedShort: +// keyBuilder.appendUnsigned(t.shortValue()); +// break; +// case XSDUnsignedInt: +// keyBuilder.appendUnsigned(t.intValue()); +// break; +// case XSDUnsignedLong: +// keyBuilder.appendUnsigned(t.longValue()); +// break; + default: + throw new AssertionError(toString()); + } + + return keyBuilder; + + } + +} Copied: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineIV.java (from rev 3258, branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineInternalValue.java) =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineIV.java (rev 0) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineIV.java 2010-07-21 19:48:42 UTC (rev 3260) @@ -0,0 +1,91 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.rdf.internal; + +import org.openrdf.model.Value; + +import com.bigdata.rdf.model.BigdataValue; + +/** + * Abstract base class for inline RDF values (literals, blank nodes, and + * statement identifiers can be inlined). + * <p> + * {@inheritDoc} + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + * @version $Id: TestEncodeDecodeKeys.java 2753 2010-05-01 16:36:59Z + * thompsonbry $ + */ +abstract public class AbstractInlineIV<V extends BigdataValue, T> + extends AbstractIV<V, T> { + + /** + * + */ + private static final long serialVersionUID = -2847844163772097836L; + + protected AbstractInlineIV(final VTE vte, + final DTE dte) { + + super(vte, true/* inline */, false/* extension */, dte); + + } + + /** + * Returns the String-value of a Value object. This returns either a + * Literal's label, a URI's URI or a BNode's ID. + * + * @see Value#stringValue() + */ + abstract public String stringValue(); + + /** + * Always returns <code>true</code> since the value is inline. + */ + final public boolean isInline() { + return true; + } + + /** + * Always returns <code>false</code> since the value is inline. + */ + final public boolean isTermId() { + return false; + } + + /** + * Always returns <code>false</code> since the value is inline. + */ + final public boolean isNull() { + return false; + } + + public String toString() { + + return super.getDTE() + "(" + stringValue() + ")"; + + } + +} \ No newline at end of file Deleted: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineInternalValue.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineInternalValue.java 2010-07-21 18:04:22 UTC (rev 3259) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInlineInternalValue.java 2010-07-21 19:48:42 UTC (rev 3260) @@ -1,91 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -package com.bigdata.rdf.internal; - -import org.openrdf.model.Value; - -import com.bigdata.rdf.model.BigdataValue; - -/** - * Abstract base class for inline RDF values (literals, blank nodes, and - * statement identifiers can be inlined). - * <p> - * {@inheritDoc} - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - * @version $Id: TestEncodeDecodeKeys.java 2753 2010-05-01 16:36:59Z - * thompsonbry $ - */ -abstract public class AbstractInlineInternalValue<V extends BigdataValue, T> - extends AbstractInternalValue<V, T> { - - /** - * - */ - private static final long serialVersionUID = -2847844163772097836L; - - protected AbstractInlineInternalValue(final VTE vte, - final DTE dte) { - - super(vte, true/* inline */, false/* extension */, dte); - - } - - /** - * Returns the String-value of a Value object. This returns either a - * Literal's label, a URI's URI or a BNode's ID. - * - * @see Value#stringValue() - */ - abstract public String stringValue(); - - /** - * Always returns <code>true</code> since the value is inline. - */ - final public boolean isInline() { - return true; - } - - /** - * Always returns <code>false</code> since the value is inline. - */ - final public boolean isTermId() { - return false; - } - - /** - * Always returns <code>false</code> since the value is inline. - */ - final public boolean isNull() { - return false; - } - - public String toString() { - - return super.getDTE() + "(" + stringValue() + ")"; - - } - -} \ No newline at end of file Deleted: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInternalValue.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInternalValue.java 2010-07-21 18:04:22 UTC (rev 3259) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/AbstractInternalValue.java 2010-07-21 19:48:42 UTC (rev 3260) @@ -1,680 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on May 3, 2010 - */ - -package com.bigdata.rdf.internal; - -import java.io.DataOutput; -import java.io.IOException; -import java.util.UUID; -import org.deri.iris.basics.Literal; -import org.openrdf.model.Value; -import com.bigdata.btree.keys.IKeyBuilder; -import com.bigdata.btree.keys.KeyBuilder; -import com.bigdata.rdf.model.BigdataValue; - -/** - * Abstract base class for the inline representation of an RDF Value (the - * representation which is encoded in to the keys of the statement indices). - * This class is responsible for combining the {@link VTE} and the {@link DTE} - * together into the flags byte used as a common prefix for all keys formed from - * RDF Values regardless of whether they are based on an assigned term - * identifier or the inlining of the RDF Value. - * - * <h3>Binary record format</h3> - * - * We currently have 14 built-in (or intrinsic) data types (see {@link DTE}). - * Each of those types has a natural order which we can encode and decode from - * the B+Tree key. In general, there is a relatively limited set of interesting - * intrinsic codings, which is important since we will dedicate just 4 bits for - * to code the natural order of the value space, which is just only 16 - * distinctions. Given that we have 14 intrinsic data types, that leaves room - * for just two more. One of those bits is reserved against (see - * {@link DTE#Reserved1}). The other bit is reserved for extensibility in the - * framework itself as described below (see {@link DTE#Extension}). - * <p> - * The header byte contains various bit flags which are laid out as follows: - * - * <pre> - * [valueType] : 2 bits - * [inline] : 1 bit - * [extension] : 1 bit - * [dataTypeCode] : 4 bits - * </pre> - * - * <dl> - * <dt>valueType</dt> - * <dd>These 2 bits distinguish between URIs, Literals, Blank Nodes, and - * statement identifiers (SIDs). These bits are up front and therefore partition - * the key space first by the RDF Value type. See {@link VTE} which governs - * these bits.</dd> - * <dt>inline</dt> - * <dd>This bit indicates whether the value is inline or represented by a term - * identifier in the key. This bit is set based on how a given triple store or - * quad store instance is configured. However, because the bit is present in the - * flags, we know how to decode the key without reference to this configuration - * metadata.</dd> - * <dt>extension</dt> - * <dd>This bit is ignored (and should be zero) unless the RDF Value is a - * Literal with a data type URI which is being inlined. For data type literals, - * this bit is set if the actual data type is not one of those which we handle - * intrinsically but is one of those which has been registered (by the - * application) as an "extended" data type projected onto one of the intrinsic - * data types. Thus, this bit partitions the key space into the intrinsic data - * types and the extended data types.<br/> - * When <code>true</code>, this bit signals that information about the actual - * RDF Value data type will follow (see below). When <code>false</code>, the - * datatype URI is directly recoverable (for a data type Literal) from the - * <code>dataTypeCode</code>.</dd> - * <dt>dataTypeCode</dt> - * <dd>These 4 bits indicate the intrinsic data type for the inline value and - * are ignored (and should be zero) unless a data type Literal is being inlined. - * These bits partition the key space. However, since <code>extension</code> bit - * comes first this will not interleave inline values for intrinsic and extended - * data types having the same <code>dataTypeCode</code>. <br/> - * Note: The <code>dataTypeCode</code> <code>0xf</code> ({@link DTE#Extension)} - * is reserved for extending the set of intrinsic data types. When the code is - * <code>0xf</code> the next byte must be considered as well to determine the - * actual intrinsic data type code.</dd> - * </dl> - * - * <pre> - * ---------- byte boundary ---------- - * </pre> - * - * If <code>extension</code> was true, then then the next byte(s) encode - * information about the source data type URI and the key space will be - * partitioned based on the extended data type URI [the precise format of that - * data has not yet been decided -- see below]. - * - * <pre> - * ---------- byte boundary ---------- - * </pre> - * - * The unsigned byte[] representation of the value in the value space for one of - * the intrinsic types. The length of this byte[] may be directly determined - * from the [dataTypeCode] for most data types. However, for xsd:integer and - * xsd:decimal, the length is part of the representation. - * - * <pre> - * ---------- byte boundary and end of the record ---------- - * </pre> - * - * <h3>Extensibility</h3> - * - * There are three core use cases for extensibility: - * <dl> - * <dt>projections</dt> - * <dd>A projection takes an application specific data type and maps it onto one - * of the intrinsic data types (int, float, double, etc). Projections provide an - * extensible mechanism which allows an application to benefit from inline - * representation of RDF Values and allows the query optimizer to chose - * key-range scans for application defined data types if they can be projected - * onto intrinsic data types. For example, if you define an application specific - * data type <code>foo:milliseconds</code> representing milliseconds since the - * epoch, then the value space of that data type can be projected onto an - * <code>xsd:long</code>.</dd> - * <dt>enumerations</dt> - * <dd>An enumeration is an application specific data type having a specific set - * of values. Those values are then projected onto an intrinsic data type such - * as <code>byte</code> (256 distinctions) or <code>short</code> (64k - * distinctions). Enumerations make it possible to inline application specific - * data types while benefiting from XSD validation of those RDF Values. When an - * enumeration is registered, the order in which the members of the enumeration - * are given may optionally specify the natural order of that enumeration. The - * natural order is imposed by projecting the first member of the enumeration - * one ZERO, the second member onto ONE, etc. An enumeration with a natural - * order will be sorted based on that defined order and query optimizations may - * perform key-range scans informed by that natural order.<br/> - * Enumerations may be used in cases where you might otherwise use short - * character codes. For example, an enumeration could be defined for the two - * character abbreviations for the 50 US States. That enumeration could be - * mapped onto a single byte.</dd> - * <dt>custom indices</dt> - * <dd>The best example here is spatial data, which requires literals which - * represent points, rectangles, circles, arcs, clouds, etc to be inserted into - * special spatial indices. Queries must be aware of spatial data and must be - * rewritten to run against the appropriate spatial indices.<br/> - * Another use case would be carrying specialized indices for bioinformatics or - * genomics data.</dd> - * </dl> - * Note: Both projected and enumerated extensible data types MAY map many RDF - * Values onto the same internal value but each internal value MUST map onto a - * single RDF Value (materialization must be deterministic). This can be seen as - * normalization imposed by the database. - * - * @todo Note: There can be more than one URI for the same XSD datatype (there - * is more than one accepted namespace - see <a - * href="http://www.w3.org/TR/xmlschema-2/#namespaces"> XML Schema - * Datatypes namespaces </a>). I propose that we collapse these by default - * onto a canonical datatype URI. - * - * @todo For a extensible data type which is being projected onto an intrinsic - * data type we would need both (a) a method to project the RDF Value onto - * the appropriate intrinsic data type; and (b) a method to materialize an - * RDF Value from the inline representation. - * <p> - * If we put the registrations into their own index, then we could use a - * more compact representation (the term identifier of the datatype URI is - * 8 bytes, but we could do with 2 or 4 bytes). Alternatively, we could - * use the LongPacker to pack an unsigned long integer into as few bytes - * as possible. This would break the natural ordering across the - * dataTypeIds, but I can not see how that would matter since the term - * identifiers are essentially arbitrary anyway so their order has little - * value. - * - * @todo Can we inline the language code for a literal? I think that the - * language code must be ASCII and might be restricted to two characters. - * This might use up our {@link DTE#Reserved1} bit. - * - * @todo One consequences of this refactor is that you must use equals() rather - * than == to compare internal values, including term identifiers. This - * boils down to verifying that the two internal values are the same type - * (same VTE, DTE, etc) and have the same value (termId, long, etc). That - * can all be done rather quickly, but it is more overhead than testing a - * == b. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id: TestEncodeDecodeKeys.java 2753 2010-05-01 16:36:59Z thompsonbry - * $ - * @param <V> - * The generic type for the RDF {@link Value} implementation. - * @param <T> - * The generic type for the inline value. - */ -public abstract class AbstractInternalValue<V extends BigdataValue, T> - implements IV<V, T> { - - /** - * - */ - private static final long serialVersionUID = 4710700756635103123L; - - /** - * Bit flags indicating the kind of RDF Value ({@link VTE}), whether the RDF - * Value is inline, whether this is an extension datatype, and the natural - * order and binary representation of the inline value ({@link See #DTE}). - * - * @see VTE - * @see DTE - */ - private final byte flags; - - /** - * The RDF Value type (URI, BNode, Literal or Statement) and the data type - * are combined and stored in a single byte together with whether the RDF - * value has been inlined (an <i>inline</i> bit) and whether the RDF Value - * is an extended data type (the <i>extension</> bit). The <i>vte</i> has 4 - * distinctions and is thus TWO (2) bits wide. The <i>dte</i> allows for up - * to 16 distinctions and is SIX (6) bits wide. The bits allocated to these - * sets of distinctions are combined within a single byte as follows: - * - * <pre> - * [vviedddd] - * </pre> - * - * where <code>v</code> is a {@link VTE} bit, <code>i</code> is the - * <i>inline</i> bit, <code>e</code> is the extension bit, and - * <code>d</code> is a {@link DTE} bit. - * - * @param vte - * The RDF Value type (URI, BNode, Literal, or Statement). - * @param inline - * <code>true</code> iff the RDF value will be represented inline - * in the key. When <code>false</code>, the term identifier of - * the RDF Value will be represented inline instead of its actual - * value. - * @param extension - * When <code>true</code>, the actual RDF data type URI differs - * from the intrinsic data type (the {@link DTE}) but has been - * projected onto the natural order of the intrinsic data type. - * @param dte - * The internal datatype for the RDF value (termId, xsd:int, - * xsd:long, xsd:float, xsd:double, etc). - * - * @see VTE - * @see DTE - */ - protected AbstractInternalValue(final VTE vte, final boolean inline, - final boolean extension, final DTE dte) { - - // vte << 6 bits (it is in the high 2 bits). - // inline << 5 bits - // extension << 4 bits - // dte is in the low 4 bits. - this( (byte) ((// - (((int) vte.v) << VTE_SHIFT)// - | ((inline ? 1 : 0) << INLINE_SHIFT)// - | ((extension ? 1 : 0) << EXTENSION_SHIFT) // - | (dte.v)// - ) & 0xff)); - - } - - /** - * Constructor used when decoding since you already have the flags. - * - * @param flags - * The flags. - */ - protected AbstractInternalValue(final byte flags) { - - this.flags = flags; - - } - - final public byte flags() { - - return flags; - - } - - /** - * The #of bits (SIX) that the {@link VTE} is shifted to - * the left when encoding it into the {@link #flags}. - */ - private final static int VTE_SHIFT = 6; - - /** - * The bit mask that is bit-wise ANDed with the flags in order to reveal the - * {@link VTE}. The high TWO (2) bits of the low byte in the mask are set. - */ - private final static int VTE_MASK = 0xC0; - - /** - * The #of bits (FIVE) that the <i>inline</i> flag is shifted to the left - * when encoding it into the {@link #flags}. - */ - private final static int INLINE_SHIFT = 5; - - /** - * The bit mask that is bit-wise ANDed with the flags in order to reveal - * the <code>inline</code> bit. - */ - private final static int INLINE_MASK = 0x20; - - /** - * The #of bits (FOUR) that the <i>extension</i> flag is shifted to the left - * when encoding it into the {@link #flags}. - */ - private final static int EXTENSION_SHIFT = 4; - - /** - * The bit mask that is bit-wise ANDed with the flags in order to reveal - * the <code>inline</code> bit. - */ - private final static int EXTENSION_MASK = 0x10; - - /** - * The bit mask that is bit-wise ANDed with the flags in order to reveal the - * {@link DTE}. The low FOUR (4) bits in the mask are set. - */ - private final static int DTE_MASK = 0x0f; - - /** - * Return <code>true</code> if the flags byte has its <code>inline</code> - * bit set. - * - * @param flags - * The flags byte. - */ - static public boolean isInline(final byte flags) { - - return (flags & INLINE_MASK) != 0; - - } - - /** - * FIXME I think we really need to be able to say from the flags whether - * an IV is null or non-null. The context position of statements can - * often be null. - * - * @param flags - * The flags byte. - */ - static public boolean isNull(final byte flags) { - - return false; - - } - - /** - * Return <code>true</code> if the flags byte has its <code>extension</code> - * bit set. - * - * @param flags - * The flags byte. - * - * @todo unit test for this. - */ - static public boolean isExtension(final byte flags) { - - return (flags & EXTENSION_MASK) != 0; - - } - - final public VTE getVTE() { - - return VTE - .valueOf((byte) (((flags & VTE_MASK) >>> VTE_SHIFT) & 0xff)); - - } - - /** - * Return the {@link DTE} for this {@link IV}. - */ - final public DTE getDTE() { - - return DTE.valueOf((byte) ((flags & DTE_MASK) & 0xff)); - - } - - /** - * Helper method decodes a flags byte as found in a statement index key to - * an {@link VTE}. - * - * @param flags - * The flags byte. - * - * @return The {@link VTE} - */ - static final public VTE getInternalValueTypeEnum( - final byte flags) { - - return VTE - .valueOf((byte) (((flags & VTE_MASK) >>> VTE_SHIFT) & 0xff)); - - } - - /** - * Helper method decodes a flags byte as found in a statement index key to - * an {@link DTE}. - * - * @param flags - * The flags byte. - * @return The {@link DTE} - */ - static public DTE getInternalDataTypeEnum(final byte flags) { - - return DTE.valueOf((byte) ((flags & DTE_MASK) & 0xff)); - - } - - final public boolean isLiteral() { - - return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.LITERAL.v; - - } - - final public boolean isBNode() { - - return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.BNODE.v; - - } - - final public boolean isURI() { - - return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.URI.v; - - } - - final public boolean isStatement() { - - return (flags & VTE_MASK) >>> VTE_SHIFT == VTE.STATEMENT.v; - - } - - /** - * {@inheritDoc} - * <p> - * This implementation based on the <code>inline</code> bit flag. This can - * be overridden in many derived classes which have compile time knowledge - * of whether the RDF value is inline or not. - */ - public boolean isInline() { - return isInline(flags); - } - - /** - * {@inheritDoc} - * <p> - * This implementation based on the <code>extension</code> bit flag. Since - * the extension flag is only used for datatype literals, this method can be - * overridden in many derived classes which have compile time knowledge of - * whether the value is an RDF {@link Literal} or not. - */ - public boolean isExtension() { - return isExtension(flags); - } - - /** - * {@inheritDoc} - * <p> - * This implementation based on the <code>inline</code> bit flag. This can - * be overridden in many derived classes which have compile time knowledge - * of whether the RDF value is inline or not. - */ - public boolean isTermId() { - return !isInline(); - } - - final public boolean isNumeric() { - return isInline() && getDTE().isNumeric(); - } - - final public boolean isSignedNumeric() { - return isInline() && getDTE().isSignedNumeric(); - } - - final public boolean isUnsignedNumeric() { - return isInline() && getDTE().isUnsignedNumeric(); - } - - final public boolean isFixedNumeric() { - return isInline() && getDTE().isFixedNumeric(); - } - - final public boolean isBigNumeric() { - return isInline() && g... [truncated message content] |
From: <mrp...@us...> - 2010-07-23 17:36:44
|
Revision: 3272 http://bigdata.svn.sourceforge.net/bigdata/?rev=3272&view=rev Author: mrpersonick Date: 2010-07-23 17:36:38 +0000 (Fri, 23 Jul 2010) Log Message: ----------- adding support for extensions Added Paths: ----------- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ColorsEnumExtension.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/DefaultExtensionFactory.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/EpochExtension.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ExtensionIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IDatatypeURIResolver.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IExtension.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IExtensionFactory.java Added: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ColorsEnumExtension.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ColorsEnumExtension.java (rev 0) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ColorsEnumExtension.java 2010-07-23 17:36:38 UTC (rev 3272) @@ -0,0 +1,148 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +package com.bigdata.rdf.internal; + +import org.openrdf.model.Literal; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.impl.URIImpl; +import com.bigdata.rdf.model.BigdataURI; +import com.bigdata.rdf.model.BigdataValueFactory; +import com.bigdata.rdf.store.BD; + + +public class ColorsEnumExtension implements IExtension { + + public static final URI COLOR = new URIImpl(BD.NAMESPACE + "Color"); + + private final BigdataURI color; + + public ColorsEnumExtension(final IDatatypeURIResolver resolver) { + + this.color = resolver.resolve(COLOR); + + } + + public BigdataURI getDatatype() { + + return color; + + } + + public ExtensionIV createIV(final Value value) { + + if (value instanceof Literal == false) + throw new IllegalArgumentException(); + + final Literal l = (Literal) value; + + if (l.getDatatype() == null || !color.equals(l.getDatatype())) + throw new IllegalArgumentException(); + + final String s = value.stringValue(); + + final Color c = Enum.valueOf(Color.class, s); + + // not a valid color + if (c == null) + return null; + + final AbstractLiteralIV delegate = new XSDByteIV(c.getByte()); + + return new ExtensionIV(delegate, (TermId) getDatatype().getIV()); + + } + + public Value asValue(final ExtensionIV iv, final BigdataValueFactory vf) { + + final byte b = iv.getDelegate().byteValue(); + + final Color c = Color.valueOf(b); + + return vf.createLiteral(c.toString(), color); + + } + + /** + * Simple demonstration enum for some common colors. Can fit up to 256 enum + * values into an enum projected onto a byte. + */ + public enum Color { + + Red((byte) 0), + Blue((byte) 1), + Green((byte) 2), + Yellow((byte) 3), + Orange((byte) 4), + Purple((byte) 5), + Black((byte) 6), + White((byte) 7), + Brown((byte) 8); + + private Color(final byte b) { + this.b = b; + } + + static final public Color valueOf(final byte b) { + /* + * Note: This switch MUST correspond to the declarations above (you can + * not made the cases of the switch from [v] since it is not considered + * a to be constant by the compiler). + * + * Note: This masks off everything but the lower 4 bits. + */ + switch (b) { + case 0: + return Red; + case 1: + return Blue; + case 2: + return Green; + case 3: + return Yellow; + case 4: + return Orange; + case 5: + return Purple; + case 6: + return Black; + case 7: + return White; + case 8: + return Brown; + default: + throw new IllegalArgumentException(Byte.toString(b)); + } + } + + private final byte b; + + public byte getByte() { + return b; + } + + } + +} Added: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/DefaultExtensionFactory.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/DefaultExtensionFactory.java (rev 0) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/DefaultExtensionFactory.java 2010-07-23 17:36:38 UTC (rev 3272) @@ -0,0 +1,22 @@ +package com.bigdata.rdf.internal; + +public class DefaultExtensionFactory implements IExtensionFactory { + + private final IExtension[] extensions; + + public DefaultExtensionFactory(final IDatatypeURIResolver resolver) { + + extensions = new IExtension[] { + new EpochExtension(resolver), + new ColorsEnumExtension(resolver) + }; + + } + + public IExtension[] getExtensions() { + + return extensions; + + } + +} Added: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/EpochExtension.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/EpochExtension.java (rev 0) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/EpochExtension.java 2010-07-23 17:36:38 UTC (rev 3272) @@ -0,0 +1,86 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +package com.bigdata.rdf.internal; + +import org.openrdf.model.Literal; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.datatypes.XMLDatatypeUtil; +import org.openrdf.model.impl.URIImpl; +import com.bigdata.rdf.model.BigdataURI; +import com.bigdata.rdf.model.BigdataValueFactory; +import com.bigdata.rdf.store.BD; + + +public class EpochExtension implements IExtension { + + public static final URI EPOCH = new URIImpl(BD.NAMESPACE + "Epoch"); + + private final BigdataURI epoch; + + public EpochExtension(final IDatatypeURIResolver resolver) { + + this.epoch = resolver.resolve(EPOCH); + + } + + public BigdataURI getDatatype() { + + return epoch; + + } + + public ExtensionIV createIV(final Value value) { + + if (value instanceof Literal == false) + throw new IllegalArgumentException(); + + final Literal lit = (Literal) value; + + if (lit.getDatatype() == null || + !EPOCH.stringValue().equals(lit.getDatatype().stringValue())) + throw new IllegalArgumentException(); + + final String s = value.stringValue(); + + final long l = XMLDatatypeUtil.parseLong(s); + + // can't have negative epoch values + if (l < 0) + return null; + + final AbstractLiteralIV delegate = new XSDLongIV(l); + + return new ExtensionIV(delegate, (TermId) getDatatype().getIV()); + + } + + public Value asValue(final ExtensionIV iv, final BigdataValueFactory vf) { + + return vf.createLiteral(iv.stringValue(), epoch); + + } + +} Added: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ExtensionIV.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ExtensionIV.java (rev 0) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ExtensionIV.java 2010-07-23 17:36:38 UTC (rev 3272) @@ -0,0 +1,88 @@ +package com.bigdata.rdf.internal; + +import com.bigdata.rawstore.Bytes; +import com.bigdata.rdf.model.BigdataLiteral; +import com.bigdata.rdf.model.BigdataValue; +import com.bigdata.rdf.model.BigdataValueFactory; + +public class ExtensionIV<V extends BigdataLiteral> + extends AbstractInlineIV<V, Object> { + + /** + * + */ + private static final long serialVersionUID = 8267554196603121194L; + + private final AbstractLiteralIV delegate; + + private final TermId datatype; + + public ExtensionIV(final AbstractLiteralIV delegate, + final TermId datatype) { + super(VTE.LITERAL, DTE.Extension); + + this.delegate = delegate; + this.datatype = datatype; + } + + public AbstractLiteralIV getDelegate() { + return delegate; + } + + @Override + public String stringValue() { + return delegate.stringValue(); + } + + public Object getInlineValue() { + return delegate.getInlineValue(); + } + + @Override + public TermId getExtensionDatatype() { + return datatype; + } + + /** + * Return the hash code of the long epoch value. + */ + public int hashCode() { + return delegate.hashCode(); + } + + public boolean equals(final Object o) { + if(this==o) return true; + if(o instanceof ExtensionIV) { + return this.delegate.equals(((ExtensionIV) o).delegate) && + this.datatype.equals(((ExtensionIV) o).datatype); + } + return false; + } + + protected int _compareTo(final IV o) { + + int ret = datatype._compareTo(((ExtensionIV) o).datatype); + + if (ret != 0) + return ret; + + return delegate._compareTo(((ExtensionIV) o).delegate); + + } + + /** + * Return the normal length of the delegate plus 8 bytes for the term ID + * of the extension datatype. + */ + public int byteLength() { + return delegate.byteLength() + Bytes.SIZEOF_LONG; + } + + @SuppressWarnings("unchecked") + public V asValue(final BigdataValueFactory f, + final ILexiconConfiguration config) + throws UnsupportedOperationException { + return (V) config.getExtension(datatype).asValue(this, f); + } + +} Added: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IDatatypeURIResolver.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IDatatypeURIResolver.java (rev 0) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IDatatypeURIResolver.java 2010-07-23 17:36:38 UTC (rev 3272) @@ -0,0 +1,53 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +package com.bigdata.rdf.internal; + +import org.openrdf.model.URI; +import com.bigdata.rdf.lexicon.LexiconRelation; +import com.bigdata.rdf.model.BigdataURI; + +public interface IDatatypeURIResolver { + + /** + * Returns a fully resolved datatype URI with the {@link TermId} set. + * {@link IExtension}s will handle encoding and decoding of inline literals + * for custom datatypes, however to do so they will need the term identifier + * for the custom datatype. By passing an instance of this interface into + * the constructor for the {@link IExtension}, it will be able to resolve + * its datatype URI and cache it for future use. + * <p> + * If the datatype URI is not already in the lexicon this method MUST add + * it to the lexicon so that it has an assigned term identifier. + * <p> + * This is implemented by the {@link LexiconRelation}. + * + * @param uri + * the term to resolve + * @return + * the fully resolved term + */ + BigdataURI resolve(final URI datatypeURI); + +} Added: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IExtension.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IExtension.java (rev 0) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IExtension.java 2010-07-23 17:36:38 UTC (rev 3272) @@ -0,0 +1,40 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +package com.bigdata.rdf.internal; + +import org.openrdf.model.Value; +import com.bigdata.rdf.model.BigdataURI; +import com.bigdata.rdf.model.BigdataValueFactory; + + +public interface IExtension { + + BigdataURI getDatatype(); + + ExtensionIV createIV(final Value value); + + Value asValue(final ExtensionIV iv, final BigdataValueFactory vf); + +} Added: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IExtensionFactory.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IExtensionFactory.java (rev 0) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IExtensionFactory.java 2010-07-23 17:36:38 UTC (rev 3272) @@ -0,0 +1,31 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +package com.bigdata.rdf.internal; + +public interface IExtensionFactory { + + IExtension[] getExtensions(); + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-07-23 18:53:54
|
Revision: 3277 http://bigdata.svn.sourceforge.net/bigdata/?rev=3277&view=rev Author: mrpersonick Date: 2010-07-23 18:53:48 +0000 (Fri, 23 Jul 2010) Log Message: ----------- cleaning up, documenting Modified Paths: -------------- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ColorsEnumExtension.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/EpochExtension.java Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ColorsEnumExtension.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ColorsEnumExtension.java 2010-07-23 18:43:00 UTC (rev 3276) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ColorsEnumExtension.java 2010-07-23 18:53:48 UTC (rev 3277) @@ -32,9 +32,15 @@ import com.bigdata.rdf.model.BigdataValueFactory; import com.bigdata.rdf.store.BD; - +/** + * Example of how to do a custom enum and map that enum over a byte using a + * native inline {@link XSDByteIV}. + */ public class ColorsEnumExtension implements IExtension { + /** + * The datatype URI for the colors enum extension. + */ public static final URI COLOR = new URIImpl(BD.NAMESPACE + "Color"); private BigdataURI color; @@ -55,6 +61,15 @@ } + /** + * Attempts to convert the supplied RDF value into a colors enum + * representation. Tests for a literal value with the correct datatype + * that can be converted to one of the colors in the {@link Color} enum + * based on the string value of the literal's label. Each {@link Color} + * in the enum maps to a particular byte. This byte is encoded in a + * delegate {@link XSDByteIV}, and an {@link ExtensionIV} is returned that + * wraps the native type. + */ public ExtensionIV createIV(final Value value) { if (value instanceof Literal == false) @@ -79,12 +94,20 @@ } + /** + * Attempt to convert the {@link AbstractLiteralIV#byteValue()} back into + * a {@link Color}, and then use the string value of the {@link Color} to + * create an RDF literal. + */ public Value asValue(final ExtensionIV iv, final BigdataValueFactory vf) { final byte b = iv.getDelegate().byteValue(); final Color c = Color.valueOf(b); + if (c == null) + throw new RuntimeException("bad color got encoded somehow"); + return vf.createLiteral(c.toString(), color); } @@ -110,13 +133,6 @@ } static final public Color valueOf(final byte b) { - /* - * Note: This switch MUST correspond to the declarations above (you can - * not made the cases of the switch from [v] since it is not considered - * a to be constant by the compiler). - * - * Note: This masks off everything but the lower 4 bits. - */ switch (b) { case 0: return Red; Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/EpochExtension.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/EpochExtension.java 2010-07-23 18:43:00 UTC (rev 3276) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/EpochExtension.java 2010-07-23 18:53:48 UTC (rev 3277) @@ -33,15 +33,21 @@ import com.bigdata.rdf.model.BigdataValueFactory; import com.bigdata.rdf.store.BD; - +/** + * This implementation of {@link IExtension} implements inlining for literals + * that represent time in milliseconds since the epoch. The milliseconds are + * encoded as an inline long. + */ public class EpochExtension implements IExtension { + /** + * The datatype URI for the epoch extension. + */ public static final URI EPOCH = new URIImpl(BD.NAMESPACE + "Epoch"); private BigdataURI epoch; public EpochExtension() { - } public void resolveDatatype(final IDatatypeURIResolver resolver) { @@ -56,6 +62,13 @@ } + /** + * Attempts to convert the supplied value into an epoch representation. + * Tests for a literal value with the correct datatype that can be converted + * to a positive long integer. Encodes the long in a delegate + * {@link XSDLongIV}, and returns an {@link ExtensionIV} to wrap the native + * type. + */ public ExtensionIV createIV(final Value value) { if (value instanceof Literal == false) @@ -63,8 +76,9 @@ final Literal lit = (Literal) value; - if (lit.getDatatype() == null || - !EPOCH.stringValue().equals(lit.getDatatype().stringValue())) + final URI dt = lit.getDatatype(); + + if (dt == null || !EPOCH.stringValue().equals(dt.stringValue())) throw new IllegalArgumentException(); final String s = value.stringValue(); @@ -81,6 +95,11 @@ } + /** + * Use the string value of the {@link ExtensionIV} (which defers to the + * string value of the native type) to create a literal with the epoch + * datatype. + */ public Value asValue(final ExtensionIV iv, final BigdataValueFactory vf) { return vf.createLiteral(iv.stringValue(), epoch); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-07-23 20:29:50
|
Revision: 3279 http://bigdata.svn.sourceforge.net/bigdata/?rev=3279&view=rev Author: mrpersonick Date: 2010-07-23 20:29:43 +0000 (Fri, 23 Jul 2010) Log Message: ----------- Added generics for extension classes Modified Paths: -------------- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ColorsEnumExtension.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/EpochExtension.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IExtension.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ColorsEnumExtension.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ColorsEnumExtension.java 2010-07-23 18:56:20 UTC (rev 3278) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ColorsEnumExtension.java 2010-07-23 20:29:43 UTC (rev 3279) @@ -29,6 +29,7 @@ import org.openrdf.model.Value; import org.openrdf.model.impl.URIImpl; import com.bigdata.rdf.model.BigdataURI; +import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.model.BigdataValueFactory; import com.bigdata.rdf.store.BD; @@ -36,7 +37,7 @@ * Example of how to do a custom enum and map that enum over a byte using a * native inline {@link XSDByteIV}. */ -public class ColorsEnumExtension implements IExtension { +public class ColorsEnumExtension<V extends BigdataValue> implements IExtension<V> { /** * The datatype URI for the colors enum extension. @@ -99,7 +100,7 @@ * a {@link Color}, and then use the string value of the {@link Color} to * create an RDF literal. */ - public Value asValue(final ExtensionIV iv, final BigdataValueFactory vf) { + public V asValue(final ExtensionIV iv, final BigdataValueFactory vf) { final byte b = iv.getDelegate().byteValue(); @@ -108,7 +109,7 @@ if (c == null) throw new RuntimeException("bad color got encoded somehow"); - return vf.createLiteral(c.toString(), color); + return (V) vf.createLiteral(c.toString(), color); } Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/EpochExtension.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/EpochExtension.java 2010-07-23 18:56:20 UTC (rev 3278) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/EpochExtension.java 2010-07-23 20:29:43 UTC (rev 3279) @@ -30,6 +30,7 @@ import org.openrdf.model.datatypes.XMLDatatypeUtil; import org.openrdf.model.impl.URIImpl; import com.bigdata.rdf.model.BigdataURI; +import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.model.BigdataValueFactory; import com.bigdata.rdf.store.BD; @@ -38,7 +39,7 @@ * that represent time in milliseconds since the epoch. The milliseconds are * encoded as an inline long. */ -public class EpochExtension implements IExtension { +public class EpochExtension<V extends BigdataValue> implements IExtension<V> { /** * The datatype URI for the epoch extension. @@ -100,9 +101,9 @@ * string value of the native type) to create a literal with the epoch * datatype. */ - public Value asValue(final ExtensionIV iv, final BigdataValueFactory vf) { + public V asValue(final ExtensionIV iv, final BigdataValueFactory vf) { - return vf.createLiteral(iv.stringValue(), epoch); + return (V) vf.createLiteral(iv.stringValue(), epoch); } Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IExtension.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IExtension.java 2010-07-23 18:56:20 UTC (rev 3278) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IExtension.java 2010-07-23 20:29:43 UTC (rev 3279) @@ -26,6 +26,7 @@ import org.openrdf.model.Value; import com.bigdata.rdf.model.BigdataURI; +import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.model.BigdataValueFactory; /** @@ -37,7 +38,7 @@ * the datatype URI it needs resolved and the resolver will lookup (or create) * the {@link TermId}. */ -public interface IExtension { +public interface IExtension<V extends BigdataValue> { /** * This will be called very early in the IExtension lifecycle so that the @@ -77,6 +78,6 @@ * @return * the RDF value */ - Value asValue(final ExtensionIV iv, final BigdataValueFactory vf); + V asValue(final ExtensionIV iv, final BigdataValueFactory vf); } Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java 2010-07-23 18:56:20 UTC (rev 3278) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/ILexiconConfiguration.java 2010-07-23 20:29:43 UTC (rev 3279) @@ -33,7 +33,7 @@ * Configuration determines which RDF Values are inlined into the statement * indices rather than being assigned term identifiers by the lexicon. */ -public interface ILexiconConfiguration { +public interface ILexiconConfiguration<V extends BigdataValue> { /** * Create an inline {@link IV} for the supplied RDF value if inlining is @@ -61,7 +61,7 @@ * @return * the RDF value */ - Value asValue(final ExtensionIV iv, final BigdataValueFactory vf); + V asValue(final ExtensionIV iv, final BigdataValueFactory vf); /** * <code>true</code> iff the <code>vte</code> and <code>dte</code> Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java 2010-07-23 18:56:20 UTC (rev 3278) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/LexiconConfiguration.java 2010-07-23 20:29:43 UTC (rev 3279) @@ -50,7 +50,8 @@ * @todo large literal size boundary. * @todo other configuration options. */ -public class LexiconConfiguration implements ILexiconConfiguration { +public class LexiconConfiguration<V extends BigdataValue> + implements ILexiconConfiguration { private boolean inlineLiterals, inlineBNodes; @@ -76,9 +77,9 @@ } - public Value asValue(final ExtensionIV iv, final BigdataValueFactory vf) { + public V asValue(final ExtensionIV iv, final BigdataValueFactory vf) { final TermId datatype = iv.getExtensionDatatype(); - return termIds.get(datatype).asValue(iv, vf); + return (V) termIds.get(datatype).asValue(iv, vf); } public IV createInlineIV(final Value value) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |