|
From: <dm...@us...> - 2010-09-14 13:50:38
|
Revision: 3542
http://bigdata.svn.sourceforge.net/bigdata/?rev=3542&view=rev
Author: dmacgbr
Date: 2010-09-14 13:50:31 +0000 (Tue, 14 Sep 2010)
Log Message:
-----------
See trac #146. Allow specification of a default graph when running a bulk load of RDF triple data into a quad store. This is achieved by setting com.bigdata.rdf.load.MappedRDFDataLoadMaster.defaultGraph to the desired value, e.g. "http://xyz.com/data/defaultGraph", in the bigdata configuration file. This parameter has no effect when loading a triple store. Further, if not specified when loading a quad store, the systems behaviour is unaffected by this change. i.e. the graph/context co-ordinate in each quad remains null. Various of the unit tests touched by this change have been modified effectively assuming that the default graph has not been specified.
Modified Paths:
--------------
trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFDataLoadMaster.java
trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFFileLoadTask.java
trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/SingleResourceReaderTask.java
trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java
trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/BasicRioLoader.java
trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/IRioLoader.java
trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/PresortRioLoader.java
trunk/bigdata-rdf/src/java/com/bigdata/rdf/store/DataLoader.java
trunk/bigdata-rdf/src/java/com/bigdata/rdf/util/Splitter.java
trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/AbstractRIOTestCase.java
trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/EDSAsyncLoader.java
trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/TestAsynchronousStatementBufferFactory.java
trunk/bigdata-sails/src/test/com/bigdata/rdf/stress/LoadClosureAndQueryTest.java
Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFDataLoadMaster.java
===================================================================
--- trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFDataLoadMaster.java 2010-09-14 10:57:21 UTC (rev 3541)
+++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFDataLoadMaster.java 2010-09-14 13:50:31 UTC (rev 3542)
@@ -272,7 +272,18 @@
//
// /** {@value #DEFAULT_MAX_TRIES} */
// int DEFAULT_MAX_TRIES = 3;
-
+
+ /**
+ * The value that will be used for the graph/context co-ordinate when
+ * loading data represented in a triple format into a quad store.
+ */
+ String DEFAULT_GRAPH = "defaultGraph" ;
+
+ /**
+ * TODO Should we always enforce a real value? i.e. provide a real default
+ * or abort the load.
+ */
+ String DEFAULT_DEFAULT_GRAPH = null ;
}
/**
@@ -402,6 +413,12 @@
private transient RDFFormat rdfFormat;
/**
+ * The value that will be used for the graph/context co-ordinate when
+ * loading data represented in a triple format into a quad store.
+ */
+ public final String defaultGraph ;
+
+ /**
* Force the load of the NxParser integration class and its registration
* of the NQuadsParser#nquads RDFFormat.
*
@@ -496,6 +513,8 @@
sb.append(", " + ConfigurationOptions.RDF_FORMAT + "=" + rdfFormat);
+ sb.append(", " + ConfigurationOptions.DEFAULT_GRAPH + "=" + defaultGraph) ;
+
sb.append(", " + ConfigurationOptions.FORCE_OVERFLOW_BEFORE_CLOSURE + "="
+ forceOverflowBeforeClosure);
@@ -601,6 +620,10 @@
}
+ defaultGraph = (String) config.getEntry(component,
+ ConfigurationOptions.DEFAULT_GRAPH, String.class,
+ ConfigurationOptions.DEFAULT_DEFAULT_GRAPH);
+
rejectedExecutionDelay = (Long) config.getEntry(
component,
ConfigurationOptions.REJECTED_EXECUTION_DELAY, Long.TYPE,
@@ -979,6 +1002,7 @@
jobState.ontology,//file
jobState.ontology.getPath(),//baseURI
jobState.getRDFFormat(),//
+ jobState.defaultGraph,
jobState.ontologyFileFilter //
);
Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFFileLoadTask.java
===================================================================
--- trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFFileLoadTask.java 2010-09-14 10:57:21 UTC (rev 3541)
+++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFFileLoadTask.java 2010-09-14 13:50:31 UTC (rev 3542)
@@ -223,6 +223,7 @@
jobState.valuesInitialCapacity,//
jobState.bnodesInitialCapacity,//
jobState.getRDFFormat(), //
+ jobState.defaultGraph,
parserOptions,//
false, // deleteAfter is handled by the master!
jobState.parserPoolSize, //
Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/SingleResourceReaderTask.java
===================================================================
--- trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/SingleResourceReaderTask.java 2010-09-14 10:57:21 UTC (rev 3541)
+++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/SingleResourceReaderTask.java 2010-09-14 13:50:31 UTC (rev 3542)
@@ -186,7 +186,7 @@
// run the parser.
// @todo reuse the same underlying parser instance?
- loader.loadRdf(reader, baseURL, rdfFormat, parserOptions);
+ loader.loadRdf(reader, baseURL, rdfFormat, null, parserOptions);
success = true;
Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java
===================================================================
--- trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java 2010-09-14 10:57:21 UTC (rev 3541)
+++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java 2010-09-14 13:50:31 UTC (rev 3542)
@@ -356,8 +356,14 @@
* The default {@link RDFFormat}.
*/
private final RDFFormat defaultFormat;
-
+
/**
+ * The value that will be used for the graph/context co-ordinate when
+ * loading data represented in a triple format into a quad store.
+ */
+ private final String defaultGraph;
+
+ /**
* Options for the {@link RDFParser}.
*/
private final RDFParserOptions parserOptions;
@@ -1423,7 +1429,7 @@
try {
// run the parser.
new PresortRioLoader(buffer).loadRdf(reader, baseURL,
- rdfFormat, parserOptions);
+ rdfFormat, defaultGraph, parserOptions);
} finally {
reader.close();
}
@@ -1490,6 +1496,9 @@
* {@link BNode}s parsed from a single document.
* @param defaultFormat
* The default {@link RDFFormat} which will be assumed.
+ * @param defaultGraph
+ * The value that will be used for the graph/context co-ordinate when
+ * loading data represented in a triple format into a quad store.
* @param parserOptions
* Options for the {@link RDFParser}.
* @param deleteAfter
@@ -1529,6 +1538,7 @@
final int valuesInitialCapacity,//
final int bnodesInitialCapacity, //
final RDFFormat defaultFormat,//
+ final String defaultGraph,//
final RDFParserOptions parserOptions,//
final boolean deleteAfter,//
final int parserPoolSize,//
@@ -1566,6 +1576,8 @@
this.defaultFormat = defaultFormat;
+ this.defaultGraph = defaultGraph;
+
this.parserOptions = parserOptions;
this.deleteAfter = deleteAfter;
Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/BasicRioLoader.java
===================================================================
--- trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/BasicRioLoader.java 2010-09-14 10:57:21 UTC (rev 3541)
+++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/BasicRioLoader.java 2010-09-14 13:50:31 UTC (rev 3542)
@@ -37,6 +37,8 @@
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.Rio;
+import com.bigdata.rdf.model.BigdataURI;
+
/**
* Parses data but does not load it into the indices.
*
@@ -74,6 +76,8 @@
private final ValueFactory valueFactory;
+ protected String defaultGraph;
+
public BasicRioLoader(final ValueFactory valueFactory) {
if (valueFactory == null)
@@ -153,18 +157,20 @@
}
final public void loadRdf(final InputStream is, final String baseURI,
- final RDFFormat rdfFormat, final RDFParserOptions options)
+ final RDFFormat rdfFormat, final String defaultGraph,
+ final RDFParserOptions options)
throws Exception {
- loadRdf2(is, baseURI, rdfFormat, options);
+ loadRdf2(is, baseURI, rdfFormat, defaultGraph, options);
}
final public void loadRdf(final Reader reader, final String baseURI,
- final RDFFormat rdfFormat, final RDFParserOptions options)
+ final RDFFormat rdfFormat, final String defaultGraph,
+ final RDFParserOptions options)
throws Exception {
- loadRdf2(reader, baseURI, rdfFormat, options);
+ loadRdf2(reader, baseURI, rdfFormat, defaultGraph, options);
}
@@ -180,7 +186,7 @@
* @throws Exception
*/
protected void loadRdf2(final Object source, final String baseURI,
- final RDFFormat rdfFormat, final RDFParserOptions options)
+ final RDFFormat rdfFormat, final String defaultGraph, final RDFParserOptions options)
throws Exception {
if (source == null)
@@ -198,6 +204,8 @@
if (log.isInfoEnabled())
log.info("format=" + rdfFormat + ", options=" + options);
+ this.defaultGraph = defaultGraph ;
+
final RDFParser parser = getParser(rdfFormat);
// apply options to the parser
@@ -212,7 +220,7 @@
// Note: reset so that rates are correct for each source loaded.
stmtsAdded = 0;
-
+
try {
before();
Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/IRioLoader.java
===================================================================
--- trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/IRioLoader.java 2010-09-14 10:57:21 UTC (rev 3541)
+++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/IRioLoader.java 2010-09-14 13:50:31 UTC (rev 3542)
@@ -72,12 +72,14 @@
* The base URL for those data.
* @param rdfFormat
* The interchange format.
+ * @param defaultGraph
+ * The default graph.
* @param options
* Options to be applied to the {@link RDFParser}.
* @throws Exception
*/
public void loadRdf(Reader reader, String baseURL, RDFFormat rdfFormat,
- RDFParserOptions options) throws Exception;
+ String defaultGraph, RDFParserOptions options) throws Exception;
/**
* Parse RDF data.
@@ -88,11 +90,13 @@
* The base URL for those data.
* @param rdfFormat
* The interchange format.
+ * @param defaultGraph
+ * The default graph.
* @param options
* Options to be applied to the {@link RDFParser}.
* @throws Exception
*/
public void loadRdf(InputStream is, String baseURI, RDFFormat rdfFormat,
- RDFParserOptions options) throws Exception;
+ String defaultGraph, RDFParserOptions options) throws Exception;
}
Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/PresortRioLoader.java
===================================================================
--- trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/PresortRioLoader.java 2010-09-14 10:57:21 UTC (rev 3541)
+++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/PresortRioLoader.java 2010-09-14 13:50:31 UTC (rev 3542)
@@ -23,11 +23,14 @@
*/
package com.bigdata.rdf.rio;
+import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.rio.RDFHandler;
import org.openrdf.rio.RDFHandlerException;
+import com.bigdata.rdf.model.BigdataURI;
+
/**
* Statement handler for the RIO RDF Parser that writes on a
* {@link StatementBuffer}.
@@ -45,6 +48,12 @@
final protected IStatementBuffer<?> buffer;
/**
+ * The value that will be used for the graph/context co-ordinate when
+ * loading data represented in a triple format into a quad store.
+ */
+ private BigdataURI defaultGraphURI = null ;
+
+ /**
* Sets up parser to load RDF.
*
* @param buffer
@@ -58,7 +67,7 @@
this.buffer = buffer;
}
-
+
/**
* bulk insert the buffered data into the store.
*/
@@ -87,8 +96,11 @@
public RDFHandler newRDFHandler() {
+ defaultGraphURI = null != defaultGraph && 4 == buffer.getDatabase ().getSPOKeyArity ()
+ ? buffer.getDatabase ().getValueFactory ().createURI ( defaultGraph )
+ : null
+ ;
return this;
-
}
public void handleStatement( final Statement stmt ) {
@@ -98,9 +110,13 @@
log.debug(stmt);
}
-
+
+ Resource graph = stmt.getContext() ;
+ if ( null == graph
+ && null != defaultGraphURI ) // only true when we know we are loading a quad store
+ graph = defaultGraphURI ;
// buffer the write (handles overflow).
- buffer.add( stmt.getSubject(), stmt.getPredicate(), stmt.getObject(), stmt.getContext() );
+ buffer.add( stmt.getSubject(), stmt.getPredicate(), stmt.getObject(), graph );
stmtsAdded++;
Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/store/DataLoader.java
===================================================================
--- trunk/bigdata-rdf/src/java/com/bigdata/rdf/store/DataLoader.java 2010-09-14 10:57:21 UTC (rev 3541)
+++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/store/DataLoader.java 2010-09-14 13:50:31 UTC (rev 3542)
@@ -640,7 +640,7 @@
final LoadStats totals = new LoadStats();
- loadData3(totals, reader, baseURL, rdfFormat, true/*endOfBatch*/);
+ loadData3(totals, reader, baseURL, rdfFormat, null, true/*endOfBatch*/);
return totals;
@@ -668,7 +668,7 @@
final LoadStats totals = new LoadStats();
- loadData3(totals, is, baseURL, rdfFormat, true/* endOfBatch */);
+ loadData3(totals, is, baseURL, rdfFormat, null, true/* endOfBatch */);
return totals;
@@ -704,7 +704,7 @@
final LoadStats totals = new LoadStats();
- loadData3(totals, is, baseURL, rdfFormat, true/*endOfBatch*/);
+ loadData3(totals, is, baseURL, rdfFormat, null, true/*endOfBatch*/);
return totals;
@@ -762,7 +762,7 @@
if(file.exists()) {
loadFiles(totals, 0/* depth */, file, baseURL,
- rdfFormat, filter, endOfBatch);
+ rdfFormat, null, filter, endOfBatch);
return;
@@ -789,7 +789,7 @@
try {
- loadData3(totals, reader, baseURL, rdfFormat, endOfBatch);
+ loadData3(totals, reader, baseURL, rdfFormat, null, endOfBatch);
} catch (Exception ex) {
@@ -817,6 +817,9 @@
* The format of the file (optional, when not specified the
* format is deduced for each file in turn using the
* {@link RDFFormat} static methods).
+ * @param defaultGraph
+ * The value that will be used for the graph/context co-ordinate when
+ * loading data represented in a triple format into a quad store.
* @param filter
* A filter selecting the file names that will be loaded
* (optional). When specified, the filter MUST accept directories
@@ -827,7 +830,8 @@
* @throws IOException
*/
public LoadStats loadFiles(final File file, final String baseURI,
- final RDFFormat rdfFormat, final FilenameFilter filter)
+ final RDFFormat rdfFormat, final String defaultGraph,
+ final FilenameFilter filter)
throws IOException {
if (file == null)
@@ -835,7 +839,7 @@
final LoadStats totals = new LoadStats();
- loadFiles(totals, 0/* depth */, file, baseURI, rdfFormat, filter, true/* endOfBatch */
+ loadFiles(totals, 0/* depth */, file, baseURI, rdfFormat, defaultGraph, filter, true/* endOfBatch */
);
return totals;
@@ -844,7 +848,8 @@
protected void loadFiles(final LoadStats totals, final int depth,
final File file, final String baseURI, final RDFFormat rdfFormat,
- final FilenameFilter filter, final boolean endOfBatch)
+ final String defaultGraph, final FilenameFilter filter,
+ final boolean endOfBatch)
throws IOException {
if (file.isDirectory()) {
@@ -864,7 +869,7 @@
// final RDFFormat fmt = RDFFormat.forFileName(f.toString(),
// rdfFormat);
- loadFiles(totals, depth + 1, f, baseURI, rdfFormat, filter,
+ loadFiles(totals, depth + 1, f, baseURI, rdfFormat, defaultGraph, filter,
(depth == 0 && i < files.length ? false : endOfBatch));
}
@@ -919,7 +924,7 @@
final String s = baseURI != null ? baseURI : file.toURI()
.toString();
- loadData3(totals, reader, s, fmt, endOfBatch);
+ loadData3(totals, reader, s, fmt, defaultGraph, endOfBatch);
return;
@@ -955,7 +960,7 @@
*/
protected void loadData3(final LoadStats totals, final Object source,
final String baseURL, final RDFFormat rdfFormat,
- final boolean endOfBatch) throws IOException {
+ final String defaultGraph, final boolean endOfBatch) throws IOException {
final long begin = System.currentTimeMillis();
@@ -978,11 +983,10 @@
}
// Setup the loader.
- final PresortRioLoader loader = new PresortRioLoader(buffer);
+ final PresortRioLoader loader = new PresortRioLoader ( buffer ) ;
// @todo review: disable auto-flush - caller will handle flush of the buffer.
// loader.setFlush(false);
-
// add listener to log progress.
loader.addRioLoaderListener( new RioLoaderListener() {
@@ -1006,12 +1010,12 @@
if(source instanceof Reader) {
- loader.loadRdf((Reader) source, baseURL, rdfFormat, parserOptions);
+ loader.loadRdf((Reader) source, baseURL, rdfFormat, defaultGraph, parserOptions);
} else if (source instanceof InputStream) {
loader.loadRdf((InputStream) source, baseURL, rdfFormat,
- parserOptions);
+ defaultGraph, parserOptions);
} else
throw new AssertionError();
@@ -1360,7 +1364,7 @@
// rdfFormat, filter);
dataLoader.loadFiles(totals, 0/* depth */, fileOrDir, baseURI,
- rdfFormat, filter, true/* endOfBatch */
+ rdfFormat, null, filter, true/* endOfBatch */
);
}
Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/util/Splitter.java
===================================================================
--- trunk/bigdata-rdf/src/java/com/bigdata/rdf/util/Splitter.java 2010-09-14 10:57:21 UTC (rev 3541)
+++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/util/Splitter.java 2010-09-14 13:50:31 UTC (rev 3542)
@@ -714,7 +714,7 @@
try {
// run the parser.
new MyLoader(buffer).loadRdf(reader, baseURL,
- defaultRDFFormat, s.parserOptions);
+ defaultRDFFormat, null, s.parserOptions);
} finally {
reader.close();
}
Modified: trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/AbstractRIOTestCase.java
===================================================================
--- trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/AbstractRIOTestCase.java 2010-09-14 10:57:21 UTC (rev 3541)
+++ trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/AbstractRIOTestCase.java 2010-09-14 13:50:31 UTC (rev 3542)
@@ -401,7 +401,7 @@
});
- loader.loadRdf((Reader) reader, baseURI, rdfFormat, options);
+ loader.loadRdf((Reader) reader, baseURI, rdfFormat, null, options);
if (log.isInfoEnabled())
log.info("Done: " + resource);
@@ -681,7 +681,7 @@
loader.loadRdf(new BufferedReader(new InputStreamReader(
new FileInputStream(resource))), baseURI, rdfFormat,
- options);
+ null, options);
if(log.isInfoEnabled())
log.info("End of reparse: nerrors=" + nerrs + ", file="
Modified: trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/EDSAsyncLoader.java
===================================================================
--- trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/EDSAsyncLoader.java 2010-09-14 10:57:21 UTC (rev 3541)
+++ trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/EDSAsyncLoader.java 2010-09-14 13:50:31 UTC (rev 3542)
@@ -161,6 +161,7 @@
valuesInitialCapacity,//
bnodesInitialCapacity,//
RDFFormat.RDFXML, // defaultFormat
+ null, // defaultGraph
parserOptions, // parserOptions
false, // deleteAfter
poolSize, // parserPoolSize,
Modified: trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/TestAsynchronousStatementBufferFactory.java
===================================================================
--- trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/TestAsynchronousStatementBufferFactory.java 2010-09-14 10:57:21 UTC (rev 3541)
+++ trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/TestAsynchronousStatementBufferFactory.java 2010-09-14 13:50:31 UTC (rev 3542)
@@ -400,6 +400,7 @@
valuesInitialCapacity,//
bnodesInitialCapacity,//
RDFFormat.RDFXML, // defaultFormat
+ null, // defaultGraph
parserOptions, //
false, // deleteAfter
parallel?5:1, // parserPoolSize,
Modified: trunk/bigdata-sails/src/test/com/bigdata/rdf/stress/LoadClosureAndQueryTest.java
===================================================================
--- trunk/bigdata-sails/src/test/com/bigdata/rdf/stress/LoadClosureAndQueryTest.java 2010-09-14 10:57:21 UTC (rev 3541)
+++ trunk/bigdata-sails/src/test/com/bigdata/rdf/stress/LoadClosureAndQueryTest.java 2010-09-14 13:50:31 UTC (rev 3542)
@@ -1204,7 +1204,7 @@
try {
dataLoader.loadFiles(dataDir, null/* baseURI */,
- null/* rdfFormat */, filter);
+ null/* rdfFormat */, null, /* defaultGraph */filter);
} catch (IOException ex) {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|