From: <dm...@us...> - 2010-09-14 13:50:38
|
Revision: 3542 http://bigdata.svn.sourceforge.net/bigdata/?rev=3542&view=rev Author: dmacgbr Date: 2010-09-14 13:50:31 +0000 (Tue, 14 Sep 2010) Log Message: ----------- See trac #146. Allow specification of a default graph when running a bulk load of RDF triple data into a quad store. This is achieved by setting com.bigdata.rdf.load.MappedRDFDataLoadMaster.defaultGraph to the desired value, e.g. "http://xyz.com/data/defaultGraph", in the bigdata configuration file. This parameter has no effect when loading a triple store. Further, if not specified when loading a quad store, the systems behaviour is unaffected by this change. i.e. the graph/context co-ordinate in each quad remains null. Various of the unit tests touched by this change have been modified effectively assuming that the default graph has not been specified. Modified Paths: -------------- trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFDataLoadMaster.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFFileLoadTask.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/SingleResourceReaderTask.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/BasicRioLoader.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/IRioLoader.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/PresortRioLoader.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/store/DataLoader.java trunk/bigdata-rdf/src/java/com/bigdata/rdf/util/Splitter.java trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/AbstractRIOTestCase.java trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/EDSAsyncLoader.java trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/TestAsynchronousStatementBufferFactory.java trunk/bigdata-sails/src/test/com/bigdata/rdf/stress/LoadClosureAndQueryTest.java Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFDataLoadMaster.java =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFDataLoadMaster.java 2010-09-14 10:57:21 UTC (rev 3541) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFDataLoadMaster.java 2010-09-14 13:50:31 UTC (rev 3542) @@ -272,7 +272,18 @@ // // /** {@value #DEFAULT_MAX_TRIES} */ // int DEFAULT_MAX_TRIES = 3; - + + /** + * The value that will be used for the graph/context co-ordinate when + * loading data represented in a triple format into a quad store. + */ + String DEFAULT_GRAPH = "defaultGraph" ; + + /** + * TODO Should we always enforce a real value? i.e. provide a real default + * or abort the load. + */ + String DEFAULT_DEFAULT_GRAPH = null ; } /** @@ -402,6 +413,12 @@ private transient RDFFormat rdfFormat; /** + * The value that will be used for the graph/context co-ordinate when + * loading data represented in a triple format into a quad store. + */ + public final String defaultGraph ; + + /** * Force the load of the NxParser integration class and its registration * of the NQuadsParser#nquads RDFFormat. * @@ -496,6 +513,8 @@ sb.append(", " + ConfigurationOptions.RDF_FORMAT + "=" + rdfFormat); + sb.append(", " + ConfigurationOptions.DEFAULT_GRAPH + "=" + defaultGraph) ; + sb.append(", " + ConfigurationOptions.FORCE_OVERFLOW_BEFORE_CLOSURE + "=" + forceOverflowBeforeClosure); @@ -601,6 +620,10 @@ } + defaultGraph = (String) config.getEntry(component, + ConfigurationOptions.DEFAULT_GRAPH, String.class, + ConfigurationOptions.DEFAULT_DEFAULT_GRAPH); + rejectedExecutionDelay = (Long) config.getEntry( component, ConfigurationOptions.REJECTED_EXECUTION_DELAY, Long.TYPE, @@ -979,6 +1002,7 @@ jobState.ontology,//file jobState.ontology.getPath(),//baseURI jobState.getRDFFormat(),// + jobState.defaultGraph, jobState.ontologyFileFilter // ); Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFFileLoadTask.java =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFFileLoadTask.java 2010-09-14 10:57:21 UTC (rev 3541) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/MappedRDFFileLoadTask.java 2010-09-14 13:50:31 UTC (rev 3542) @@ -223,6 +223,7 @@ jobState.valuesInitialCapacity,// jobState.bnodesInitialCapacity,// jobState.getRDFFormat(), // + jobState.defaultGraph, parserOptions,// false, // deleteAfter is handled by the master! jobState.parserPoolSize, // Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/SingleResourceReaderTask.java =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/SingleResourceReaderTask.java 2010-09-14 10:57:21 UTC (rev 3541) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/load/SingleResourceReaderTask.java 2010-09-14 13:50:31 UTC (rev 3542) @@ -186,7 +186,7 @@ // run the parser. // @todo reuse the same underlying parser instance? - loader.loadRdf(reader, baseURL, rdfFormat, parserOptions); + loader.loadRdf(reader, baseURL, rdfFormat, null, parserOptions); success = true; Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java 2010-09-14 10:57:21 UTC (rev 3541) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/AsynchronousStatementBufferFactory.java 2010-09-14 13:50:31 UTC (rev 3542) @@ -356,8 +356,14 @@ * The default {@link RDFFormat}. */ private final RDFFormat defaultFormat; - + /** + * The value that will be used for the graph/context co-ordinate when + * loading data represented in a triple format into a quad store. + */ + private final String defaultGraph; + + /** * Options for the {@link RDFParser}. */ private final RDFParserOptions parserOptions; @@ -1423,7 +1429,7 @@ try { // run the parser. new PresortRioLoader(buffer).loadRdf(reader, baseURL, - rdfFormat, parserOptions); + rdfFormat, defaultGraph, parserOptions); } finally { reader.close(); } @@ -1490,6 +1496,9 @@ * {@link BNode}s parsed from a single document. * @param defaultFormat * The default {@link RDFFormat} which will be assumed. + * @param defaultGraph + * The value that will be used for the graph/context co-ordinate when + * loading data represented in a triple format into a quad store. * @param parserOptions * Options for the {@link RDFParser}. * @param deleteAfter @@ -1529,6 +1538,7 @@ final int valuesInitialCapacity,// final int bnodesInitialCapacity, // final RDFFormat defaultFormat,// + final String defaultGraph,// final RDFParserOptions parserOptions,// final boolean deleteAfter,// final int parserPoolSize,// @@ -1566,6 +1576,8 @@ this.defaultFormat = defaultFormat; + this.defaultGraph = defaultGraph; + this.parserOptions = parserOptions; this.deleteAfter = deleteAfter; Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/BasicRioLoader.java =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/BasicRioLoader.java 2010-09-14 10:57:21 UTC (rev 3541) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/BasicRioLoader.java 2010-09-14 13:50:31 UTC (rev 3542) @@ -37,6 +37,8 @@ import org.openrdf.rio.RDFParser; import org.openrdf.rio.Rio; +import com.bigdata.rdf.model.BigdataURI; + /** * Parses data but does not load it into the indices. * @@ -74,6 +76,8 @@ private final ValueFactory valueFactory; + protected String defaultGraph; + public BasicRioLoader(final ValueFactory valueFactory) { if (valueFactory == null) @@ -153,18 +157,20 @@ } final public void loadRdf(final InputStream is, final String baseURI, - final RDFFormat rdfFormat, final RDFParserOptions options) + final RDFFormat rdfFormat, final String defaultGraph, + final RDFParserOptions options) throws Exception { - loadRdf2(is, baseURI, rdfFormat, options); + loadRdf2(is, baseURI, rdfFormat, defaultGraph, options); } final public void loadRdf(final Reader reader, final String baseURI, - final RDFFormat rdfFormat, final RDFParserOptions options) + final RDFFormat rdfFormat, final String defaultGraph, + final RDFParserOptions options) throws Exception { - loadRdf2(reader, baseURI, rdfFormat, options); + loadRdf2(reader, baseURI, rdfFormat, defaultGraph, options); } @@ -180,7 +186,7 @@ * @throws Exception */ protected void loadRdf2(final Object source, final String baseURI, - final RDFFormat rdfFormat, final RDFParserOptions options) + final RDFFormat rdfFormat, final String defaultGraph, final RDFParserOptions options) throws Exception { if (source == null) @@ -198,6 +204,8 @@ if (log.isInfoEnabled()) log.info("format=" + rdfFormat + ", options=" + options); + this.defaultGraph = defaultGraph ; + final RDFParser parser = getParser(rdfFormat); // apply options to the parser @@ -212,7 +220,7 @@ // Note: reset so that rates are correct for each source loaded. stmtsAdded = 0; - + try { before(); Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/IRioLoader.java =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/IRioLoader.java 2010-09-14 10:57:21 UTC (rev 3541) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/IRioLoader.java 2010-09-14 13:50:31 UTC (rev 3542) @@ -72,12 +72,14 @@ * The base URL for those data. * @param rdfFormat * The interchange format. + * @param defaultGraph + * The default graph. * @param options * Options to be applied to the {@link RDFParser}. * @throws Exception */ public void loadRdf(Reader reader, String baseURL, RDFFormat rdfFormat, - RDFParserOptions options) throws Exception; + String defaultGraph, RDFParserOptions options) throws Exception; /** * Parse RDF data. @@ -88,11 +90,13 @@ * The base URL for those data. * @param rdfFormat * The interchange format. + * @param defaultGraph + * The default graph. * @param options * Options to be applied to the {@link RDFParser}. * @throws Exception */ public void loadRdf(InputStream is, String baseURI, RDFFormat rdfFormat, - RDFParserOptions options) throws Exception; + String defaultGraph, RDFParserOptions options) throws Exception; } Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/PresortRioLoader.java =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/PresortRioLoader.java 2010-09-14 10:57:21 UTC (rev 3541) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/rio/PresortRioLoader.java 2010-09-14 13:50:31 UTC (rev 3542) @@ -23,11 +23,14 @@ */ package com.bigdata.rdf.rio; +import org.openrdf.model.Resource; import org.openrdf.model.Statement; import org.openrdf.model.Value; import org.openrdf.rio.RDFHandler; import org.openrdf.rio.RDFHandlerException; +import com.bigdata.rdf.model.BigdataURI; + /** * Statement handler for the RIO RDF Parser that writes on a * {@link StatementBuffer}. @@ -45,6 +48,12 @@ final protected IStatementBuffer<?> buffer; /** + * The value that will be used for the graph/context co-ordinate when + * loading data represented in a triple format into a quad store. + */ + private BigdataURI defaultGraphURI = null ; + + /** * Sets up parser to load RDF. * * @param buffer @@ -58,7 +67,7 @@ this.buffer = buffer; } - + /** * bulk insert the buffered data into the store. */ @@ -87,8 +96,11 @@ public RDFHandler newRDFHandler() { + defaultGraphURI = null != defaultGraph && 4 == buffer.getDatabase ().getSPOKeyArity () + ? buffer.getDatabase ().getValueFactory ().createURI ( defaultGraph ) + : null + ; return this; - } public void handleStatement( final Statement stmt ) { @@ -98,9 +110,13 @@ log.debug(stmt); } - + + Resource graph = stmt.getContext() ; + if ( null == graph + && null != defaultGraphURI ) // only true when we know we are loading a quad store + graph = defaultGraphURI ; // buffer the write (handles overflow). - buffer.add( stmt.getSubject(), stmt.getPredicate(), stmt.getObject(), stmt.getContext() ); + buffer.add( stmt.getSubject(), stmt.getPredicate(), stmt.getObject(), graph ); stmtsAdded++; Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/store/DataLoader.java =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/store/DataLoader.java 2010-09-14 10:57:21 UTC (rev 3541) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/store/DataLoader.java 2010-09-14 13:50:31 UTC (rev 3542) @@ -640,7 +640,7 @@ final LoadStats totals = new LoadStats(); - loadData3(totals, reader, baseURL, rdfFormat, true/*endOfBatch*/); + loadData3(totals, reader, baseURL, rdfFormat, null, true/*endOfBatch*/); return totals; @@ -668,7 +668,7 @@ final LoadStats totals = new LoadStats(); - loadData3(totals, is, baseURL, rdfFormat, true/* endOfBatch */); + loadData3(totals, is, baseURL, rdfFormat, null, true/* endOfBatch */); return totals; @@ -704,7 +704,7 @@ final LoadStats totals = new LoadStats(); - loadData3(totals, is, baseURL, rdfFormat, true/*endOfBatch*/); + loadData3(totals, is, baseURL, rdfFormat, null, true/*endOfBatch*/); return totals; @@ -762,7 +762,7 @@ if(file.exists()) { loadFiles(totals, 0/* depth */, file, baseURL, - rdfFormat, filter, endOfBatch); + rdfFormat, null, filter, endOfBatch); return; @@ -789,7 +789,7 @@ try { - loadData3(totals, reader, baseURL, rdfFormat, endOfBatch); + loadData3(totals, reader, baseURL, rdfFormat, null, endOfBatch); } catch (Exception ex) { @@ -817,6 +817,9 @@ * The format of the file (optional, when not specified the * format is deduced for each file in turn using the * {@link RDFFormat} static methods). + * @param defaultGraph + * The value that will be used for the graph/context co-ordinate when + * loading data represented in a triple format into a quad store. * @param filter * A filter selecting the file names that will be loaded * (optional). When specified, the filter MUST accept directories @@ -827,7 +830,8 @@ * @throws IOException */ public LoadStats loadFiles(final File file, final String baseURI, - final RDFFormat rdfFormat, final FilenameFilter filter) + final RDFFormat rdfFormat, final String defaultGraph, + final FilenameFilter filter) throws IOException { if (file == null) @@ -835,7 +839,7 @@ final LoadStats totals = new LoadStats(); - loadFiles(totals, 0/* depth */, file, baseURI, rdfFormat, filter, true/* endOfBatch */ + loadFiles(totals, 0/* depth */, file, baseURI, rdfFormat, defaultGraph, filter, true/* endOfBatch */ ); return totals; @@ -844,7 +848,8 @@ protected void loadFiles(final LoadStats totals, final int depth, final File file, final String baseURI, final RDFFormat rdfFormat, - final FilenameFilter filter, final boolean endOfBatch) + final String defaultGraph, final FilenameFilter filter, + final boolean endOfBatch) throws IOException { if (file.isDirectory()) { @@ -864,7 +869,7 @@ // final RDFFormat fmt = RDFFormat.forFileName(f.toString(), // rdfFormat); - loadFiles(totals, depth + 1, f, baseURI, rdfFormat, filter, + loadFiles(totals, depth + 1, f, baseURI, rdfFormat, defaultGraph, filter, (depth == 0 && i < files.length ? false : endOfBatch)); } @@ -919,7 +924,7 @@ final String s = baseURI != null ? baseURI : file.toURI() .toString(); - loadData3(totals, reader, s, fmt, endOfBatch); + loadData3(totals, reader, s, fmt, defaultGraph, endOfBatch); return; @@ -955,7 +960,7 @@ */ protected void loadData3(final LoadStats totals, final Object source, final String baseURL, final RDFFormat rdfFormat, - final boolean endOfBatch) throws IOException { + final String defaultGraph, final boolean endOfBatch) throws IOException { final long begin = System.currentTimeMillis(); @@ -978,11 +983,10 @@ } // Setup the loader. - final PresortRioLoader loader = new PresortRioLoader(buffer); + final PresortRioLoader loader = new PresortRioLoader ( buffer ) ; // @todo review: disable auto-flush - caller will handle flush of the buffer. // loader.setFlush(false); - // add listener to log progress. loader.addRioLoaderListener( new RioLoaderListener() { @@ -1006,12 +1010,12 @@ if(source instanceof Reader) { - loader.loadRdf((Reader) source, baseURL, rdfFormat, parserOptions); + loader.loadRdf((Reader) source, baseURL, rdfFormat, defaultGraph, parserOptions); } else if (source instanceof InputStream) { loader.loadRdf((InputStream) source, baseURL, rdfFormat, - parserOptions); + defaultGraph, parserOptions); } else throw new AssertionError(); @@ -1360,7 +1364,7 @@ // rdfFormat, filter); dataLoader.loadFiles(totals, 0/* depth */, fileOrDir, baseURI, - rdfFormat, filter, true/* endOfBatch */ + rdfFormat, null, filter, true/* endOfBatch */ ); } Modified: trunk/bigdata-rdf/src/java/com/bigdata/rdf/util/Splitter.java =================================================================== --- trunk/bigdata-rdf/src/java/com/bigdata/rdf/util/Splitter.java 2010-09-14 10:57:21 UTC (rev 3541) +++ trunk/bigdata-rdf/src/java/com/bigdata/rdf/util/Splitter.java 2010-09-14 13:50:31 UTC (rev 3542) @@ -714,7 +714,7 @@ try { // run the parser. new MyLoader(buffer).loadRdf(reader, baseURL, - defaultRDFFormat, s.parserOptions); + defaultRDFFormat, null, s.parserOptions); } finally { reader.close(); } Modified: trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/AbstractRIOTestCase.java =================================================================== --- trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/AbstractRIOTestCase.java 2010-09-14 10:57:21 UTC (rev 3541) +++ trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/AbstractRIOTestCase.java 2010-09-14 13:50:31 UTC (rev 3542) @@ -401,7 +401,7 @@ }); - loader.loadRdf((Reader) reader, baseURI, rdfFormat, options); + loader.loadRdf((Reader) reader, baseURI, rdfFormat, null, options); if (log.isInfoEnabled()) log.info("Done: " + resource); @@ -681,7 +681,7 @@ loader.loadRdf(new BufferedReader(new InputStreamReader( new FileInputStream(resource))), baseURI, rdfFormat, - options); + null, options); if(log.isInfoEnabled()) log.info("End of reparse: nerrors=" + nerrs + ", file=" Modified: trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/EDSAsyncLoader.java =================================================================== --- trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/EDSAsyncLoader.java 2010-09-14 10:57:21 UTC (rev 3541) +++ trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/EDSAsyncLoader.java 2010-09-14 13:50:31 UTC (rev 3542) @@ -161,6 +161,7 @@ valuesInitialCapacity,// bnodesInitialCapacity,// RDFFormat.RDFXML, // defaultFormat + null, // defaultGraph parserOptions, // parserOptions false, // deleteAfter poolSize, // parserPoolSize, Modified: trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/TestAsynchronousStatementBufferFactory.java =================================================================== --- trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/TestAsynchronousStatementBufferFactory.java 2010-09-14 10:57:21 UTC (rev 3541) +++ trunk/bigdata-rdf/src/test/com/bigdata/rdf/rio/TestAsynchronousStatementBufferFactory.java 2010-09-14 13:50:31 UTC (rev 3542) @@ -400,6 +400,7 @@ valuesInitialCapacity,// bnodesInitialCapacity,// RDFFormat.RDFXML, // defaultFormat + null, // defaultGraph parserOptions, // false, // deleteAfter parallel?5:1, // parserPoolSize, Modified: trunk/bigdata-sails/src/test/com/bigdata/rdf/stress/LoadClosureAndQueryTest.java =================================================================== --- trunk/bigdata-sails/src/test/com/bigdata/rdf/stress/LoadClosureAndQueryTest.java 2010-09-14 10:57:21 UTC (rev 3541) +++ trunk/bigdata-sails/src/test/com/bigdata/rdf/stress/LoadClosureAndQueryTest.java 2010-09-14 13:50:31 UTC (rev 3542) @@ -1204,7 +1204,7 @@ try { dataLoader.loadFiles(dataDir, null/* baseURI */, - null/* rdfFormat */, filter); + null/* rdfFormat */, null, /* defaultGraph */filter); } catch (IOException ex) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |