Update of /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/rio In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv22601/src/java/com/bigdata/rdf/rio Modified Files: PresortRioLoader.java BasicRioLoader.java IRioLoader.java MultiThreadedPresortRioLoader.java BulkRioLoader.java Log Message: testing SAIL and lubm, including adding BTree#removeAll(), touching up some inferences, making it possible to load different RDF interchange formats, and adding JOIN ordering based on the sesame optimizer and the actual triple pattern selectivity in the data. Index: IRioLoader.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/rio/IRioLoader.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** IRioLoader.java 6 Feb 2007 23:06:43 -0000 1.3 --- IRioLoader.java 18 Apr 2007 17:29:07 -0000 1.4 *************** *** 48,51 **** --- 48,52 ---- package com.bigdata.rdf.rio; + import java.io.InputStream; import java.io.Reader; *************** *** 70,74 **** public void removeRioLoaderListener( RioLoaderListener l ); ! public void loadRdfXml( Reader reader, String baseURI ) throws Exception; } --- 71,77 ---- public void removeRioLoaderListener( RioLoaderListener l ); ! public void loadRdf( Reader reader, String baseURI ) throws Exception; ! ! // public void loadRdf( InputStream is, String baseURI ) throws Exception; } Index: MultiThreadedPresortRioLoader.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/rio/MultiThreadedPresortRioLoader.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** MultiThreadedPresortRioLoader.java 13 Apr 2007 15:02:34 -0000 1.6 --- MultiThreadedPresortRioLoader.java 18 Apr 2007 17:29:07 -0000 1.7 *************** *** 250,254 **** */ ! public void loadRdfXml( Reader reader, String baseURI ) throws Exception { OptimizedValueFactory valueFac = new OptimizedValueFactory(); --- 250,254 ---- */ ! public void loadRdf( Reader reader, String baseURI ) throws Exception { OptimizedValueFactory valueFac = new OptimizedValueFactory(); Index: PresortRioLoader.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/rio/PresortRioLoader.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** PresortRioLoader.java 6 Feb 2007 23:06:43 -0000 1.7 --- PresortRioLoader.java 18 Apr 2007 17:29:07 -0000 1.8 *************** *** 44,48 **** --- 44,52 ---- package com.bigdata.rdf.rio; + import java.io.BufferedInputStream; + import java.io.IOException; + import java.io.InputStream; import java.io.Reader; + import java.net.URL; import java.util.Iterator; import java.util.Vector; *************** *** 52,58 **** --- 56,66 ---- import org.openrdf.model.URI; import org.openrdf.model.Value; + import org.openrdf.model.ValueFactory; import org.openrdf.rio.Parser; import org.openrdf.rio.StatementHandler; + import org.openrdf.rio.ntriples.NTriplesParser; import org.openrdf.rio.rdfxml.RdfXmlParser; + import org.openrdf.rio.turtle.TurtleParser; + import org.openrdf.sesame.constants.RDFFormat; import com.bigdata.rdf.TripleStore; *************** *** 65,71 **** * batch. * - * @todo try optimization using async IO to write data buffered on the journal - * to disk. - * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ --- 73,76 ---- *************** *** 88,91 **** --- 93,106 ---- /** + * The RDF syntax to be parsed. + */ + protected final RDFFormat rdfFormat; + + /** + * Controls the {@link Parser#setVerifyData(boolean)} option. + */ + protected final boolean verifyData; + + /** * The bufferQueue capacity -or- <code>-1</code> if the {@link Buffer} * object is signaling that no more buffers will be placed onto the *************** *** 113,126 **** */ Buffer buffer; ! public PresortRioLoader( TripleStore store ) { ! ! this(store, DEFAULT_BUFFER_SIZE, true ); } ! public PresortRioLoader(TripleStore store, int capacity, boolean distinct) { assert store != null; assert capacity > 0; --- 128,186 ---- */ Buffer buffer; + + /** + * Used as the value factory for the {@link Parser}. + */ + OptimizedValueFactory valueFac = new OptimizedValueFactory(); ! /** ! * Sets up parser to load RDF/XML - {@link #verifyData} is NOT enabled. ! * ! * @param store ! * The store into which to insert the loaded statements. ! */ ! public PresortRioLoader(TripleStore store) { ! ! this(store, RDFFormat.RDFXML, false /*verifyData*/); } ! /** ! * Sets up parser to load the indicated RDF interchange syntax. ! * ! * @param store ! * The store into which to insert the loaded statements. ! * @param rdfFormat ! * The RDF interchange syntax to be parsed. ! * @param verifyData ! * Controls the {@link Parser#setVerifyData(boolean)} option. ! */ ! public PresortRioLoader( TripleStore store, RDFFormat rdfFormat, boolean verifyData ) { ! ! this(store, rdfFormat, verifyData, DEFAULT_BUFFER_SIZE, true); ! ! } ! ! /** ! * Sets up parser to load RDF. ! * ! * @param store ! * The store into which to insert the loaded statements. ! * @param rdfFormat ! * The RDF interchange syntax to be parsed. ! * @param verifyData ! * Controls the {@link Parser#setVerifyData(boolean)} option. ! * @param capacity ! * The capacity of the buffer. ! * @param distinct ! * Whether or not terms and statements are made distinct in the ! * buffer. ! */ ! public PresortRioLoader(TripleStore store, RDFFormat rdfFormat, ! boolean verifyData, int capacity, boolean distinct) { assert store != null; + + assert rdfFormat != null; assert capacity > 0; *************** *** 128,131 **** --- 188,195 ---- this.store = store; + this.rdfFormat = rdfFormat; + + this.verifyData = verifyData; + this.capacity = capacity; *************** *** 196,199 **** --- 260,334 ---- /** + * Choose the parser based on the {@link RDFFormat} specified to the + * constructor. + * + * @param valFactory + * The value factory. + * + * @return The parser. + */ + protected Parser newParser(ValueFactory valFactory) { + + final Parser parser; + + if (RDFFormat.RDFXML.equals(rdfFormat)) { + + parser = new RdfXmlParser(valFactory); + + } else if (RDFFormat.NTRIPLES.equals(rdfFormat)) { + + parser = new NTriplesParser(valFactory); + + } else if (RDFFormat.TURTLE.equals(rdfFormat)) { + + parser = new TurtleParser(valFactory); + + } else { + + throw new IllegalArgumentException("Format not supported: " + + rdfFormat); + + } + + parser.setVerifyData( verifyData ); + + parser.setStatementHandler( this ); + + return parser; + + } + + // InputStream rdfStream = getClass().getResourceAsStream(ontology); + // + // if (rdfStream == null) { + // + // /* + // * If we do not find as a Resource then try as a URL. + // * + // */ + // try { + // + // rdfStream = new URL(ontology).openConnection().getInputStream(); + // + // } catch (IOException ex) { + // + // ex.printStackTrace(System.err); + // + // return false; + // + // } + // + // } + // + // rdfStream = new BufferedInputStream(rdfStream); + // + // ... + // + // finally { + // rdfStream.close(); + // } + // + + /** * We need to collect two (three including bnode) term arrays and one * statement array. These should be buffers of a settable size. *************** *** 214,227 **** * The baseURI or "" if none is known. */ ! ! public void loadRdfXml( Reader reader, String baseURI ) throws Exception { ! ! OptimizedValueFactory valueFac = new OptimizedValueFactory(); ! ! Parser parser = new RdfXmlParser( valueFac ); ! ! parser.setVerifyData( false ); ! parser.setStatementHandler( this ); // Note: reset to that rates reflect load times not clock times. --- 349,355 ---- * The baseURI or "" if none is known. */ ! public void loadRdf( Reader reader, String baseURI ) throws Exception { ! Parser parser = newParser(valueFac); // Note: reset to that rates reflect load times not clock times. Index: BasicRioLoader.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/rio/BasicRioLoader.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** BasicRioLoader.java 6 Feb 2007 23:06:43 -0000 1.4 --- BasicRioLoader.java 18 Apr 2007 17:29:07 -0000 1.5 *************** *** 132,136 **** } ! public void loadRdfXml( Reader reader, String baseURI ) throws Exception { Parser parser = new RdfXmlParser(); --- 132,136 ---- } ! public void loadRdf( Reader reader, String baseURI ) throws Exception { Parser parser = new RdfXmlParser(); Index: BulkRioLoader.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/rio/BulkRioLoader.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** BulkRioLoader.java 13 Apr 2007 15:02:34 -0000 1.13 --- BulkRioLoader.java 18 Apr 2007 17:29:07 -0000 1.14 *************** *** 249,253 **** * the RDF/XML source */ ! public void loadRdfXml( Reader reader, String baseURI ) throws Exception { OptimizedValueFactory valueFac = new OptimizedValueFactory(); --- 249,253 ---- * the RDF/XML source */ ! public void loadRdf( Reader reader, String baseURI ) throws Exception { OptimizedValueFactory valueFac = new OptimizedValueFactory(); |