[Bigdata-commit] SF.net SVN: bigdata:[6390] branches/BIGDATA_RELEASE_1_2_0

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 6390
          http://bigdata.svn.sourceforge.net/bigdata/?rev=6390&view=rev
Author:   thompsonbry
Date:     2012-07-24 14:04:39 +0000 (Tue, 24 Jul 2012)
Log Message:
-----------
I've extended the VoID support to include the description of each named graph. This behavior can be disabled using the webapp configuration parameters but defaults to true.

I have relayered the service description into a set of protected methods and added a constructor. This should make it easier to extend the base behavior.

I've also added some reporting for interesting bigdata features.

Modified Paths:
--------------
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/BigdataRDFServletContextListener.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/BigdataServlet.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/ConfigParams.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/QueryServlet.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/SD.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/SparqlEndpointConfig.java

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java
===================================================================

--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java	2012-07-24 00:33:16 UTC (rev 6389)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java	2012-07-24 14:04:39 UTC (rev 6390)
@@ -890,7 +890,7 @@
     }
     
     /**
-     * <code>true</code> iff the full text index is enabled.
+     * <code>true</code> iff the (value centric) full text index is enabled.
      * 
      * @see AbstractTripleStore.Options#TEXT_INDEX
      */

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/BigdataRDFServletContextListener.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/BigdataRDFServletContextListener.java	2012-07-24 00:33:16 UTC (rev 6389)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/BigdataRDFServletContextListener.java	2012-07-24 14:04:39 UTC (rev 6390)
@@ -286,8 +286,23 @@
 
         }
 
+        final boolean describeEachNamedGraph;
+        {
+
+            final String s = context
+                    .getInitParameter(ConfigParams.DESCRIBE_EACH_NAMED_GRAPH);
+
+            describeEachNamedGraph = s == null ? ConfigParams.DEFAULT_DESCRIBE_EACH_NAMED_GRAPH
+                    : Boolean.valueOf(s);
+            
+            if (log.isInfoEnabled())
+                log.info(ConfigParams.DESCRIBE_EACH_NAMED_GRAPH + "="
+                        + describeEachNamedGraph);
+
+        }
+
         final SparqlEndpointConfig config = new SparqlEndpointConfig(namespace,
-                timestamp, queryThreadPoolSize);
+                timestamp, queryThreadPoolSize, describeEachNamedGraph);
 
         rdfContext = new BigdataRDFContext(config, indexManager);
 

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/BigdataServlet.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/BigdataServlet.java	2012-07-24 00:33:16 UTC (rev 6389)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/BigdataServlet.java	2012-07-24 14:04:39 UTC (rev 6390)
@@ -32,6 +32,7 @@
 
 import javax.servlet.ServletContext;
 import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
 
 import org.apache.log4j.Logger;
@@ -248,4 +249,25 @@
         
     }
 
+    /**
+     * Figure out and return the service end point.
+     */
+    protected String getServiceURI(final HttpServletRequest req) {
+
+        final String serviceURI;
+
+        final StringBuffer sb = req.getRequestURL();
+
+        final int indexOf = sb.indexOf("?");
+
+        if (indexOf == -1) {
+            serviceURI = sb.toString();
+        } else {
+            serviceURI = sb.substring(0, indexOf);
+        }
+
+        return serviceURI;
+
+    }
+
 }

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/ConfigParams.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/ConfigParams.java	2012-07-24 00:33:16 UTC (rev 6389)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/ConfigParams.java	2012-07-24 14:04:39 UTC (rev 6390)
@@ -80,4 +80,15 @@
      */
     String READ_LOCK = "read-lock";
     
+    /**
+     * When <code>true</code> and the KB instance is in the <code>quads</code>
+     * mode, each named graph will also be described in in the same level of
+     * detail as the default graph (default
+     * {@value #DEFAULT_DESCRIBE_EACH_NAMED_GRAPH}). Otherwise only the default
+     * graph will be described.
+     */
+    String DESCRIBE_EACH_NAMED_GRAPH = "describe-each-named-graph";
+    
+    boolean DEFAULT_DESCRIBE_EACH_NAMED_GRAPH = true;
+    
 }

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/QueryServlet.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/QueryServlet.java	2012-07-24 00:33:16 UTC (rev 6389)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/QueryServlet.java	2012-07-24 14:04:39 UTC (rev 6390)
@@ -28,7 +28,6 @@
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.io.Writer;
-import java.nio.charset.Charset;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.TreeMap;
@@ -48,6 +47,7 @@
 import org.openrdf.model.Statement;
 import org.openrdf.model.URI;
 import org.openrdf.model.Value;
+import org.openrdf.model.impl.GraphImpl;
 import org.openrdf.query.MalformedQueryException;
 import org.openrdf.rio.RDFFormat;
 import org.openrdf.rio.RDFHandlerException;
@@ -175,25 +175,22 @@
             buildResponse(resp, HTTP_NOTFOUND, MIME_TEXT_PLAIN);
             return;
         }
+
+        // Figure out the service end point.
+        final String serviceURI = getServiceURI(req);
         
         /*
-         * Figure out the service end point.
+         * TODO Resolve the SD class name and ctor via a configuration property
+         * for extensible descriptions.
          */
-        final String serviceURI;
+        final Graph g = new GraphImpl();
         {
-            final StringBuffer sb = req.getRequestURL();
 
-            final int indexOf = sb.indexOf("?");
+            final SD sd = new SD(g, tripleStore, serviceURI);
 
-            if (indexOf == -1) {
-                serviceURI = sb.toString();
-            } else {
-                serviceURI = sb.substring(0, indexOf);
-            }
+            sd.describeService(getBigdataRDFContext().getConfig().describeEachNamedGraph);
 
         }
-        
-        final Graph g = SD.describeService(tripleStore, serviceURI);
 
         /*
          * CONNEG for the MIME type.

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/SD.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/SD.java	2012-07-24 00:33:16 UTC (rev 6389)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/SD.java	2012-07-24 14:04:39 UTC (rev 6390)
@@ -30,14 +30,17 @@
 import java.util.Arrays;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
+import java.util.LinkedList;
+import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;
 
 import org.openrdf.model.BNode;
 import org.openrdf.model.Graph;
+import org.openrdf.model.Resource;
 import org.openrdf.model.URI;
 import org.openrdf.model.ValueFactory;
-import org.openrdf.model.impl.GraphImpl;
 import org.openrdf.model.impl.URIImpl;
 import org.openrdf.model.vocabulary.RDF;
 
@@ -47,10 +50,14 @@
 import com.bigdata.rdf.axioms.RdfsAxioms;
 import com.bigdata.rdf.internal.IV;
 import com.bigdata.rdf.internal.NotMaterializedException;
+import com.bigdata.rdf.model.BigdataResource;
 import com.bigdata.rdf.model.BigdataURI;
 import com.bigdata.rdf.model.BigdataValue;
+import com.bigdata.rdf.sail.BigdataSail;
 import com.bigdata.rdf.spo.SPOKeyOrder;
+import com.bigdata.rdf.spo.SPORelation;
 import com.bigdata.rdf.store.AbstractTripleStore;
+import com.bigdata.rdf.store.BD;
 import com.bigdata.rdf.store.BigdataValueIterator;
 import com.bigdata.rdf.store.BigdataValueIteratorImpl;
 import com.bigdata.rdf.vocab.decls.VoidVocabularyDecl;
@@ -139,6 +146,36 @@
     static public final URI BasicFederatedQuery = new URIImpl(NS + "BasicFederatedQuery");
     
     /**
+     * The namespace for the bigdata specific features.
+     */
+    static public final String BDFNS = BD.NAMESPACE+"/features/";
+
+    /*
+     * KB modes.
+     */
+    static public final URI ModeTriples = new URIImpl(BDFNS + "KB/Mode/Triples");
+    static public final URI ModeQuads = new URIImpl(BDFNS + "KB/Mode/Quads");
+    static public final URI ModeSids = new URIImpl(BDFNS + "KB/Mode/Sids");
+
+    /*
+     * Text Index modes.
+     */
+    static public final URI TextIndexValueCentric = new URIImpl(BDFNS
+            + "KB/TextIndex/ValueCentric");
+    
+    static public final URI TextIndexSubjectCentric = new URIImpl(BDFNS
+            + "KB/TextIndex/SubjectCentric");
+
+    /*
+     * Misc KB features.
+     */
+    static public final URI TruthMaintenance = new URIImpl(BDFNS
+            + "KB/TruthMaintenance");
+    
+    static public final URI IsolatableIndices = new URIImpl(BDFNS
+            + "KB/IsolatableIndices");
+    
+    /**
      * <pre>
      * http://www.w3.org/ns/entailment/OWL-RDF-Based
      * http://www.w3.org/ns/entailment/RDFS
@@ -337,285 +374,548 @@
 //            "http://www.w3.org/ns/formats/SPARQL_Results_TSV");
 
     /**
-     * Collect various information, building up a service description graph.
+     * The graph in which the service description is accumulated (from the
+     * constructor).
+     */
+    protected final Graph g;
+    
+    /**
+     * The KB instance that is being described (from the constructor).
+     */
+    protected final AbstractTripleStore tripleStore;
+    
+    /**
+     * The service end point (from the constructor).
+     */
+    protected final String serviceURI;
+
+    /**
+     * The value factory used to create values for the service description graph
+     * {@link #g}.
+     */
+    protected final ValueFactory f;
+    
+    /**
+     * The resource which models the service.
+     */
+    protected final BNode aService;
+
+    /**
+     * The resource which models the default data set for the service.
+     */
+    protected final BNode aDefaultDataset;
+    
+    /**
+     * The resource which models the default graph in the default data set for
+     * the service.
+     */
+    protected final BNode aDefaultGraph;
+
+    /**
      * 
+     * @param g
+     *            Where to assemble the description.
      * @param tripleStore
      *            The KB instance to be described.
      * @param serviceURIs
      *            One or more service end points for that KB instance.
+     * 
+     * @see #describeService()
      */
-    public static Graph describeService(final AbstractTripleStore tripleStore,
+    public SD(final Graph g, final AbstractTripleStore tripleStore,
             final String serviceURI) {
 
-        final Graph g = new GraphImpl();
+        if (g == null)
+            throw new IllegalArgumentException();
 
-        final ValueFactory f = g.getValueFactory();
+        if (tripleStore == null)
+            throw new IllegalArgumentException();
+        
+        if (serviceURI == null)
+            throw new IllegalArgumentException();
 
-        final BNode service = f.createBNode("service");
+        this.g = g;
+        
+        this.tripleStore = tripleStore;
+        
+        this.serviceURI = serviceURI;
+        
+        this.f = g.getValueFactory();
+        
+        aService = f.createBNode("service");
 
-        final BNode defaultDataset = f.createBNode("defaultDataset");
+        aDefaultDataset = f.createBNode("defaultDataset");
         
-        final BNode defaultGraph = f.createBNode("defaultGraph");
+        aDefaultGraph = f.createBNode("defaultGraph");
+        
+    }
+    
+    /**
+     * Collect various information, building up a service description graph.
+     * 
+     * @param describeNamedGraphs
+     *            When <code>true</code> and the KB instance is in the
+     *            <code>quads</code> mode, each named graph will also be
+     *            described in in the same level of detail as the default graph.
+     *            Otherwise only the default graph will be described.
+     */
+    public void describeService(final boolean describeNamedGraphs) {
 
-        g.add(service, RDF.TYPE, SD.Service);
+        g.add(aService, RDF.TYPE, SD.Service);
 
         // Service end point.
-        g.add(service, SD.endpoint, g.getValueFactory().createURI(serviceURI));
+        describeServiceEndpoints();
 
-        /*
-         * Supported Query Languages
-         */
-        g.add(service, SD.supportedLanguage, SD.SPARQL10Query);
-        g.add(service, SD.supportedLanguage, SD.SPARQL11Query);
-        g.add(service, SD.supportedLanguage, SD.SPARQL11Update);
+        // Describe the supported languages (SPARQL Query, Update, etc).
+        describeLanguages();
 
-        /*
-         * RDF and SPARQL Formats.
-         * 
-         * @see http://www.openrdf.org/issues/browse/RIO-79 (Request for unique
-         * URIs)
-         * 
-         * TODO Add an explicit declaration for SIDS mode data interchange?
-         */
+        // Describe other supported features.
+        describeOtherFeatures();
+        
+        // Describe the supported data formats.
+        describeInputFormats();
+        describeResultFormats();
 
-        // InputFormats
-        {
-            g.add(service, SD.inputFormat, SD.RDFXML);
-            g.add(service, SD.inputFormat, SD.NTRIPLES);
-            g.add(service, SD.inputFormat, SD.TURTLE);
-            g.add(service, SD.inputFormat, SD.N3);
-            // g.add(service, SD.inputFormat, SD.TRIX); // TODO TRIX
-            g.add(service, SD.inputFormat, SD.TRIG);
-            // g.add(service, SD.inputFormat, SD.BINARY); // TODO BINARY
-            g.add(service, SD.inputFormat, SD.NQUADS);
+        // Describe the entailment regime (if any).
+        describeEntailmentRegime();
+        
+        // Describe the default graph of the default data set.
+        describeDefaultDataSet(describeNamedGraphs);
 
-            g.add(service, SD.inputFormat, SD.SPARQL_RESULTS_XML);
-            g.add(service, SD.inputFormat, SD.SPARQL_RESULTS_JSON);
-            g.add(service, SD.inputFormat, SD.SPARQL_RESULTS_CSV);
-            g.add(service, SD.inputFormat, SD.SPARQL_RESULTS_TSV);
-            // g.add(service, SD.inputFormat,
-            // SD.SPARQL_RESULTS_OPENRDF_BINARY);
+    }
 
-        }
+    /**
+     * Describe the service end point(s).
+     * 
+     * @see #endpoint
+     */
+    protected void describeServiceEndpoints() {
 
-        // ResultFormats
-        {
-            g.add(service, SD.resultFormat, SD.RDFXML);
-            g.add(service, SD.resultFormat, SD.NTRIPLES);
-            g.add(service, SD.resultFormat, SD.TURTLE);
-            g.add(service, SD.resultFormat, SD.N3);
-            // g.add(service, SD.resultFormat, SD.TRIX); // TODO TRIX
-            g.add(service, SD.resultFormat, SD.TRIG);
-            // g.add(service, SD.resultFormat, SD.BINARY); // TODO BINARY
-            // g.add(service, SD.resultFormat, SD.NQUADS); // TODO NQuads
-            // writer
+        g.add(aService, SD.endpoint, g.getValueFactory().createURI(serviceURI));
 
-            g.add(service, SD.resultFormat, SD.SPARQL_RESULTS_XML);
-            g.add(service, SD.resultFormat, SD.SPARQL_RESULTS_JSON);
-            g.add(service, SD.resultFormat, SD.SPARQL_RESULTS_CSV);
-            g.add(service, SD.resultFormat, SD.SPARQL_RESULTS_TSV);
-            // g.add(service, SD.resultFormat,
-            // SD.SPARQL_RESULTS_OPENRDF_BINARY);
-        }
+    }
+    
+    /**
+     * Describe the supported Query Languages
+     */
+    protected void describeLanguages() {
 
-        /*
-         * TODO Report out the database mode {triples, provenance, or quads} and
-         * the entailment regieme for that mode (only for triples or quads at
-         * this point). Report out when truth maintenance is enabled and whether
-         * or not the full text index is enabled.
-         * 
-         * TODO sd:languageExtension or sd:feature could be used for query
-         * hints, NAMED SUBQUERY, etc.
-         */
+        g.add(aService, SD.supportedLanguage, SD.SPARQL10Query);
+        
+        g.add(aService, SD.supportedLanguage, SD.SPARQL11Query);
 
+        g.add(aService, SD.supportedLanguage, SD.SPARQL11Update);
+
+    }
+
+    /**
+     * Describe the supported input formats.
+     * 
+     * @see http://www.openrdf.org/issues/browse/RIO-79 (Request for unique
+     *      URIs)
+     * 
+     * @see #inputFormat
+     * 
+     *      TODO Add an explicit declaration for SIDS mode data interchange?
+     */
+    protected void describeInputFormats() {
+        
+        g.add(aService, SD.inputFormat, SD.RDFXML);
+        g.add(aService, SD.inputFormat, SD.NTRIPLES);
+        g.add(aService, SD.inputFormat, SD.TURTLE);
+        g.add(aService, SD.inputFormat, SD.N3);
+        // g.add(service, SD.inputFormat, SD.TRIX); // TODO TRIX
+        g.add(aService, SD.inputFormat, SD.TRIG);
+        // g.add(service, SD.inputFormat, SD.BINARY); // TODO BINARY
+        g.add(aService, SD.inputFormat, SD.NQUADS);
+
+        g.add(aService, SD.inputFormat, SD.SPARQL_RESULTS_XML);
+        g.add(aService, SD.inputFormat, SD.SPARQL_RESULTS_JSON);
+        g.add(aService, SD.inputFormat, SD.SPARQL_RESULTS_CSV);
+        g.add(aService, SD.inputFormat, SD.SPARQL_RESULTS_TSV);
+        // g.add(service,SD.inputFormat,SD.SPARQL_RESULTS_OPENRDF_BINARY);
+
+    }
+
+    /**
+     * Describe the supported result formats.
+     * 
+     * @see #resultFormat
+     */
+    protected void describeResultFormats() {
+
+        g.add(aService, SD.resultFormat, SD.RDFXML);
+        g.add(aService, SD.resultFormat, SD.NTRIPLES);
+        g.add(aService, SD.resultFormat, SD.TURTLE);
+        g.add(aService, SD.resultFormat, SD.N3);
+        // g.add(service, SD.resultFormat, SD.TRIX); // TODO TRIX
+        g.add(aService, SD.resultFormat, SD.TRIG);
+        // g.add(service, SD.resultFormat, SD.BINARY); // TODO BINARY
+        // g.add(service, SD.resultFormat, SD.NQUADS); // TODO NQuads
+        // writer
+
+        g.add(aService, SD.resultFormat, SD.SPARQL_RESULTS_XML);
+        g.add(aService, SD.resultFormat, SD.SPARQL_RESULTS_JSON);
+        g.add(aService, SD.resultFormat, SD.SPARQL_RESULTS_CSV);
+        g.add(aService, SD.resultFormat, SD.SPARQL_RESULTS_TSV);
+        // g.add(service, SD.resultFormat,
+        // SD.SPARQL_RESULTS_OPENRDF_BINARY);
+
+    }
+    
+    /**
+     * Describe non-language features.
+     * 
+     * TODO sd:languageExtension or sd:feature could be used for query hints,
+     * NAMED SUBQUERY, the NSS REST API features, etc.
+     * 
+     * TODO Report the backing store type (Journal, TemporaryStore, Federation)
+     * and the journal buffer mode.
+     */
+    protected void describeOtherFeatures() {
+
+        g.add(aService, SD.feature, SD.BasicFederatedQuery);
+
         if (tripleStore.isQuads()) {
 
-            g.add(service, SD.feature, SD.UnionDefaultGraph);
+            g.add(aService, SD.feature, SD.UnionDefaultGraph);
 
+            g.add(aService, SD.feature, ModeQuads);
+
+        } else if(tripleStore.isStatementIdentifiers()) {
+        
+            g.add(aService, SD.feature, ModeSids);
+            
         } else {
 
-            /*
-             * TODO The Axioms interface could self-report this.
-             */
-            final URI entailmentRegime;
-            final Axioms axioms = tripleStore.getAxioms();
-            if (axioms == null || axioms instanceof NoAxioms) {
-                entailmentRegime = SD.simpleEntailment;
-            } else if (axioms instanceof OwlAxioms) {
-                // TODO This is really the RDFS+ entailment regime.
-                entailmentRegime = SD.rdfsEntailment;
-            } else if (axioms instanceof RdfsAxioms) {
-                entailmentRegime = SD.rdfsEntailment;
-            } else {
-                // Unknown.
-                entailmentRegime = null;
-            }
-            if (entailmentRegime != null)
-                g.add(service, SD.entailmentRegime, entailmentRegime);
+            g.add(aService, SD.feature, ModeTriples);
 
         }
 
-        // Other features.
-        g.add(service, SD.feature, SD.BasicFederatedQuery);
+        if (tripleStore.getLexiconRelation().isTextIndex()) {
 
+            g.add(aService, SD.feature, TextIndexValueCentric);
+
+        }
+
+        if (tripleStore.getLexiconRelation().isSubjectCentricTextIndex()) {
+
+            g.add(aService, SD.feature, TextIndexSubjectCentric);
+
+        }
+
+        // TODO Report when justification chains are being tracked?
+//        if(tripleStore.isJustify())
+
+        final Properties properties = tripleStore.getProperties();
+
+        if (Boolean.valueOf(properties.getProperty(
+                BigdataSail.Options.TRUTH_MAINTENANCE,
+                BigdataSail.Options.DEFAULT_TRUTH_MAINTENANCE))) {
+
+            g.add(aService, SD.feature, TruthMaintenance);
+
+        }
+        
+        if (Boolean.valueOf(properties.getProperty(
+                BigdataSail.Options.ISOLATABLE_INDICES,
+                BigdataSail.Options.DEFAULT_ISOLATABLE_INDICES))) {
+
+            g.add(aService, SD.feature, IsolatableIndices);
+
+        }
+        
         /*
-         * Information about the defaultGraph.
+         * TODO Report properties from the BigdataSail
          * 
-         * TODO This could all be generalized and then run for each known named
-         * graph as well.
+         * NAMESPACE?
          */
+        
+    }
+    
+    /**
+     * Describe the entailment regime.
+     * 
+     * TODO The Axioms interface could self-report this.
+     */
+    protected void describeEntailmentRegime() {
+
+        if (!tripleStore.isQuads())
+            return;
+
+        final URI entailmentRegime;
+        final Axioms axioms = tripleStore.getAxioms();
+        if (axioms == null || axioms instanceof NoAxioms) {
+            entailmentRegime = SD.simpleEntailment;
+        } else if (axioms instanceof OwlAxioms) {
+            // TODO This is really the RDFS+ entailment regime.
+            entailmentRegime = SD.rdfsEntailment;
+        } else if (axioms instanceof RdfsAxioms) {
+            entailmentRegime = SD.rdfsEntailment;
+        } else {
+            // Unknown.
+            entailmentRegime = null;
+        }
+        if (entailmentRegime != null)
+            g.add(aService, SD.entailmentRegime, entailmentRegime);
+
+    }
+    
+    /**
+     * Describe the default data set.
+     * 
+     * @param describeNamedGraph
+     *            When <code>true</code>, each named graph will also be
+     *            described in in the same level of detail as the default graph.
+     *            Otherwise only the default graph will be described.
+     */
+    protected void describeDefaultDataSet(final boolean describeNamedGraph) {
+
+        // Default data set
+        g.add(aService, SD.defaultDataset, aDefaultDataset);
+
+        g.add(aDefaultDataset, RDF.TYPE, SD.Dataset);
+
+        // any URI is considered to be an entity.
+        g.add(aDefaultDataset, VoidVocabularyDecl.uriRegexPattern,
+                f.createLiteral("^.*"));
+
+        // Frequency count of the predicates in the default graph.
+        final IVCount[] predicatePartitionCounts = predicateUsage(
+                tripleStore);
+
+        // Frequency count of the classes in the default graph.
+        final IVCount[] classPartitionCounts = classUsage(tripleStore);
+
+        // Describe vocabularies based on the predicate partitions.
+        describeVocabularies(predicatePartitionCounts);
+        
+        // defaultGraph description.
         {
+
+            // Default graph in the default data set.
+            g.add(aDefaultDataset, SD.defaultGraph, aDefaultGraph);
             
-            // Default data set
-            g.add(service, SD.defaultDataset, defaultDataset);
+            // Describe the default graph using statistics.
+            describeGraph(aDefaultGraph, predicatePartitionCounts,
+                    classPartitionCounts);
 
-            g.add(defaultDataset, RDF.TYPE, SD.Dataset);
+        } // end defaultGraph
 
-            // any URI is considered to be an entity.
-            g.add(defaultDataset, VoidVocabularyDecl.uriRegexPattern,
-                    f.createLiteral("^.*"));
+        // sb.append("termCount\t = " + tripleStore.getTermCount() + "\n");
+        //
+        // sb.append("uriCount\t = " + tripleStore.getURICount() + "\n");
+        //
+        // sb.append("literalCount\t = " + tripleStore.getLiteralCount() +
+        // "\n");
+        //
+        // /*
+        // * Note: The blank node count is only available when using the told
+        // * bnodes mode.
+        // */
+        // sb
+        // .append("bnodeCount\t = "
+        // + (tripleStore.getLexiconRelation()
+        // .isStoreBlankNodes() ? ""
+        // + tripleStore.getBNodeCount() : "N/A")
+        // + "\n");
 
-            // Default graph in the default data set.
-            g.add(defaultDataset, SD.defaultGraph, defaultGraph);
+        /*
+         * Report for each named graph.
+         */
+        if (describeNamedGraph && tripleStore.isQuads()) {
 
-            // defautGraph description.
-            {
+            final SPORelation r = tripleStore.getSPORelation();
 
-                // #of triples in the default graph
-                g.add(defaultGraph, VoidVocabularyDecl.triples,
-                        f.createLiteral(tripleStore.getStatementCount()));
+            // the index to use for distinct term scan.
+            final SPOKeyOrder keyOrder = SPOKeyOrder.CSPO;
 
-                // #of entities in the default graph.
-                g.add(defaultGraph, VoidVocabularyDecl.entities,
-                        f.createLiteral(tripleStore.getURICount()));
+            // visit distinct IVs for context position on that index.
+            @SuppressWarnings("rawtypes")
+            final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder);
 
-                // The distinct vocabularies in use.
-                final Set<String> namespaces = new LinkedHashSet<String>();
+            // resolve IVs to terms efficiently during iteration.
+            final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(
+                    tripleStore/* resolveTerms */, itr);
 
-                /*
-                 * property partition statistics & used vocabularies.
-                 * 
-                 * Note: A temporary graph is used to hold these data so we can
-                 * first output the vocabulary summary. This gives the output a
-                 * neater (and more human consumable) appearance.
-                 */
-                final Graph propertyPartitions = new GraphImpl();
+            try {
 
-                // Frequency count of the predicates in the default graph.
-                final IVCount[] predicatePartitionCounts = predicateUsage(tripleStore);
+                while (itr2.hasNext()) {
 
-                // Frequency count of the classes in the default graph.
-                final IVCount[] classPartitionCounts = classUsage(tripleStore);
+                    /*
+                     * Describe this named graph.
+                     * 
+                     * Note: This is using the predicate and class partition
+                     * statistics from the default graph (RDF merge) to identify
+                     * the set of all possible predicates and classes within
+                     * each named graph. It then tests each predicate and class
+                     * partition against the named graph and ignores those which
+                     * are not present in a given named graph. This is being
+                     * done because we do not have a CPxx index.
+                     */
 
-                {
+                    final BigdataResource graph = (BigdataResource) itr2.next();
 
-                    // property partitions.
-                    for (IVCount tmp : predicatePartitionCounts) {
+                    final IVCount[] predicatePartitionCounts2 = predicateUsage(
+                            tripleStore, graph.getIV(),
+                            predicatePartitionCounts);
 
-                        final BNode propertyPartition = f.createBNode();
+                    final IVCount[] classPartitionCounts2 = classUsage(
+                            tripleStore, graph.getIV(), classPartitionCounts);
 
-                        final URI p = (URI) tmp.getValue();
+                    final BNode aNamedGraph = f.createBNode();
 
-                        propertyPartitions.add(defaultGraph,
-                                VoidVocabularyDecl.propertyPartition,
-                                propertyPartition);
+                    // Named graph in the default data set.
+                    g.add(aDefaultDataset, SD.namedGraph, aNamedGraph);
 
-                        propertyPartitions.add(propertyPartition,
-                                VoidVocabularyDecl.property, p);
+                    // The name of that named graph.
+                    g.add(aNamedGraph, SD.name, graph);
 
-                        propertyPartitions.add(propertyPartition,
-                                VoidVocabularyDecl.triples,
-                                f.createLiteral(tmp.count));
+                    // Describe the named graph.
+                    describeGraph(aNamedGraph, predicatePartitionCounts2,
+                            classPartitionCounts2);
 
-                        String namespace = p.getNamespace();
+                }
+                
+            } finally {
 
-                        if (namespace.endsWith("#")) {
+                itr2.close();
 
-                            // Strip trailing '#' per VoID specification.
-                            namespace = namespace.substring(0,
-                                    namespace.length() - 1);
+            }
 
-                        }
+        }
 
-                        namespaces.add(namespace);
+    }
 
-                    }
+    /**
+     * Describe the vocabularies which are in use in the KB based on the
+     * predicate partition statistics.
+     * 
+     * @param predicateParitionCounts
+     *            The predicate partition statistics.
+     */
+    protected void describeVocabularies(final IVCount[] predicatePartitionCounts) {
 
-                }
+        // Find the distinct vocabularies in use.
+        final Set<String> namespaces = new LinkedHashSet<String>();
+        {
 
-                // emit the in use vocabularies.
-                for (String namespace : namespaces) {
+            // property partitions.
+            for (IVCount tmp : predicatePartitionCounts) {
 
-                    g.add(defaultDataset, VoidVocabularyDecl.vocabulary,
-                            f.createURI(namespace));
+                final URI p = (URI) tmp.getValue();
 
+                String namespace = p.getNamespace();
+
+                if (namespace.endsWith("#")) {
+
+                    // Strip trailing '#' per VoID specification.
+                    namespace = namespace.substring(0,
+                            namespace.length() - 1);
+
                 }
 
-                // #of distinct predicates in the default graph.
-                g.add(defaultGraph, VoidVocabularyDecl.properties,
-                        f.createLiteral(predicatePartitionCounts.length));
+                namespaces.add(namespace);
 
-                // #of distinct classes in the default graph.
-                g.add(defaultGraph, VoidVocabularyDecl.classes,
-                        f.createLiteral(classPartitionCounts.length));
+            }
 
-                // now emit the property partition statistics.
-                g.addAll(propertyPartitions);
-                
-                // class partition statistics.
-                {
+        }
 
-                    // per class partition statistics.
-                    for (IVCount tmp : classPartitionCounts) {
+        // Sort into dictionary order.
+        final String[] a = namespaces.toArray(new String[namespaces.size()]);
+        
+        Arrays.sort(a);
 
-                        final BNode classPartition = f.createBNode();
+        for (String namespace : a) {
 
-                        final BigdataValue cls = tmp.getValue();
+            g.add(aDefaultDataset, VoidVocabularyDecl.vocabulary,
+                    f.createURI(namespace));
 
-                        g.add(defaultGraph, VoidVocabularyDecl.classPartition,
-                                classPartition);
+        }
 
-                        g.add(classPartition, VoidVocabularyDecl.class_, cls);
+    }
+    
+    /**
+     * Describe a named or default graph.
+     * 
+     * @param graph
+     *            The named graph.
+     * @param predicatePartitionCounts
+     *            The predicate partition statistics for that graph.
+     * @param classPartitionCounts
+     *            The class partition statistics for that graph.
+     */
+    protected void describeGraph(final Resource graph,
+            final IVCount[] predicatePartitionCounts,
+            final IVCount[] classPartitionCounts) {
 
-                        g.add(classPartition, VoidVocabularyDecl.triples,
-                                f.createLiteral(tmp.count));
+        // The graph is a Graph.
+        g.add(graph, RDF.TYPE, SD.Graph);
 
-                    }
+        // #of triples in the default graph
+        g.add(graph, VoidVocabularyDecl.triples,
+                f.createLiteral(tripleStore.getStatementCount()));
 
-                } // end class partition statistics.
+        // #of entities in the default graph.
+        g.add(graph, VoidVocabularyDecl.entities,
+                f.createLiteral(tripleStore.getURICount()));
 
-            } // end defaultGraph
-            
-//            sb.append("termCount\t = " + tripleStore.getTermCount() + "\n");
-//
-//            sb.append("uriCount\t = " + tripleStore.getURICount() + "\n");
-//
-//            sb.append("literalCount\t = " + tripleStore.getLiteralCount() + "\n");
-//
-//            /*
-//             * Note: The blank node count is only available when using the told
-//             * bnodes mode.
-//             */
-//            sb
-//                    .append("bnodeCount\t = "
-//                            + (tripleStore.getLexiconRelation()
-//                                    .isStoreBlankNodes() ? ""
-//                                    + tripleStore.getBNodeCount() : "N/A")
-//                            + "\n");
+        // #of distinct predicates in the default graph.
+        g.add(graph, VoidVocabularyDecl.properties,
+                f.createLiteral(predicatePartitionCounts.length));
 
+        // #of distinct classes in the default graph.
+        g.add(graph, VoidVocabularyDecl.classes,
+                f.createLiteral(classPartitionCounts.length));
+
+        // property partition statistics.
+        for (IVCount tmp : predicatePartitionCounts) {
+
+            final BNode propertyPartition = f.createBNode();
+
+            final URI p = (URI) tmp.getValue();
+
+            g.add(graph, VoidVocabularyDecl.propertyPartition,
+                    propertyPartition);
+
+            g.add(propertyPartition, VoidVocabularyDecl.property, p);
+
+            g.add(propertyPartition, VoidVocabularyDecl.triples,
+                    f.createLiteral(tmp.count));
+
         }
 
-        return g;
-    
+        // class partition statistics.
+        {
+
+            // per class partition statistics.
+            for (IVCount tmp : classPartitionCounts) {
+
+                final BNode classPartition = f.createBNode();
+
+                final BigdataValue cls = tmp.getValue();
+
+                g.add(graph, VoidVocabularyDecl.classPartition,
+                        classPartition);
+
+                g.add(classPartition, VoidVocabularyDecl.class_, cls);
+
+                g.add(classPartition, VoidVocabularyDecl.triples,
+                        f.createLiteral(tmp.count));
+
+            }
+
+        } // end class partition statistics.
+
     }
 
     /**
      * An {@link IV} and a counter for that {@link IV}.
      */
-    private static class IVCount implements Comparable<IVCount> {
+    protected static class IVCount implements Comparable<IVCount> {
 
-        public final IV<?,?> iv;
-        
+        public final IV<?, ?> iv;
+
         public final long count;
 
         private BigdataValue val;
@@ -677,16 +977,21 @@
         }
         
     }
-    
+
     /**
      * Return an array of the distinct predicates in the KB ordered by their
      * descending frequency of use. The {@link IV}s in the returned array will
      * have been resolved to the corresponding {@link BigdataURI}s which can be
      * accessed using {@link IV#getValue()}.
+     * 
+     * @param kb
+     *            The KB instance.
      */
-    private static IVCount[] predicateUsage(final AbstractTripleStore kb) {
+    protected static IVCount[] predicateUsage(final AbstractTripleStore kb) {
 
-        if (kb.getSPORelation().oneAccessPath) {
+        final SPORelation r = kb.getSPORelation();
+        
+        if (r.oneAccessPath) {
 
             // The necessary index (POS or POCS) does not exist.
             throw new UnsupportedOperationException();
@@ -695,10 +1000,12 @@
 
         final boolean quads = kb.isQuads();
 
-        // visit distinct term identifiers for the predicate position.
+        // the index to use for distinct predicate scan.
+        final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS;
+
+        // visit distinct term identifiers for predicate position on that index.
         @SuppressWarnings("rawtypes")
-        final IChunkedIterator<IV> itr = kb.getSPORelation().distinctTermScan(
-                quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS);
+        final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder);
 
         // resolve term identifiers to terms efficiently during iteration.
         final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(
@@ -716,8 +1023,7 @@
 
                 final IV<?,?> iv = term.getIV();
 
-                final long n = kb.getSPORelation()
-                        .getAccessPath(null, iv, null, null)
+                final long n = r.getAccessPath(null, iv, null, null)
                         .rangeCount(false/* exact */);
 
                 ivs.add(iv);
@@ -755,6 +1061,61 @@
     }
 
     /**
+     * Return the predicate partition statistics for the named graph.
+     * 
+     * @param kb
+     *            The KB instance.
+     * @param civ
+     *            The {@link IV} of a named graph (required).
+     * 
+     * @return The predicate partition statistics for that named graph. Only
+     *         predicate partitions which are non-empty are returned.
+     */
+    protected static IVCount[] predicateUsage(final AbstractTripleStore kb,
+            final IV<?, ?> civ, final IVCount[] predicatePartitionCounts) {
+        
+        final SPORelation r = kb.getSPORelation();
+
+        final boolean quads = kb.isQuads();
+        
+        if (!quads) {
+            
+            // Named graph only valid in quads mode.
+            throw new IllegalArgumentException();
+        
+        }
+
+        // The non-zero counts.
+        final List<IVCount> counts = new LinkedList<IVCount>();
+
+        // Check the known non-empty predicate partitions.
+        for(IVCount in : predicatePartitionCounts){
+
+            final long n = r.getAccessPath(null, in.iv, null, civ).rangeCount(
+                    false/* exact */);
+
+            if (n == 0)
+                continue;
+
+            final IVCount out = new IVCount(in.iv, n);
+
+            out.setValue(in.getValue());
+            
+            counts.add(out);
+
+        }
+        
+
+        final IVCount[] a = counts.toArray(new IVCount[counts.size()]);
+
+        // Order by descending count.
+        Arrays.sort(a);
+
+        return a;
+
+    }
+    
+    /**
      * Return an efficient statistical summary for the class partitions. The
      * SPARQL query for this is
      * 
@@ -781,10 +1142,12 @@
      *            
      * @return The class usage statistics.
      */
-    private static IVCount[] classUsage(final AbstractTripleStore kb) {
+    protected static IVCount[] classUsage(final AbstractTripleStore kb) {
 
-        if (kb.getSPORelation().oneAccessPath) {
+        final SPORelation r = kb.getSPORelation();
 
+        if (r.oneAccessPath) {
+
             // The necessary index (POS or POCS) does not exist.
             throw new UnsupportedOperationException();
 
@@ -809,9 +1172,8 @@
         
         // visit distinct term identifiers for the rdf:type predicate.
         @SuppressWarnings("rawtypes")
-        final IChunkedIterator<IV> itr = kb
-                .getSPORelation()
-                .distinctMultiTermScan(keyOrder, new IV[] { rdfType.getIV() }/* knownTerms */);
+        final IChunkedIterator<IV> itr = r.distinctMultiTermScan(keyOrder,
+                new IV[] { rdfType.getIV() }/* knownTerms */);
 
         // resolve term identifiers to terms efficiently during iteration.
         final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(
@@ -829,10 +1191,8 @@
 
                 final IV<?,?> iv = term.getIV();
 
-                final long n = kb
-                        .getSPORelation()
-                        .getAccessPath(null, rdfType.getIV()/* p */, iv/* o */,
-                                null).rangeCount(false/* exact */);
+                final long n = r.getAccessPath(null, rdfType.getIV()/* p */,
+                        iv/* o */, null).rangeCount(false/* exact */);
 
                 ivs.add(iv);
                 
@@ -868,4 +1228,71 @@
 
     }
 
+    /**
+     * Return the class partition statistics for the named graph.
+     * 
+     * @param kb
+     *            The KB instance.
+     * @param civ
+     *            The {@link IV} of a named graph (required).
+     * 
+     * @return The class partition statistics for that named graph. Only class
+     *         partitions which are non-empty are returned.
+     */
+    protected static IVCount[] classUsage(final AbstractTripleStore kb,
+            final IV<?, ?> civ, final IVCount[] classPartitionCounts) {
+
+        final SPORelation r = kb.getSPORelation();
+
+        final boolean quads = kb.isQuads();
+
+        if (!quads) {
+
+            // Named graph only valid in quads mode.
+            throw new IllegalArgumentException();
+
+        }
+
+        // Resolve IV for rdf:type
+        final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE);
+
+        kb.getLexiconRelation().addTerms(new BigdataValue[] { rdfType },
+                1/* numTerms */, true/* readOnly */);
+
+        if (rdfType.getIV() == null) {
+
+            // No rdf:type assertions since rdf:type is unknown term.
+            return new IVCount[0];
+
+        }
+
+        // The non-zero counts.
+        final List<IVCount> counts = new LinkedList<IVCount>();
+
+        // Check the known non-empty predicate partitions.
+        for (IVCount in : classPartitionCounts) {
+
+            final long n = r.getAccessPath(null, rdfType.getIV()/* p */,
+                    in.iv/* o */, civ).rangeCount(false/* exact */);
+
+            if (n == 0)
+                continue;
+            
+            final IVCount out = new IVCount(in.iv, n);
+
+            out.setValue(in.getValue());
+
+            counts.add(out);
+
+        }
+
+        final IVCount[] a = counts.toArray(new IVCount[counts.size()]);
+
+        // Order by descending count.
+        Arrays.sort(a);
+
+        return a;
+
+    }
+
 }

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/SparqlEndpointConfig.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/SparqlEndpointConfig.java	2012-07-24 00:33:16 UTC (rev 6389)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/SparqlEndpointConfig.java	2012-07-24 14:04:39 UTC (rev 6390)
@@ -50,8 +50,16 @@
      */
     final public int queryThreadPoolSize;
     
+    /**
+     * When <code>true</code> and the KB instance is in the <code>quads</code>
+     * mode, each named graph will also be described in in the same level of
+     * detail as the default graph. Otherwise only the default graph will be
+     * described.
+     */
+    final public boolean describeEachNamedGraph;
+    
     public SparqlEndpointConfig(final String namespace, final long timestamp,
-            final int queryThreadPoolSize) {
+            final int queryThreadPoolSize, final boolean describeEachNamedGraph) {
 
         if (namespace == null)
             throw new IllegalArgumentException();
@@ -61,6 +69,8 @@
         this.timestamp = timestamp;
 
         this.queryThreadPoolSize = queryThreadPoolSize;
+        
+        this.describeEachNamedGraph = describeEachNamedGraph;
 
     }
 

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





[Bigdata-commit] SF.net SVN: bigdata:[6390] branches/BIGDATA_RELEASE_1_2_0

Fast, scalable, robust graph database platform

[Bigdata-commit] SF.net SVN: bigdata:[6390] branches/BIGDATA_RELEASE_1_2_0