This list is closed, nobody may subscribe to it.
2010 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(139) |
Aug
(94) |
Sep
(232) |
Oct
(143) |
Nov
(138) |
Dec
(55) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2011 |
Jan
(127) |
Feb
(90) |
Mar
(101) |
Apr
(74) |
May
(148) |
Jun
(241) |
Jul
(169) |
Aug
(121) |
Sep
(157) |
Oct
(199) |
Nov
(281) |
Dec
(75) |
2012 |
Jan
(107) |
Feb
(122) |
Mar
(184) |
Apr
(73) |
May
(14) |
Jun
(49) |
Jul
(26) |
Aug
(103) |
Sep
(133) |
Oct
(61) |
Nov
(51) |
Dec
(55) |
2013 |
Jan
(59) |
Feb
(72) |
Mar
(99) |
Apr
(62) |
May
(92) |
Jun
(19) |
Jul
(31) |
Aug
(138) |
Sep
(47) |
Oct
(83) |
Nov
(95) |
Dec
(111) |
2014 |
Jan
(125) |
Feb
(60) |
Mar
(119) |
Apr
(136) |
May
(270) |
Jun
(83) |
Jul
(88) |
Aug
(30) |
Sep
(47) |
Oct
(27) |
Nov
(23) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(3) |
Oct
|
Nov
|
Dec
|
2016 |
Jan
|
Feb
|
Mar
(4) |
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <tho...@us...> - 2014-02-24 13:47:32
|
Revision: 7882 http://sourceforge.net/p/bigdata/code/7882 Author: thompsonbry Date: 2014-02-24 13:47:27 +0000 (Mon, 24 Feb 2014) Log Message: ----------- Added a link attribute type constraint as part of the RDR support. This constraint is not yet integrated into the BigdataGASEngine. It is exposed to the GASService and can be set, but it is currently ignored. There is now a unit test for scatter out edges with the link type constraint. The test for the link attribute access is stil failing since that feature has not yet been implemented in the BigdataGASEngine. Modified Paths: -------------- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASEngine.java branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/bd/AbstractBigdataGraphTestCase.java branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/bd/TestSSSP.java Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java 2014-02-24 13:41:17 UTC (rev 7881) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java 2014-02-24 13:47:27 UTC (rev 7882) @@ -116,13 +116,11 @@ * @return The {@link Value} for the predicate that identifies the desired * link type (there can be many types of links - the return value * specifies which attribute is of interest). - * - * FIXME define getLinkAttribType() (RDR) */ URI getLinkType(); /** - * Set an optional constraint on the type of the visited links. + * Set an optional restriction on the type of the visited links. * <p> * Note: When this option is used, the scatter and gather will not visit the * property set for the vertex. Instead, the graph is treated as if it were @@ -136,6 +134,32 @@ void setLinkType(URI linkType); /** + * Return non-<code>null</code> iff there is a single link attribute type to + * be visited. This imposes a restriction on which link attributes are + * considered by the algorithm. The link attribute type restriction may be + * (and often is) paired with a link type restriction. + * + * @return The {@link Value} for the predicate that identifies the desired + * link attribute type. + * + * @see #setLinkType(URI) + */ + URI getLinkAttributeType(); + + /** + * Imposes an optional restriction on which link attributes are considered + * by the algorithm. The link attribute type restriction may be (and often + * is) paired with a link type restriction. + * + * @param linkAttributeType + * The link attribute type to visit (optional). When + * <code>null</code>, the link attributes for the visited links + * are NOT visited (the topology of the graph is visited, but not + * the attributes for the edges). + */ + void setLinkAttributeType(URI linkType); + + /** * Set an optional {@link IReducer} that will run after the * {@link IGASProgram} is terminated. This may be used to extract results * from the visited vertices. @@ -163,10 +187,10 @@ * TODO Rename as constrainEdgeFilter or even split into a * constrainGatherFilter and a constraintScatterFilter. * - * FIXME APPLY : If we need access to the vertex property values in - * APPLY (which we probably do, at least optionally), then there - * should be a similar method to decide whether the property values - * for the vertex are made available during the APPLY. + * TODO APPLY : If we need access to the vertex property values in + * APPLY (which we probably do, at least optionally), then perhaps + * there should be a similar method to decide whether the property + * values for the vertex are made available during the APPLY. */ IStriterator constrainFilter(IStriterator eitr); Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2014-02-24 13:41:17 UTC (rev 7881) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2014-02-24 13:47:27 UTC (rev 7882) @@ -81,6 +81,11 @@ private final AtomicReference<URI> linkType = new AtomicReference<URI>(null); /** + * An optional constraint on the type of the visited link attributes. + */ + private final AtomicReference<URI> linkAttributeType = new AtomicReference<URI>(null); + + /** * An optional {@link IReducer} that will executed after the * {@link IGASProgram}. */ @@ -824,12 +829,6 @@ } - /** - * {@inheritDoc} - * <p> - * The default implementation does not restrict the visitation to a - * connectivity matrix (returns <code>null</code>). - */ @Override public URI getLinkType() { @@ -844,6 +843,20 @@ } + @Override + public URI getLinkAttributeType() { + + return linkAttributeType.get(); + + } + + @Override + public void setLinkAttributeType(final URI linkAttributeType) { + + this.linkAttributeType.set(linkAttributeType); + + } + /** * {@inheritDoc} * <p> Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASEngine.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASEngine.java 2014-02-24 13:41:17 UTC (rev 7881) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASEngine.java 2014-02-24 13:47:27 UTC (rev 7882) @@ -36,6 +36,7 @@ import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.internal.NotMaterializedException; +import com.bigdata.rdf.internal.impl.bnode.SidIV; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.sail.BigdataSail; import com.bigdata.rdf.spo.ISPO; @@ -348,6 +349,7 @@ private final IV u; // ctor (computed) private final IV linkTypeIV; + private final IV linkAttrTypeIV; private final boolean posOptimization; private final SPOKeyOrder keyOrder; private final IIndex ndx; @@ -364,6 +366,8 @@ this.u = u; linkTypeIV = getIV(ctx.getLinkType()); + + linkAttrTypeIV = getIV(ctx.getLinkAttributeType()); final IKeyBuilder keyBuilder; /* @@ -376,7 +380,7 @@ * We use the POS(C) index. The S values give us the in-edges * for that [u] and the specified link type. * - * FIXME POS OPTIMIZATION: write unit test for this option to + * TODO POS OPTIMIZATION: write unit test for this option to * make sure that the right filter is imposed. write performance * test to verify expected benefit. Watch out for the in-edges * vs out-edges since only one is optimized. @@ -397,16 +401,74 @@ keyBuilder.reset(); - // Bind P as a constant. - IVUtility.encode(keyBuilder, linkTypeIV); +// if (linkAttrTypeIV != null) { +// +// /* +// * RDR optimization for POS(C) index: +// * +// * P:= linkAttributeType +// * +// * O:= unbound (the SID is in SPO(C) order, but we do +// * not have S. P would be the linkType, but without S we +// * can not form a prefix). +// * +// * S:= unbound +// * +// * C:= unbound +// * +// * Note: We can only optimize this when both the +// * linkType and linkAttributeType are specified. +// */ +// +// // P +// IVUtility.encode(keyBuilder, linkAttrTypeIV); +// +// // O is a SID prefix. +// { +// +// // RDR prefix byte. +// keyBuilder.append(SidIV.toFlags()); +// +// // SID.P:=linkType +// IVUtility.encode(keyBuilder, linkTypeIV); +// +// // SID.O:=u +// IVUtility.encode(keyBuilder, u); +// +// } +// +// // The rest of the key is unbound. +// +// } else { - // Bind O for this key-range scan. - IVUtility.encode(keyBuilder, u); + // Bind P as a constant. + IVUtility.encode(keyBuilder, linkTypeIV); + // Bind O for this key-range scan. + IVUtility.encode(keyBuilder, u); + +// } + } else { /* * SPO(C) or OSP(C) + * + * FIXME RDR: For RDR link attribute access, the keys are + * formed differently. Lower case letters are used for + * variables. Upper case letters for constants. + * + * For SPO(C): S:=SID(Spo(c)), P:=linkAttributeType (must + * filter), O:=linkAttributeValue (read it off the index + * when the filter is satisfied). + * + * For OSP(C): OL=SID(Osp(c)), P:=linkAttributeType (must + * filter), S:=linkAttributeValue (read it off the index + * when the filter is satisfied). + * + * FIXME RDR should also be supported in the SAIL and RAM + * GAS engine implementations. The statements about + * statements would be modeled as reified statement models. */ keyOrder = getKeyOrder(kb, inEdges); Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java 2014-02-24 13:41:17 UTC (rev 7881) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java 2014-02-24 13:47:27 UTC (rev 7882) @@ -217,9 +217,19 @@ * * @see IGASContext#setLinkType(URI) */ - URI LINK_TYPE = new URIImpl(NAMESPACE+"linkType"); - + URI LINK_TYPE = new URIImpl(NAMESPACE + "linkType"); + /** + * An optional constraint on the types of the link attributes that will + * be visited by the algorithm - the use of this option is required if + * you want to process some specific link weight rather than the simple + * topology of the graph. + * + * @see IGASContext#setLinkAttributeType(URI) + */ + URI LINK_ATTR_TYPE = new URIImpl(NAMESPACE + "linkAttrType"); + + /** * The {@link IGASScheduler} (default is {@link #DEFAULT_SCHEDULER}). * Class must implement {@link IGASSchedulerImpl}. */ @@ -367,7 +377,7 @@ private final int nthreads; private final int maxIterations; private final int maxVisited; - private final URI linkType; + private final URI linkType, linkAttrType; private final Class<IGASProgram<VS, ES, ST>> gasClass; private final Class<IGASSchedulerImpl> schedulerClass; private final Value[] initialFrontier; @@ -412,6 +422,9 @@ this.linkType = (URI) getOnlyArg(Options.PROGRAM, Options.LINK_TYPE, null/* default */); + this.linkAttrType = (URI) getOnlyArg(Options.PROGRAM, + Options.LINK_ATTR_TYPE, null/* default */); + // GASProgram (required) { @@ -719,9 +732,14 @@ gasContext.setMaxVisited(maxVisited); + // Optional link type constraint. if (linkType != null) gasContext.setLinkType(linkType); + // Optional link attribute constraint. + if (linkAttrType != null) + gasContext.setLinkAttributeType(linkAttrType); + final IGASState<VS, ES, ST> gasState = gasContext.getGASState(); // TODO We should look at this when extracting the parameters from the SERVICE's graph pattern. Modified: branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/bd/AbstractBigdataGraphTestCase.java =================================================================== --- branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/bd/AbstractBigdataGraphTestCase.java 2014-02-24 13:41:17 UTC (rev 7881) +++ branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/bd/AbstractBigdataGraphTestCase.java 2014-02-24 13:47:27 UTC (rev 7882) @@ -204,7 +204,7 @@ */ static private final String smallWeightedGraph = "bigdata-gas/src/test/com/bigdata/rdf/graph/data/smallWeightedGraph.ttl"; - private final BigdataURI foafKnows, v1, v2, v3, v4, v5; + private final BigdataURI foafKnows, linkWeight, v1, v2, v3, v4, v5; public SmallWeightedGraphProblem() throws Exception { @@ -216,6 +216,9 @@ foafKnows = (BigdataURI) vf .createURI("http://xmlns.com/foaf/0.1/knows"); + + linkWeight = (BigdataURI) vf + .createURI("http://www.bigdata.com/weight"); v1 = (BigdataURI) vf.createURI("http://www.bigdata.com/1"); v2 = (BigdataURI) vf.createURI("http://www.bigdata.com/2"); @@ -223,8 +226,8 @@ v4 = (BigdataURI) vf.createURI("http://www.bigdata.com/4"); v5 = (BigdataURI) vf.createURI("http://www.bigdata.com/5"); - final BigdataValue[] terms = new BigdataValue[] { foafKnows, v1, - v2, v3, v4, v5 }; + final BigdataValue[] terms = new BigdataValue[] { foafKnows, + linkWeight, v1, v2, v3, v4, v5 }; // batch resolve existing IVs. ((BigdataSail) sail).getDatabase().getLexiconRelation() @@ -243,6 +246,11 @@ } @SuppressWarnings("rawtypes") + public IV getLinkWeight() { + return linkWeight.getIV(); + } + + @SuppressWarnings("rawtypes") public IV getV1() { return v1.getIV(); } Modified: branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/bd/TestSSSP.java =================================================================== --- branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/bd/TestSSSP.java 2014-02-24 13:41:17 UTC (rev 7881) +++ branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/bd/TestSSSP.java 2014-02-24 13:47:27 UTC (rev 7882) @@ -23,6 +23,8 @@ */ package com.bigdata.rdf.graph.impl.bd; +import org.openrdf.model.URI; + import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASEngine; import com.bigdata.rdf.graph.IGASState; @@ -84,7 +86,64 @@ } /** + * A unit test based on graph with link weights - in this version of the + * test we constrain the link type but do not specify the link attribute + * type. Hence it ignores the link weights. This provides a test of the + * optimized access path when just the link type constraint is specified. + */ + public void test_sssp_linkType_constraint() throws Exception { + + final SmallWeightedGraphProblem p = setupSmallWeightedGraphProblem(); + + final IGASEngine gasEngine = getGraphFixture() + .newGASEngine(1/* nthreads */); + + try { + + final IGraphAccessor graphAccessor = getGraphFixture() + .newGraphAccessor(null/* ignored */); + + final IGASContext<SSSP.VS, SSSP.ES, Integer> gasContext = gasEngine + .newGASContext(graphAccessor, new SSSP()); + + // Set constraint on the visited link types. + gasContext.setLinkType((URI) p.getFoafKnows()); + + final IGASState<SSSP.VS, SSSP.ES, Integer> gasState = gasContext.getGASState(); + + // Initialize the froniter. + gasState.setFrontier(gasContext, p.getV1()); + + // Converge. + gasContext.call(); + + assertEquals(0, gasState.getState(p.getV1()).dist()); + + assertEquals(1, gasState.getState(p.getV2()).dist()); + + assertEquals(1, gasState.getState(p.getV3()).dist()); + + assertEquals(2, gasState.getState(p.getV4()).dist()); + + assertEquals(2, gasState.getState(p.getV5()).dist()); + + } finally { + + gasEngine.shutdownNow(); + + } + + } + + /** * A unit test based on graph with link weights. + * + * FIXME Test with just the linkAttributeType constraint and with both a + * linkType and linkAttributeType constraint. (We already have a test with + * just the linkType constraint above). + * + * FIXME This is only testing the scatter AP. We also need to test the + * gather AP. */ public void test_sssp_weightedGraph() throws Exception { @@ -101,6 +160,12 @@ final IGASContext<SSSP.VS, SSSP.ES, Integer> gasContext = gasEngine .newGASContext(graphAccessor, new SSSP()); + // Set constraint on the visited link types. + gasContext.setLinkType((URI) p.getFoafKnows()); + + // Set constraint on the visited link attribute types. + gasContext.setLinkAttributeType((URI) p.getLinkWeight()); + final IGASState<SSSP.VS, SSSP.ES, Integer> gasState = gasContext.getGASState(); // Initialize the froniter. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-24 13:41:21
|
Revision: 7881 http://sourceforge.net/p/bigdata/code/7881 Author: thompsonbry Date: 2014-02-24 13:41:17 +0000 (Mon, 24 Feb 2014) Log Message: ----------- final attributes, etc. Modified Paths: -------------- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/internal/impl/bnode/SidIV.java Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/internal/impl/bnode/SidIV.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/internal/impl/bnode/SidIV.java 2014-02-24 12:51:24 UTC (rev 7880) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/internal/impl/bnode/SidIV.java 2014-02-24 13:41:17 UTC (rev 7881) @@ -42,6 +42,7 @@ import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.internal.VTE; +import com.bigdata.rdf.internal.impl.AbstractIV; import com.bigdata.rdf.internal.impl.AbstractInlineIV; import com.bigdata.rdf.lexicon.LexiconRelation; import com.bigdata.rdf.model.BigdataBNode; @@ -100,6 +101,7 @@ */ private transient V bnode; + @Override public IV<V, ISPO> clone(final boolean clearCache) { final SidIV<V> tmp = new SidIV<V>(spo); @@ -135,9 +137,23 @@ } - /** + + /** + * Return the <code>flags</code> byte for a {@link SidIV}. + */ + public static final byte toFlags() { + /* + * Note: XSDBoolean happens to be assigned the code value of 0, which is + * the value we want when the data type enumeration will be ignored. + */ + return AbstractIV.toFlags(VTE.STATEMENT, true/* inline */, + false/* extension */, DTE.XSDBoolean); + } + + /** * Returns the inline spo. */ + @Override public ISPO getInlineValue() throws UnsupportedOperationException { return spo; } @@ -146,12 +162,14 @@ * Returns the bnode representation of this IV, useful for serialization * formats such as RDF/XML. See {@link #bnodeId()}. */ + @SuppressWarnings("unchecked") + @Override public V asValue(final LexiconRelation lex) { - if (bnode == null) { + if (bnode == null) { bnode = (V) lex.getValueFactory().createBNode(getID()); bnode.setIV(this); bnode.setStatementIdentifier(true); - } + } return bnode; } @@ -159,10 +177,12 @@ * Return the byte length for the byte[] encoded representation of this * internal value. Depends on the byte length of the encoded inline spo. */ + @Override public int byteLength() { return 1 + key().length; } + @Override public String toString() { return "Sid("+toString(spo)+")"; } @@ -177,6 +197,7 @@ SPO.toString(spo.o())); } + @Override public int hashCode() { return spo.hashCode(); } @@ -204,6 +225,7 @@ /** * Two {@link SidIV} are equal if their (s,p,o) IVs are equal. */ + @Override public boolean equals(final Object o) { if (this == o) return true; @@ -219,7 +241,7 @@ return false; } - public int _compareTo(IV o) { + public int _compareTo(final IV o) { /* * Note: This works, but it might be more expensive. @@ -299,7 +321,8 @@ this.key = iv.key(); } - public void readExternal(ObjectInput in) throws IOException, + @Override + public void readExternal(final ObjectInput in) throws IOException, ClassNotFoundException { // flags = in.readByte(); final int nbytes = LongPacker.unpackInt(in); @@ -307,7 +330,8 @@ in.readFully(key); } - public void writeExternal(ObjectOutput out) throws IOException { + @Override + public void writeExternal(final ObjectOutput out) throws IOException { // out.writeByte(flags); LongPacker.packLong(out, key.length); out.write(key); @@ -352,5 +376,4 @@ } - } \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-24 12:51:26
|
Revision: 7880 http://sourceforge.net/p/bigdata/code/7880 Author: thompsonbry Date: 2014-02-24 12:51:24 +0000 (Mon, 24 Feb 2014) Log Message: ----------- Modified IGASEngine to report the #of threads in the thread pool. Javadoc related to an approach for implementing a parallel reduction operator. Javadoc linking to the wiki page for the GAS API and GASService. Modified Paths: -------------- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASEngine.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASState.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASEngine.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASState.java Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASEngine.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASEngine.java 2014-02-24 12:49:05 UTC (rev 7879) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASEngine.java 2014-02-24 12:51:24 UTC (rev 7880) @@ -64,8 +64,13 @@ */ void shutdownNow(); + /** + * The parallelism for the SCATTER and GATHER phases. + */ + int getNThreads(); + /* - * TODO This is a problem since we then need to scope the SailConnection + * Note: This is a problem since we then need to scope the SailConnection * internally. */ // /** Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASState.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASState.java 2014-02-24 12:49:05 UTC (rev 7879) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASState.java 2014-02-24 12:51:24 UTC (rev 7880) @@ -238,7 +238,11 @@ * @return The edge decoded from that vertex and <code>null</code> iff the * vertex is not an edge. * - * TODO RDR : Link to an RDR wiki page as well. + * @see <a href="http://www.bigdata.com/whitepapers/reifSPARQL.pdf" > + * Reification Done Right </a> + * + * @see <a href="http://wiki.bigdata.com/wiki/index.php/RDF_GAS_API" > RDF + * GAS API</a> */ Statement decodeStatement(Value v); Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASEngine.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2014-02-24 12:49:05 UTC (rev 7879) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2014-02-24 12:51:24 UTC (rev 7880) @@ -90,9 +90,7 @@ */ private final AtomicReference<Class<IGASSchedulerImpl>> schedulerClassRef; - /** - * The parallelism for the SCATTER and GATHER phases. - */ + @Override public int getNThreads() { return nthreads; Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASState.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASState.java 2014-02-24 12:49:05 UTC (rev 7879) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASState.java 2014-02-24 12:51:24 UTC (rev 7880) @@ -324,6 +324,12 @@ * Note: We can not do a parallel reduction right now because the backing * class does not expose a parallel iterator, e.g., a segment-wise iterator. * The reduction over the {@link #vertexState} is quite slow as a result. + * <p> + * It looks like bulk parallel operators will be eventually introduced into + * the Java concurrency collections. For now, it seems like the short term + * solution would be to drop them onto stripped lists at the same time that + * they are first inserted into the CHM. I could then read over those + * striped lists in parallel during the reduction. */ @Override public <T> T reduce(final IReducer<VS, ES, ST, T> op) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-24 12:49:10
|
Revision: 7879 http://sourceforge.net/p/bigdata/code/7879 Author: thompsonbry Date: 2014-02-24 12:49:05 +0000 (Mon, 24 Feb 2014) Log Message: ----------- javadoc edit. Modified Paths: -------------- branches/RDR/bigdata/src/java/com/bigdata/relation/accesspath/ThreadLocalBufferFactory.java Modified: branches/RDR/bigdata/src/java/com/bigdata/relation/accesspath/ThreadLocalBufferFactory.java =================================================================== --- branches/RDR/bigdata/src/java/com/bigdata/relation/accesspath/ThreadLocalBufferFactory.java 2014-02-24 01:52:13 UTC (rev 7878) +++ branches/RDR/bigdata/src/java/com/bigdata/relation/accesspath/ThreadLocalBufferFactory.java 2014-02-24 12:49:05 UTC (rev 7879) @@ -37,7 +37,7 @@ /** * A factory pattern for per-thread objects whose life cycle is tied to some - * container. . The pool can be torn down when the container is torn down, which + * container. The pool can be torn down when the container is torn down, which * prevents its thread-local references from escaping. * <p> * Note: This implementation uses a true thread local buffers managed by a This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-24 01:52:19
|
Revision: 7878 http://sourceforge.net/p/bigdata/code/7878 Author: thompsonbry Date: 2014-02-24 01:52:13 +0000 (Mon, 24 Feb 2014) Log Message: ----------- Checkpoint on the GASService. See #810. Modified Paths: -------------- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASOptions.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASProgram.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IReducer.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/BFS.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/CC.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/PR.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/SSSP.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/BaseGASProgram.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASEngine.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASState.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/ram/RAMGASEngine.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/sail/SAILGASEngine.java branches/RDR/bigdata-gas/src/test/com/bigdata/rdf/graph/impl/ram/TestGather.java branches/RDR/bigdata-gas/src/test/com/bigdata/rdf/graph/impl/sail/TestGather.java branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASEngine.java branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/bd/TestBFS.java branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/bd/TestGather.java branches/RDR/bigdata-sails/src/java/com/bigdata/rdf/sail/sparql/PrefixDeclProcessor.java Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -18,6 +18,11 @@ import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; +import org.openrdf.model.URI; +import org.openrdf.model.Value; + +import cutthecrap.utils.striterators.IStriterator; + /** * Execution context for an {@link IGASProgram}. This is distinct from the * {@link IGASEngine} so we can support distributed evaluation and concurrent @@ -36,6 +41,15 @@ * the generic type for the per-edge state, but that is not always * true. The SUM type is scoped to the GATHER + SUM operation (NOT * the computation). + * + * TODO Add option to order the vertices to provide a serializable + * execution plan (like GraphChi). I believe that this reduces to + * computing a DAG over the frontier before executing the GATHER and + * then executing the frontier such that the parallel execution is + * constrained by arcs in the DAG that do not have mutual + * dependencies. This would have to place a partial ordering over the + * vertices in the frontier and then process the frontier with + * limited parallelism based on that partial ordering. */ public interface IGASContext<VS, ES, ST> extends Callable<IGASStats> { @@ -90,6 +104,73 @@ int getMaxVisited(); /** + * Return non-<code>null</code> iff there is a single link type to be + * visited. This corresponds to a view of the graph as sparse connectivity + * matrix. The {@link IGASEngine} can optimize traversal patterns using the + * <code>POS</code> index. + * <p> + * Note: When this option is used, the scatter and gather will not visit the + * property set for the vertex. Instead, the graph is treated as if it were + * an unattributed graph and only mined for the connectivity data. + * + * @return The {@link Value} for the predicate that identifies the desired + * link type (there can be many types of links - the return value + * specifies which attribute is of interest). + * + * FIXME define getLinkAttribType() (RDR) + */ + URI getLinkType(); + + /** + * Set an optional constraint on the type of the visited links. + * <p> + * Note: When this option is used, the scatter and gather will not visit the + * property set for the vertex. Instead, the graph is treated as if it were + * an unattributed graph and only mined for the connectivity data (which may + * include a link weight). + * + * @param linkType + * The link type to visit (optional). When <code>null</code>, all + * link types are visited. + */ + void setLinkType(URI linkType); + + /** + * Set an optional {@link IReducer} that will run after the + * {@link IGASProgram} is terminated. This may be used to extract results + * from the visited vertices. + * + * @param afterOp + * The {@link IReducer}. + */ + <T> void setRunAfterOp(IReducer<VS, ES, ST, T> afterOp); + + /** + * Return an optional {@link IReducer} that will run after the + * {@link IGASProgram} is terminated. This may be used to extract results + * from the visited vertices. + */ + <T> IReducer<VS, ES, ST, T> getRunAfterOp(); + + /** + * Hook to impose a constraint on the visited edges and/or property values. + * + * @param itr + * The iterator visiting those edges and/or property values. + * + * @return Either the same iterator or a constrained iterator. + * + * TODO Rename as constrainEdgeFilter or even split into a + * constrainGatherFilter and a constraintScatterFilter. + * + * FIXME APPLY : If we need access to the vertex property values in + * APPLY (which we probably do, at least optionally), then there + * should be a similar method to decide whether the property values + * for the vertex are made available during the APPLY. + */ + IStriterator constrainFilter(IStriterator eitr); + + /** * Execute one iteration. * * @param stats Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASOptions.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASOptions.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASOptions.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -16,24 +16,15 @@ package com.bigdata.rdf.graph; import org.openrdf.model.Statement; -import org.openrdf.model.URI; import org.openrdf.model.Value; -import cutthecrap.utils.striterators.IStriterator; +import com.bigdata.rdf.graph.analytics.CC; +import com.bigdata.rdf.graph.impl.util.GASRunnerBase; /** * Interface for options that are understood by the {@link IGASEngine} and which * may be declared by the {@link IGASProgram}. * - * TODO Add option to order the vertices to provide a serializable execution - * plan (like GraphChi). I believe that this reduces to computing a DAG over the - * frontier before executing the GATHER and then executing the frontier such - * that the parallel execution is constrained by arcs in the DAG that do not - * have mutual dependencies. This is really an option that would be implemented - * by the {@link IGASContext}, which would have to place a partial ordering over - * the vertices in the frontier and then process the frontier with limited - * parallelism based on that partial ordering. - * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ public interface IGASOptions<VS, ES, ST> { @@ -51,6 +42,10 @@ * sample all vertices regardless of their edges, specify * {@value EdgesEnum#NoEdges}. To require that each vertex has at least one * in-edge and one out-edge, specify {@link EdgesEnum#AllEdges}. + * + * FIXME This should be moved into {@link GASRunnerBase}. The only class + * that customizes this is {@link CC}. (For {@link CC} we need to put all + * vertices into the frontier, even those without edges.) */ EdgesEnum getSampleEdgesFilter(); @@ -86,40 +81,4 @@ */ Factory<Statement, ES> getEdgeStateFactory(); - /** - * Return non-<code>null</code> iff there is a single link type to be - * visited. This corresponds to a view of the graph as sparse connectivity - * matrix. The {@link IGASEngine} can optimize traversal patterns using the - * <code>POS</code> index. - * <p> - * Note: When this option is used, the scatter and gather will not visit the - * property set for the vertex. The graph is treated as if it were an - * unattributed graph and only mined for the connectivity data. - * - * @return The {@link Value} for the predicate that identifies the desired - * link type (there can be many types of links - the return value - * specifies which attribute is of interest). - * - * @see #getLinkAttribType() - */ - URI getLinkType(); - - /** - * Hook to impose a constraint on the visited edges and/or property values. - * - * @param itr - * The iterator visiting those edges and/or property values. - * - * @return Either the same iterator or a constrained iterator. - * - * TODO Rename as constrainEdgeFilter or even split into a - * constrainGatherFilter and a constraintScatterFilter. - * - * FIXME APPLY : If we need access to the vertex property values in - * APPLY (which we probably do, at least optionally), then there - * should be a similar method to decide whether the property values - * for the vertex are made available during the APPLY. - */ - IStriterator constrainFilter(IGASContext<VS, ES, ST> ctx, IStriterator eitr); - } \ No newline at end of file Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASProgram.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASProgram.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASProgram.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -15,8 +15,11 @@ */ package com.bigdata.rdf.graph; +import java.util.List; + import org.openrdf.model.Statement; import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; /** * Abstract interface for GAS programs. @@ -51,12 +54,13 @@ void before(IGASContext<VS, ES, ST> ctx); /** - * One time initialization after the {@link IGASProgram} is executed. + * Return a default reduction that will be applied after the + * {@link IGASProgram} is executed. * - * @param ctx - * The evaluation context. + * @return The default reduction -or- <code>null</code> if no such reduction + * is defined. */ - void after(IGASContext<VS, ES, ST> ctx); + <T> IReducer<VS, ES, ST, T> getDefaultAfterOp(); /** * Callback to initialize the state for each vertex in the initial frontier @@ -200,5 +204,42 @@ * the frontier is non-empty). */ boolean nextRound(IGASContext<VS, ES, ST> ctx); + + /** + * Return a list of interfaces that may be used to extract variable bindings + * for the vertices visited by the algorithm. + */ + List<IBinder<VS, ES, ST>> getBinderList(); + /** + * An interface that may be used to extract variable bindings for the + * vertices visited by the algorithm. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + public interface IBinder<VS, ES, ST> { + + /** + * The ordinal index of the variable that is bound by this + * {@link IBinder}. By convention, index ZERO is the vertex. Indices + * greater than ZERO are typically aspects of the state of the vertex. + */ + int getIndex(); + + /** + * @param vf + * The {@link ValueFactory} used to create the return + * {@link Value}. + * @param u + * The vertex. + * + * @return The {@link Value} for that ordinal variable or + * <code>null</code> if there is no binding for that ordinal + * variable. + */ + Value bind(ValueFactory vf, final IGASState<VS, ES, ST> state, Value u); + + } + } \ No newline at end of file Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IReducer.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IReducer.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IReducer.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -26,7 +26,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id: IResultHandler.java 2265 2009-10-26 12:51:06Z thompsonbry $ */ -public interface IReducer<VS,ES, ST, T> { +public interface IReducer<VS, ES, ST, T> { /** * Method is invoked for each result and is responsible for combining the Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/BFS.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/BFS.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/BFS.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -15,8 +15,8 @@ */ package com.bigdata.rdf.graph.analytics; -import java.util.Arrays; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; @@ -24,6 +24,7 @@ import org.openrdf.model.Statement; import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.Factory; @@ -34,8 +35,6 @@ import com.bigdata.rdf.graph.IReducer; import com.bigdata.rdf.graph.impl.BaseGASProgram; -import cutthecrap.utils.striterators.IStriterator; - /** * Breadth First Search (BFS) is an iterative graph traversal primitive. The * frontier is expanded iteratively until no new vertices are discovered. Each @@ -158,19 +157,6 @@ } /** - * {@inheritDoc} - * <p> - * Overridden to only visit the edges of the graph. - */ - @Override - public IStriterator constrainFilter( - final IGASContext<BFS.VS, BFS.ES, Void> ctx, final IStriterator itr) { - - return itr.addFilter(getEdgeOnlyFilter(ctx)); - - } - - /** * Not used. */ @Override @@ -260,6 +246,39 @@ } /** + * {@inheritDoc} + * <p> + * <dl> + * <dt>1</dt> + * <dd>The depth at which the vertex was first encountered during traversal.</dd> + * </dl> + */ + @Override + public List<IBinder<BFS.VS, BFS.ES, Void>> getBinderList() { + + final List<IBinder<BFS.VS, BFS.ES, Void>> tmp = super.getBinderList(); + + tmp.add(new IBinder<BFS.VS, BFS.ES, Void>() { + + @Override + public int getIndex() { + return 1; + } + + @Override + public Value bind(final ValueFactory vf, + final IGASState<BFS.VS, BFS.ES, Void> state, final Value u) { + + return vf.createLiteral(state.getState(u).depth.get()); + + } + }); + + return tmp; + + } + + /** * Reduce the active vertex state, returning a histogram reporting the #of * vertices at each distance from the starting vertex. There will always be * one vertex at depth zero - this is the starting vertex. For each @@ -272,11 +291,9 @@ * Thompson</a> * * TODO Do another reducer that reports the actual BFS tree rather - * than a histogram. For each depth, it needs to have the set of - * vertices that are at that number of hops from the starting - * vertex. So, there is an outer map from depth to set. The inner - * set should also be concurrent if we allow concurrent reduction of - * the activated vertex state. + * than a histogram. We need to store the predecessor for this. That + * will allow us to trivially report the BFS route between any two + * vertices. */ protected static class HistogramReducer implements IReducer<VS, ES, Void, Map<Integer, AtomicLong>> { @@ -323,54 +340,71 @@ } - @Override - public void after(final IGASContext<BFS.VS, BFS.ES, Void> ctx) { +// @Override +// public <T> IReducer<VS, ES, Void, T> getDefaultAfterOp() { +// +// class NV implements Comparable<NV> { +// public final int n; +// public final long v; +// public NV(final int n, final long v) { +// this.n = n; +// this.v = v; +// } +// @Override +// public int compareTo(final NV o) { +// if (o.n > this.n) +// return -1; +// if (o.n < this.n) +// return 1; +// return 0; +// } +// } +// +// final IReducer<VS, ES, Void, T> outerReducer = new IReducer<VS, ES, Void, T>() { +// +// final HistogramReducer innerReducer = new HistogramReducer(); +// +// @Override +// public void visit(IGASState<VS, ES, Void> state, Value u) { +// +// innerReducer.visit(state, u); +// +// } +// +// @Override +// public T get() { +// +// final Map<Integer, AtomicLong> h = innerReducer.get(); +// +// final NV[] a = new NV[h.size()]; +// +// int i = 0; +// +// for (Map.Entry<Integer, AtomicLong> e : h.entrySet()) { +// +// a[i++] = new NV(e.getKey().intValue(), e.getValue().get()); +// +// } +// +// Arrays.sort(a); +// +// System.out.println("distance, frontierSize, sumFrontierSize"); +// long sum = 0L; +// for (NV t : a) { +// +// System.out.println(t.n + ", " + t.v + ", " + sum); +// +// sum += t.v; +// +// } +// +// return null; +// } +// +// }; +// +// return outerReducer; +// +// } - final HistogramReducer r = new HistogramReducer(); - - ctx.getGASState().reduce(r); - - class NV implements Comparable<NV> { - public final int n; - public final long v; - public NV(final int n, final long v) { - this.n = n; - this.v = v; - } - @Override - public int compareTo(final NV o) { - if (o.n > this.n) - return -1; - if (o.n < this.n) - return 1; - return 0; - } - } - - final Map<Integer, AtomicLong> h = r.get(); - - final NV[] a = new NV[h.size()]; - - int i = 0; - - for (Map.Entry<Integer, AtomicLong> e : h.entrySet()) { - - a[i++] = new NV(e.getKey().intValue(), e.getValue().get()); - - } - - Arrays.sort(a); - - System.out.println("distance, frontierSize, sumFrontierSize"); - long sum = 0L; - for (NV t : a) { - - System.out.println(t.n + ", " + t.v + ", " + sum); - - sum += t.v; - - } - - } - } Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/CC.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/CC.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/CC.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -15,7 +15,6 @@ */ package com.bigdata.rdf.graph.analytics; -import java.util.Arrays; import java.util.Collections; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; @@ -29,14 +28,11 @@ import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.Factory; import com.bigdata.rdf.graph.FrontierEnum; -import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASScheduler; import com.bigdata.rdf.graph.IGASState; import com.bigdata.rdf.graph.IReducer; import com.bigdata.rdf.graph.impl.BaseGASProgram; -import cutthecrap.utils.striterators.IStriterator; - /** * Connected components computes the distinct sets of non-overlapping subgraphs * within a graph. All vertices within a connected component are connected along @@ -190,19 +186,6 @@ /** * {@inheritDoc} * <p> - * Overridden to only visit the edges of the graph. - */ - @Override - public IStriterator constrainFilter( - final IGASContext<CC.VS, CC.ES, Value> ctx, final IStriterator itr) { - - return itr.addFilter(getEdgeOnlyFilter(ctx)); - - } - - /** - * {@inheritDoc} - * <p> * Return the label of the remote vertex. */ @Override @@ -325,87 +308,95 @@ * Returns a map containing the labels assigned to each connected component * (which gives you a vertex in that connected component) and the #of * vertices in each connected component. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> */ - public Map<Value, AtomicInteger> getConnectedComponents( - final IGASState<CC.VS, CC.ES, Value> state) { + public class ConnectedComponentsReducer implements IReducer<CC.VS,CC.ES,Value,Map<Value,AtomicInteger>> { final ConcurrentHashMap<Value, AtomicInteger> labels = new ConcurrentHashMap<Value, AtomicInteger>(); - return state - .reduce(new IReducer<CC.VS, CC.ES, Value, Map<Value, AtomicInteger>>() { + @Override + public void visit(final IGASState<VS, ES, Value> state, final Value u) { - @Override - public void visit(final IGASState<VS, ES, Value> state, - final Value u) { + final VS us = state.getState(u); - final VS us = state.getState(u); + if (us != null) { - if (us != null) { + final Value label = us.getLabel(); - final Value label = us.getLabel(); + if (log.isDebugEnabled()) + log.debug("v=" + u + ", label=" + label); - if (log.isDebugEnabled()) - log.debug("v=" + u + ", label=" + label); + final AtomicInteger oldval = labels.putIfAbsent(label, + new AtomicInteger(1)); - final AtomicInteger oldval = labels.putIfAbsent( - label, new AtomicInteger(1)); + if (oldval != null) { - if (oldval != null) { + // lost race. increment existing counter. + oldval.incrementAndGet(); - // lost race. increment existing counter. - oldval.incrementAndGet(); - - } - - } + } - } + } - @Override - public Map<Value, AtomicInteger> get() { + } - return Collections.unmodifiableMap(labels); + @Override + public Map<Value, AtomicInteger> get() { - } - }); + return Collections.unmodifiableMap(labels); + } + } - @Override - public void after(final IGASContext<CC.VS, CC.ES, Value> ctx) { + /** + * Returns a map containing the labels assigned to each connected component + * (which gives you a vertex in that connected component) and the #of + * vertices in each connected component. + */ + public Map<Value, AtomicInteger> getConnectedComponents( + final IGASState<CC.VS, CC.ES, Value> state) { - final Map<Value, AtomicInteger> labels = getConnectedComponents(ctx - .getGASState()); - - System.out.println("There are " + labels.size() - + " connected components"); - - class NV implements Comparable<NV> { - public final int n; - public final Value v; - public NV(int n, Value v) { - this.n = n; - this.v = v; - } - @Override - public int compareTo(final NV o) { - return o.n - this.n; - } - } - - final NV[] a = new NV[labels.size()]; - int i = 0; - for (Map.Entry<Value, AtomicInteger> e : labels.entrySet()) { - a[i++] = new NV(e.getValue().intValue(), e.getKey()); - } - - Arrays.sort(a); - - System.out.println("size, label"); - for(NV t : a) { - System.out.println(t.n + ", " + t.v); - } - + return state.reduce(new ConnectedComponentsReducer()); } +// @Override +// public void after(final IGASContext<CC.VS, CC.ES, Value> ctx) { +// +// final Map<Value, AtomicInteger> labels = getConnectedComponents(ctx +// .getGASState()); +// +// System.out.println("There are " + labels.size() +// + " connected components"); +// +// class NV implements Comparable<NV> { +// public final int n; +// public final Value v; +// public NV(int n, Value v) { +// this.n = n; +// this.v = v; +// } +// @Override +// public int compareTo(final NV o) { +// return o.n - this.n; +// } +// } +// +// final NV[] a = new NV[labels.size()]; +// int i = 0; +// for (Map.Entry<Value, AtomicInteger> e : labels.entrySet()) { +// a[i++] = new NV(e.getValue().intValue(), e.getKey()); +// } +// +// Arrays.sort(a); +// +// System.out.println("size, label"); +// for(NV t : a) { +// System.out.println(t.n + ", " + t.v); +// } +// +// } + } Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/PR.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/PR.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/PR.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -15,7 +15,6 @@ */ package com.bigdata.rdf.graph.analytics; -import java.util.Arrays; import java.util.Collections; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; @@ -33,8 +32,6 @@ import com.bigdata.rdf.graph.IReducer; import com.bigdata.rdf.graph.impl.BaseGASProgram; -import cutthecrap.utils.striterators.IStriterator; - /** * Page rank assigns weights to the vertices in a graph based by on the relative * "importance" as determined by the patterns of directed links in the graph. @@ -186,19 +183,6 @@ /** * {@inheritDoc} * <p> - * Overridden to only visit the edges of the graph. - */ - @Override - public IStriterator constrainFilter( - final IGASContext<PR.VS, PR.ES, Double> ctx, final IStriterator itr) { - - return itr.addFilter(getEdgeOnlyFilter(ctx)); - - } - - /** - * {@inheritDoc} - * <p> * Each vertex is initialized to the reset probability. * * FIXME We need to do this efficiently. E.g., using a scan to find all of @@ -332,97 +316,107 @@ } - @Override - public void after(final IGASContext<PR.VS, PR.ES, Double> ctx) { + /** + * Class reports a map containing the page rank associated with each visited + * vertex. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + public class PageRankReducer implements IReducer<PR.VS, PR.ES, Double, Map<Value,Double>> { - final ConcurrentHashMap<Value, Double> values = new ConcurrentHashMap<Value, Double>(); + private final ConcurrentHashMap<Value, Double> values = new ConcurrentHashMap<Value, Double>(); + + @Override + public void visit(final IGASState<VS, ES, Double> state, + final Value u) { - ctx.getGASState().reduce( - new IReducer<PR.VS, PR.ES, Double, Map<Value, Double>>() { + final VS us = state.getState(u); - @Override - public void visit(final IGASState<VS, ES, Double> state, - final Value u) { + if (us != null) { - final VS us = state.getState(u); + final double pageRank = us.getValue(); - if (us != null) { + // FIXME Why are NaNs showing up? + if (Double.isNaN(pageRank)) + return; - final double pageRank = us.getValue(); + // FIXME Do infinite values show up? + if (Double.isInfinite(pageRank)) + return; + + if (pageRank < minPageRank) { + // Ignore small values. + return; + } - // FIXME Why are NaNs showing up? - if (Double.isNaN(pageRank)) - return; + /* + * Only report the larger ranked values. + */ - // FIXME Do infinite values show up? - if (Double.isInfinite(pageRank)) - return; - - if (pageRank < minPageRank) { - // Ignore small values. - return; - } + if (log.isDebugEnabled()) + log.debug("v=" + u + ", pageRank=" + pageRank); - /* - * Only report the larger ranked values. - */ + values.put(u, Double.valueOf(pageRank)); - if (log.isDebugEnabled()) - log.debug("v=" + u + ", pageRank=" + pageRank); - - values.put(u, Double.valueOf(pageRank)); - - } - - } - - @Override - public Map<Value, Double> get() { - - return Collections.unmodifiableMap(values); - - } - }); - - class NV implements Comparable<NV> { - public final double n; - public final Value v; - public NV(double n, Value v) { - this.n = n; - this.v = v; } - @Override - public int compareTo(final NV o) { - if (o.n > this.n) - return 1; - if (o.n < this.n) - return -1; - return 0; - } - } - final NV[] a = new NV[values.size()]; - - int i = 0; - - for (Map.Entry<Value, Double> e : values.entrySet()) { - - a[i++] = new NV(e.getValue().doubleValue(), e.getKey()); - } - Arrays.sort(a); + @Override + public Map<Value, Double> get() { - System.out.println("rank, pageRank, vertex"); - i = 0; - for (NV t : a) { + return Collections.unmodifiableMap(values); - System.out.println(i + ", " + t.n + ", " + t.v); - - i++; - } - + } + +// @Override +// public void after(final IGASContext<PR.VS, PR.ES, Double> ctx) { +// +// final Map<Value, Double> values = ctx.getGASState().reduce( +// new PageRankReducer()); +// +// class NV implements Comparable<NV> { +// public final double n; +// public final Value v; +// public NV(double n, Value v) { +// this.n = n; +// this.v = v; +// } +// @Override +// public int compareTo(final NV o) { +// if (o.n > this.n) +// return 1; +// if (o.n < this.n) +// return -1; +// return 0; +// } +// } +// +// final NV[] a = new NV[values.size()]; +// +// int i = 0; +// +// for (Map.Entry<Value, Double> e : values.entrySet()) { +// +// a[i++] = new NV(e.getValue().doubleValue(), e.getKey()); +// +// } +// +// Arrays.sort(a); +// +// System.out.println("rank, pageRank, vertex"); +// i = 0; +// for (NV t : a) { +// +// System.out.println(i + ", " + t.n + ", " + t.v); +// +// i++; +// +// } +// +// } } Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/SSSP.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/SSSP.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/analytics/SSSP.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -15,9 +15,12 @@ */ package com.bigdata.rdf.graph.analytics; +import java.util.List; + import org.apache.log4j.Logger; import org.openrdf.model.Statement; import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.Factory; @@ -27,8 +30,6 @@ import com.bigdata.rdf.graph.IGASState; import com.bigdata.rdf.graph.impl.BaseGASProgram; -import cutthecrap.utils.striterators.IStriterator; - /** * SSSP (Single Source, Shortest Path). This analytic computes the shortest path * to each connected vertex in the graph starting from the given vertex. Only @@ -52,9 +53,10 @@ * phase is executed to update the state of the distinct vertices in the * frontier. * - * TODO Add a reducer to report the actual minimum length paths. This is - * similar to a BFS tree, but the path lengths are not integer values so - * we need a different data structure to collect them. + * FIXME Add a reducer to report the actual minimum length paths. This + * is similar to a BFS tree, but the path lengths are not integer values + * so we need a different data structure to collect them (we need to + * store the predecesor when we run SSSP to do this). */ public class SSSP extends BaseGASProgram<SSSP.VS, SSSP.ES, Integer/* dist */> { @@ -200,20 +202,6 @@ } /** - * {@inheritDoc} - * <p> - * Overridden to only visit the edges of the graph. - */ - @Override - public IStriterator constrainFilter( - final IGASContext<SSSP.VS, SSSP.ES, Integer> ctx, - final IStriterator itr) { - - return itr.addFilter(getEdgeOnlyFilter(ctx)); - - } - - /** * Set the {@link VS#dist()} to ZERO (0). * <p> * {@inheritDoc} @@ -394,4 +382,39 @@ } + /** + * {@inheritDoc} + * <p> + * <dl> + * <dt>1</dt> + * <dd>The shortest distance from the initial frontier to the vertex.</dd> + * </dl> + */ + @Override + public List<IBinder<SSSP.VS, SSSP.ES, Integer>> getBinderList() { + + final List<IBinder<SSSP.VS, SSSP.ES, Integer>> tmp = super + .getBinderList(); + + tmp.add(new IBinder<SSSP.VS, SSSP.ES, Integer>() { + + @Override + public int getIndex() { + return 1; + } + + @Override + public Value bind(final ValueFactory vf, + final IGASState<SSSP.VS, SSSP.ES, Integer> state, + final Value u) { + + return vf.createLiteral(state.getState(u).dist()); + + } + }); + + return tmp; + + } + } Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/BaseGASProgram.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/BaseGASProgram.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/BaseGASProgram.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -16,13 +16,15 @@ package com.bigdata.rdf.graph.impl; import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; import java.util.Random; import org.apache.log4j.Logger; import org.openrdf.model.Resource; import org.openrdf.model.Statement; -import org.openrdf.model.URI; import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.Factory; @@ -30,12 +32,9 @@ import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASProgram; import com.bigdata.rdf.graph.IGASState; +import com.bigdata.rdf.graph.IReducer; import com.bigdata.rdf.graph.impl.util.VertexDistribution; -import cutthecrap.utils.striterators.Filter; -import cutthecrap.utils.striterators.IFilter; -import cutthecrap.utils.striterators.IStriterator; - /** * Abstract base class with some useful defaults. * @@ -49,103 +48,6 @@ private static final Logger log = Logger.getLogger(BaseGASProgram.class); - /** - * {@inheritDoc} - * <p> - * The default implementation does not restrict the visitation to a - * connectivity matrix (returns <code>null</code>). - */ - @Override - public URI getLinkType() { - - return null; - - } - - /** - * {@inheritDoc} - * <p> - * The default implementation returns its argument. - */ - @Override - public IStriterator constrainFilter(final IGASContext<VS, ES, ST> ctx, - final IStriterator itr) { - - return itr; - - } - - /** - * Return an {@link IFilter} that will only visit the edges of the graph. - * - * @see IGASState#isEdge(Statement) - */ - protected IFilter getEdgeOnlyFilter(final IGASContext<VS, ES, ST> ctx) { - - return new EdgeOnlyFilter(ctx); - - } - - /** - * Filter visits only edges (filters out attribute values). - * <p> - * Note: This filter is pushed down onto the AP and evaluated close to the - * data. - */ - private class EdgeOnlyFilter extends Filter { - private static final long serialVersionUID = 1L; - private final IGASState<VS, ES, ST> gasState; - private EdgeOnlyFilter(final IGASContext<VS, ES, ST> ctx) { - this.gasState = ctx.getGASState(); - } - @Override - public boolean isValid(final Object e) { - return gasState.isEdge((Statement) e); - } - }; - - /** - * Return a filter that only visits the edges of graph that are instances of - * the specified link attribute type. - * <p> - * Note: For bigdata, the visited edges can be decoded to recover the - * original link as well. - * - * @see IGASState#isLinkAttrib(Statement, URI) - * @see IGASState#decodeStatement(Value) - */ - protected IFilter getLinkAttribFilter(final IGASContext<VS, ES, ST> ctx, - final URI linkAttribType) { - - return new LinkAttribFilter(ctx, linkAttribType); - - } - - /** - * Filter visits only edges where the {@link Statement} is an instance of - * the specified link attribute type. For bigdata, the visited edges can be - * decoded to recover the original link as well. - */ - private class LinkAttribFilter extends Filter { - private static final long serialVersionUID = 1L; - - private final IGASState<VS, ES, ST> gasState; - private final URI linkAttribType; - - public LinkAttribFilter(final IGASContext<VS, ES, ST> ctx, - final URI linkAttribType) { - if (linkAttribType == null) - throw new IllegalArgumentException(); - this.gasState = ctx.getGASState(); - this.linkAttribType = linkAttribType; - } - - @Override - public boolean isValid(final Object e) { - return gasState.isLinkAttrib((Statement) e, linkAttribType); - } - } - // /** // * If the vertex is actually an edge, then return the decoded edge. // * @@ -229,9 +131,9 @@ * The default implementation is a NOP. */ @Override - public void after(final IGASContext<VS, ES, ST> ctx) { + public <T> IReducer<VS, ES, ST, T> getDefaultAfterOp() { - // NOP + return null; // NOP } @@ -319,4 +221,49 @@ } + /** + * Return an {@link IBinder} for the vertex itself + */ + private IBinder<VS, ES, ST> getBinder0() { + + return new IBinder<VS, ES, ST>() { + + @Override + public int getIndex() { + + return 0; + + } + + @Override + public Value bind(final ValueFactory vf, + final IGASState<VS, ES, ST> state, final Value u) { + + return u; + + } + + }; + + } + + /** + * {@inheritDoc} + * <p> + * <dl> + * <dt>0</dt> + * <dd>The visited vertex itself.</dd> + * </dl> + */ + @Override + public List<IBinder<VS, ES, ST>> getBinderList() { + + final List<IBinder<VS, ES, ST>> tmp = new LinkedList<IBinder<VS, ES, ST>>(); + + tmp.add(getBinder0()); + + return tmp; + + } + } Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -20,9 +20,11 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import org.apache.log4j.Logger; import org.openrdf.model.Statement; +import org.openrdf.model.URI; import org.openrdf.model.Value; import com.bigdata.rdf.graph.EdgesEnum; @@ -36,6 +38,10 @@ import com.bigdata.rdf.graph.IStaticFrontier; import com.bigdata.rdf.graph.util.GASUtil; +import cutthecrap.utils.striterators.Filter; +import cutthecrap.utils.striterators.IFilter; +import cutthecrap.utils.striterators.IStriterator; + public class GASContext<VS, ES, ST> implements IGASContext<VS, ES, ST> { private static final Logger log = Logger.getLogger(GASContext.class); @@ -70,6 +76,18 @@ Integer.MAX_VALUE); /** + * An optional constraint on the type of the visited links. + */ + private final AtomicReference<URI> linkType = new AtomicReference<URI>(null); + + /** + * An optional {@link IReducer} that will executed after the + * {@link IGASProgram}. + */ + private final AtomicReference<IReducer<VS, ES, ST, ?>> afterOp = new AtomicReference<IReducer<VS, ES, ST, ?>>( + null); + + /** * * @param namespace * The namespace of the graph (KB instance). @@ -168,8 +186,19 @@ gasState.traceState(); - program.after(this); - + // Optional post-reduction. + { + + final IReducer<VS, ES, ST, ?> op = getRunAfterOp(); + + if (op != null) { + + gasState.reduce(op); + + } + + } + // Done return total; @@ -374,26 +403,93 @@ /** * Do APPLY. * - * TODO The apply() should be parallelized. For some algorithms, there is a - * moderate amount of work per vertex in apply(). Use {@link #nthreads} to - * set the parallelism. - * <p> - * Note: This is very similar to the {@link IGASState#reduce(IReducer)} - * operation. This operates over the frontier. reduce() operates over the - * activated vertices. Both need fine grained parallelism. Both can have - * either light or moderately heavy operations (a dot product would be an - * example of a heavier operation). + * @return The #of vertices for which the operation was executed. + * + * @throws Exception */ - private void apply(final IStaticFrontier f) { + private void apply(final IStaticFrontier f) throws Exception { - for (Value u : f) { +// for (Value u : f) { +// +// program.apply(gasState, u, null/* sum */); +// +// } - program.apply(gasState, u, null/* sum */); + // Note: Return value of ApplyReducer is currently ignored. + reduceOverFrontier(f, new ApplyReducer<Void>()); + + } + private class ApplyReducer<T> implements IReducer<VS, ES, ST, T> { + + @Override + public void visit(final IGASState<VS, ES, ST> state, final Value u) { + + program.apply(state, u, null/* sum */); + } + @Override + public T get() { + + // Note: Nothing returned right now. + return null; + + } + } + + /** + * Reduce over the frontier (used for apply()). + * + * @param f + * The frontier. + * @param op + * The {@link IReducer}. + * + * @return The {@link IReducer#get() result}. + * + * @throws Exception + */ + public <T> T reduceOverFrontier(final IStaticFrontier f, + final IReducer<VS, ES, ST, T> op) throws Exception { + if (f == null) + throw new IllegalArgumentException(); + + if (op == null) + throw new IllegalArgumentException(); + + class ReduceVertexTaskFactory implements VertexTaskFactory<Long> { + + @Override + public Callable<Long> newVertexTask(final Value u) { + + return new Callable<Long>() { + + @Override + public Long call() { + + // program.apply(gasState, u, null/* sum */); + op.visit(gasState, u); + + // Nothing returned by visit(). + return ONE; + + }; + }; + + }; + } + + gasEngine.newFrontierStrategy(new ReduceVertexTaskFactory(), f).call(); + + // Return reduction. + return op.get(); + + } + private static final Long ONE = Long.valueOf(1L); + /** * @param inEdges * when <code>true</code> the GATHER is over the in-edges. @@ -728,4 +824,122 @@ } + /** + * {@inheritDoc} + * <p> + * The default implementation does not restrict the visitation to a + * connectivity matrix (returns <code>null</code>). + */ + @Override + public URI getLinkType() { + + return linkType.get(); + + } + + @Override + public void setLinkType(final URI linkType) { + + this.linkType.set(linkType); + + } + + /** + * {@inheritDoc} + * <p> + * The default implementation only visits the edges. + */ + @Override + public IStriterator constrainFilter(final IStriterator itr) { + + return itr.addFilter(getEdgeOnlyFilter()); + + } + + /** + * Return an {@link IFilter} that will only visit the edges of the graph. + * + * @see IGASState#isEdge(Statement) + */ + protected IFilter getEdgeOnlyFilter() { + + return new EdgeOnlyFilter(this); + + } + + /** + * Filter visits only edges (filters out attribute values). + * <p> + * Note: This filter is pushed down onto the AP and evaluated close to the + * data. + */ + private class EdgeOnlyFilter extends Filter { + private static final long serialVersionUID = 1L; + private final IGASState<VS, ES, ST> gasState; + private EdgeOnlyFilter(final IGASContext<VS, ES, ST> ctx) { + this.gasState = ctx.getGASState(); + } + @Override + public boolean isValid(final Object e) { + return gasState.isEdge((Statement) e); + } + }; + + /** + * Return a filter that only visits the edges of graph that are instances of + * the specified link attribute type. + * <p> + * Note: For bigdata, the visited edges can be decoded to recover the + * original link as well. + * + * @see IGASState#isLinkAttrib(Statement, URI) + * @see IGASState#decodeStatement(Value) + */ + protected IFilter getLinkAttribFilter(final IGASContext<VS, ES, ST> ctx, + final URI linkAttribType) { + + return new LinkAttribFilter(ctx, linkAttribType); + + } + + /** + * Filter visits only edges where the {@link Statement} is an instance of + * the specified link attribute type. For bigdata, the visited edges can be + * decoded to recover the original link as well. + */ + private class LinkAttribFilter extends Filter { + private static final long serialVersionUID = 1L; + + private final IGASState<VS, ES, ST> gasState; + private final URI linkAttribType; + + public LinkAttribFilter(final IGASContext<VS, ES, ST> ctx, + final URI linkAttribType) { + if (linkAttribType == null) + throw new IllegalArgumentException(); + this.gasState = ctx.getGASState(); + this.linkAttribType = linkAttribType; + } + + @Override + public boolean isValid(final Object e) { + return gasState.isLinkAttrib((Statement) e, linkAttribType); + } + } + + @Override + public <T> void setRunAfterOp(final IReducer<VS, ES, ST, T> afterOp) { + + this.afterOp.set(afterOp); + + } + + @SuppressWarnings("unchecked") + @Override + public <T> IReducer<VS, ES, ST, T> getRunAfterOp() { + + return (IReducer<VS, ES, ST, T>) afterOp.get(); + + } + } // GASContext Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASEngine.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -210,6 +210,7 @@ } + @Override public Long call() throws Exception { long nedges = 0L; Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASState.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASState.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASState.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -320,6 +320,10 @@ * * TODO REDUCE : parallelize with nthreads. The reduce operations are often * lightweight, so maybe a fork/join pool would work better? + * <p> + * Note: We can not do a parallel reduction right now because the backing + * class does not expose a parallel iterator, e.g., a segment-wise iterator. + * The reduction over the {@link #vertexState} is quite slow as a result. */ @Override public <T> T reduce(final IReducer<VS, ES, ST, T> op) { Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/ram/RAMGASEngine.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/ram/RAMGASEngine.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/ram/RAMGASEngine.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -32,7 +32,6 @@ import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.IGASContext; -import com.bigdata.rdf.graph.IGASProgram; import com.bigdata.rdf.graph.IGraphAccessor; import com.bigdata.rdf.graph.impl.GASEngine; import com.bigdata.rdf.graph.impl.util.VertexDistribution; @@ -325,12 +324,11 @@ } - @SuppressWarnings({ "unchecked", "rawtypes" }) private IStriterator getEdges(final boolean inEdges, final IGASContext<?, ?, ?> ctx, final Value u) throws SailException { - final URI linkTypeIV = (URI) ctx.getGASProgram().getLinkType(); + final URI linkTypeIV = (URI) ctx.getLinkType(); if(linkTypeIV != null) { /* * FIXME RDR: We need to use a union of access paths for link @@ -351,8 +349,7 @@ /* * Optionally wrap the program specified filter. */ - return ((IGASProgram) ctx.getGASProgram()).constrainFilter(ctx, - sitr); + return ctx.constrainFilter(sitr); } Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/sail/SAILGASEngine.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/sail/SAILGASEngine.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/sail/SAILGASEngine.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -31,7 +31,6 @@ import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.IGASContext; -import com.bigdata.rdf.graph.IGASProgram; import com.bigdata.rdf.graph.IGraphAccessor; import com.bigdata.rdf.graph.impl.GASEngine; import com.bigdata.rdf.graph.impl.util.VertexDistribution; @@ -148,7 +147,7 @@ final IGASContext<?, ?, ?> ctx, final Value u) throws SailException { - final URI linkTypeIV = (URI) ctx.getGASProgram().getLinkType(); + final URI linkTypeIV = (URI) ctx.getLinkType(); if(linkTypeIV != null) { /* * FIXME RDR: We need to use a union of access paths for link @@ -176,7 +175,7 @@ * since only one is optimized. */ final boolean posOptimization = linkTypeIV != null - && !inEdges; + && inEdges; final CloseableIteration<? extends Statement, SailException> citr; if (posOptimization) { @@ -238,9 +237,9 @@ * much more efficient. (If the index is local, then simply stacking * striterators is just as efficient.) */ - return ((IGASProgram) ctx.getGASProgram()).constrainFilter(ctx, - sitr); + return ctx.constrainFilter(sitr); + } @Override Modified: branches/RDR/bigdata-gas/src/test/com/bigdata/rdf/graph/impl/ram/TestGather.java =================================================================== --- branches/RDR/bigdata-gas/src/test/com/bigdata/rdf/graph/impl/ram/TestGather.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/test/com/bigdata/rdf/graph/impl/ram/TestGather.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -36,8 +36,6 @@ import com.bigdata.rdf.graph.impl.ram.RAMGASEngine.RAMGraph; import com.bigdata.rdf.graph.impl.ram.RAMGASEngine.RAMGraphAccessor; -import cutthecrap.utils.striterators.IStriterator; - /** * Test class for GATHER. * @@ -89,21 +87,7 @@ return EdgesEnum.NoEdges; } - /** - * {@inheritDoc} - * <p> - * Overridden to only visit the edges of the graph. - */ @Override - public IStriterator constrainFilter( - final IGASContext<Set<Statement>, Set<Statement>, Set<Statement>> ctx, - final IStriterator itr) { - - return itr.addFilter(getEdgeOnlyFilter(ctx)); - - } - - @Override public Factory<Value, Set<Statement>> getVertexStateFactory() { return new Factory<Value, Set<Statement>>() { @Override Modified: branches/RDR/bigdata-gas/src/test/com/bigdata/rdf/graph/impl/sail/TestGather.java =================================================================== --- branches/RDR/bigdata-gas/src/test/com/bigdata/rdf/graph/impl/sail/TestGather.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-gas/src/test/com/bigdata/rdf/graph/impl/sail/TestGather.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -35,8 +35,6 @@ import com.bigdata.rdf.graph.impl.BaseGASProgram; import com.bigdata.rdf.graph.impl.GASStats; -import cutthecrap.utils.striterators.IStriterator; - /** * Test class for GATHER. * @@ -87,22 +85,8 @@ public EdgesEnum getScatterEdges() { return EdgesEnum.NoEdges; } - - /** - * {@inheritDoc} - * <p> - * Overridden to only visit the edges of the graph. - */ + @Override - public IStriterator constrainFilter( - final IGASContext<Set<Statement>, Set<Statement>, Set<Statement>> ctx, - final IStriterator itr) { - - return itr.addFilter(getEdgeOnlyFilter(ctx)); - - } - - @Override public Factory<Value, Set<Statement>> getVertexStateFactory() { return new Factory<Value, Set<Statement>>() { @Override Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASEngine.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASEngine.java 2014-02-23 23:24:57 UTC (rev 7877) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASEngine.java 2014-02-24 01:52:13 UTC (rev 7878) @@ -35,6 +35,8 @@ import com.bigdata.rdf.graph.impl.util.VertexDistribution; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; +import com.bigdata.rdf.internal.NotMaterializedException; +import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.sail.BigdataSail; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPOKeyOrder; @@ -361,7 +363,7 @@ this.ctx = ctx; this.u = u; - linkTypeIV = (IV) ctx.getGASProgram().getLinkType(); + linkTypeIV = getIV(ctx.getLinkType()); final IKeyBuilder keyBuilder; /* @@ -371,7 +373,7 @@ * * [u] gets bound on O. * - * We use... [truncated message content] |
From: <tho...@us...> - 2014-02-23 23:25:02
|
Revision: 7877 http://sourceforge.net/p/bigdata/code/7877 Author: thompsonbry Date: 2014-02-23 23:24:57 +0000 (Sun, 23 Feb 2014) Log Message: ----------- Bug fix for #816 (static analysis of SERVICE ignores variables declared in the SERVICE's graph pattern). Modified Paths: -------------- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysis.java branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysisBase.java branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/TestStaticAnalysis.java Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysis.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysis.java 2014-02-23 12:15:58 UTC (rev 7876) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysis.java 2014-02-23 23:24:57 UTC (rev 7877) @@ -1547,7 +1547,11 @@ /** * Report "MUST" bound bindings projected by the SERVICE. This involves * checking the graph pattern reported by - * {@link ServiceNode#getGraphPattern()} . + * {@link ServiceNode#getGraphPattern()}. + * <p> + * Note: If the SERVICE URI is a variable, then it can only become bound + * through some other operation. If the SERVICE variable never becomes + * bound, then the SERVICE call can not run. */ // MUST : ServiceNode public Set<IVariable<?>> getDefinitelyProducedBindings( Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysisBase.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysisBase.java 2014-02-23 12:15:58 UTC (rev 7876) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/StaticAnalysisBase.java 2014-02-23 23:24:57 UTC (rev 7877) @@ -38,8 +38,8 @@ import com.bigdata.bop.IConstant; import com.bigdata.bop.IVariable; import com.bigdata.rdf.sparql.ast.eval.IEvaluationContext; +import com.bigdata.rdf.sparql.ast.service.ServiceNode; import com.bigdata.rdf.sparql.ast.ssets.ISolutionSetManager; -import com.bigdata.rdf.store.ITripleStore; /** * Base class for static analysis. @@ -171,7 +171,26 @@ // do not recurse return varSet; - + + } else if (op instanceof ServiceNode) { + + // @see http://trac.bigdata.com/ticket/816 + final ServiceNode serviceNode = (ServiceNode) op; + + // Look for the SERVICE URI, it might be a variable as well. + final TermNode uriRef = serviceNode.getServiceRef(); + + if (uriRef instanceof VarNode) { + + varSet.add(((VarNode) uriRef).getValueExpression()); + + } + + // pick up anything in the group graph pattern. + getSpannedVariables(serviceNode.getGraphPattern(), filters, varSet); + + // fall through - look for attached filters. + } else if (op instanceof FilterNode && !filters) { // DO NOT RECURSE INTO THE FILTER! Modified: branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/TestStaticAnalysis.java =================================================================== --- branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/TestStaticAnalysis.java 2014-02-23 12:15:58 UTC (rev 7876) +++ branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/TestStaticAnalysis.java 2014-02-23 23:24:57 UTC (rev 7877) @@ -270,6 +270,70 @@ } /** + * Unit test of static analysis for a SERVICE call. + * + * @see <a href="http://trac.bigdata.com/ticket/816" > Wildcard projection + * ignores variables inside a SERVICE call </a> + */ + public void test_static_analysis05() + throws MalformedQueryException { + + final String queryStr = "" + + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n"+ + "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> \n"+ + "PREFIX foaf: <http://xmlns.com/foaf/0.1/> \n"+ + "select ?x (12 as ?y)\n" + + " where {\n" + + " service ?uri {\n" + + " ?x rdf:type foaf:Person .\n" + + " ?x rdfs:label ?z .\n" + + " }\n" + + "}"; + + final QueryRoot queryRoot = new Bigdata2ASTSPARQLParser(store) + .parseQuery2(queryStr, baseURI).getOriginalAST(); + + final StaticAnalysis sa = new StaticAnalysis(queryRoot); + + final Set<IVariable<?>> expectedProjected = new LinkedHashSet<IVariable<?>>(); + + expectedProjected.add(Var.var("x")); + expectedProjected.add(Var.var("y")); + + assertEquals(expectedProjected, sa.getDefinitelyProducedBindings(queryRoot)); + + // The spanned variables includes the SERVICE URI (if it is a variable). + { + + final Set<IVariable<?>> expectedWhereClause = new LinkedHashSet<IVariable<?>>(); + + expectedWhereClause.add(Var.var("uri")); + expectedWhereClause.add(Var.var("x")); + expectedWhereClause.add(Var.var("z")); + + assertEquals(expectedWhereClause, sa.getSpannedVariables( + queryRoot.getWhereClause(), + new LinkedHashSet<IVariable<?>>())); + } + + // The definitely bound variables does NOT include the SERVICE URI. When + // that is a variable it needs to become bound through other means. + { + + final Set<IVariable<?>> expectedWhereClause = new LinkedHashSet<IVariable<?>>(); + + expectedWhereClause.add(Var.var("x")); + expectedWhereClause.add(Var.var("z")); + + assertEquals(expectedWhereClause, sa.getDefinitelyProducedBindings( + queryRoot.getWhereClause(), + new LinkedHashSet<IVariable<?>>(), true/* recursive */)); + + } + + } + + /** * Unit test for computing the join variables for a named subquery based on * the analysis of the bindings which MUST be produced by the subquery and * those which MUST be bound on entry into the group in which the subquery This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-23 12:16:04
|
Revision: 7876 http://sourceforge.net/p/bigdata/code/7876 Author: thompsonbry Date: 2014-02-23 12:15:58 +0000 (Sun, 23 Feb 2014) Log Message: ----------- replaced ConcurrentSkipList with Collections.newSetFromMap() per feedback on possible performance impact from the skip list. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTEvalHelper.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTEvalHelper.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTEvalHelper.java 2014-02-23 00:21:30 UTC (rev 7875) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTEvalHelper.java 2014-02-23 12:15:58 UTC (rev 7876) @@ -29,6 +29,7 @@ import info.aduna.iteration.CloseableIteration; +import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -36,7 +37,7 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.ConcurrentHashMap; import org.apache.log4j.Logger; import org.apache.log4j.MDC; @@ -540,8 +541,9 @@ */ // Concurrency safe set. - describedResources = new ConcurrentSkipListSet<BigdataValue>(); - + describedResources = Collections + .newSetFromMap(new ConcurrentHashMap<BigdataValue, Boolean>()); + // Collect the bindings on those variables. solutions2 = new DescribeBindingsCollector(// describeVars,// what to collect This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-23 00:21:35
|
Revision: 7875 http://sourceforge.net/p/bigdata/code/7875 Author: thompsonbry Date: 2014-02-23 00:21:30 +0000 (Sun, 23 Feb 2014) Log Message: ----------- Adjusted the fence post for maxIterations such that a value of ONE causes the algorithm to halt after the first round. Updated the documentation to match. Modified Paths: -------------- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java 2014-02-23 00:18:40 UTC (rev 7874) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java 2014-02-23 00:21:30 UTC (rev 7875) @@ -55,7 +55,8 @@ IGraphAccessor getGraphAccessor(); /** - * Specify the maximum number of iterations for the algorithm. + * Specify the maximum number of iterations for the algorithm. A value of + * ONE means that the algorithm will halt after the first round. * * @param newValue * The maximum number of iterations. Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2014-02-23 00:18:40 UTC (rev 7874) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2014-02-23 00:21:30 UTC (rev 7875) @@ -137,7 +137,7 @@ * GASStats. */ - if (total.getNRounds() >= getMaxIterations()) { + if (total.getNRounds() + 1 >= getMaxIterations()) { log.warn("Halting: maxIterations=" + getMaxIterations() + ", #rounds=" + total.getNRounds()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-23 00:18:43
|
Revision: 7874 http://sourceforge.net/p/bigdata/code/7874 Author: thompsonbry Date: 2014-02-23 00:18:40 +0000 (Sun, 23 Feb 2014) Log Message: ----------- Since I committed the change to extract depth from BFS, I just made that code conditional so it will not break other algorithms (e.g., SSSP). This still needs to be abstracted correctly so it will report the "state". I think that the right approach might be to hand the IGASProgram the binding set and ordered list of variables (out, out1, out2) and let it create the bindings as appropriate for that algorithm. Modified Paths: -------------- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java 2014-02-23 00:14:26 UTC (rev 7873) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java 2014-02-23 00:18:40 UTC (rev 7874) @@ -755,7 +755,7 @@ if (outVar != null) { vals[j++] = new Constant(v); } - if (stateVar != null) { + if (stateVar != null && gasProgram instanceof BFS) { /* * FIXME Need an API for self-reporting of an IV by * the IGASProgram. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-23 00:14:32
|
Revision: 7873 http://sourceforge.net/p/bigdata/code/7873 Author: thompsonbry Date: 2014-02-23 00:14:26 +0000 (Sun, 23 Feb 2014) Log Message: ----------- Integrated the maxIterations and maxVertices constraints into IGASContext, GASContext, and GASService. The algorithm now halts if those thresholds are reached. We could also do this for #edges visited since that is tracked by IGASStats. Modified Paths: -------------- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java 2014-02-22 22:38:43 UTC (rev 7872) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/IGASContext.java 2014-02-23 00:14:26 UTC (rev 7873) @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -*/ + */ package com.bigdata.rdf.graph; import java.util.concurrent.Callable; @@ -48,13 +48,47 @@ * The computation state. */ IGASState<VS, ES, ST> getGASState(); - + /** * The graph access object. */ IGraphAccessor getGraphAccessor(); - + /** + * Specify the maximum number of iterations for the algorithm. + * + * @param newValue + * The maximum number of iterations. + * + * @throws IllegalArgumentException + * if the new value is non-positive. + */ + void setMaxIterations(int newValue); + + /** + * Return the maximum number iterations for the algorithm. + */ + int getMaxIterations(); + + /** + * Specify the maximum number of vertices that may be visited. The algorithm + * will halt if this value is exceeded. + * + * @param newValue + * The maximum number of vertices in the frontier. + * + * @throws IllegalArgumentException + * if the new value is non-positive. + */ + void setMaxVisited(int newValue); + + /** + * Return the maximum number of vertices that may be visited. The algorithm + * will halt if this value is exceeded. + */ + int getMaxVisited(); + + /** * Execute one iteration. * * @param stats @@ -65,11 +99,11 @@ */ boolean doRound(IGASStats stats) throws Exception, ExecutionException, InterruptedException; - + /** * Execute the associated {@link IGASProgram}. */ @Override IGASStats call() throws Exception; - + } \ No newline at end of file Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2014-02-22 22:38:43 UTC (rev 7872) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2014-02-23 00:14:26 UTC (rev 7873) @@ -19,6 +19,7 @@ import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.log4j.Logger; import org.openrdf.model.Statement; @@ -57,6 +58,18 @@ private final IGASProgram<VS, ES, ST> program; /** + * The maximum number of iterations (defaults to {@link Integer#MAX_VALUE}). + */ + private final AtomicInteger maxIterations = new AtomicInteger( + Integer.MAX_VALUE); + + /** + * The maximum number of vertices (defaults to {@link Integer#MAX_VALUE}). + */ + private final AtomicInteger maxVertices = new AtomicInteger( + Integer.MAX_VALUE); + + /** * * @param namespace * The namespace of the graph (KB instance). @@ -117,6 +130,31 @@ while (!gasState.frontier().isEmpty()) { + /* + * Check halting conditions. + * + * Note: We could also halt on maxEdges since that is tracked in the + * GASStats. + */ + + if (total.getNRounds() >= getMaxIterations()) { + + log.warn("Halting: maxIterations=" + getMaxIterations() + + ", #rounds=" + total.getNRounds()); + + break; + + } + + if (total.getFrontierSize() >= getMaxVisited()) { + + log.warn("Halting: maxVertices=" + getMaxVisited() + + ", frontierSize=" + total.getFrontierSize()); + + break; + + } + final GASStats roundStats = new GASStats(); doRound(roundStats); @@ -656,4 +694,38 @@ } // GatherTask + @Override + public void setMaxIterations(final int newValue) { + + if (newValue <= 0) + throw new IllegalArgumentException(); + + this.maxIterations.set(newValue); + + } + + @Override + public int getMaxIterations() { + + return maxIterations.get(); + + } + + @Override + public void setMaxVisited(int newValue) { + + if (newValue <= 0) + throw new IllegalArgumentException(); + + this.maxVertices.set(newValue); + + } + + @Override + public int getMaxVisited() { + + return maxVertices.get(); + + } + } // GASContext Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java 2014-02-22 22:38:43 UTC (rev 7872) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java 2014-02-23 00:14:26 UTC (rev 7873) @@ -53,10 +53,13 @@ import com.bigdata.rdf.graph.IGASStats; import com.bigdata.rdf.graph.IGraphAccessor; import com.bigdata.rdf.graph.IReducer; +import com.bigdata.rdf.graph.analytics.BFS; import com.bigdata.rdf.graph.impl.GASEngine; import com.bigdata.rdf.graph.impl.GASState; import com.bigdata.rdf.graph.impl.bd.BigdataGASEngine.BigdataGraphAccessor; import com.bigdata.rdf.graph.impl.scheduler.CHMScheduler; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.impl.literal.XSDNumericIV; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; import com.bigdata.rdf.sparql.ast.GraphPatternGroup; @@ -223,6 +226,12 @@ */ URI OUT = new URIImpl(NAMESPACE + "out"); + /** + * The state of the visited vertex (algorithm dependent, but something + * like traversal depth is common). + */ + URI STATE = new URIImpl(NAMESPACE + "state"); + } static private transient final Logger log = Logger @@ -323,12 +332,13 @@ // options extracted from the SERVICE's graph pattern. private final int nthreads; - private final int maxIterations; // FIXME set as limit on GASState. - private final int maxVisited; // FIXME set as limit on GASState. + private final int maxIterations; + private final int maxVisited; private final Class<IGASProgram<VS, ES, ST>> gasClass; private final Class<IGASSchedulerImpl> schedulerClass; private final Value[] initialFrontier; private final IVariable<?> outVar; + private final IVariable<?> stateVar; public GASServiceCall(final AbstractTripleStore store, final ServiceNode serviceNode, @@ -434,6 +444,9 @@ // The output variable (bound to the visited set). this.outVar = getVar(Options.PROGRAM, Options.OUT); + // The state variable (bound to the state associated with each visited vertex). + this.stateVar = getVar(Options.PROGRAM, Options.STATE); + } /** @@ -652,6 +665,10 @@ final IGASContext<VS, ES, ST> gasContext = gasEngine.newGASContext( graphAccessor, gasProgram); + gasContext.setMaxIterations(maxIterations); + + gasContext.setMaxVisited(maxVisited); + final IGASState<VS, ES, ST> gasState = gasContext.getGASState(); // TODO We should look at this when extracting the parameters from the SERVICE's graph pattern. @@ -710,17 +727,48 @@ } }); + /* + * Bind output variables (if any). + */ final IBindingSet[] out = new IBindingSet[visitedSet.size()]; { - final IVariable[] vars = new IVariable[] { outVar }; + + final List<IVariable> tmp = new LinkedList<IVariable>(); + + if (outVar != null) + tmp.add(outVar); + + if (stateVar != null) + tmp.add(stateVar); + + final IVariable[] vars = tmp.toArray(new IVariable[tmp + .size()]); + + final IConstant[] vals = new IConstant[vars.length]; + int i = 0; + for (Value v : visitedSet) { - out[i++] = new ListBindingSet(vars, - new IConstant[] { new Constant(v) }); + int j = 0; + if (outVar != null) { + vals[j++] = new Constant(v); + } + if (stateVar != null) { + /* + * FIXME Need an API for self-reporting of an IV by + * the IGASProgram. + */ + final int depth = ((BFS.VS)gasState.getState(v)).depth(); + final IV depthIV = new XSDNumericIV(depth); + vals[j++] = new Constant(depthIV); + } + out[i++] = new ListBindingSet(vars, vals); + } + } return new ChunkedArrayIterator<IBindingSet>(out); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-22 22:38:49
|
Revision: 7872 http://sourceforge.net/p/bigdata/code/7872 Author: thompsonbry Date: 2014-02-22 22:38:43 +0000 (Sat, 22 Feb 2014) Log Message: ----------- added GASService. Works. Sort of. The next steps are to interpret maxIterations and maxVertices as GASState constraints, not GASProgram options. The same for the link type URI and the access to the link weights. This will all make it much easier to paramterize the algorithms through the SPARQL SERVICE invocation. Modified Paths: -------------- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java 2014-02-22 22:37:27 UTC (rev 7871) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java 2014-02-22 22:38:43 UTC (rev 7872) @@ -57,10 +57,12 @@ import com.bigdata.rdf.graph.impl.GASState; import com.bigdata.rdf.graph.impl.bd.BigdataGASEngine.BigdataGraphAccessor; import com.bigdata.rdf.graph.impl.scheduler.CHMScheduler; +import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; import com.bigdata.rdf.sparql.ast.GraphPatternGroup; import com.bigdata.rdf.sparql.ast.IGroupMemberNode; import com.bigdata.rdf.sparql.ast.StatementPatternNode; +import com.bigdata.rdf.sparql.ast.VarNode; import com.bigdata.rdf.sparql.ast.service.BigdataNativeServiceOptions; import com.bigdata.rdf.sparql.ast.service.BigdataServiceCall; import com.bigdata.rdf.sparql.ast.service.CustomServiceFactory; @@ -79,9 +81,9 @@ * For example, the following would run a depth-limited BFS traversal: * * <pre> - * PREFIX gas <http://www.bigdata.com/rdf/gas#> + * PREFIX gas: <http://www.bigdata.com/rdf/gas#> * #... - * SERVICE <GAS> { + * SERVICE <gas#service> { * gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS" . * gas:program gas:in <IRI> . # one or more times, specifies the initial frontier. * gas:program gas:out ?out . # exactly once - will be bound to the visited vertices. @@ -94,9 +96,9 @@ * Or the following would run the FuzzySSSP algorithm. * * <pre> - * PREFIX gas <http://www.bigdata.com/rdf/gas#> + * PREFIX gas: <http://www.bigdata.com/rdf/gas#> * #... - * SERVICE <GAS> { + * SERVICE <gas:service> { * gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.FuzzySSSP" . * gas:program gas:in <IRI> . # one or more times, specifies the initial frontier. * gas:program gas:target <IRI> . # one or more times, identifies the target vertices and hence the paths of interest. @@ -150,6 +152,11 @@ * The namespace used for bigdata GAS API. */ String NAMESPACE = "http://www.bigdata.com/rdf/gas#"; + + /** + * The URL at which the {@link GASService} will respond. + */ + URI SERVICE_KEY = new URIImpl(NAMESPACE + "service"); /** * Used as the subject in the GAS SERVICE invocation pattern. @@ -479,7 +486,7 @@ tmp = new LinkedList<Value>(); // found an o. - return (IVariable<?>) sp.o(); + return ((VarNode)sp.o()).getValueExpression(); } @@ -614,10 +621,9 @@ } /** + * Execute the GAS program. + * <p> * {@inheritDoc} - * - * TODO Join with the source solutions? Or is that handled by the - * caller? */ @Override public ICloseableIterator<IBindingSet> call( @@ -657,10 +663,19 @@ // Setup the initial frontier. for (Value startingVertex : initialFrontier) { - gasState.setFrontier(gasContext, startingVertex); + /* + * FIXME Why can't we pass in the Value (with a defined + * IV) and not the IV? This should work. Passing in the + * IV is against the grain of the API and the + * generalized abstraction as Values. Of course, having + * the IV is necessary since this is an internal, high + * performance, and close to the indices operation. + */ + gasState.setFrontier(gasContext, + ((BigdataValue) startingVertex).getIV()); } - + } // Run the analytic. @@ -702,7 +717,7 @@ int i = 0; for (Value v : visitedSet) { - out[i] = new ListBindingSet(vars, + out[i++] = new ListBindingSet(vars, new IConstant[] { new Constant(v) }); } Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java 2014-02-22 22:37:27 UTC (rev 7871) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java 2014-02-22 22:38:43 UTC (rev 7872) @@ -10,6 +10,7 @@ import org.openrdf.model.URI; import org.openrdf.model.impl.URIImpl; +import com.bigdata.rdf.graph.impl.bd.GASService; import com.bigdata.rdf.sparql.ast.QueryHints; import com.bigdata.rdf.sparql.ast.cache.DescribeServiceFactory; import com.bigdata.rdf.sparql.ast.eval.SampleServiceFactory; @@ -112,6 +113,9 @@ } + // The Gather-Apply-Scatter RDF Graph Mining service. + add(GASService.Options.SERVICE_KEY, new GASService()); + } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-22 22:37:32
|
Revision: 7871 http://sourceforge.net/p/bigdata/code/7871 Author: thompsonbry Date: 2014-02-22 22:37:27 +0000 (Sat, 22 Feb 2014) Log Message: ----------- javadoc Modified Paths: -------------- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/util/GASRunnerBase.java Modified: branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/util/GASRunnerBase.java =================================================================== --- branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/util/GASRunnerBase.java 2014-02-22 18:42:26 UTC (rev 7870) +++ branches/RDR/bigdata-gas/src/java/com/bigdata/rdf/graph/impl/util/GASRunnerBase.java 2014-02-22 22:37:27 UTC (rev 7871) @@ -548,6 +548,7 @@ gasState.setFrontier(gasContext, startingVertex); + // Run analytic. final IGASStats stats = (IGASStats) gasContext.call(); if (stats.getFrontierSize() == 1) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2014-02-22 18:42:32
|
Revision: 7870 http://sourceforge.net/p/bigdata/code/7870 Author: mrpersonick Date: 2014-02-22 18:42:26 +0000 (Sat, 22 Feb 2014) Log Message: ----------- get the asBound predicate right when the sidVar is bound in the incoming solution Modified Paths: -------------- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java 2014-02-22 18:33:55 UTC (rev 7869) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java 2014-02-22 18:42:26 UTC (rev 7870) @@ -387,7 +387,7 @@ // inconsistent return null; - } else { + } else if (this.s().isVar()) { bindingSet.set((IVariable) this.s(), new Constant<IV>(s)); @@ -398,7 +398,7 @@ // inconsistent return null; - } else { + } else if (this.p().isVar()) { bindingSet.set((IVariable) this.p(), new Constant<IV>(p)); @@ -409,7 +409,7 @@ // inconsistent return null; - } else { + } else if (this.o().isVar()) { bindingSet.set((IVariable) this.o(), new Constant<IV>(o)); @@ -421,7 +421,7 @@ // // inconsistent // return null; // -// } else { +// } else if (this.c().isVar()) { // // bindingSet.set((IVariable) this.c(), new Constant<IV>(c)); // This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2014-02-22 18:34:02
|
Revision: 7869 http://sourceforge.net/p/bigdata/code/7869 Author: mrpersonick Date: 2014-02-22 18:33:55 +0000 (Sat, 22 Feb 2014) Log Message: ----------- get the asBound predicate right when the sidVar is bound in the incoming solution Modified Paths: -------------- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/StaticOptimizer.java branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/StaticOptimizer.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/StaticOptimizer.java 2014-02-22 18:22:38 UTC (rev 7868) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/StaticOptimizer.java 2014-02-22 18:33:55 UTC (rev 7869) @@ -14,7 +14,9 @@ import com.bigdata.rdf.sparql.ast.IReorderableNode; import com.bigdata.rdf.sparql.ast.QueryHints; import com.bigdata.rdf.sparql.ast.QueryRoot; +import com.bigdata.rdf.sparql.ast.StatementPatternNode; import com.bigdata.rdf.sparql.ast.StaticAnalysis; +import com.bigdata.rdf.sparql.ast.VarNode; import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext; import com.bigdata.rdf.sparql.ast.optimizers.ASTStaticJoinOptimizer.Annotations; @@ -231,8 +233,20 @@ order[0] = preferredFirstTail; order[1] = preferredFirstTail == 0 ? 1 : 0; } else { - order[0] = cardinality(0) <= cardinality(1) ? 0 : 1; - order[1] = cardinality(0) <= cardinality(1) ? 1 : 0; + if (cardinality(0) == cardinality(1) && + nodes.get(0) instanceof StatementPatternNode && + nodes.get(1) instanceof StatementPatternNode) { + final VarNode sid0 = ((StatementPatternNode) nodes.get(0)).sid(); + final VarNode sid1 = ((StatementPatternNode) nodes.get(1)).sid(); + if (sid0 != null && sid1 == null) { + order[0] = 1; + order[1] = 0; + } + } + if (order[0] == -1) { + order[0] = cardinality(0) <= cardinality(1) ? 0 : 1; + order[1] = cardinality(0) <= cardinality(1) ? 1 : 0; + } } return computeJoinCardinality(getTail(0), getTail(1)); } Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java 2014-02-22 18:22:38 UTC (rev 7868) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java 2014-02-22 18:33:55 UTC (rev 7869) @@ -26,6 +26,7 @@ import java.util.Map; import com.bigdata.bop.BOp; +import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IVariable; import com.bigdata.bop.IVariableOrConstant; @@ -33,6 +34,7 @@ import com.bigdata.bop.ap.Predicate; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.impl.bnode.SidIV; +import com.bigdata.rdf.sparql.ast.FilterNode; import com.bigdata.relation.rule.IAccessPathExpander; /** @@ -353,6 +355,80 @@ @Override public SPOPredicate asBound(final IBindingSet bindingSet) { + final IVariable<?> sidVar = sid(); + + if (sidVar != null && bindingSet.isBound(sidVar)) { + + final Object obj = bindingSet.get(sidVar).get(); + + // prior predicate bound something other than a sid to a sid var + if (obj instanceof SidIV == false) { + + // inconsistent + return null; + + } + + final SidIV sidIV = (SidIV) obj; + + final ISPO spo = sidIV.getInlineValue(); + + final IV s = spo.s(); + + final IV p = spo.p(); + + final IV o = spo.o(); + + // TODO implement RDR in quads mode +// final IV c = spo.c(); + + if (this.s().isConstant() && !this.s().get().equals(s)) { + + // inconsistent + return null; + + } else { + + bindingSet.set((IVariable) this.s(), new Constant<IV>(s)); + + } + + if (this.p().isConstant() && !this.p().get().equals(p)) { + + // inconsistent + return null; + + } else { + + bindingSet.set((IVariable) this.p(), new Constant<IV>(p)); + + } + + if (this.o().isConstant() && !this.o().get().equals(o)) { + + // inconsistent + return null; + + } else { + + bindingSet.set((IVariable) this.o(), new Constant<IV>(o)); + + } + + // TODO implement RDR in quads mode +// if (this.c().isConstant() && !this.c().get().equals(c)) { +// +// // inconsistent +// return null; +// +// } else { +// +// bindingSet.set((IVariable) this.c(), new Constant<IV>(c)); +// +// } + + } + return (SPOPredicate) new SPOPredicate(argsCopy(), annotationsRef()) ._asBound(bindingSet); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-22 18:22:41
|
Revision: 7868 http://sourceforge.net/p/bigdata/code/7868 Author: thompsonbry Date: 2014-02-22 18:22:38 +0000 (Sat, 22 Feb 2014) Log Message: ----------- IPredicate.asBound() - javadoc. now allows a null return. BOpContext.bind() - the bind() version associated with the IElement based PipelineJoin code path has been deprecated. PipelineJoin: - the handleJoin() method has been deprecated. It is associated with the IElement process rather than "solutions" based AP reads. - the code in the BindingSetConsumerTask has been modified to accomodate the possible null return from SPOPreciate.asBound() when the SPOPredicate is associated with an RDR access path (the SID variable is defined). Incoming binding sets that cause SPOPredicate.asBound() to fail will cause the join to fail for that source solution. For these cases, the join for that source solution is simply not run (we do not create the associated AccessPathTest). @see #815 (RDR query does too much work) Modified Paths: -------------- branches/RDR/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/RDR/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/RDR/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java Modified: branches/RDR/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/RDR/bigdata/src/java/com/bigdata/bop/BOpContext.java 2014-02-21 20:04:18 UTC (rev 7867) +++ branches/RDR/bigdata/src/java/com/bigdata/bop/BOpContext.java 2014-02-22 18:22:38 UTC (rev 7868) @@ -707,7 +707,7 @@ * * @throws NullPointerException * if an argument is <code>null</code>. - */ + */ @Deprecated// with PipelineJoin.JoinTask.AccessPathTask.handleJoin() final static public boolean bind(final IPredicate<?> pred, final IConstraint[] constraints, final Object e, final IBindingSet bindings) { Modified: branches/RDR/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/RDR/bigdata/src/java/com/bigdata/bop/IPredicate.java 2014-02-21 20:04:18 UTC (rev 7867) +++ branches/RDR/bigdata/src/java/com/bigdata/bop/IPredicate.java 2014-02-22 18:22:38 UTC (rev 7868) @@ -622,6 +622,12 @@ * * @param bindingSet * The binding set. + * + * @return The as-bound {@link IPredicate} -or- <code>null</code> if the + * {@link IPredicate} can not be unified with the + * {@link IBindingSet}. + * + * @see #815 (RDR query does too much work) */ public IPredicate<E> asBound(IBindingSet bindingSet); Modified: branches/RDR/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/RDR/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2014-02-21 20:04:18 UTC (rev 7867) +++ branches/RDR/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2014-02-22 18:22:38 UTC (rev 7868) @@ -1012,6 +1012,22 @@ // constrain the predicate to the given bindings. IPredicate<E> asBound = predicate.asBound(bindingSet); + if (asBound == null) { + + /* + * This can happen for a SIDS mode join if some of the + * (s,p,o,[c]) and SID are bound on entry and they can not + * be unified. For example, the s position might be + * inconsistent with the Subject that can be decoded from + * the SID binding. + * + * @see #815 (RDR query does too much work) + */ + + return; + + } + if (partitionId != -1) { /* @@ -1049,10 +1065,11 @@ if (coalesceAccessPaths) { - /* - * Aggregate the source bindingSets that license the same - * asBound predicate. - */ + /* + * Aggregate the source bindingSets that license the same + * asBound predicate. The predicates in the keys of this map + * as "as-bound". + */ final Map<IPredicate<E>, Collection<IBindingSet>> map = combineBindingSets(chunk); /* @@ -1069,7 +1086,7 @@ * Do not coalesce access paths. */ - tasks = new JoinTask.AccessPathTask[chunk.length]; + final List<AccessPathTask> tmp = new LinkedList<AccessPathTask>(); for (int i = 0; i < chunk.length; i++) { @@ -1078,8 +1095,24 @@ // constrain the predicate to the given bindings. IPredicate<E> asBound = predicate.asBound(bindingSet); - if (partitionId != -1) { + if (asBound == null) { + /* + * This can happen for a SIDS mode join if some of the + * (s,p,o,[c]) and SID are bound on entry and they can not + * be unified. For example, the s position might be + * inconsistent with the Subject that can be decoded from + * the SID binding. + * + * @see #815 (RDR query does too much work) + */ + + continue; + + } + + if (partitionId != -1) { + /* * Constrain the predicate to the desired index * partition. @@ -1095,11 +1128,15 @@ } - tasks[i] = new AccessPathTask(asBound, Collections - .singletonList(bindingSet)); + tmp.add(new AccessPathTask(asBound, Collections + .singletonList(bindingSet))); } + // Exact fit array. + tasks = tmp + .toArray(new JoinTask.AccessPathTask[tmp.size()]); + } return tasks; @@ -1143,8 +1180,24 @@ // constrain the predicate to the given bindings. IPredicate<E> asBound = predicate.asBound(bindingSet); - if (partitionId != -1) { + if (asBound == null) { + /* + * This can happen for a SIDS mode join if some of the + * (s,p,o,[c]) and SID are bound on entry and they can not + * be unified. For example, the s position might be + * inconsistent with the Subject that can be decoded from + * the SID binding. + * + * @see #815 (RDR query does too much work) + */ + + continue; + + } + + if (partitionId != -1) { + /* * Constrain the predicate to the desired index * partition. @@ -1597,7 +1650,7 @@ /** * A vectored pipeline join (chunk at a time processing) for * {@link IElement}s. - */ + */@Deprecated // by handleJoin2() protected void handleJoin() { final long cutoffLimit = predicate.getProperty( This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-21 20:04:23
|
Revision: 7867 http://sourceforge.net/p/bigdata/code/7867 Author: thompsonbry Date: 2014-02-21 20:04:18 +0000 (Fri, 21 Feb 2014) Log Message: ----------- Commit of unit test for #815. The test passes. The problem is how the joins are being executed (underconstrained, too much work). Modified Paths: -------------- branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/TestReificationDoneRightEval.java branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.srx Added Paths: ----------- branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.rq branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.ttl Modified: branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/TestReificationDoneRightEval.java =================================================================== --- branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/TestReificationDoneRightEval.java 2014-02-21 19:59:39 UTC (rev 7866) +++ branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/TestReificationDoneRightEval.java 2014-02-21 20:04:18 UTC (rev 7867) @@ -387,6 +387,22 @@ } + /** + * <pre> + * </pre> + * @see <a href="http://trac.bigdata.com/ticket/815"> RDR query does too + * much work</a> + */ + public void test_reificationDoneRight_04() throws Exception { + + new TestHelper("reif/rdr-04", // testURI, + "reif/rdr-04.rq",// queryFileURL + "reif/rdr-04.ttl",// dataFileURL + "reif/rdr-04.srx"// resultFileURL + ).runTest(); + + } + @Override public Properties getProperties() { Added: branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.rq =================================================================== --- branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.rq (rev 0) +++ branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.rq 2014-02-21 20:04:18 UTC (rev 7867) @@ -0,0 +1,5 @@ +prefix : <http://example.com/> + +select ?s ?p ?o ?p1 ?o1 where { + <<?s ?p ?o>> ?p1 ?o1 . +} Modified: branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.srx =================================================================== --- branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.srx 2014-02-21 19:59:39 UTC (rev 7866) +++ branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.srx 2014-02-21 20:04:18 UTC (rev 7867) @@ -13,7 +13,7 @@ </head> <results> <result> - <binding name="a"> + <binding name="s"> <uri>http://example.com/a1</uri> </binding> <binding name="p"> @@ -30,7 +30,7 @@ </binding> </result> <result> - <binding name="a"> + <binding name="s"> <uri>http://example.com/a2</uri> </binding> <binding name="p"> Added: branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.ttl =================================================================== --- branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.ttl (rev 0) +++ branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.ttl 2014-02-21 20:04:18 UTC (rev 7867) @@ -0,0 +1,7 @@ +@prefix : <http://example.com/> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . + +:a1 :b :c . +:a2 :b :c . +<<:a1 :b :c>> :d :e1 . +<<:a2 :b :c>> :d :e2 . This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-21 19:59:43
|
Revision: 7866 http://sourceforge.net/p/bigdata/code/7866 Author: thompsonbry Date: 2014-02-21 19:59:39 +0000 (Fri, 21 Feb 2014) Log Message: ----------- expected results for a new RDR test Added Paths: ----------- branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.srx Added: branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.srx =================================================================== --- branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.srx (rev 0) +++ branches/RDR/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/reif/rdr-04.srx 2014-02-21 19:59:39 UTC (rev 7866) @@ -0,0 +1,50 @@ +<?xml version="1.0"?> +<sparql + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" + xmlns:xs="http://www.w3.org/2001/XMLSchema#" + xmlns="http://www.w3.org/2005/sparql-results#" > + <head> + <variable name="s"/> + <variable name="p"/> + <variable name="o"/> + <variable name="p1"/> + <variable name="o1"/> + </head> + <results> + <result> + <binding name="a"> + <uri>http://example.com/a1</uri> + </binding> + <binding name="p"> + <uri>http://example.com/b</uri> + </binding> + <binding name="o"> + <uri>http://example.com/c</uri> + </binding> + <binding name="p1"> + <uri>http://example.com/d</uri> + </binding> + <binding name="o1"> + <uri>http://example.com/e1</uri> + </binding> + </result> + <result> + <binding name="a"> + <uri>http://example.com/a2</uri> + </binding> + <binding name="p"> + <uri>http://example.com/b</uri> + </binding> + <binding name="o"> + <uri>http://example.com/c</uri> + </binding> + <binding name="p1"> + <uri>http://example.com/d</uri> + </binding> + <binding name="o1"> + <uri>http://example.com/e2</uri> + </binding> + </result> + </results> +</sparql> \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-21 19:46:44
|
Revision: 7865 http://sourceforge.net/p/bigdata/code/7865 Author: thompsonbry Date: 2014-02-21 19:46:40 +0000 (Fri, 21 Feb 2014) Log Message: ----------- more RDR parser tests Modified Paths: -------------- branches/RDR/bigdata-sails/src/test/com/bigdata/rdf/sail/sparql/TestReificationDoneRightParser.java Modified: branches/RDR/bigdata-sails/src/test/com/bigdata/rdf/sail/sparql/TestReificationDoneRightParser.java =================================================================== --- branches/RDR/bigdata-sails/src/test/com/bigdata/rdf/sail/sparql/TestReificationDoneRightParser.java 2014-02-21 19:39:06 UTC (rev 7864) +++ branches/RDR/bigdata-sails/src/test/com/bigdata/rdf/sail/sparql/TestReificationDoneRightParser.java 2014-02-21 19:46:40 UTC (rev 7865) @@ -591,7 +591,171 @@ * <pre> * prefix : <http://example.com/> * SELECT ?a { - * BIND( <<?a ?b ?c>> >> as ?sid ) . + * ?d ?e <<?a ?b ?c>> . + * } + * </pre> + * + * Should be translated as : + * + * <pre> + * SP(?a, ?b, ?c) as ?sid-1 . + * SP(?d, ?e, ?-sid-1). + * </pre> + * + * Note that the SP for the first bound triple pattern will enforce the + * semantics that the triple must exist in the data in order for the query + * to succeed. + */ + public void test_triple_ref_pattern_all_vars() + throws MalformedQueryException, TokenMgrError, ParseException { + + final String sparql // + = "prefix : <http://example.com/>\n" // + + "select ?a {\n"// + + " ?d ?e <<?a ?b ?c>> .\n" + + "}"; + + final QueryRoot expected = new QueryRoot(QueryType.SELECT); + { + + final VarNode a = new VarNode("a"); + final VarNode b = new VarNode("b"); + final VarNode c = new VarNode("c"); + final VarNode d = new VarNode("d"); + final VarNode e = new VarNode("e"); + final VarNode sid = new VarNode("-sid-1"); + + { + final Map<String, String> prefixDecls = new LinkedHashMap<String, String>(); + expected.setPrefixDecls(prefixDecls); + prefixDecls.put("", "http://example.com/"); + } + + final ProjectionNode projection = new ProjectionNode(); + projection.addProjectionVar(new VarNode("a")); + expected.setProjection(projection); + + final JoinGroupNode whereClause = new JoinGroupNode(); + expected.setWhereClause(whereClause); + + // SP(?a, ?b, ?c) as ?sid . + final StatementPatternNode sp1 = new StatementPatternNode(// + a,// + b,// + c,// + null/* c */,// + Scope.DEFAULT_CONTEXTS); + sp1.setSid(sid); + + // SP(?d, ?e, ?sid). + final StatementPatternNode sp2 = new StatementPatternNode(// + d,// + e,// + sid,// + null/* c */,// + Scope.DEFAULT_CONTEXTS); + + whereClause.addChild(sp1); + + whereClause.addChild(sp2); + + } + + final QueryRoot actual = parse(sparql, baseURI); + + assertSameAST(sparql, expected, actual); + + } + + /** + * A unit test when the triple reference pattern is a constant. + * + * <pre> + * prefix : <http://example.com/> + * SELECT ?a { + * <<?a ?b ?c>> ?d ?e . + * } + * </pre> + * + * Should be translated as : + * + * <pre> + * SP(?a, ?b, ?c) as ?sid-1 . + * SP(?-sid-1, ?d, ?e). + * </pre> + * + * Note that the SP for the first bound triple pattern will enforce the + * semantics that the triple must exist in the data in order for the query + * to succeed. + */ + public void test_triple_ref_pattern_all_vars2() + throws MalformedQueryException, TokenMgrError, ParseException { + + final String sparql // + = "prefix : <http://example.com/>\n" // + + "select ?a {\n"// + + " <<?a ?b ?c>> ?d ?e .\n" + + "}"; + + final QueryRoot expected = new QueryRoot(QueryType.SELECT); + { + + final VarNode a = new VarNode("a"); + final VarNode b = new VarNode("b"); + final VarNode c = new VarNode("c"); + final VarNode d = new VarNode("d"); + final VarNode e = new VarNode("e"); + final VarNode sid = new VarNode("-sid-1"); + + { + final Map<String, String> prefixDecls = new LinkedHashMap<String, String>(); + expected.setPrefixDecls(prefixDecls); + prefixDecls.put("", "http://example.com/"); + } + + final ProjectionNode projection = new ProjectionNode(); + projection.addProjectionVar(new VarNode("a")); + expected.setProjection(projection); + + final JoinGroupNode whereClause = new JoinGroupNode(); + expected.setWhereClause(whereClause); + + // SP(?a, ?b, ?c) as ?sid . + final StatementPatternNode sp1 = new StatementPatternNode(// + a,// + b,// + c,// + null/* c */,// + Scope.DEFAULT_CONTEXTS); + sp1.setSid(sid); + + // SP(?sid, ?d, ?e). + final StatementPatternNode sp2 = new StatementPatternNode(// + sid,// + d,// + e,// + null/* c */,// + Scope.DEFAULT_CONTEXTS); + + whereClause.addChild(sp1); + + whereClause.addChild(sp2); + + } + + final QueryRoot actual = parse(sparql, baseURI); + + assertSameAST(sparql, expected, actual); + + } + + /** + * A unit test when the triple reference pattern is a constant. + * + * <pre> + * prefix : <http://example.com/> + * SELECT ?a { + * BIND( <<?a ?b ?c>> as ?sid ) . * ?d ?e ?sid . * } * </pre> @@ -607,7 +771,7 @@ * semantics that the triple must exist in the data in order for the query * to succeed. */ - public void test_triple_ref_pattern_all_vars() + public void test_triple_ref_pattern_all_vars_with_explicit_bind() throws MalformedQueryException, TokenMgrError, ParseException { final String sparql // This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-21 19:39:09
|
Revision: 7864 http://sourceforge.net/p/bigdata/code/7864 Author: thompsonbry Date: 2014-02-21 19:39:06 +0000 (Fri, 21 Feb 2014) Log Message: ----------- added new RDR test. Modified Paths: -------------- branches/RDR/bigdata-sails/src/test/com/bigdata/rdf/sail/sparql/TestReificationDoneRightParser.java Modified: branches/RDR/bigdata-sails/src/test/com/bigdata/rdf/sail/sparql/TestReificationDoneRightParser.java =================================================================== --- branches/RDR/bigdata-sails/src/test/com/bigdata/rdf/sail/sparql/TestReificationDoneRightParser.java 2014-02-21 09:09:39 UTC (rev 7863) +++ branches/RDR/bigdata-sails/src/test/com/bigdata/rdf/sail/sparql/TestReificationDoneRightParser.java 2014-02-21 19:39:06 UTC (rev 7864) @@ -585,4 +585,88 @@ } + /** + * A unit test when the triple reference pattern is a constant. + * + * <pre> + * prefix : <http://example.com/> + * SELECT ?a { + * BIND( <<?a ?b ?c>> >> as ?sid ) . + * ?d ?e ?sid . + * } + * </pre> + * + * Should be translated as : + * + * <pre> + * SP(?a, ?b, ?c) as ?sid . + * SP(?d, ?e, ?sid). + * </pre> + * + * Note that the SP for the first bound triple pattern will enforce the + * semantics that the triple must exist in the data in order for the query + * to succeed. + */ + public void test_triple_ref_pattern_all_vars() + throws MalformedQueryException, TokenMgrError, ParseException { + + final String sparql // + = "prefix : <http://example.com/>\n" // + + "select ?a {\n"// + + " BIND( <<?a ?b ?c>> AS ?sid) .\n"// + + " ?d ?e ?sid.\n" + + "}"; + + final QueryRoot expected = new QueryRoot(QueryType.SELECT); + { + + final VarNode a = new VarNode("a"); + final VarNode b = new VarNode("b"); + final VarNode c = new VarNode("c"); + final VarNode d = new VarNode("d"); + final VarNode e = new VarNode("e"); + final VarNode sid = new VarNode("sid"); + + { + final Map<String, String> prefixDecls = new LinkedHashMap<String, String>(); + expected.setPrefixDecls(prefixDecls); + prefixDecls.put("", "http://example.com/"); + } + + final ProjectionNode projection = new ProjectionNode(); + projection.addProjectionVar(new VarNode("a")); + expected.setProjection(projection); + + final JoinGroupNode whereClause = new JoinGroupNode(); + expected.setWhereClause(whereClause); + + // SP(?a, ?b, ?c) as ?sid . + final StatementPatternNode sp1 = new StatementPatternNode(// + a,// + b,// + c,// + null/* c */,// + Scope.DEFAULT_CONTEXTS); + sp1.setSid(sid); + + // SP(?d, ?e, ?sid). + final StatementPatternNode sp2 = new StatementPatternNode(// + d,// + e,// + sid,// + null/* c */,// + Scope.DEFAULT_CONTEXTS); + + whereClause.addChild(sp1); + + whereClause.addChild(sp2); + + } + + final QueryRoot actual = parse(sparql, baseURI); + + assertSameAST(sparql, expected, actual); + + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2014-02-21 09:09:43
|
Revision: 7863 http://sourceforge.net/p/bigdata/code/7863 Author: martyncutcher Date: 2014-02-21 09:09:39 +0000 (Fri, 21 Feb 2014) Log Message: ----------- Branch to develop HA5 tests, and and required HA amendments Added Paths: ----------- branches/BIGDATA_MGC_HA5/ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-20 21:14:58
|
Revision: 7862 http://sourceforge.net/p/bigdata/code/7862 Author: thompsonbry Date: 2014-02-20 21:14:56 +0000 (Thu, 20 Feb 2014) Log Message: ----------- javadoc Modified Paths: -------------- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java 2014-02-20 21:14:41 UTC (rev 7861) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java 2014-02-20 21:14:56 UTC (rev 7862) @@ -82,12 +82,12 @@ * PREFIX gas <http://www.bigdata.com/rdf/gas#> * #... * SERVICE <GAS> { - * gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS" - * gas:program gas:in <IRI> # one or more times, specifies the initial frontier. - * gas:program gas:out ?out # exactly once - will be bound to the visited vertices. - * gas:program gas:maxIterations 4 # optional limit on breadth first expansion. - * gas:program gas:maxVisited 2000 # optional limit on the #of visited vertices. - * gas:program gas:nthreads 4 # specify the #of threads to use (optional) + * gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS" . + * gas:program gas:in <IRI> . # one or more times, specifies the initial frontier. + * gas:program gas:out ?out . # exactly once - will be bound to the visited vertices. + * gas:program gas:maxIterations 4 . # optional limit on breadth first expansion. + * gas:program gas:maxVisited 2000 . # optional limit on the #of visited vertices. + * gas:program gas:nthreads 4 . # specify the #of threads to use (optional) * } * </pre> * @@ -97,12 +97,12 @@ * PREFIX gas <http://www.bigdata.com/rdf/gas#> * #... * SERVICE <GAS> { - * gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.FuzzySSSP" - * gas:program gas:in <IRI> # one or more times, specifies the initial frontier. - * gas:program gas:target <IRI> # one or more times, identifies the target vertices and hence the paths of interest. - * gas:program gas:out ?out # exactly once - will be bound to the visited vertices laying within N-hops of the shortest paths. - * gas:program gas:maxIterations 4 # optional limit on breadth first expansion. - * gas:program gas:maxVisited 2000 # optional limit on the #of visited vertices. + * gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.FuzzySSSP" . + * gas:program gas:in <IRI> . # one or more times, specifies the initial frontier. + * gas:program gas:target <IRI> . # one or more times, identifies the target vertices and hence the paths of interest. + * gas:program gas:out ?out . # exactly once - will be bound to the visited vertices laying within N-hops of the shortest paths. + * gas:program gas:maxIterations 4 . # optional limit on breadth first expansion. + * gas:program gas:maxVisited 2000 . # optional limit on the #of visited vertices. * } * </pre> * @@ -114,8 +114,16 @@ * * TODO The input frontier could be a variable, in which case we would pull out * the column for that variable rather than running the algorithm once per - * source binding set, right? Or maybe not. + * source binding set, right? Or maybe not. * + * TODO Allow {@link IReducer} that binds the visited vertex and also the + * dynamic state associated with that vertex. For BFS and SSSP, this could be + * depth/distance and the predecessor (for path information). For BFS and SSSP, + * we could also have a specific target vertex (or vertices) and then report out + * the path for that vertex/vertices. This would significantly reduce the data + * reported back. (Could we run SSSP in both directions to accelerate the + * convergence?) + * * TODO Also support export. This could be easily done using a SPARQL SELECT * * <pre> @@ -655,6 +663,7 @@ } + // Run the analytic. final IGASStats stats = (IGASStats) gasContext.call(); if (log.isInfoEnabled()) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-20 21:14:46
|
Revision: 7861 http://sourceforge.net/p/bigdata/code/7861 Author: thompsonbry Date: 2014-02-20 21:14:41 +0000 (Thu, 20 Feb 2014) Log Message: ----------- javadoc Modified Paths: -------------- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java 2014-02-20 16:37:21 UTC (rev 7860) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceRegistry.java 2014-02-20 21:14:41 UTC (rev 7861) @@ -141,7 +141,15 @@ return defaultServiceFactoryRef.get(); } - + + /** + * Register a service. + * + * @param serviceURI + * The service URI. + * @param factory + * The factory to execute calls against that service. + */ public final void add(final URI serviceURI, final ServiceFactory factory) { synchronized (this) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2014-02-20 16:37:24
|
Revision: 7860 http://sourceforge.net/p/bigdata/code/7860 Author: mrpersonick Date: 2014-02-20 16:37:21 +0000 (Thu, 20 Feb 2014) Log Message: ----------- fixed a bug where the full text query was not being interrupted properly Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/BigdataValueCentricFullTextIndex.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/BigdataValueCentricFullTextIndex.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/BigdataValueCentricFullTextIndex.java 2014-02-20 16:19:41 UTC (rev 7859) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/BigdataValueCentricFullTextIndex.java 2014-02-20 16:37:21 UTC (rev 7860) @@ -423,6 +423,12 @@ for (Map.Entry<IV<?,?>, BigdataValue> e : terms.entrySet()) { + if (Thread.interrupted()) { + + throw new RuntimeException(new InterruptedException()); + + } + final IV<?,?> iv = e.getKey(); final BigdataValue term = e.getValue(); @@ -488,6 +494,12 @@ for (Map.Entry<IV<?,?>, BigdataValue> e : terms.entrySet()) { + if (Thread.interrupted()) { + + throw new RuntimeException(new InterruptedException()); + + } + final IV<?,?> iv = e.getKey(); final BigdataValue term = e.getValue(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2014-02-20 16:19:43
|
Revision: 7859 http://sourceforge.net/p/bigdata/code/7859 Author: mrpersonick Date: 2014-02-20 16:19:41 +0000 (Thu, 20 Feb 2014) Log Message: ----------- fixed a bug where the full text query was not being interrupted properly Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/FullTextIndex.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/FullTextIndex.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2014-02-20 14:19:11 UTC (rev 7858) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2014-02-20 16:19:41 UTC (rev 7859) @@ -1151,6 +1151,14 @@ log.info("Interrupted - only partial results will be returned."); } + /* + * Yes, let's toss it. We were getting into a situation + * where the ExecutionHelper above received an interrupt + * but we still went through the heavy-weight filtering + * operations below (matchExact or matchRegex). + */ + throw new RuntimeException(ex); + } catch (ExecutionException ex) { throw new RuntimeException(ex); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-02-20 14:19:15
|
Revision: 7858 http://sourceforge.net/p/bigdata/code/7858 Author: thompsonbry Date: 2014-02-20 14:19:11 +0000 (Thu, 20 Feb 2014) Log Message: ----------- Basic implementation of the GASService sufficient to invoke BFS, SSSP, CC, or PR. This is not yet tested. It is not sufficient to invoke the FuzzySSSP algorithm since that does not implement the IGASProgram interface. javadoc on ServiceCall. Modified Paths: -------------- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceCall.java Added Paths: ----------- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java Added: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java (rev 0) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/GASService.java 2014-02-20 14:19:11 UTC (rev 7858) @@ -0,0 +1,774 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.rdf.graph.impl.bd; + +import java.lang.reflect.Constructor; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ConcurrentSkipListSet; + +import org.apache.log4j.Logger; +import org.openrdf.model.Literal; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.impl.URIImpl; + +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.bindingSet.ListBindingSet; +import com.bigdata.journal.IIndexManager; +import com.bigdata.rdf.graph.IGASContext; +import com.bigdata.rdf.graph.IGASEngine; +import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IGASScheduler; +import com.bigdata.rdf.graph.IGASSchedulerImpl; +import com.bigdata.rdf.graph.IGASState; +import com.bigdata.rdf.graph.IGASStats; +import com.bigdata.rdf.graph.IGraphAccessor; +import com.bigdata.rdf.graph.IReducer; +import com.bigdata.rdf.graph.impl.GASEngine; +import com.bigdata.rdf.graph.impl.GASState; +import com.bigdata.rdf.graph.impl.bd.BigdataGASEngine.BigdataGraphAccessor; +import com.bigdata.rdf.graph.impl.scheduler.CHMScheduler; +import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; +import com.bigdata.rdf.sparql.ast.GraphPatternGroup; +import com.bigdata.rdf.sparql.ast.IGroupMemberNode; +import com.bigdata.rdf.sparql.ast.StatementPatternNode; +import com.bigdata.rdf.sparql.ast.service.BigdataNativeServiceOptions; +import com.bigdata.rdf.sparql.ast.service.BigdataServiceCall; +import com.bigdata.rdf.sparql.ast.service.CustomServiceFactory; +import com.bigdata.rdf.sparql.ast.service.IServiceOptions; +import com.bigdata.rdf.sparql.ast.service.ServiceCall; +import com.bigdata.rdf.sparql.ast.service.ServiceCallCreateParams; +import com.bigdata.rdf.sparql.ast.service.ServiceNode; +import com.bigdata.rdf.store.AbstractTripleStore; +import com.bigdata.striterator.ChunkedArrayIterator; + +import cutthecrap.utils.striterators.ICloseableIterator; + +/** + * A SERVICE that exposes {@link IGASProgram}s for SPARQL execution. + * <p> + * For example, the following would run a depth-limited BFS traversal: + * + * <pre> + * PREFIX gas <http://www.bigdata.com/rdf/gas#> + * #... + * SERVICE <GAS> { + * gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS" + * gas:program gas:in <IRI> # one or more times, specifies the initial frontier. + * gas:program gas:out ?out # exactly once - will be bound to the visited vertices. + * gas:program gas:maxIterations 4 # optional limit on breadth first expansion. + * gas:program gas:maxVisited 2000 # optional limit on the #of visited vertices. + * gas:program gas:nthreads 4 # specify the #of threads to use (optional) + * } + * </pre> + * + * Or the following would run the FuzzySSSP algorithm. + * + * <pre> + * PREFIX gas <http://www.bigdata.com/rdf/gas#> + * #... + * SERVICE <GAS> { + * gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.FuzzySSSP" + * gas:program gas:in <IRI> # one or more times, specifies the initial frontier. + * gas:program gas:target <IRI> # one or more times, identifies the target vertices and hence the paths of interest. + * gas:program gas:out ?out # exactly once - will be bound to the visited vertices laying within N-hops of the shortest paths. + * gas:program gas:maxIterations 4 # optional limit on breadth first expansion. + * gas:program gas:maxVisited 2000 # optional limit on the #of visited vertices. + * } + * </pre> + * + * TODO Also allow the execution of gas workflows. A workflow would be more + * along the lines of a callable, but one where the initial source and/or target + * vertices could be identified. Or have an interface that wraps the analytics + * (including things like FuzzySSSP) so they can declare their own arguments for + * invocation as a SERVICE. + * + * TODO The input frontier could be a variable, in which case we would pull out + * the column for that variable rather than running the algorithm once per + * source binding set, right? Or maybe not. + * + * TODO Also support export. This could be easily done using a SPARQL SELECT + * + * <pre> + * SELECT ?src ?tgt ?edgeWeight { + * <<?src linkType ?tgt> propertyType ?edgeWeight> + * } + * </pre> + * + * or (if you have a simple topology without edge weights) + * + * <pre> + * SELECT ?src ?tgt bind(?edgeWeight,1) { + * ?src linkType ?tgt + * } + * </pre> + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class GASService implements CustomServiceFactory { + + public interface Options { + + /** + * The namespace used for bigdata GAS API. + */ + String NAMESPACE = "http://www.bigdata.com/rdf/gas#"; + + /** + * Used as the subject in the GAS SERVICE invocation pattern. + */ + URI PROGRAM = new URIImpl(NAMESPACE + "program"); + + /** + * Magic predicate identifies the fully qualified class name of the + * {@link IGASProgram} to be executed. + */ + URI GAS_CLASS = new URIImpl(NAMESPACE + "gasClass"); + + /** + * The #of threads that will be used to expand the frontier in each + * iteration of the algorithm (optional, default + * {@value #DEFAULT_NTHREADS}). + * + * @see #DEFAULT_NTHREADS + */ + URI NTHREADS = new URIImpl(NAMESPACE + "nthreads"); + + int DEFAULT_NTHREADS = 4; + + /** + * The maximum #of iterations for the GAS program (optional, default + * {@value #DEFAULT_MAX_ITERATIONS}). + * + * @see #DEFAULT_MAX_ITERATIONS + */ + URI MAX_ITERATIONS = new URIImpl(NAMESPACE + "maxIterations"); + + int DEFAULT_MAX_ITERATIONS = Integer.MAX_VALUE; + + /** + * The maximum #of vertices in the visited set for the GAS program + * (optional, default {@value #DEFAULT_MAX_VISITED}). + * + * @see #DEFAULT_MAX_VISITED + */ + URI MAX_VISITED = new URIImpl(NAMESPACE + "maxVisited"); + + int DEFAULT_MAX_VISITED = Integer.MAX_VALUE; + + /** + * The {@link IGASScheduler} (default is {@link #DEFAULT_SCHEDULER}). + * Class must implement {@link IGASSchedulerImpl}. + */ + URI SCHEDULER_CLASS = new URIImpl(NAMESPACE + "schedulerClass"); + + Class<? extends IGASSchedulerImpl> DEFAULT_SCHEDULER = CHMScheduler.class; + + /** + * Magic predicate used to specify a vertex in the initial frontier. + */ + URI IN = new URIImpl(NAMESPACE + "in"); + + /** + * Magic predicate used to specify a variable that will become bound to + * each vertex in the visited set for the analytic. + * + * TODO This is not always what we want to report. We really need to + * specify the {@link IReducer} to run and then get the output of that + * to the caller. + */ + URI OUT = new URIImpl(NAMESPACE + "out"); + + } + + static private transient final Logger log = Logger + .getLogger(GASService.class); + + private final BigdataNativeServiceOptions serviceOptions; + + public GASService() { + + serviceOptions = new BigdataNativeServiceOptions(); + + /* + * TODO Review decision to make this a runFirst service. The rational is + * that this service can only apply a very limited set of restrictions + * during query, therefore it will often make sense to run it first. + * However, the fromTime and toTime could be bound by the query and the + * service can filter some things more efficiently internally than if we + * generated a bunch of intermediate solutions for those things. + */ + serviceOptions.setRunFirst(true); + + } + + /** + * The known URIs. + * <p> + * Note: We can recognize anything in {@link Options#NAMESPACE}, but the + * predicate still has to be something that we know how to interpret. + */ + static final Set<URI> gasUris; + + static { + + final Set<URI> set = new LinkedHashSet<URI>(); + + set.add(Options.PROGRAM); + + gasUris = Collections.unmodifiableSet(set); + + } + + @Override + public IServiceOptions getServiceOptions() { + + return serviceOptions; + + } + + /** + * NOP + * <p> + * {@inheritDoc} + */ + @Override + public void startConnection(BigdataSailConnection conn) { + // NOP + } + + @Override + public ServiceCall<?> create(final ServiceCallCreateParams params) { + + if (params == null) + throw new IllegalArgumentException(); + + final AbstractTripleStore store = params.getTripleStore(); + + if (store == null) + throw new IllegalArgumentException(); + + /* + * Create and return the ServiceCall object which will execute this + * query. + */ + + return new GASServiceCall(store, params.getServiceNode(), + getServiceOptions()); + + } + + /** + * Execute the service call (run the GAS program). + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + * + * TODO Validate the service call parameters, including whether they + * are understood by the specific algorithm. + * + * TODO Both maxIterations and maxVertexSetSize should be part of + * the GASState to be constraints that are applied for any + * algorithm. + */ + private static class GASServiceCall<VS, ES, ST> implements BigdataServiceCall { + + private final AbstractTripleStore store; + private final GraphPatternGroup<IGroupMemberNode> graphPattern; + private final IServiceOptions serviceOptions; + + // options extracted from the SERVICE's graph pattern. + private final int nthreads; + private final int maxIterations; // FIXME set as limit on GASState. + private final int maxVisited; // FIXME set as limit on GASState. + private final Class<IGASProgram<VS, ES, ST>> gasClass; + private final Class<IGASSchedulerImpl> schedulerClass; + private final Value[] initialFrontier; + private final IVariable<?> outVar; + + public GASServiceCall(final AbstractTripleStore store, + final ServiceNode serviceNode, + final IServiceOptions serviceOptions) { + + if (store == null) + throw new IllegalArgumentException(); + + if (serviceNode == null) + throw new IllegalArgumentException(); + + if (serviceOptions == null) + throw new IllegalArgumentException(); + + this.store = store; + + this.graphPattern = serviceNode.getGraphPattern(); + + this.serviceOptions = serviceOptions; + + this.nthreads = ((Literal) getOnlyArg( + Options.PROGRAM, + Options.NTHREADS, + store.getValueFactory().createLiteral( + Options.DEFAULT_NTHREADS))).intValue(); + + this.maxIterations = ((Literal) getOnlyArg(Options.PROGRAM, + Options.MAX_ITERATIONS, store.getValueFactory() + .createLiteral(Options.DEFAULT_MAX_ITERATIONS))) + .intValue(); + + this.maxVisited = ((Literal) getOnlyArg( + Options.PROGRAM, + Options.MAX_VISITED, + store.getValueFactory().createLiteral( + Options.DEFAULT_MAX_VISITED))).intValue(); + + // GASProgram (required) + { + + final Literal tmp = (Literal) getOnlyArg(Options.PROGRAM, + Options.GAS_CLASS); + + if (tmp == null) + throw new IllegalArgumentException( + "Required predicate not specified: " + + Options.GAS_CLASS); + + final String className = tmp.stringValue(); + + final Class<?> cls; + try { + cls = Class.forName(className); + } catch (ClassNotFoundException e) { + throw new IllegalArgumentException("No such class: " + + className); + } + + if (!IGASProgram.class.isAssignableFrom(cls)) + throw new IllegalArgumentException(Options.GAS_CLASS + + " must extend " + IGASProgram.class.getName()); + + this.gasClass = (Class<IGASProgram<VS, ES, ST>>) cls; + + } + + // Scheduler (optional). + { + + final Literal tmp = (Literal) getOnlyArg(Options.PROGRAM, + Options.SCHEDULER_CLASS); + + if (tmp == null) { + + this.schedulerClass = null; + + } else { + + final String className = tmp.stringValue(); + + final Class<?> cls; + try { + cls = Class.forName(className); + } catch (ClassNotFoundException e) { + throw new IllegalArgumentException("No such class: " + + className); + } + + if (!IGASSchedulerImpl.class.isAssignableFrom(cls)) + throw new IllegalArgumentException( + Options.SCHEDULER_CLASS + " must extend " + + IGASSchedulerImpl.class.getName()); + + this.schedulerClass = (Class<IGASSchedulerImpl>) cls; + + } + + } + + // Initial frontier. + this.initialFrontier = getArg(Options.PROGRAM, Options.IN); + + // The output variable (bound to the visited set). + this.outVar = getVar(Options.PROGRAM, Options.OUT); + + } + + /** + * Return the variable associated with the first instandce of the + * specified subject and predicate in the service's graph pattern. Only + * the simple {@link StatementPatternNode}s are visited. + * + * @param s + * The subject. + * @param p + * The predicate. + * + * @return The variable -or- <code>null</code> if the specified subject + * and predicate do not appear. + */ + private IVariable<?> getVar(final URI s, final URI p) { + + if (s == null) + throw new IllegalArgumentException(); + if (p == null) + throw new IllegalArgumentException(); + + List<Value> tmp = null; + + final Iterator<IGroupMemberNode> itr = graphPattern.getChildren() + .iterator(); + + while (itr.hasNext()) { + + final IGroupMemberNode child = itr.next(); + + if (!(child instanceof StatementPatternNode)) + continue; + + final StatementPatternNode sp = (StatementPatternNode) child; + + // s and p are constants. + if (!sp.s().isConstant()) + continue; + if (!sp.p().isConstant()) + continue; + + // constants match. + if (!s.equals(sp.s().getValue())) + continue; + if (!p.equals(sp.p().getValue())) + continue; + + if (tmp == null) + tmp = new LinkedList<Value>(); + + // found an o. + return (IVariable<?>) sp.o(); + + } + + return null; // not found. + + } + + /** + * Return the object bindings from the service's graph pattern for the + * specified subject and predicate. Only the simple + * {@link StatementPatternNode}s are visited. + * + * @param s + * The subject. + * @param p + * The predicate. + * + * @return An array containing one or more bindings -or- + * <code>null</code> if the specified subject and predicate do + * not appear. + */ + private Value[] getArg(final URI s, final URI p) { + + if (s == null) + throw new IllegalArgumentException(); + if (p == null) + throw new IllegalArgumentException(); + + List<Value> tmp = null; + + final Iterator<IGroupMemberNode> itr = graphPattern.getChildren() + .iterator(); + + while (itr.hasNext()) { + + final IGroupMemberNode child = itr.next(); + + if (!(child instanceof StatementPatternNode)) + continue; + + final StatementPatternNode sp = (StatementPatternNode) child; + + // s and p are constants. + if (!sp.s().isConstant()) + continue; + if (!sp.p().isConstant()) + continue; + + // constants match. + if (!s.equals(sp.s().getValue())) + continue; + if (!p.equals(sp.p().getValue())) + continue; + + if (tmp == null) + tmp = new LinkedList<Value>(); + + // found an o. + tmp.add(sp.o().getValue()); + + } + + if (tmp == null) + return null; + + return tmp.toArray(new Value[tmp.size()]); + + } + + /** + * Return the sole {@link Value} for the given s and p. + * + * @param s + * The subject. + * @param p + * The predicate. + * + * @return The sole {@link Value} for that s and p -or- + * <code>null</code> if no value was given. + * + * @throws RuntimeException + * if there are multiple values. + */ + private Value getOnlyArg(final URI s, final URI p) { + + final Value[] tmp = getArg(s, p); + + if (tmp == null) + return null; + + if (tmp.length > 1) + throw new IllegalArgumentException("Multiple values: s=" + s + + ", p=" + p); + + return tmp[0]; + + } + + /** + * Return the sole {@link Value} for the given s and p and the default + * value if no value was explicitly provided. + * + * @param s + * The subject. + * @param p + * The predicate. + * @param def + * The default value. + * + * @return The sole {@link Value} for that s and p -or- the default + * value if no value was given. + * + * @throws RuntimeException + * if there are multiple values. + */ + private Value getOnlyArg(final URI s, final URI p, final Value def) { + + final Value tmp = getOnlyArg(s, p); + + if (tmp == null) + return def; + + return tmp; + + } + + @Override + public IServiceOptions getServiceOptions() { + + return serviceOptions; + + } + + /** + * {@inheritDoc} + * + * TODO Join with the source solutions? Or is that handled by the + * caller? + */ + @Override + public ICloseableIterator<IBindingSet> call( + final IBindingSet[] bindingSets) throws Exception { + + /* + * Try/finally pattern to setup the BigdataGASEngine, execute the + * algorithm, and return the results. + */ + IGASEngine gasEngine = null; + + try { + + gasEngine = newGasEngine(store.getIndexManager(), nthreads); + + if (schedulerClass != null) { + + ((GASEngine) gasEngine).setSchedulerClass(schedulerClass); + + } + + final IGASProgram<VS, ES, ST> gasProgram = newGASProgram(gasClass); + + final IGraphAccessor graphAccessor = newGraphAccessor(store); + + final IGASContext<VS, ES, ST> gasContext = gasEngine.newGASContext( + graphAccessor, gasProgram); + + final IGASState<VS, ES, ST> gasState = gasContext.getGASState(); + + // TODO We should look at this when extracting the parameters from the SERVICE's graph pattern. +// final FrontierEnum frontierEnum = gasProgram +// .getInitialFrontierEnum(); + + if (initialFrontier != null) { + + // Setup the initial frontier. + for (Value startingVertex : initialFrontier) { + + gasState.setFrontier(gasContext, startingVertex); + + } + + } + + final IGASStats stats = (IGASStats) gasContext.call(); + + if (log.isInfoEnabled()) { + final StringBuilder sb = new StringBuilder(); + sb.append("GAS"); + sb.append(": analytic=" + gasProgram.getClass().getSimpleName()); + sb.append(", nthreads=" + nthreads); + sb.append(", scheduler=" + ((GASState<VS, ES, ST>)gasState).getScheduler().getClass().getSimpleName()); + sb.append(", gasEngine=" + gasEngine.getClass().getSimpleName()); + sb.append(", stats=" + stats); + log.info(sb.toString()); + } + + /* TODO We should be able to run a REDUCER here, not just + * report what is in the visited set. + */ + final Set<Value> visitedSet = new ConcurrentSkipListSet<Value>(); + + gasState.reduce(new IReducer<VS, ES, ST, Void>() { + + @Override + public void visit(IGASState<VS, ES, ST> state, Value u) { + visitedSet.add(u); + } + + @Override + public Void get() { + return null; + } + }); + + final IBindingSet[] out = new IBindingSet[visitedSet.size()]; + + { + final IVariable[] vars = new IVariable[] { outVar }; + int i = 0; + for (Value v : visitedSet) { + + out[i] = new ListBindingSet(vars, + new IConstant[] { new Constant(v) }); + + } + } + + return new ChunkedArrayIterator<IBindingSet>(out); + + } finally { + + if (gasEngine != null) { + + gasEngine.shutdownNow(); + + gasEngine = null; + + } + + } + + } + + /** + * Factory for the {@link IGASEngine}. + */ + private IGASEngine newGasEngine(final IIndexManager indexManager, + final int nthreads) { + + return new BigdataGASEngine(indexManager, nthreads); + + } + + /** + * Return an instance of the {@link IGASProgram} to be evaluated. + */ + private IGASProgram<VS, ES, ST> newGASProgram( + final Class<IGASProgram<VS, ES, ST>> cls) { + + if (cls == null) + throw new IllegalArgumentException(); + + try { + + final Constructor<IGASProgram<VS, ES, ST>> ctor = cls + .getConstructor(new Class[] {}); + + final IGASProgram<VS, ES, ST> gasProgram = ctor + .newInstance(new Object[] {}); + + return gasProgram; + + } catch (Exception e) { + + throw new RuntimeException(e); + + } + + } + + /** + * Return the object used to access the as-configured graph. + */ + private IGraphAccessor newGraphAccessor(final AbstractTripleStore kb) { + + /* + * Use a read-only view (sampling depends on access to the BTree rather + * than the ReadCommittedIndex). + */ + final BigdataGraphAccessor graphAccessor = new BigdataGraphAccessor( + kb.getIndexManager(), kb.getNamespace(), kb + .getIndexManager().getLastCommitTime()); + + return graphAccessor; + + } + + } + +} Modified: branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceCall.java =================================================================== --- branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceCall.java 2014-02-20 12:13:07 UTC (rev 7857) +++ branches/RDR/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/service/ServiceCall.java 2014-02-20 14:19:11 UTC (rev 7858) @@ -60,13 +60,16 @@ IServiceOptions getServiceOptions(); /** - * Invoke an service. + * Invoke an service. The caller will join the results from the service with + * the solutions in the context in which the service was invoked (using a + * solution set hash join pattern). * * @param bindingSets * The binding sets flowing into the service. * * @return An iterator from which the solutions can be drained. If the * iterator is closed, the service invocation must be cancelled. + * * @throws Exception * * TODO RECHUNKING: This should probably return an This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |