This list is closed, nobody may subscribe to it.
2010 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(139) |
Aug
(94) |
Sep
(232) |
Oct
(143) |
Nov
(138) |
Dec
(55) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2011 |
Jan
(127) |
Feb
(90) |
Mar
(101) |
Apr
(74) |
May
(148) |
Jun
(241) |
Jul
(169) |
Aug
(121) |
Sep
(157) |
Oct
(199) |
Nov
(281) |
Dec
(75) |
2012 |
Jan
(107) |
Feb
(122) |
Mar
(184) |
Apr
(73) |
May
(14) |
Jun
(49) |
Jul
(26) |
Aug
(103) |
Sep
(133) |
Oct
(61) |
Nov
(51) |
Dec
(55) |
2013 |
Jan
(59) |
Feb
(72) |
Mar
(99) |
Apr
(62) |
May
(92) |
Jun
(19) |
Jul
(31) |
Aug
(138) |
Sep
(47) |
Oct
(83) |
Nov
(95) |
Dec
(111) |
2014 |
Jan
(125) |
Feb
(60) |
Mar
(119) |
Apr
(136) |
May
(270) |
Jun
(83) |
Jul
(88) |
Aug
(30) |
Sep
(47) |
Oct
(27) |
Nov
(23) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(3) |
Oct
|
Nov
|
Dec
|
2016 |
Jan
|
Feb
|
Mar
(4) |
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <mar...@us...> - 2013-08-28 14:23:52
|
Revision: 7351 http://bigdata.svn.sourceforge.net/bigdata/?rev=7351&view=rev Author: martyncutcher Date: 2013-08-28 14:23:42 +0000 (Wed, 28 Aug 2013) Log Message: ----------- Modify DumpLogDigest to use logicalServicePath and Quorum client to access service HAGlue, and remove use of SimpleDiscovery class Modified Paths: -------------- branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DumpLogDigests.java branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java Removed Paths: ------------- branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SimpleDiscovery.java Modified: branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DumpLogDigests.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DumpLogDigests.java 2013-08-28 14:14:07 UTC (rev 7350) +++ branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DumpLogDigests.java 2013-08-28 14:23:42 UTC (rev 7351) @@ -32,6 +32,7 @@ import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; +import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.ExecutionException; @@ -49,7 +50,10 @@ import com.bigdata.ha.IndexManagerCallable; import com.bigdata.ha.halog.IHALogReader; import com.bigdata.journal.ITransactionService; +import com.bigdata.journal.jini.ha.HAClient.HAConnection; import com.bigdata.journal.jini.ha.HALogIndex.IHALogRecord; +import com.bigdata.quorum.Quorum; +import com.bigdata.quorum.QuorumClient; import cutthecrap.utils.striterators.EmptyIterator; @@ -79,19 +83,33 @@ * @author Martyn Cutcher * */ -public class DumpLogDigests extends SimpleDiscovery { +public class DumpLogDigests { private static final Logger log = Logger.getLogger(DumpLogDigests.class); private static final int DEFAULT_SERVICE_THREADS = 5; private static final int DEFAULT_BATCH = 50; - /** - * Just needs the configuration file for the discovery service and the local zookeeper port - */ - public DumpLogDigests(final String[] configFiles) throws ConfigurationException, IOException, InterruptedException { - super(configFiles); - } + final HAClient client; + + /** + * Just needs the configuration file for the discovery service and the local + * zookeeper port + */ + public DumpLogDigests(final String[] configFiles) + throws ConfigurationException, IOException, InterruptedException { + + // wait for zk services to register! + Thread.sleep(1000); + + client = new HAClient(configFiles); + } + + public void shutdown() { + client.disconnect(true); + } + + public Iterator<ServiceLogs> summary(final String serviceRoot) throws IOException, ExecutionException { return summary(dump(serviceRoot, DEFAULT_BATCH, DEFAULT_SERVICE_THREADS)); @@ -546,5 +564,27 @@ return tmp; } - + + + private List<HAGlue> services(final String serviceRoot) throws IOException, + ExecutionException, KeeperException, InterruptedException { + + + final List<HAGlue> ret = new ArrayList<HAGlue>(); + + final HAConnection cnxn = client.connect(); + + final Quorum<HAGlue, QuorumClient<HAGlue>> quorum = cnxn.getHAGlueQuorum(serviceRoot); + final UUID[] uuids = quorum.getJoined(); + + QuorumClient<HAGlue> qclient = quorum.getClient(); + + for (UUID uuid : uuids) { + ret.add(qclient.getService(uuid)); + } + + return ret; + + } + } Deleted: branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SimpleDiscovery.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SimpleDiscovery.java 2013-08-28 14:14:07 UTC (rev 7350) +++ branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SimpleDiscovery.java 2013-08-28 14:23:42 UTC (rev 7351) @@ -1,106 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2013. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -package com.bigdata.journal.jini.ha; - -import java.io.IOException; -import java.net.InetAddress; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ExecutionException; - -import net.jini.config.Configuration; -import net.jini.config.ConfigurationException; -import net.jini.config.ConfigurationProvider; -import net.jini.core.lookup.ServiceItem; -import net.jini.discovery.DiscoveryEvent; -import net.jini.discovery.DiscoveryListener; -import net.jini.discovery.LookupDiscoveryManager; -import net.jini.lease.LeaseRenewalManager; -import net.jini.lookup.ServiceDiscoveryManager; - -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.ZooKeeper; - -import com.bigdata.ha.HAGlue; -import com.bigdata.io.SerializerUtil; -import com.bigdata.journal.jini.ha.HAClient.HAConnection; -import com.bigdata.quorum.zk.QuorumServiceState; -import com.bigdata.service.jini.JiniClientConfig; -import com.bigdata.util.config.NicUtil; -import com.bigdata.zookeeper.ZooHelper; -import com.bigdata.zookeeper.ZooKeeperAccessor; - -public class SimpleDiscovery { - - final HAClient client; - - /** - * Just needs the configuration file for the discovery service and the local - * zookeeper port - */ - public SimpleDiscovery(final String[] configFiles) - throws ConfigurationException, IOException, InterruptedException { - - // wait for zk services to register! - Thread.sleep(1000); - - client = new HAClient(configFiles); - } - - public void shutdown() { - client.disconnect(true); - } - - - public List<HAGlue> services(final String serviceRoot) throws IOException, - ExecutionException, KeeperException, InterruptedException { - - final HAConnection cnxn = client.connect(); - - List<HAGlue> ret = new ArrayList<HAGlue>(); - - final ZooKeeper zk = cnxn.getZookeeperAccessor().getZookeeper(); - - final List<String> data = zk.getChildren( - serviceRoot + "/quorum/joined", null); - - // Now access serviceIDs so that we can use discovery to gain HAGlue - // interface - for (final String d : data) { - final byte[] bytes = zk.getData( - serviceRoot + "/quorum/joined/" + d, false, null); - - final QuorumServiceState qs = (QuorumServiceState) SerializerUtil - .deserialize(bytes); - - ret.add(cnxn.getHAGlueService(qs - .serviceUUID())); - } - - return ret; - - } - -} Modified: branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java 2013-08-28 14:14:07 UTC (rev 7350) +++ branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/AbstractHA3JournalServerTestCase.java 2013-08-28 14:23:42 UTC (rev 7351) @@ -1241,7 +1241,7 @@ * test and a {@link UUID} in order to keep HAJournalServer processes that * do not die nicely from causing crosstalk between the unit tests. */ - private String getLogicalServiceId() { + protected String getLogicalServiceId() { return logicalServiceId; Modified: branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java 2013-08-28 14:14:07 UTC (rev 7350) +++ branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java 2013-08-28 14:23:42 UTC (rev 7351) @@ -29,7 +29,6 @@ import java.util.UUID; import com.bigdata.ha.HAGlue; -import com.bigdata.ha.HAStatusEnum; public class TestHA3DumpLogs extends AbstractHA3JournalServerTestCase { @@ -47,6 +46,7 @@ startB(); awaitMetQuorum(); + final UUID leaderId = quorum.getLeaderId(); final HAGlue leader = quorum.getClient().getService(leaderId); awaitNSSAndHAReady(leader); @@ -76,10 +76,11 @@ public void testBatchDumpLogs() throws Exception { // only start 2 services to ensure logs are maintained - startA(); - startB(); + startA(); + startB(); - awaitMetQuorum(); + awaitMetQuorum(); + final UUID leaderId = quorum.getLeaderId(); final HAGlue leader = quorum.getClient().getService(leaderId); awaitNSSAndHAReady(leader); @@ -124,9 +125,9 @@ try { final StringWriter sw = new StringWriter(); final PrintWriter pw = new PrintWriter(sw); + final String logicalServiceId = getLogicalServiceId(); + final Iterator<DumpLogDigests.ServiceLogs> serviceLogInfo = dlds.dump(logicalServiceId /*logicalServiceZPath*/, 20/*batchlogs*/, 5/*serviceThreads*/); - final Iterator<DumpLogDigests.ServiceLogs> serviceLogInfo = dlds.dump(logicalServiceZPath, 20/*batchlogs*/, 5/*serviceThreads*/); - while (serviceLogInfo.hasNext()) { DumpLogDigests.ServiceLogs info = serviceLogInfo.next(); pw.println(info); @@ -143,13 +144,14 @@ /** * Tests main entry for DumpLogDigests - * @param b + * @param summary */ private void mainShowLogs(boolean summary) throws Exception { + final String logicalServiceId = getLogicalServiceId(); DumpLogDigests.main( new String[] { SRC_PATH + "dumpFile.config", - logicalServiceZPath, + logicalServiceId /*logicalServiceZPath*/, (summary ? "summary" : "full") } ); Modified: branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java 2013-08-28 14:14:07 UTC (rev 7350) +++ branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3JournalServer.java 2013-08-28 14:23:42 UTC (rev 7351) @@ -28,11 +28,17 @@ import java.io.File; import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.net.InetAddress; import java.text.SimpleDateFormat; import java.util.Date; +import java.util.Iterator; +import java.util.List; import java.util.Properties; import java.util.UUID; import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; import java.util.concurrent.FutureTask; import java.util.concurrent.ScheduledExecutorService; @@ -41,18 +47,31 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicLong; +import org.apache.zookeeper.ZooKeeper; + import net.jini.config.Configuration; +import net.jini.config.ConfigurationException; +import net.jini.config.ConfigurationProvider; +import net.jini.core.lookup.ServiceItem; +import net.jini.discovery.LookupDiscoveryManager; +import net.jini.lease.LeaseRenewalManager; +import net.jini.lookup.ServiceDiscoveryManager; import com.bigdata.ha.HAGlue; import com.bigdata.ha.HAStatusEnum; import com.bigdata.ha.halog.HALogWriter; import com.bigdata.ha.halog.IHALogReader; import com.bigdata.ha.msg.HARootBlockRequest; +import com.bigdata.io.SerializerUtil; import com.bigdata.journal.AbstractJournal; import com.bigdata.quorum.Quorum; +import com.bigdata.quorum.zk.QuorumServiceState; import com.bigdata.rdf.sail.webapp.client.RemoteRepository; import com.bigdata.service.jini.JiniClientConfig; import com.bigdata.util.NV; +import com.bigdata.util.config.NicUtil; +import com.bigdata.zookeeper.ZooHelper; +import com.bigdata.zookeeper.ZooKeeperAccessor; /** * Test suites for an {@link HAJournalServer} quorum with a replication factor @@ -268,6 +287,49 @@ } + public void _testStartABCSimultaneousLookupRestarts() throws Exception { + + final ABC abc = new ABC(false/*sequential*/); // simultaneous start. + + final HAGlue serverA = abc.serverA, serverB = abc.serverB, serverC = abc.serverC; + + // Verify quorum is FULLY met. + final long t1 = awaitFullyMetQuorum(); + +// // Verify KB exists on leader. +// final HAGlue leader = quorum.getClient().getLeader(token); + + // await the KB create commit point to become visible on each service. + awaitCommitCounter(1L, new HAGlue[] { serverA, serverB, serverC }); + + // Verify binary equality of ALL journals. + assertDigestsEquals(new HAGlue[] { serverA, serverB, serverC }); + + // Now let's stop zookeeper + + ((HAJournalTest.HAGlueTest) abc.serverA).log("About to stop zookeeper"); + + System.err.println("stop zookeeper"); + for (int i = 5; i > 0; i--) { + Thread.sleep(1000); + System.err.println("WAIT: " + i); + } + + ((HAJournalTest.HAGlueTest) abc.serverA).log("About to start zookeeper"); + System.err.println("ServerA: " + abc.serverA.getHAStatus()); + + System.err.println("start zookeeper"); + + Thread.sleep(5000); + + ((HAJournalTest.HAGlueTest) abc.serverA).log("Waiting for quorum meet"); + + // Verify quorum is FULLY met. + final long t2 = awaitFullyMetQuorum(); + + assertFalse(t1 == t2); + } + /** * Start 3 services. Verify quorum meets and is fully met and that the * journal digests are equals. Verify that there are no HALog files since @@ -2252,7 +2314,7 @@ */ // disable from standard test runs public void _testSANDBOXStressABC_Restart() throws Exception { - for (int i = 1; i <= 20; i++) { + for (int i = 1; i <= 40; i++) { try { new ABC(true/* sequential */); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-28 14:14:19
|
Revision: 7350 http://bigdata.svn.sourceforge.net/bigdata/?rev=7350&view=rev Author: thompsonbry Date: 2013-08-28 14:14:07 +0000 (Wed, 28 Aug 2013) Log Message: ----------- Fixed bug in AbstractCachingServiceClient where it was hitting an assertion error. The root cause is that the ServiceDiscoveryEvent is delivered to the ServiceCache asynchronously, so this is a data race. The assert was removed and the rationale was documented in the code. Cleared up some confusion over logicalServiceId and logicalServiceZPath. Modified TestHA3DumpLogs to use awaitNSSAndHAReady(). Simplified dumpFile.config. Fixed some problems where interrupts were swallowed (forcing an early return, but not propagated to the caller) in the jini related code. See #728 (Refactor to create HA Client) See #687 (HAJournalServer Cache not populated) Modified Paths: -------------- branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/AbstractQuorum.java branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/AbstractQuorumClient.java branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/AbstractQuorumMember.java branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/QuorumClient.java branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/QuorumMember.java branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAClient.java branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/quorum/zk/ZKQuorumImpl.java branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/service/jini/lookup/AbstractCachingServiceClient.java branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/service/jini/lookup/ServiceCache.java branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestAll.java branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/dumpFile.config branches/READ_CACHE2/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java Modified: branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/AbstractQuorum.java =================================================================== --- branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/AbstractQuorum.java 2013-08-28 09:56:00 UTC (rev 7349) +++ branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/AbstractQuorum.java 2013-08-28 14:14:07 UTC (rev 7350) @@ -445,7 +445,7 @@ // addListener(client); if (client instanceof QuorumMember<?>) { // create actor for that service. - this.actor = newActor(client.getLogicalServiceId(), + this.actor = newActor(client.getLogicalServiceZPath(), ((QuorumMember<?>) client).getServiceId()); } if (singleThreadWatcher) { @@ -464,7 +464,7 @@ // Reach out to the durable quorum and get the lastValidToken this.lastValidToken = getLastValidTokenFromQuorumState(client); // Setup the watcher. - this.watcher = newWatcher(client.getLogicalServiceId()); + this.watcher = newWatcher(client.getLogicalServiceZPath()); this.eventService = (sendSynchronous ? null : Executors .newSingleThreadExecutor(new DaemonThreadFactory( "QuorumEventService"))); Modified: branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/AbstractQuorumClient.java =================================================================== --- branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/AbstractQuorumClient.java 2013-08-28 09:56:00 UTC (rev 7349) +++ branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/AbstractQuorumClient.java 2013-08-28 14:14:07 UTC (rev 7350) @@ -47,21 +47,27 @@ private final AtomicReference<Quorum<?, ?>> quorum = new AtomicReference<Quorum<?, ?>>(); - private final String logicalServiceId; + private final String logicalServiceZPath; @Override - final public String getLogicalServiceId() { + final public String getLogicalServiceZPath() { - return logicalServiceId; + return logicalServiceZPath; } - protected AbstractQuorumClient(final String logicalServiceId) { + /** + * + * @param logicalServiceZPath + * the fully qualified logical service identifier (for zookeeper, + * this is the logicalServiceZPath). + */ + protected AbstractQuorumClient(final String logicalServiceZPath) { - if(logicalServiceId == null) + if(logicalServiceZPath == null) throw new IllegalArgumentException(); - this.logicalServiceId = logicalServiceId; + this.logicalServiceZPath = logicalServiceZPath; } Modified: branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/AbstractQuorumMember.java =================================================================== --- branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/AbstractQuorumMember.java 2013-08-28 09:56:00 UTC (rev 7349) +++ branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/AbstractQuorumMember.java 2013-08-28 14:14:07 UTC (rev 7350) @@ -43,7 +43,8 @@ private final UUID serviceId; - protected AbstractQuorumMember(final String logicalServiceId, UUID serviceId) { + protected AbstractQuorumMember(final String logicalServiceId, + final UUID serviceId) { super(logicalServiceId); Modified: branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/QuorumClient.java =================================================================== --- branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/QuorumClient.java 2013-08-28 09:56:00 UTC (rev 7349) +++ branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/QuorumClient.java 2013-08-28 14:14:07 UTC (rev 7350) @@ -44,13 +44,22 @@ public interface QuorumClient<S extends Remote> extends QuorumListener { /** - * The identifier of the logical service whose quorum state will be - * monitored. A highly available service is comprised of multiple physical - * services which are instances of the same logical service. + * The fully qualified identifier of the logical service whose quorum state + * will be monitored (for zookeeper, this is the logicalServiceZPath). A + * highly available service is comprised of multiple physical services which + * are instances of the same logical service. + * <p> + * Note: The method was renamed from <code>getLogicalServiceId()</code> to + * {@link #getLogicalServiceZPath()} to avoid confusion with the zookeeper + * integration which has both a logicalServiceId (just the last component of + * the zpath) and a logicalServiceZPath. The {@link Quorum} and + * {@link QuorumClient} interfaces only understand a single logical service + * identifier - this is what corresponds to the + * <code?logicalServiceZPath</code> for the zookeeper integration. * * @see QuorumMember#getServiceId() */ - String getLogicalServiceId(); + String getLogicalServiceZPath(); /** * Life cycle message sent when the client will begin to receive messages Modified: branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/QuorumMember.java =================================================================== --- branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/QuorumMember.java 2013-08-28 09:56:00 UTC (rev 7349) +++ branches/READ_CACHE2/bigdata/src/java/com/bigdata/quorum/QuorumMember.java 2013-08-28 14:14:07 UTC (rev 7350) @@ -56,7 +56,7 @@ * The identifier for this service (the quorum member). Each quorum member * is a physical service instance of some highly available logical service. * - * @see QuorumClient#getLogicalServiceId() + * @see QuorumClient#getLogicalServiceZPath() */ UUID getServiceId(); Modified: branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAClient.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAClient.java 2013-08-28 09:56:00 UTC (rev 7349) +++ branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/HAClient.java 2013-08-28 14:14:07 UTC (rev 7350) @@ -109,7 +109,10 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ * - * TODO Refactor the HA3 test suite to use the HAClient class. + * @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/728" > Refactor + * to create HAClient</a> + * + * TODO Refactor the HA3 test suite to use the HAClient class. */ public class HAClient { @@ -820,7 +823,7 @@ // Setup discovery for HAGlue clients. discoveryClient = new HAGlueServicesClient( serviceDiscoveryManager, - null/* serviceDiscoveryListener */, cacheMissTimeout); + this/* serviceDiscoveryListener */, cacheMissTimeout); // And set the reference. The client is now "connected". this.clientRef.set(client); @@ -1028,11 +1031,33 @@ */ public HAGlue getHAGlueService(final UUID serviceUUID) { - return discoveryClient.getService(); + return discoveryClient.getService(serviceUUID); } /** + * Resolve the array of service {@link UUID}s to their RMI proxies. + * + * @param serviceUUIDs + * The service {@link UUID}s. + * + * @return The correlated array of RMI proxies. + */ + public HAGlue[] getHAGlueService(final UUID[] serviceUUIDs) { + + final HAGlue[] a = new HAGlue[serviceUUIDs.length]; + + for (int i = 0; i < a.length; i++) { + + a[i] = discoveryClient.getService(serviceUUIDs[i]); + + } + + return a; + + } + + /** * Return an array UUIDs for discovered {@link HAGlue} services. * * @param maxCount @@ -1058,7 +1083,8 @@ * Return the set of known logical service identifiers for HA * replication clusters. These are extracted from zookeeper. * - * @return The known logical service identifiers. + * @return The known logical service identifiers (just the last + * component of the zpath). * * @throws InterruptedException * @throws KeeperException @@ -1240,9 +1266,9 @@ private class MyQuorumClient extends AbstractQuorumClient<HAGlue> { - protected MyQuorumClient(final String logicalServiceId) { + protected MyQuorumClient(final String logicalServiceZPath) { - super(logicalServiceId); + super(logicalServiceZPath); } @@ -1265,6 +1291,7 @@ * <p> * {@inheritDoc} */ + @Override public void serviceAdded(final ServiceDiscoveryEvent e) { final ServiceItem serviceItem = e.getPostEventServiceItem(); @@ -1368,6 +1395,8 @@ } catch (InterruptedException ex) { + // Propagate interrupt. + Thread.currentThread().interrupt(); return; } @@ -1398,6 +1427,8 @@ } catch (InterruptedException ex) { + // Propagate interrupt. + Thread.currentThread().interrupt(); return; } Modified: branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/quorum/zk/ZKQuorumImpl.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/quorum/zk/ZKQuorumImpl.java 2013-08-28 09:56:00 UTC (rev 7349) +++ branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/quorum/zk/ZKQuorumImpl.java 2013-08-28 14:14:07 UTC (rev 7350) @@ -2168,8 +2168,8 @@ @Override protected long getLastValidTokenFromQuorumState(final C client) { // super.getLastValidTokenFromQuorumState(client); - final String logicalServiceId = client.getLogicalServiceId(); - final String quorumZPath = logicalServiceId + "/" + QUORUM; + final String logicalServiceZPath = client.getLogicalServiceZPath(); + final String quorumZPath = logicalServiceZPath + "/" + QUORUM; while (true) { final byte[] data; try { Modified: branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/service/jini/lookup/AbstractCachingServiceClient.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/service/jini/lookup/AbstractCachingServiceClient.java 2013-08-28 09:56:00 UTC (rev 7349) +++ branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/service/jini/lookup/AbstractCachingServiceClient.java 2013-08-28 14:14:07 UTC (rev 7350) @@ -329,6 +329,8 @@ if (log.isInfoEnabled()) log.info("Interrupted - no match."); + // Propagate the interrupt. + Thread.currentThread().interrupt(); return null; } @@ -428,6 +430,8 @@ if (log.isInfoEnabled()) log.info("Interrupted - no match."); + // Propagate the interrupt. + Thread.currentThread().interrupt(); return null; } @@ -448,20 +452,45 @@ log.info("Found: " + item); /** - * Verify that the discovered item was entered into the ServiceCache. - * The ServiceCache implements the ServiceDiscoveryListener interface. - * The LookupCache should have delivered a ServiceDiscoverEvent to the - * ServiceDiscoveryListener intercace on the ServiceCache when we - * performed the lookup at the top of this method. + * Note: See below. We can not verify this. [Verify that the discovered + * item was entered into the ServiceCache. The ServiceCache implements + * the ServiceDiscoveryListener interface. The LookupCache should have + * delivered a ServiceDiscoverEvent to the ServiceDiscoveryListener + * interface on the ServiceCache when we performed the lookup at the top + * of this method. If you hit this, then a likely explanation is that + * the service interface specified to the constructor was not compatible + * with the template (i.e., one or the other was incorrect).] * - * Note: If you hit this, then a likely explanation is that the service - * interface specified to the constructor was not compatible with the - * template (i.e., one or the other was incorrect). + * Note: The ServiceDiscoveryEvent messages are delivered by a thread + * started by the ServiceDiscoveryManager as observed in the trace below + * (the serviceAdded() method was modified to generate this trace and + * does not normally throw the UnsupportedOperationException). This is + * the reason why the asserts below are sometimes tripped. The event + * delivery in another thread amounts to a data race between the lookup + * of the service (above) and its insert into the ServiceCache (which is + * asynchronous). * + * <pre> + * Aug 28, 2013 9:35:00 AM com.sun.jini.thread.TaskManager$TaskThread run + * WARNING: Task.run exception + * java.lang.UnsupportedOperationException + * at com.bigdata.journal.jini.ha.HAClient$HAConnection.serviceAdded(HAClient.java:1270) + * at com.bigdata.service.jini.lookup.ServiceCache.serviceAdded(ServiceCache.java:125) + * at net.jini.lookup.ServiceDiscoveryManager$LookupCacheImpl.serviceNotifyDo(ServiceDiscoveryManager.java:2181) + * at net.jini.lookup.ServiceDiscoveryManager$LookupCacheImpl.serviceNotifyDo(ServiceDiscoveryManager.java:2168) + * at net.jini.lookup.ServiceDiscoveryManager$LookupCacheImpl.addServiceNotify(ServiceDiscoveryManager.java:2128) + * at net.jini.lookup.ServiceDiscoveryManager$LookupCacheImpl.access$2200(ServiceDiscoveryManager.java:843) + * at net.jini.lookup.ServiceDiscoveryManager$LookupCacheImpl$NewOldServiceTask.run(ServiceDiscoveryManager.java:1417) + * at com.sun.jini.thread.TaskManager$TaskThread.run(TaskManager.java:331) + * </pre> + * * @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/687" > * HAJournalServer Cache not populated </a> + * + * @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/728" > + * Refactor to create HAClient</a> */ - assert serviceCache.getServiceItemByID(serviceId) != null; +// assert serviceCache.getServiceItemByID(serviceId) != null; // if (serviceCache.getServiceItemByID(serviceId) == null) { // throw new AssertionError( // "Failed to install service into cache: serviceId=" Modified: branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/service/jini/lookup/ServiceCache.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/service/jini/lookup/ServiceCache.java 2013-08-28 09:56:00 UTC (rev 7349) +++ branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/service/jini/lookup/ServiceCache.java 2013-08-28 14:14:07 UTC (rev 7350) @@ -108,6 +108,7 @@ * this class is listening. The set of distinct joined services is * accessible via the {@link LookupCache}. */ + @Override public void serviceAdded(final ServiceDiscoveryEvent e) { final ServiceItem item = e.getPostEventServiceItem(); @@ -130,6 +131,7 @@ /** * updates the map and delegates to the {@link #listener}. */ + @Override public void serviceChanged(final ServiceDiscoveryEvent e) { final ServiceItem item = e.getPostEventServiceItem(); @@ -157,6 +159,7 @@ * explanation for those transients is that the host with the service is * swapping and has failed to keep up its pings to the jini registrar(s). */ + @Override public void serviceRemoved(final ServiceDiscoveryEvent e) { final ServiceItem item = e.getPreEventServiceItem(); @@ -165,6 +168,9 @@ final Object service = item.service; + if (log.isInfoEnabled()) + log.info("" + e + ", class=" + item); + if (service instanceof IService) { try { @@ -183,9 +189,6 @@ } - if (log.isInfoEnabled()) - log.info("" + e + ", class=" + item); - serviceIdMap.remove(serviceID); if (listener != null) { Modified: branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestAll.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestAll.java 2013-08-28 09:56:00 UTC (rev 7349) +++ branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestAll.java 2013-08-28 14:14:07 UTC (rev 7350) @@ -110,6 +110,9 @@ // // Test suite for the global write lock. // suite.addTestSuite(TestHAJournalServerGlobalWriteLock.class); + // Test suite for utility to compute and compare HALog digests. + suite.addTestSuite(TestHA3DumpLogs.class); + // Test suite for direct IBufferStrategy data xfer tests. suite.addTestSuite(TestRawTransfers.class); Modified: branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java 2013-08-28 09:56:00 UTC (rev 7349) +++ branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java 2013-08-28 14:14:07 UTC (rev 7350) @@ -26,7 +26,11 @@ import java.io.PrintWriter; import java.io.StringWriter; import java.util.Iterator; +import java.util.UUID; +import com.bigdata.ha.HAGlue; +import com.bigdata.ha.HAStatusEnum; + public class TestHA3DumpLogs extends AbstractHA3JournalServerTestCase { public TestHA3DumpLogs() { @@ -43,6 +47,9 @@ startB(); awaitMetQuorum(); + final UUID leaderId = quorum.getLeaderId(); + final HAGlue leader = quorum.getClient().getService(leaderId); + awaitNSSAndHAReady(leader); // Run through a few transactions to generate some log files simpleTransaction(); @@ -69,10 +76,13 @@ public void testBatchDumpLogs() throws Exception { // only start 2 services to ensure logs are maintained - startA(); - startB(); + startA(); + startB(); - awaitMetQuorum(); + awaitMetQuorum(); + final UUID leaderId = quorum.getLeaderId(); + final HAGlue leader = quorum.getClient().getService(leaderId); + awaitNSSAndHAReady(leader); // Run through a few transactions to generate some log files // Ensure that services use multiple batches Modified: branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/dumpFile.config =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/dumpFile.config 2013-08-28 09:56:00 UTC (rev 7349) +++ branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/dumpFile.config 2013-08-28 14:14:07 UTC (rev 7350) @@ -132,33 +132,7 @@ */ private static namespace = "kb"; - - private static kb = new NV[] { - - /* Setup for QUADS mode without the full text index. */ - - new NV(BigdataSail.Options.TRUTH_MAINTENANCE, "false" ), - new NV(BigdataSail.Options.QUADS, "true"), - new NV(BigdataSail.Options.STATEMENT_IDENTIFIERS, "false"), - new NV(BigdataSail.Options.TEXT_INDEX, "false"), - new NV(BigdataSail.Options.AXIOMS_CLASS,"com.bigdata.rdf.axioms.NoAxioms"), - new NV(BigdataSail.Options.QUERY_TIME_EXPANDER, "false"), - // Bump up the branching factor for the lexicon indices on the named kb. - // com.bigdata.namespace.kb.lex.com.bigdata.btree.BTree.branchingFactor=400 - new NV(com.bigdata.config.Configuration.getOverrideProperty - ( namespace + "." + LexiconRelation.NAME_LEXICON_RELATION, - IndexMetadata.Options.BTREE_BRANCHING_FACTOR - ), "400"), - - // Bump up the branching factor for the statement indices on the named kb. - // com.bigdata.namespace.kb.spo.com.bigdata.btree.BTree.branchingFactor=1024 - new NV(com.bigdata.config.Configuration.getOverrideProperty - ( namespace + "." + SPORelation.NAME_SPO_RELATION, - IndexMetadata.Options.BTREE_BRANCHING_FACTOR - ), "1024"), - }; - } /* @@ -229,88 +203,3 @@ maxLeaseDuration = bigdata.leaseTimeout; } - -/* - * Server configuration options. - */ -com.bigdata.journal.jini.ha.HAJournalServer { - - args = new String[] { - "-showversion", - "-Djava.security.policy=policy.all", - "-Dlog4j.configuration=file:log4j-A.properties", - "-Djava.util.logging.config.file=logging-A.properties", - "-server", - "-Xmx1G", - "-ea", - "-Xdebug","-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=1080" - }; - - serviceDir = bigdata.serviceDir; - - // Default policy. - restorePolicy = new com.bigdata.journal.jini.ha.DefaultRestorePolicy(); - - // Suppress automatic snapshots. - snapshotPolicy = new com.bigdata.journal.jini.ha.NoSnapshotPolicy(); - - logicalServiceId = bigdata.logicalServiceId; - - writePipelineAddr = new InetSocketAddress("localhost",bigdata.haPort); - - /* - writePipelineAddr = new InetSocketAddress(// - InetAddress.getByName(// - NicUtil.getIpAddress("default.nic", "default", - false// loopbackOk - )), // - bigdata.haPort - ); - */ - - replicationFactor = bigdata.replicationFactor; - - // Use the overridden version of the HAJournal by default so we get the - // HAGlueTest API for every test. - HAJournalClass = "com.bigdata.journal.jini.ha.HAJournalTest"; - -} - -/* - * Journal configuration. - */ -com.bigdata.journal.jini.ha.HAJournal { - - properties = (NV[]) ConfigMath.concat(new NV[] { - - new NV(Options.FILE, - ConfigMath.getAbsolutePath(new File(bigdata.dataDir,"bigdata-ha.jnl"))), - - new NV(Options.BUFFER_MODE,""+BufferMode.DiskRW), - - new NV(IndexMetadata.Options.WRITE_RETENTION_QUEUE_CAPACITY,"4000"), - - new NV(IndexMetadata.Options.BTREE_BRANCHING_FACTOR,"128"), - - new NV(AbstractTransactionService.Options.MIN_RELEASE_AGE,"1"), - - }, bigdata.kb); - -} - -/* - * NanoSparqlServer configuration. - */ -com.bigdata.rdf.sail.webapp.NanoSparqlServer { - - namespace = bigdata.namespace; - - create = true; - - queryThreadPoolSize = 16; - - describeEachNamedGraph = true; - - port = bigdata.nssPort; - -} Modified: branches/READ_CACHE2/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java =================================================================== --- branches/READ_CACHE2/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java 2013-08-28 09:56:00 UTC (rev 7349) +++ branches/READ_CACHE2/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/HAStatusServletUtil.java 2013-08-28 14:14:07 UTC (rev 7350) @@ -156,7 +156,7 @@ p.text("quorumToken=" + quorumToken + ", lastValidToken=" + lastValidToken).node("br").close(); - p.text("logicalServiceId=" + quorumService.getLogicalServiceId()) + p.text("logicalServiceZPath=" + quorumService.getLogicalServiceZPath()) .node("br").close(); // Note: This is the *local* value of getHAStatus(). @@ -648,7 +648,7 @@ quorum.getZookeeper()); dump.dump(out, true/* showDatatrue */, - quorumService.getLogicalServiceId()/* zpath */, + quorumService.getLogicalServiceZPath()/* zpath */, 0/* depth */); } catch (InterruptedException e) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2013-08-28 09:56:07
|
Revision: 7349 http://bigdata.svn.sourceforge.net/bigdata/?rev=7349&view=rev Author: martyncutcher Date: 2013-08-28 09:56:00 +0000 (Wed, 28 Aug 2013) Log Message: ----------- minor code tidy Modified Paths: -------------- branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java Modified: branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java 2013-08-28 09:54:27 UTC (rev 7348) +++ branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java 2013-08-28 09:56:00 UTC (rev 7349) @@ -29,6 +29,14 @@ public class TestHA3DumpLogs extends AbstractHA3JournalServerTestCase { + public TestHA3DumpLogs() { + + } + + public TestHA3DumpLogs(final String name) { + super(name); + } + public void testSimpleDumpLogs() throws Exception { // only start 2 services to ensure logs are maintained startA(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2013-08-28 09:54:34
|
Revision: 7348 http://bigdata.svn.sourceforge.net/bigdata/?rev=7348&view=rev Author: martyncutcher Date: 2013-08-28 09:54:27 +0000 (Wed, 28 Aug 2013) Log Message: ----------- minor code tidy Modified Paths: -------------- branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DumpLogDigests.java Modified: branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DumpLogDigests.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DumpLogDigests.java 2013-08-28 09:51:56 UTC (rev 7347) +++ branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DumpLogDigests.java 2013-08-28 09:54:27 UTC (rev 7348) @@ -26,9 +26,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.PrintWriter; import java.io.Serializable; -import java.io.StringWriter; import java.security.MessageDigest; import java.util.ArrayList; import java.util.Iterator; @@ -309,7 +307,7 @@ for (long cc = startCC; cc <= endCC; cc++) { final long cur = cc; - final Future res = es.submit(new Callable<Void>() { + final Future<Void> res = es.submit(new Callable<Void>() { @Override public Void call() throws Exception { try { @@ -351,7 +349,7 @@ } @SuppressWarnings("serial") - static public class HALogInfo implements Serializable, Comparable { + static public class HALogInfo implements Serializable, Comparable<HALogInfo> { final public String logName; final public boolean isOpen; final public byte[] digest; @@ -363,11 +361,7 @@ } @Override - public int compareTo(Object o) { - if (! (o instanceof HALogInfo)) - return -1; - final HALogInfo other = (HALogInfo) o; - + public int compareTo(HALogInfo other) { return logName.compareTo(other.logName); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2013-08-28 09:52:10
|
Revision: 7347 http://bigdata.svn.sourceforge.net/bigdata/?rev=7347&view=rev Author: martyncutcher Date: 2013-08-28 09:51:56 +0000 (Wed, 28 Aug 2013) Log Message: ----------- First version of DumpLogDigests utility for ticket #726. Can be called from code or command line to output the digest data from a set of joined services. An option supports output only if deltas are found. The TestHA3DumpLogs unit tests demonstrate use. Added Paths: ----------- branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DumpLogDigests.java branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SimpleDiscovery.java branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/dumpFile.config Added: branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DumpLogDigests.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DumpLogDigests.java (rev 0) +++ branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/DumpLogDigests.java 2013-08-28 09:51:56 UTC (rev 7347) @@ -0,0 +1,556 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2013. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.journal.jini.ha; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.Serializable; +import java.io.StringWriter; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; + +import net.jini.config.ConfigurationException; + +import org.apache.log4j.Logger; +import org.apache.zookeeper.KeeperException; + +import com.bigdata.btree.BytesUtil; +import com.bigdata.ha.HAGlue; +import com.bigdata.ha.IndexManagerCallable; +import com.bigdata.ha.halog.IHALogReader; +import com.bigdata.journal.ITransactionService; +import com.bigdata.journal.jini.ha.HALogIndex.IHALogRecord; + +import cutthecrap.utils.striterators.EmptyIterator; + +/** + * Accesses Zookeeper state and then connects to each service member to + * retrieve log digest information. + * + * The states are pinned by starting a read transaction on any of the services. + * + * This task will return an object with the transaction reference to be released + * and the range of commit counters for the logs to be checked. + * + * Then tasks should be submitted for each service for a subset of the total logs available. + * + * Each subset is processed and a subset summary created. + * + * The dump method returns an Iterator providing the option to block on hasNext. This handles + * the batching of log computations for each service, the idea being that for large + * numbers of logs comparisons could be made across a subset of the logs. + * + * An intermediate ServiceLogWait class wraps the future to allow an iterator to + * block on get() before returning the wrapped values as ServiceLogs instances. + * + * Internal to each service the "serviceThreads" value defines the number of concurrent + * log digest computations at any point. + * + * @author Martyn Cutcher + * + */ +public class DumpLogDigests extends SimpleDiscovery { + + private static final Logger log = Logger.getLogger(DumpLogDigests.class); + + private static final int DEFAULT_SERVICE_THREADS = 5; + private static final int DEFAULT_BATCH = 50; + + /** + * Just needs the configuration file for the discovery service and the local zookeeper port + */ + public DumpLogDigests(final String[] configFiles) throws ConfigurationException, IOException, InterruptedException { + super(configFiles); + } + + public Iterator<ServiceLogs> summary(final String serviceRoot) throws IOException, ExecutionException { + return summary(dump(serviceRoot, DEFAULT_BATCH, DEFAULT_SERVICE_THREADS)); + } + + public Iterator<ServiceLogs> dump(final String serviceRoot) throws IOException, ExecutionException { + return dump(serviceRoot, DEFAULT_BATCH, DEFAULT_SERVICE_THREADS); + } + + public Iterator<ServiceLogs> dump(final String serviceRoot, final int batchSize, final int serviceThreads) throws IOException, ExecutionException { + try { + // wait for zk services to register! + Thread.sleep(1000); + + List<HAGlue> services = services(serviceRoot); + + if (services.isEmpty()) + throw new IllegalArgumentException("No services found for " + serviceRoot); + + // Start by grabbing a nominal service to pin the logs + final HAGlue pinner = services.get(0); + + final LogDigestParams params = pinner.submit(new PinLogs(), false).get(); + + if (log.isInfoEnabled()) + log.info("Pinning startCC: " + params.startCC + ", endCC: " + params.endCC); + + /** + * Now access serviceIDs so that we can use discovery to gain HAGlue interface. + * + * Submit all requests for concurrent processing, then add results + */ + List<Future<List<HALogInfo>>> results = new ArrayList<Future<List<HALogInfo>>>(); + long batchStart = params.startCC; + long batchEnd = batchStart + batchSize - 1; + int tasks = 0; + while (true) { + if (batchEnd > params.endCC) + batchEnd = params.endCC; + + if (log.isInfoEnabled()) + log.info("Running batch start: " + batchStart + ", end: " + batchEnd + " across " + services); + + for (final HAGlue glue : services) { + + results.add(glue.submit(new GetLogInfo(batchStart, batchEnd, serviceThreads), false)); + + tasks++; + } + + if (batchEnd == params.endCC) + break; + + batchStart += batchSize; + batchEnd += batchSize; + } + + final ArrayList<ServiceLogWait> logs = new ArrayList<ServiceLogWait>(); + for (int t = 0; t < tasks; t++) { + final int s = t % services.size(); + logs.add(new ServiceLogWait(services.get(s).getServiceUUID().toString(), results.get(t), s, services.size())); + } + + // now submit task to release the pinning transaction and wait for it to complete + pinner.submit(new UnpinLogs(params.tx), false).get(); + + // return an Iterator blocking on the Future value of the next source item before + // creating a return value + return new Iterator<ServiceLogs>() { + final Iterator<ServiceLogWait> src = logs.iterator(); + + @Override + public boolean hasNext() { + return src.hasNext(); + } + + @Override + public ServiceLogs next() { + final ServiceLogWait data = src.next(); + + try { + // This will block on the future.get() + return new ServiceLogs(data.service, data.waitlogInfos.get(), data.item, data.batch); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } catch (ExecutionException e) { + throw new RuntimeException(e); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + + } catch (InterruptedException e) { + throw new RuntimeException(e); + } catch (KeeperException e) { + throw new RuntimeException(e); + } + } + + /** + * LogDigestParams with PinLogs and UnpinLogs tasks ensure that + * transactions (and logs?) will not be removed while the digests + * are computed. + * TODO: Check if this will really pin the logs? + */ + @SuppressWarnings("serial") + static public class LogDigestParams implements Serializable { + final public long tx; + final public long startCC; + final public long endCC; + + LogDigestParams(final long tx, final long startCC, final long endCC) { + this.tx = tx; + this.startCC = startCC; + this.endCC = endCC; + } + } + + @SuppressWarnings("serial") + static class PinLogs extends IndexManagerCallable<LogDigestParams> { + + @Override + public LogDigestParams call() throws Exception { + final HAJournal ha = (HAJournal) this.getIndexManager(); + + final ITransactionService ts = ha.getTransactionService(); + final long relTime = ts.getReleaseTime(); + final long tx = ts.newTx(relTime+1); + + final HALogNexus nexus = ha.getHALogNexus(); + Iterator<IHALogRecord> logs = nexus.getHALogs(); + final long startCC; + long endCC = nexus.getCommitCounter()+1; // endCC + if (logs.hasNext()) { + startCC = logs.next().getCommitCounter(); + } else { + startCC = endCC; + } + + return new LogDigestParams(tx, startCC, endCC); + } + + } + + @SuppressWarnings("serial") + static class UnpinLogs extends IndexManagerCallable<Void> { + long tx; + + UnpinLogs(long tx) { + this.tx = tx; + } + + @Override + public Void call() throws Exception { + final HAJournal ha = (HAJournal) this.getIndexManager(); + + final ITransactionService ts = ha.getTransactionService(); + + ts.abort(tx); + + return null; + } + + } + + /** + * The GetLogInfo callable is submitted to each service, retrieving a + * List of HALogInfo data elements for each commit counter log + * requested. + * + * It is parameterized for start and end commit counters for the logs and + * the number of serviceThreads to split the digest computations across. + */ + + @SuppressWarnings("serial") + static class GetLogInfo extends IndexManagerCallable<List<HALogInfo>> { + long startCC; + long endCC; + int serviceThreads; + GetLogInfo(long startCC, long endCC, int serviceThreads) { + this.startCC = startCC; + this.endCC = endCC; + this.serviceThreads = serviceThreads; + + if (serviceThreads < 1 || serviceThreads > 20) { + throw new IllegalArgumentException(); + } + } + + @Override + public List<HALogInfo> call() throws Exception { + final ConcurrentSkipListSet<HALogInfo> infos = new ConcurrentSkipListSet<HALogInfo>(); + + HAJournal ha = (HAJournal) this.getIndexManager(); + + final HALogNexus nexus = ha.getHALogNexus(); + long openCC = nexus.getCommitCounter(); + log.warn("Open Commit Counter: " + openCC + ", startCC: " + startCC + ", endCC: " + endCC); + + /** + * Submit each computation as task to pooled executor service - say maximum of + * five threads + */ + final ThreadPoolExecutor es = (ThreadPoolExecutor) Executors + .newFixedThreadPool(serviceThreads); + + final List<Future<Void>> results = new ArrayList<Future<Void>>(); + + for (long cc = startCC; cc <= endCC; cc++) { + final long cur = cc; + + final Future res = es.submit(new Callable<Void>() { + @Override + public Void call() throws Exception { + try { + final File file = nexus.getHALogFile(cur); + + log.warn("Found log file: " + file.getName()); + + // compute file digest + final IHALogReader r = nexus.getReader(cur); + + final MessageDigest digest = MessageDigest.getInstance("MD5"); + + r.computeDigest(digest); + + infos.add(new HALogInfo(file.getName(), r.isLive(), digest.digest())); + } catch (FileNotFoundException fnf) { + // half expected + } catch (Throwable t) { + log.warn("Unexpected error", t); + } + + return null; + } + + }); + + results.add(res); + } + + for (Future<Void> res : results) { + res.get(); + } + + es.shutdown(); + + return new ArrayList<HALogInfo>(infos); + } + + } + + @SuppressWarnings("serial") + static public class HALogInfo implements Serializable, Comparable { + final public String logName; + final public boolean isOpen; + final public byte[] digest; + + HALogInfo(final String logName, final boolean isOpen, final byte[] bs) { + this.logName = logName; + this.isOpen = isOpen; + this.digest = bs; + } + + @Override + public int compareTo(Object o) { + if (! (o instanceof HALogInfo)) + return -1; + final HALogInfo other = (HALogInfo) o; + + return logName.compareTo(other.logName); + } + } + + /** + * The ServiceLogs data is a list of digest results for a specific service. + */ + @SuppressWarnings("serial") + static public class ServiceLogs implements Serializable { + + final public List<HALogInfo> logInfos; + final public String service; + final public int item; + final public int batch; + + ServiceLogs(final String service, final List<HALogInfo> logInfos, final int batch, final int item) { + this.logInfos = logInfos; + this.service = service; + this.item = item; + this.batch = batch; + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Service: " + service + "\n"); + for (HALogInfo li : logInfos) { + sb.append(li.logName); + sb.append(" " + BytesUtil.toHexString(li.digest)); + sb.append(" " + (li.isOpen ? "open" : "closed")); + sb.append("\n"); + } + + return sb.toString(); + } + } + + /** + * The ServiceLogWait supports a delayed iteration where the iterator handles + * the wait on the Future, rather than the client needing to be aware of the + * concurrent evaluation. + */ + class ServiceLogWait { + final Future<List<HALogInfo>> waitlogInfos; + final String service; + final int item; + final int batch; + + ServiceLogWait(final String service, final Future<List<HALogInfo>> waitlogInfos, final int batch, final int item) { + this.waitlogInfos = waitlogInfos; + this.service = service; + this.item = item; + this.batch = batch; + } + } + + /** + * Two required arguments: + * <HA configuration file> + * <ServiceRoot> + * + * A third optional argument + * "summary" + * which, if present, only generates output for HALogInfo that is different for teh joined services. + * + * The DumpLogDigests dump method returns an Iterator over the halog files. + */ + static public void main(final String[] args) throws ConfigurationException, IOException, InterruptedException, ExecutionException { + if (args.length < 2 || args.length > 3) { + System.err.println("required arguments: <configFile> <serviceRoot> [\"summary\"]"); + + return; + } + + final String configFile = args[0]; + final String serviceRoot = args[1]; + + final boolean summary = args.length > 2 ? "summary".equals(args[2]) : false; + + final DumpLogDigests dld = new DumpLogDigests(new String[] {configFile}); + + final Iterator<ServiceLogs> slogs; + + if (summary) { + slogs = dld.summary(serviceRoot); + } else { + slogs = dld.dump(serviceRoot); + } + + while (slogs.hasNext()) { + final ServiceLogs sl = slogs.next(); + + if (sl.logInfos.size() > 0) + System.out.println(sl.toString()); + } + } + + /** + * Summary will return any reported differences. + * + * @param slogs + * @return + */ + static public Iterator<ServiceLogs> summary(final Iterator<ServiceLogs> slogs) { + return new Iterator<ServiceLogs>() { + + Iterator<ServiceLogs> delta = new EmptyIterator<ServiceLogs>(); + + @Override + public boolean hasNext() { + return delta.hasNext() || slogs.hasNext(); + } + + @Override + public ServiceLogs next() { + if (!hasNext()) + throw new NoSuchElementException(); + + if (delta.hasNext()) { + return delta.next(); + } else { + delta = delta(slogs).iterator(); + + return delta.next(); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + }; + } + + /** + * Buffers the responses to allow HALogInfo comparisons and remove entries + * that are similar. + * + * @param slogs + * @return the service log deltas for a single batch + */ + static List<ServiceLogs> delta(final Iterator<ServiceLogs> slogs) { + final ArrayList<ServiceLogs> tmp = new ArrayList<ServiceLogs>(); + + // retrieve batch results + while (slogs.hasNext()) { + ServiceLogs sl = slogs.next(); // will block if not ready + tmp.add(sl); + if (sl.item == (sl.batch-1)) // break on last entry for batch + break; + } + + // size is number of services with batch info + if (tmp.size() > 0) { + // select first service to compare with + final ServiceLogs t = tmp.get(0); + + // first check if all others have same number of entries, if not then + // do not attempt to compare further. + for (int n = 1; n < tmp.size(); n++) { + if (tmp.get(n).logInfos.size() != t.logInfos.size()) + return tmp; + } + + // next compare digests, if all the same then remove the entry + for (int li = t.logInfos.size()-1; li >= 0; li--) { + final HALogInfo s = t.logInfos.get(li); + boolean include = false; + for (int n = 1; n < tmp.size(); n++) { + final HALogInfo tst = tmp.get(n).logInfos.get(li); + if (BytesUtil.compareBytes(tst.digest, s.digest) != 0) + include = true; + } + + if (!include) { // remove HALogInfo + for (int n = 0; n < tmp.size(); n++) { + tmp.get(n).logInfos.remove(li); + } + } + } + } + + return tmp; + } + +} Added: branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SimpleDiscovery.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SimpleDiscovery.java (rev 0) +++ branches/READ_CACHE2/bigdata-jini/src/java/com/bigdata/journal/jini/ha/SimpleDiscovery.java 2013-08-28 09:51:56 UTC (rev 7347) @@ -0,0 +1,106 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2013. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +package com.bigdata.journal.jini.ha; + +import java.io.IOException; +import java.net.InetAddress; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutionException; + +import net.jini.config.Configuration; +import net.jini.config.ConfigurationException; +import net.jini.config.ConfigurationProvider; +import net.jini.core.lookup.ServiceItem; +import net.jini.discovery.DiscoveryEvent; +import net.jini.discovery.DiscoveryListener; +import net.jini.discovery.LookupDiscoveryManager; +import net.jini.lease.LeaseRenewalManager; +import net.jini.lookup.ServiceDiscoveryManager; + +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.ZooKeeper; + +import com.bigdata.ha.HAGlue; +import com.bigdata.io.SerializerUtil; +import com.bigdata.journal.jini.ha.HAClient.HAConnection; +import com.bigdata.quorum.zk.QuorumServiceState; +import com.bigdata.service.jini.JiniClientConfig; +import com.bigdata.util.config.NicUtil; +import com.bigdata.zookeeper.ZooHelper; +import com.bigdata.zookeeper.ZooKeeperAccessor; + +public class SimpleDiscovery { + + final HAClient client; + + /** + * Just needs the configuration file for the discovery service and the local + * zookeeper port + */ + public SimpleDiscovery(final String[] configFiles) + throws ConfigurationException, IOException, InterruptedException { + + // wait for zk services to register! + Thread.sleep(1000); + + client = new HAClient(configFiles); + } + + public void shutdown() { + client.disconnect(true); + } + + + public List<HAGlue> services(final String serviceRoot) throws IOException, + ExecutionException, KeeperException, InterruptedException { + + final HAConnection cnxn = client.connect(); + + List<HAGlue> ret = new ArrayList<HAGlue>(); + + final ZooKeeper zk = cnxn.getZookeeperAccessor().getZookeeper(); + + final List<String> data = zk.getChildren( + serviceRoot + "/quorum/joined", null); + + // Now access serviceIDs so that we can use discovery to gain HAGlue + // interface + for (final String d : data) { + final byte[] bytes = zk.getData( + serviceRoot + "/quorum/joined/" + d, false, null); + + final QuorumServiceState qs = (QuorumServiceState) SerializerUtil + .deserialize(bytes); + + ret.add(cnxn.getHAGlueService(qs + .serviceUUID())); + } + + return ret; + + } + +} Added: branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java (rev 0) +++ branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/TestHA3DumpLogs.java 2013-08-28 09:51:56 UTC (rev 7347) @@ -0,0 +1,141 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2013. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.journal.jini.ha; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.Iterator; + +public class TestHA3DumpLogs extends AbstractHA3JournalServerTestCase { + + public void testSimpleDumpLogs() throws Exception { + // only start 2 services to ensure logs are maintained + startA(); + startB(); + + awaitMetQuorum(); + + // Run through a few transactions to generate some log files + simpleTransaction(); + simpleTransaction(); + simpleTransaction(); + + log.warn("After 3 met quorum transactions"); + showLogs(); + + // now start third + startC(); + + awaitFullyMetQuorum(); + + log.warn("After full quorum"); + showLogs(); + + simpleTransaction(); + + log.warn("After full quorum commit"); + showLogs(); + + } + + public void testBatchDumpLogs() throws Exception { + // only start 2 services to ensure logs are maintained + startA(); + startB(); + + awaitMetQuorum(); + + // Run through a few transactions to generate some log files + // Ensure that services use multiple batches + for (int t = 0; t < 200; t++) { + simpleTransaction(); + } + + log.warn("After 200 met quorum transactions"); + +// log.warn("Remove a couple of log files to force delta"); +// Thread.sleep(20000); + + mainShowLogs(true/*summary*/); + + // now start third + startC(); + + awaitFullyMetQuorum(); + + log.warn("After full quorum"); + mainShowLogs(true/*summary*/); + + simpleTransaction(); + + log.warn("After full quorum commit"); + mainShowLogs(false/*summary*/); + + } + + /** + * Enable simple testing of DumpLogDigests utility + */ + private void showLogs() throws Exception { + final DumpLogDigests dlds = new DumpLogDigests( + new String[] { + SRC_PATH + "dumpFile.config" + } + ); + try { + final StringWriter sw = new StringWriter(); + final PrintWriter pw = new PrintWriter(sw); + + final Iterator<DumpLogDigests.ServiceLogs> serviceLogInfo = dlds.dump(logicalServiceZPath, 20/*batchlogs*/, 5/*serviceThreads*/); + + while (serviceLogInfo.hasNext()) { + DumpLogDigests.ServiceLogs info = serviceLogInfo.next(); + pw.println(info); + } + + pw.flush(); + + log.warn(sw.toString()); + + } finally { + dlds.shutdown(); + } + } + + /** + * Tests main entry for DumpLogDigests + * @param b + */ + private void mainShowLogs(boolean summary) throws Exception { + DumpLogDigests.main( + new String[] { + SRC_PATH + "dumpFile.config", + logicalServiceZPath, + (summary ? "summary" : "full") + } + ); + } + + +} Added: branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/dumpFile.config =================================================================== --- branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/dumpFile.config (rev 0) +++ branches/READ_CACHE2/bigdata-jini/src/test/com/bigdata/journal/jini/ha/dumpFile.config 2013-08-28 09:51:56 UTC (rev 7347) @@ -0,0 +1,316 @@ +import net.jini.jeri.BasicILFactory; +import net.jini.jeri.BasicJeriExporter; +import net.jini.jeri.tcp.TcpServerEndpoint; + +import net.jini.discovery.LookupDiscovery; +import net.jini.core.discovery.LookupLocator; +import net.jini.core.entry.Entry; +import net.jini.lookup.entry.Name; +import net.jini.lookup.entry.Comment; +import net.jini.lookup.entry.Address; +import net.jini.lookup.entry.Location; +import net.jini.lookup.entry.ServiceInfo; +import net.jini.core.lookup.ServiceTemplate; + +import java.io.File; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.util.UUID; + +import com.bigdata.util.NV; +import com.bigdata.util.config.NicUtil; +import com.bigdata.journal.Options; +import com.bigdata.journal.BufferMode; +import com.bigdata.journal.jini.ha.HAJournal; +import com.bigdata.jini.lookup.entry.*; +import com.bigdata.service.IBigdataClient; +import com.bigdata.service.AbstractTransactionService; +import com.bigdata.service.jini.*; +import com.bigdata.service.jini.lookup.DataServiceFilter; +import com.bigdata.service.jini.master.ServicesTemplate; +import com.bigdata.jini.start.config.*; +import com.bigdata.jini.util.ConfigMath; + +import org.apache.zookeeper.ZooDefs; +import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Id; + +// imports for various options. +import com.bigdata.btree.IndexMetadata; +import com.bigdata.btree.keys.KeyBuilder; +import com.bigdata.rdf.sail.BigdataSail; +import com.bigdata.rdf.spo.SPORelation; +import com.bigdata.rdf.spo.SPOKeyOrder; +import com.bigdata.rdf.lexicon.LexiconRelation; +import com.bigdata.rdf.lexicon.LexiconKeyOrder; +import com.bigdata.rawstore.Bytes; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeUnit.*; + +/* + * This is a sample configuration file for a highly available Journal. A + * version of this file must be available to each HAJournalServer in the + * pipeline. + */ + +/* + * Globals. + */ +bigdata { + + private static fedname = "benchmark"; + + // NanoSparqlServer (http) port. + private static nssPort = ConfigMath.add(8090,10); + + // write replication pipeline port (listener). + private static haPort = ConfigMath.add(9090,10); + + // The #of services in the write pipeline. + private static replicationFactor = 3; + + // The logical service identifier shared by all members of the quorum. + private static logicalServiceId = System.getProperty("test.logicalServiceId","CI-HAJournal-1"); + + // The service directory. + // Note: Overridden by environment property when deployed. + private static serviceDir = new File(System.getProperty("test.serviceDir",ConfigMath.getAbsolutePath(new File(new File(fedname,logicalServiceId),"Z")))); + //new File(new File(fedname,logicalServiceId),"Z"); + + // journal data directory. + private static dataDir = serviceDir; + + // one federation, multicast discovery. + //static private groups = LookupDiscovery.ALL_GROUPS; + + // unicast discovery or multiple setups, MUST specify groups. + static private groups = new String[]{bigdata.fedname}; + + /** + * One or more unicast URIs of the form <code>jini://host/</code> + * or <code>jini://host:port/</code> (no default). + * + * This MAY be an empty array if you want to use multicast + * discovery <strong>and</strong> you have specified the groups as + * LookupDiscovery.ALL_GROUPS (a <code>null</code>). + */ + static private locators = new LookupLocator[] { + + // runs jini on the localhost using unicast locators. + new LookupLocator("jini://localhost/") + + // runs jini on one or more hosts using unicast locators. + //new LookupLocator("jini://"+jini1), + //new LookupLocator("jini://"+jini2), + + }; + + /** + * A common point to set the Zookeeper client's requested + * sessionTimeout and the jini lease timeout. The default lease + * renewal period for jini is 5 minutes while for zookeeper it is + * more like 5 seconds. This puts the two systems onto a similar + * timeout period so that a disconnected client is more likely to + * be noticed in roughly the same period of time for either + * system. A value larger than the zookeeper default helps to + * prevent client disconnects under sustained heavy load. + * + * If you use a short lease timeout (LT 20s), then you need to override + * properties properties for the net.jini.lease.LeaseRenewalManager + * or it will run in a tight loop (it's default roundTripTime is 10s + * and it schedules lease renewals proactively.) + */ + + // jini + static private leaseTimeout = ConfigMath.s2ms(20); + + // zookeeper + static private sessionTimeout = (int)ConfigMath.s2ms(5); + + /* + * Configuration for default KB. + */ + + private static namespace = "kb"; + + private static kb = new NV[] { + + /* Setup for QUADS mode without the full text index. */ + + new NV(BigdataSail.Options.TRUTH_MAINTENANCE, "false" ), + new NV(BigdataSail.Options.QUADS, "true"), + new NV(BigdataSail.Options.STATEMENT_IDENTIFIERS, "false"), + new NV(BigdataSail.Options.TEXT_INDEX, "false"), + new NV(BigdataSail.Options.AXIOMS_CLASS,"com.bigdata.rdf.axioms.NoAxioms"), + new NV(BigdataSail.Options.QUERY_TIME_EXPANDER, "false"), + + // Bump up the branching factor for the lexicon indices on the named kb. + // com.bigdata.namespace.kb.lex.com.bigdata.btree.BTree.branchingFactor=400 + new NV(com.bigdata.config.Configuration.getOverrideProperty + ( namespace + "." + LexiconRelation.NAME_LEXICON_RELATION, + IndexMetadata.Options.BTREE_BRANCHING_FACTOR + ), "400"), + + // Bump up the branching factor for the statement indices on the named kb. + // com.bigdata.namespace.kb.spo.com.bigdata.btree.BTree.branchingFactor=1024 + new NV(com.bigdata.config.Configuration.getOverrideProperty + ( namespace + "." + SPORelation.NAME_SPO_RELATION, + IndexMetadata.Options.BTREE_BRANCHING_FACTOR + ), "1024"), + }; + +} + +/* + * Zookeeper client configuration. + */ +org.apache.zookeeper.ZooKeeper { + + /* Root znode for the federation instance. */ + zroot = "/" + bigdata.fedname; + + /* A comma separated list of host:port pairs, where the port is + * the CLIENT port for the zookeeper server instance. + */ + // standalone. + servers = "localhost:2081"; + // ensemble +// servers = bigdata.zoo1+":2181" +// + ","+bigdata.zoo2+":2181" +// + ","+bigdata.zoo3+":2181" +// ; + + /* Session timeout (optional). */ + sessionTimeout = bigdata.sessionTimeout; + + /* + * ACL for the zookeeper nodes created by the bigdata federation. + * + * Note: zookeeper ACLs are not transmitted over secure channels + * and are placed into plain text Configuration files by the + * ServicesManagerServer. + */ + acl = new ACL[] { + + new ACL(ZooDefs.Perms.ALL, new Id("world", "anyone")) + + }; + +} + +/* + * You should not have to edit below this line. + */ + +/* + * Jini client configuration. + */ +com.bigdata.service.jini.JiniClient { + + groups = bigdata.groups; + + locators = bigdata.locators; + +/* + entries = new Entry[] { + + // Optional metadata entries. + new Name("Z"), + + // Note: Used to assign the ServiceID to the service. + new ServiceUUID(UUID.fromString(System.getProperty("test.serviceId"))) + + }; +*/ +} + +net.jini.lookup.JoinManager { + + maxLeaseDuration = bigdata.leaseTimeout; + +} + +/* + * Server configuration options. + */ +com.bigdata.journal.jini.ha.HAJournalServer { + + args = new String[] { + "-showversion", + "-Djava.security.policy=policy.all", + "-Dlog4j.configuration=file:log4j-A.properties", + "-Djava.util.logging.config.file=logging-A.properties", + "-server", + "-Xmx1G", + "-ea", + "-Xdebug","-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=1080" + }; + + serviceDir = bigdata.serviceDir; + + // Default policy. + restorePolicy = new com.bigdata.journal.jini.ha.DefaultRestorePolicy(); + + // Suppress automatic snapshots. + snapshotPolicy = new com.bigdata.journal.jini.ha.NoSnapshotPolicy(); + + logicalServiceId = bigdata.logicalServiceId; + + writePipelineAddr = new InetSocketAddress("localhost",bigdata.haPort); + + /* + writePipelineAddr = new InetSocketAddress(// + InetAddress.getByName(// + NicUtil.getIpAddress("default.nic", "default", + false// loopbackOk + )), // + bigdata.haPort + ); + */ + + replicationFactor = bigdata.replicationFactor; + + // Use the overridden version of the HAJournal by default so we get the + // HAGlueTest API for every test. + HAJournalClass = "com.bigdata.journal.jini.ha.HAJournalTest"; + +} + +/* + * Journal configuration. + */ +com.bigdata.journal.jini.ha.HAJournal { + + properties = (NV[]) ConfigMath.concat(new NV[] { + + new NV(Options.FILE, + ConfigMath.getAbsolutePath(new File(bigdata.dataDir,"bigdata-ha.jnl"))), + + new NV(Options.BUFFER_MODE,""+BufferMode.DiskRW), + + new NV(IndexMetadata.Options.WRITE_RETENTION_QUEUE_CAPACITY,"4000"), + + new NV(IndexMetadata.Options.BTREE_BRANCHING_FACTOR,"128"), + + new NV(AbstractTransactionService.Options.MIN_RELEASE_AGE,"1"), + + }, bigdata.kb); + +} + +/* + * NanoSparqlServer configuration. + */ +com.bigdata.rdf.sail.webapp.NanoSparqlServer { + + namespace = bigdata.namespace; + + create = true; + + queryThreadPoolSize = 16; + + describeEachNamedGraph = true; + + port = bigdata.nssPort; + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-27 19:24:34
|
Revision: 7346 http://bigdata.svn.sourceforge.net/bigdata/?rev=7346&view=rev Author: thompsonbry Date: 2013-08-27 19:24:25 +0000 (Tue, 27 Aug 2013) Log Message: ----------- Refactored to add link weight support (still need the RDR parser to use this except in SIDs mode and still need to co-locate that SIDs with the ground triples in the SPO(C) and OSP(C) indices). Refactored nearly everything to abstract out a concrete implementation that does not depend on the bigdata backend. The next step will be to replace the use of IV and ISPO throughput with openrdf Value and openrdf Statement. We can then easily implement a variant over the MemorySail for openrdf (or over some custom classes that are optimized for the GAS traversal patterns). That will give us a fully functioning implementation outside of the core bigdata code base. We can then isolate the remaining dependencies (striterator and a few other items) and create an Apache 2 package for the APIs and the non-bigdata specific implementation. See #629 Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/relation/accesspath/ElementFilter.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASOptions.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASProgram.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGraphAccessor.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASEngine.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/TestGather.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/BaseGASProgram.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/relation/accesspath/ElementFilter.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/relation/accesspath/ElementFilter.java 2013-08-27 17:05:59 UTC (rev 7345) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/relation/accesspath/ElementFilter.java 2013-08-27 19:24:25 UTC (rev 7346) @@ -33,6 +33,7 @@ import com.bigdata.btree.filter.TupleFilter; import cutthecrap.utils.striterators.IFilter; +import cutthecrap.utils.striterators.IFilterTest; /** * Align the predicate's {@link IElementFilter} constraint with @@ -69,6 +70,26 @@ } + /** + * Helper method conditionally wraps the <i>test</i>. + * + * @param <R> + * @param test + * The test. + * + * @return The wrapper test -or- <code>null</code> iff the <i>test</i> is + * <code>null</code>. + */ + @SuppressWarnings("rawtypes") + public static <R> IFilter newInstance(final IFilterTest test) { + + if (test == null) + return null; + + return new ElementFilter(test); + + } + public ElementFilter(final IElementFilter<R> test) { if (test == null) @@ -78,6 +99,37 @@ } + @SuppressWarnings("unchecked") + public ElementFilter(final IFilterTest test) { + + if (test == null) + throw new IllegalArgumentException(); + + this.test = new FilterTestWrapper(test); + + } + + @SuppressWarnings("rawtypes") + private static class FilterTestWrapper implements IElementFilter { + private static final long serialVersionUID = 1L; + private final IFilterTest test; + private FilterTestWrapper(final IFilterTest test) { + this.test = test; + } + + @Override + public boolean isValid(final Object e) { + return test.isValid(e); + } + + @Override + public boolean canAccept(Object o) { + return true; + } + + } + + @Override public boolean isValid(final ITuple<R> tuple) { final R obj = (R) tuple.getObject(); Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASOptions.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASOptions.java 2013-08-27 17:05:59 UTC (rev 7345) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASOptions.java 2013-08-27 19:24:25 UTC (rev 7346) @@ -3,6 +3,8 @@ import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.spo.ISPO; +import cutthecrap.utils.striterators.IStriterator; + /** * Interface for options that are understood by the {@link IGASEngine} and which * may be declared by the {@link IGASProgram}. @@ -11,7 +13,10 @@ * plan (like GraphChi). I believe that this reduces to computing a DAG over the * frontier before executing the GATHER and then executing the frontier such * that the parallel execution is constrained by arcs in the DAG that do not - * have mutual dependencies. + * have mutual dependencies. This is really an option that would be implemented + * by the {@link IGASContext}, which would have to place a partial ordering over + * the vertices in the frontier and then process the frontier with limited + * parallelism based on that partial ordering. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ @@ -46,4 +51,65 @@ */ Factory<ISPO, ES> getEdgeStateFactory(); + /** + * Return non-<code>null</code> iff there is a single link type to be + * visited. This corresponds to a view of the graph as a sparse\xCAconnectivity + * matrix. The {@link IGASEngine} can optimize traversal patterns using the + * <code>POS</code> index. + * <p> + * Note: When this option is used, the scatter and gather will not visit the + * property set for the vertex. The graph is treated as if it were an + * unattributed graph and only mined for the connectivity data. + * + * @return The {@link IV} for the predicate that identifies the desired link + * type (there can be many types of links - the return value + * specifies which attribute is of interest). + * + * @see #getLinkAttribType() + */ + @SuppressWarnings("rawtypes") + IV getLinkType(); + +// /** +// * Return non-<code>null</code> iff there is a single link type to be +// * visited. This corresponds to a view of the graph as a sparse\xCAmatrix where +// * the data in the matrix provides the link weights. The type of the visited +// * link weights is specified by the return value for this method. The +// * {@link IGASEngine} can optimize traversal patterns using the +// * <code>POS</code> index. +// * <p> +// * Note: When this option is used, the scatter and gather will not visit the +// * property set for the vertex. The graph is treated as if it were an +// * unattributed graph and only mined for the connectivity data. +// * +// * @return The {@link IV} for the predicate that identifies the desired link +// * attribute type (a link can have many attributes - the return +// * value specifies which attribute is of interest). +// * +// * @see #getLinkType() +// */ +// IV getLinkAttribType(); +// +// /** +// * When non-<code>null</code>, the specified {@link Filter} will be used to +// * restrict the visited edges. For example, you can restrict the visitation +// * to a subset of the predicates that are of interest, to only visit edges +// * that have link edges, to visit only select property values, etc. Some +// * useful filters are defined in an abstract implementation of this +// * interface. +// * +// * @see #visitPropertySet() +// */ +// IFilterTest getEdgeFilter(); + + /** + * Hook to impose a constraint on the visited edges and/or property values. + * + * @param itr + * The iterator visiting those edges and/or property values. + * + * @return Either the same iterator or a constrained iterator. + */ + IStriterator constrainFilter(IStriterator eitr); + } \ No newline at end of file Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASProgram.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASProgram.java 2013-08-27 17:05:59 UTC (rev 7345) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASProgram.java 2013-08-27 19:24:25 UTC (rev 7346) @@ -18,12 +18,6 @@ * true. The SUM type is scoped to the GATHER + SUM operation (NOT * the computation). * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * - * TODO There should be a means to specify a filter on the possible - * predicates to be used for traversal. If there is a single predicate, - * then that gives us S+P bound. If there are multiple predicates, then - * we have an IElementFilter on P (in addition to the filter that is - * removing the Literals from the scan). */ @SuppressWarnings("rawtypes") public interface IGASProgram<VS, ES, ST> extends IGASOptions<VS, ES> { @@ -238,6 +232,6 @@ * @return <code>true</code> if the algorithm should continue (as long as * the frontier is non-empty). */ - boolean nextRound(IGASContext ctx); + boolean nextRound(IGASContext<VS, ES, ST> ctx); } \ No newline at end of file Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGraphAccessor.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGraphAccessor.java 2013-08-27 17:05:59 UTC (rev 7345) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGraphAccessor.java 2013-08-27 19:24:25 UTC (rev 7346) @@ -1,8 +1,9 @@ package com.bigdata.rdf.graph; +import java.util.Iterator; + import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.spo.ISPO; -import com.bigdata.striterator.ICloseableIterator; /** * Interface abstracts access to a backend graph implementation. @@ -14,19 +15,20 @@ /** * Return the edges for the vertex. * + * @param p The {@link IGASProgram} * @param u * The vertex. * @param edges * Typesafe enumeration indicating which edges should be visited. * @return An iterator that will visit the edges for that vertex. */ - ICloseableIterator<ISPO> getEdges(@SuppressWarnings("rawtypes") final IV u, - final EdgesEnum edges); + @SuppressWarnings("rawtypes") + Iterator<ISPO> getEdges(IGASProgram<?, ?, ?> p, IV u, EdgesEnum edges); /** * Hook to advance the view of the graph. This is invoked at the end of each * GAS computation round for a given {@link IGASProgram}. */ void advanceView(); - + } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java 2013-08-27 17:05:59 UTC (rev 7345) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java 2013-08-27 19:24:25 UTC (rev 7346) @@ -5,12 +5,14 @@ import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.Factory; import com.bigdata.rdf.graph.IGASContext; -import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IGASScheduler; import com.bigdata.rdf.graph.IGASState; -import com.bigdata.rdf.graph.IGASScheduler; +import com.bigdata.rdf.graph.impl.BaseGASProgram; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.spo.ISPO; +import cutthecrap.utils.striterators.IStriterator; + /** * Breadth First Search (BFS) is an iterative graph traversal primitive. The * frontier is expanded iteratively until no new vertices are discovered. Each @@ -20,7 +22,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ @SuppressWarnings("rawtypes") -public class BFS implements IGASProgram<BFS.VS, BFS.ES, Void> { +public class BFS extends BaseGASProgram<BFS.VS, BFS.ES, Void> { static class VS { @@ -125,10 +127,23 @@ } /** + * {@inheritDoc} + * <p> + * Overridden to only visit the edges of the graph. + */ + @Override + public IStriterator constrainFilter(IStriterator itr) { + + return itr.addFilter(edgeOnlyFilter); + + } + + /** * Not used. */ @Override - public void init(IGASState<BFS.VS, BFS.ES, Void> state, IV u) { + public void init(final IGASState<BFS.VS, BFS.ES, Void> state, final IV u) { + state.getState(u).visit(0); } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java 2013-08-27 17:05:59 UTC (rev 7345) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java 2013-08-27 19:24:25 UTC (rev 7346) @@ -2,16 +2,16 @@ import org.apache.log4j.Logger; -import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.Factory; import com.bigdata.rdf.graph.GASUtil; -import com.bigdata.rdf.graph.IGASContext; -import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IGASScheduler; import com.bigdata.rdf.graph.IGASState; -import com.bigdata.rdf.graph.IGASScheduler; +import com.bigdata.rdf.graph.impl.BaseGASProgram; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.spo.ISPO; +import cutthecrap.utils.striterators.IStriterator; + /** * SSSP (Single Source, Shortest Path). This analytic computes the shortest path * to each vertex in the graph starting from the given vertex. Only connected @@ -32,7 +32,7 @@ * undirected scatter/gather. Add unit test for undirected. */ @SuppressWarnings("rawtypes") -public class SSSP implements IGASProgram<SSSP.VS, SSSP.ES, Integer/* dist */> { +public class SSSP extends BaseGASProgram<SSSP.VS, SSSP.ES, Integer/* dist */> { private static final Logger log = Logger.getLogger(SSSP.class); @@ -43,10 +43,16 @@ * a pattern to get the link attributes materialized with the {@link ISPO} * for the link. That could be done using a read-ahead filter on the * striterator if the link weights are always clustered with the ground - * triple. - * + * triple. See {@link #decodeStatement(IV)}. + * <P> * When we make this change, the distance should be of the same type as the * link weight or generalized as <code>double</code>. + * <p> + * Maybe add a factory method or alternative constructor for the version of + * SSSP that uses link weights? All we need to do is filter out anything + * that is not a link weight. In addition, it will often be true that there + * is a single link attribute type that is of interest, so the caller should + * also be able to specify that. */ private final static int EDGE_LENGTH = 1; @@ -127,27 +133,39 @@ } - @Override - public Factory<ISPO, SSSP.ES> getEdgeStateFactory() { +// @Override +// public Factory<ISPO, SSSP.ES> getEdgeStateFactory() { +// +// return null; +// +// } +// +// @Override +// public EdgesEnum getGatherEdges() { +// +// return EdgesEnum.InEdges; +// +// } +// +// @Override +// public EdgesEnum getScatterEdges() { +// +// return EdgesEnum.OutEdges; +// +// } - return null; - - } - + /** + * {@inheritDoc} + * <p> + * Overridden to only visit the edges of the graph. + */ @Override - public EdgesEnum getGatherEdges() { + public IStriterator constrainFilter(IStriterator itr) { - return EdgesEnum.InEdges; - + return itr.addFilter(edgeOnlyFilter); + } - @Override - public EdgesEnum getScatterEdges() { - - return EdgesEnum.OutEdges; - - } - /** * Set the {@link VS#dist()} to ZERO (0). * <p> @@ -259,8 +277,8 @@ * <p> * {@inheritDoc} * - * FIXME Test both variations on a variety of data sets and see which is - * better: + * FIXME OPTIMIZE: Test both variations on a variety of data sets and see + * which is better: * * <p> * Zhisong wrote: In the original GASengine, the scatter operator only need @@ -311,11 +329,11 @@ } - @Override - public boolean nextRound(IGASContext ctx) { +// @Override +// public boolean nextRound(IGASContext ctx) { +// +// return true; +// +// } - return true; - - } - } Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/BaseGASProgram.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/BaseGASProgram.java (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/BaseGASProgram.java 2013-08-27 19:24:25 UTC (rev 7346) @@ -0,0 +1,225 @@ +package com.bigdata.rdf.graph.impl; + +import com.bigdata.rdf.graph.EdgesEnum; +import com.bigdata.rdf.graph.Factory; +import com.bigdata.rdf.graph.IGASContext; +import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IGASState; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.impl.bnode.SidIV; +import com.bigdata.rdf.spo.ISPO; + +import cutthecrap.utils.striterators.Filter; +import cutthecrap.utils.striterators.IFilter; +import cutthecrap.utils.striterators.IStriterator; + +/** + * Abstract base class with some useful defaults. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @param <VS> + * @param <ES> + * @param <ST> + */ +@SuppressWarnings("rawtypes") +abstract public class BaseGASProgram<VS, ES, ST> implements + IGASProgram<VS, ES, ST> { + + /** + * Filter visits only edges (filters out attribute values). + * <p> + * Note: This filter is pushed down onto the AP and evaluated close to the + * data. + */ + protected static final IFilter edgeOnlyFilter = new Filter() { + private static final long serialVersionUID = 1L; + + @Override + public boolean isValid(final Object e) { + return ((ISPO) e).o().isURI(); + } + }; + + /** + * Return <code>true</code> iff the visited {@link ISPO} is an instance + * of the specified link attribute type. + * + * @return + */ + protected static final IFilter newLinkAttribFilter(final IV linkAttribType) { + + return new LinkAttribFilter(linkAttribType); + + } + + static class LinkAttribFilter extends Filter { + + private static final long serialVersionUID = 1L; + + private final IV linkAttribType; + + public LinkAttribFilter(final IV linkAttribType) { + + if (linkAttribType == null) + throw new IllegalArgumentException(); + + this.linkAttribType = linkAttribType; + + } + + @Override + public boolean isValid(final Object e) { + final ISPO edge = (ISPO) e; + if(!edge.p().equals(linkAttribType)) { + // Edge does not use the specified link attribute type. + return false; + } + if (!(edge.s() instanceof SidIV)) { + // The subject of the edge is not a Statement. + return false; + } + return true; + } + + } + + /** + * If the vertex is actually an edge, then return the decoded edge. + * <p> + * Note: A vertex may be an edge. A link attribute is modeled by treating + * the link as a vertex and then asserting a property value about that + * "link vertex". For bigdata, this is handled efficiently as inline + * statements about statements. This approach subsumes the property graph + * model (property graphs do not permit recursive nesting of these + * relationships) and is 100% consistent with RDF reification, except that + * the link attributes are modeled efficiently inline with the links. This + * is what we call <a + * href="http://www.bigdata.com/whitepapers/reifSPARQL.pdf" > Reification + * Done Right </a>. + * + * @param v + * The vertex. + * + * @return The edge decoded from that vertex and <code>null</code> iff the + * vertex is not an edge. + * + * TODO RDR : Link to an RDR wiki page as well. + * + * TODO We can almost write the same logic at the openrdf layer + * using <code>v instanceof Statement</code>. However, v can not be + * a Statement for openrdf and there is no way to decode the vertex + * as a Statement in openrdf. + */ + protected ISPO decodeStatement(final IV v) { + + if (!v.isStatement()) + return null; + + final ISPO decodedEdge = (ISPO) v.getInlineValue(); + + return decodedEdge; + + } + + /** + * {@inheritDoc} + * <p> + * The default implementation does not restrict the visitation to a + * connectivity matrix (returns <code>null</code>). + */ + @Override + public IV getLinkType() { + + return null; + + } + + /** + * {@inheritDoc} + * <p> + * The default implementation returns its argument. + */ + @Override + public IStriterator constrainFilter(IStriterator itr) { + + return itr; + + } + + /** + * {@inheritDoc} + * <p> + * The default gathers on the {@link EdgesEnum#InEdges}. + */ + @Override + public EdgesEnum getGatherEdges() { + + return EdgesEnum.InEdges; + + } + + /** + * {@inheritDoc} + * <p> + * The default scatters on the {@link EdgesEnum#OutEdges}. + */ + @Override + public EdgesEnum getScatterEdges() { + + return EdgesEnum.OutEdges; + + } + + /** + * {@inheritDoc} + * <p> + * The default is a NOP. + */ + @Override + public void init(final IGASState<VS, ES, ST> state, final IV u) { + + // NOP + + } + +// public Factory<IV, VS> getVertexStateFactory(); + + /** + * {@inheritDoc} + * <p> + * The default implementation returns <code>null</code>. Override this if + * the algorithm uses per-edge computation state. + */ + @Override + public Factory<ISPO, ES> getEdgeStateFactory() { + + return null; + + } + + /** + * {@inheritDoc} + * <p> + * The default implementation returns <code>true</code>. Override this if + * you know whether or not the computation state of this vertex has changed. + */ + @Override + public boolean isChanged(IGASState<VS, ES, ST> state, IV u) { + + return true; + + } + + /** + * {@inheritDoc} + * <p> + * The default returns <code>true</code>. + */ + @Override + public boolean nextRound(IGASContext<VS, ES, ST> ctx) { + + return true; + + } + +} Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2013-08-27 17:05:59 UTC (rev 7345) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2013-08-27 19:24:25 UTC (rev 7346) @@ -1,5 +1,6 @@ package com.bigdata.rdf.graph.impl; +import java.util.Iterator; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; @@ -14,10 +15,10 @@ import com.bigdata.rdf.graph.IGASState; import com.bigdata.rdf.graph.IGASStats; import com.bigdata.rdf.graph.IGraphAccessor; +import com.bigdata.rdf.graph.IReducer; import com.bigdata.rdf.graph.IStaticFrontier; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.spo.ISPO; -import com.bigdata.striterator.ICloseableIterator; @SuppressWarnings("rawtypes") public class GASContext<VS, ES, ST> implements IGASContext<VS, ES, ST> { @@ -321,6 +322,12 @@ * TODO The apply() should be parallelized. For some algorithms, there is a * moderate amount of work per vertex in apply(). Use {@link #nthreads} to * set the parallelism. + * <p> + * Note: This is very similar to the {@link IGASState#reduce(IReducer)} + * operation. This operates over the frontier. reduce() operates over the + * activated vertices. Both need fine grained parallelism. Both can have + * either light or moderately heavy operations (a dot product would be an + * example of a heavier operation). */ private void apply(final IStaticFrontier f) { @@ -519,7 +526,7 @@ final IGASScheduler sch = scheduler(); - final ICloseableIterator<ISPO> eitr = graphAccessor.getEdges(u, + final Iterator<ISPO> eitr = graphAccessor.getEdges(program, u, getEdgesEnum()); try { @@ -540,7 +547,7 @@ } finally { - eitr.close(); +// eitr.close(); } @@ -569,7 +576,7 @@ long nedges = 0; - final ICloseableIterator<ISPO> eitr = graphAccessor.getEdges(u, + final Iterator<ISPO> eitr = graphAccessor.getEdges(program, u, getEdgesEnum()); try { @@ -622,7 +629,7 @@ } finally { - eitr.close(); +// eitr.close(); } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java 2013-08-27 17:05:59 UTC (rev 7345) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java 2013-08-27 19:24:25 UTC (rev 7346) @@ -273,7 +273,12 @@ } - // TODO REDUCE : parallelize with nthreads. + /** + * {@inheritDoc} + * + * TODO REDUCE : parallelize with nthreads. The reduce operations are often + * lightweight, so maybe a fork/join pool would work better? + */ @Override public <T> T reduce(final IReducer<VS, ES, ST, T> op) { Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASEngine.java 2013-08-27 17:05:59 UTC (rev 7345) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASEngine.java 2013-08-27 19:24:25 UTC (rev 7346) @@ -6,6 +6,7 @@ import com.bigdata.btree.IIndex; import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.ITuple; +import com.bigdata.btree.ITupleIterator; import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.SuccessorUtil; import com.bigdata.journal.IIndexManager; @@ -23,17 +24,16 @@ import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.spo.ISPO; -import com.bigdata.rdf.spo.SPOFilter; import com.bigdata.rdf.spo.SPOKeyOrder; import com.bigdata.rdf.store.AbstractTripleStore; -import com.bigdata.relation.accesspath.ElementFilter; import com.bigdata.relation.accesspath.EmptyCloseableIterator; -import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.service.IBigdataFederation; -import com.bigdata.striterator.ICloseableIterator; -import com.bigdata.striterator.Resolver; -import com.bigdata.striterator.Striterator; +import cutthecrap.utils.striterators.Filter; +import cutthecrap.utils.striterators.IStriterator; +import cutthecrap.utils.striterators.Resolver; +import cutthecrap.utils.striterators.Striterator; + /** * {@link IGASEngine} for dynamic activation of vertices. This implementation * maintains a frontier and lazily initializes the vertex state when the vertex @@ -104,21 +104,6 @@ // private static final Logger log = Logger.getLogger(GASEngine.class); /** - * Filter visits only edges (filters out attribute values). - * <p> - * Note: This filter is pushed down onto the AP and evaluated close to the - * data. - */ - static private final IElementFilter<ISPO> edgeOnlyFilter = new SPOFilter<ISPO>() { - private static final long serialVersionUID = 1L; - - @Override - public boolean isValid(final Object e) { - return ((ISPO) e).o().isURI(); - } - }; - - /** * The {@link IIndexManager} is used to access the graph. */ private final IIndexManager indexManager; @@ -317,48 +302,127 @@ } @SuppressWarnings({ "unchecked", "rawtypes" }) - private Striterator<Iterator<ISPO>, ISPO> getEdges( - final AbstractTripleStore kb, final boolean inEdges, final IV u) { + private IStriterator getEdges(final AbstractTripleStore kb, + final boolean inEdges, final IGASProgram<?, ?, ?> program, + final IV u) { - final SPOKeyOrder keyOrder = getKeyOrder(kb, inEdges); + final SPOKeyOrder keyOrder; + final IIndex ndx; + final IKeyBuilder keyBuilder; + final IV linkTypeIV = program.getLinkType(); + /* + * Optimize case where P is a constant and O is known (2 bound). + * + * P is a constant. + * + * [u] gets bound on O. + * + * We use the POS(C) index. The S values give us the out-edges for + * that [u] and the specified link type. + * + * FIXME POS OPTIMIZATION: write unit test for this option to make + * sure that the right filter is imposed. write performance test to + * verify expected benefit. Watch out for the in-edges vs out-edges + * since only one is optimized. + */ + final boolean posOptimization = linkTypeIV != null + && !inEdges; - final IIndex ndx = kb.getSPORelation().getIndex(keyOrder); + if (posOptimization) { + + /* + * POS(C) + */ + keyOrder = kb.isQuads() ? SPOKeyOrder.POCS : SPOKeyOrder.POS; - final IKeyBuilder keyBuilder = ndx.getIndexMetadata().getKeyBuilder(); + ndx = kb.getSPORelation().getIndex(keyOrder); - keyBuilder.reset(); + keyBuilder = ndx.getIndexMetadata().getKeyBuilder(); - IVUtility.encode(keyBuilder, u); + keyBuilder.reset(); + // Bind P as a constant. + IVUtility.encode(keyBuilder, linkTypeIV); + + // Bind O for this key-range scan. + IVUtility.encode(keyBuilder, u); + + } else { + + /* + * SPO(C) or OSP(C) + */ + + keyOrder = getKeyOrder(kb, inEdges); + + ndx = kb.getSPORelation().getIndex(keyOrder); + + keyBuilder = ndx.getIndexMetadata().getKeyBuilder(); + + keyBuilder.reset(); + + IVUtility.encode(keyBuilder, u); + + } + final byte[] fromKey = keyBuilder.getKey(); final byte[] toKey = SuccessorUtil.successor(fromKey.clone()); - return (Striterator<Iterator<ISPO>, ISPO>) new Striterator( - ndx.rangeIterator(fromKey, toKey, 0/* capacity */, - IRangeQuery.DEFAULT, - ElementFilter.newInstance(edgeOnlyFilter))) - .addFilter(new Resolver() { - private static final long serialVersionUID = 1L; - @Override - protected Object resolve(final Object e) { - final ITuple<ISPO> t = (ITuple<ISPO>) e; - return t.getObject(); - } - }); + final ITupleIterator<ISPO> titr = ndx.rangeIterator(fromKey, toKey, + 0/* capacity */, IRangeQuery.DEFAULT, null/* filter */); + final IStriterator sitr = new Striterator(titr); + + sitr.addFilter(new Resolver() { + private static final long serialVersionUID = 1L; + @Override + protected Object resolve(final Object e) { + final ITuple<ISPO> t = (ITuple<ISPO>) e; + return t.getObject(); + } + }); + + if (linkTypeIV != null && !posOptimization) { + /* + * A link type constraint was specified, but we were not able to + * use the POS(C) index optimization. In this case we have to + * add a filter to impose that link type constraint. + */ + sitr.addFilter(new Filter() { + private static final long serialVersionUID = 1L; + @Override + public boolean isValid(final Object e) { + return ((ISPO) e).p().equals(linkTypeIV); + } + }); + } + + /* + * Optionally wrap the program specified filter. This filter will be + * pushed down onto the index. If the index is remote, then this is + * much more efficient. (If the index is local, then simply stacking + * striterators is just as efficient.) + */ + return program.constrainFilter(sitr); + } /** * Return the edges for the vertex. * + * @param p + * The {@link IGASProgram}. * @param u * The vertex. * @param edges - * Typesafe enumeration indicating which edges should be visited. + * Typesafe enumeration indicating which edges should be + * visited. + * * @return An iterator that will visit the edges for that vertex. */ - public ICloseableIterator<ISPO> getEdges( + @SuppressWarnings("unchecked") + public Iterator<ISPO> getEdges(final IGASProgram<?, ?, ?> p, @SuppressWarnings("rawtypes") final IV u, final EdgesEnum edges) { final AbstractTripleStore kb = getKB(); @@ -367,17 +431,14 @@ case NoEdges: return new EmptyCloseableIterator<ISPO>(); case InEdges: - return (ICloseableIterator<ISPO>) getEdges(kb, true/* inEdges */, u); + return getEdges(kb, true/* inEdges */, p, u); case OutEdges: - return (ICloseableIterator<ISPO>) getEdges(kb, false/* inEdges */, - u); + return getEdges(kb, false/* inEdges */, p, u); case AllEdges: { - final Striterator<Iterator<ISPO>, ISPO> a = getEdges(kb, - true/* inEdges */, u); - final Striterator<Iterator<ISPO>, ISPO> b = getEdges(kb, - false/* outEdges */, u); + final IStriterator a = getEdges(kb, true/* inEdges */, p, u); + final IStriterator b = getEdges(kb, false/* outEdges */, p, u); a.append(b); - return (ICloseableIterator<ISPO>) a; + return a; } default: throw new UnsupportedOperationException(edges.name()); Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/TestGather.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/TestGather.java 2013-08-27 17:05:59 UTC (rev 7345) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/TestGather.java 2013-08-27 19:24:25 UTC (rev 7346) @@ -32,9 +32,8 @@ import com.bigdata.rdf.graph.Factory; import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASEngine; -import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IGASScheduler; import com.bigdata.rdf.graph.IGASState; -import com.bigdata.rdf.graph.IGASScheduler; import com.bigdata.rdf.graph.impl.bd.BigdataGASEngine; import com.bigdata.rdf.graph.impl.bd.BigdataGASEngine.BigdataGraphAccessor; import com.bigdata.rdf.internal.IV; @@ -42,6 +41,8 @@ import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPO; +import cutthecrap.utils.striterators.IStriterator; + /** * Test class for GATHER. * @@ -65,8 +66,8 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ - private static class MockGASProgram implements - IGASProgram<Set<ISPO>, Set<ISPO>, Set<ISPO>> { + private static class MockGASProgram extends + BaseGASProgram<Set<ISPO>, Set<ISPO>, Set<ISPO>> { private final EdgesEnum gatherEdges; @@ -86,7 +87,19 @@ return EdgesEnum.NoEdges; } + /** + * {@inheritDoc} + * <p> + * Overridden to only visit the edges of the graph. + */ @Override + public IStriterator constrainFilter(IStriterator itr) { + + return itr.addFilter(edgeOnlyFilter); + + } + + @Override public Factory<IV, Set<ISPO>> getVertexStateFactory() { return new Factory<IV, Set<ISPO>>() { @Override This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-27 17:06:06
|
Revision: 7345 http://bigdata.svn.sourceforge.net/bigdata/?rev=7345&view=rev Author: thompsonbry Date: 2013-08-27 17:05:59 +0000 (Tue, 27 Aug 2013) Log Message: ----------- replace Collections.emptyIterator() with the CTC EmptyIterator. The Collections class method was not introduced until 1.7. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/util/GASImplUtil.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/util/GASImplUtil.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/util/GASImplUtil.java 2013-08-27 15:25:55 UTC (rev 7344) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/util/GASImplUtil.java 2013-08-27 17:05:59 UTC (rev 7345) @@ -1,11 +1,12 @@ package com.bigdata.rdf.graph.impl.util; -import java.util.Collections; import java.util.Iterator; import java.util.Set; import com.bigdata.rdf.internal.IV; +import cutthecrap.utils.striterators.EmptyIterator; + /** * Some utility method for the GAS Engine implementation. * @@ -14,7 +15,7 @@ public class GASImplUtil { @SuppressWarnings({ "unchecked", "rawtypes" }) - public static final Iterator<IV> EMPTY_VERTICES_ITERATOR = (Iterator) Collections.emptyIterator(); + public static final Iterator<IV> EMPTY_VERTICES_ITERATOR = EmptyIterator.DEFAULT; /** * Compact a collection of vertices into an ordered frontier. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-27 15:26:02
|
Revision: 7344 http://bigdata.svn.sourceforge.net/bigdata/?rev=7344&view=rev Author: thompsonbry Date: 2013-08-27 15:25:55 +0000 (Tue, 27 Aug 2013) Log Message: ----------- Updated GAS properties for execution on mac mini environment. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/gas/build.properties branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/gas/build.xml Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/gas/build.properties =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/gas/build.properties 2013-08-27 14:23:55 UTC (rev 7343) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/gas/build.properties 2013-08-27 15:25:55 UTC (rev 7344) @@ -53,7 +53,7 @@ # files to load. #load=-load xxx -load yyy -load= +load=-load ${bigdata.dir}/bigdata-rdf/src/resources/data/foaf # The #of threads to use for GATHER and SCATTER nthreads=4 @@ -67,6 +67,12 @@ # BFS, SSSP, etc. Will run corresponding XXX class. analytic=BFS +# The class used to schedule and compact the new frontier. +#scheduler=-schedulerClass com.bigdata.rdf.graph.impl.GASState$STScheduler +#scheduler=-schedulerClass com.bigdata.rdf.graph.impl.GASState$CHSScheduler +scheduler=-schedulerClass com.bigdata.rdf.graph.impl.scheduler.CHMScheduler +#scheduler=-schedulerClass com.bigdata.rdf.graph.impl.GASState$TLScheduler + # # Profiler parameters. # @@ -74,7 +80,8 @@ # No profiler. profilerAgent= # linux-64 -#profilerAgent=-agentpath:/usr/java/yjp-9.0.3/bin/linux-x86-64/libyjpagent.so +#profilerAgent=-agentpath:/nas/install/yjp-10.0.1/bin/linux-x86-64/libyjpagent.so +profilerAgent=-agentpath:/nas/install/yjp-12.0.6/bin/linux-x86-64/libyjpagent.so # Windows #profilerAgent="-agentpath:C:/Program Files/YourKit Java Profiler 9.0.2/bin/win32/yjpagent.dll" # Windows Server 2008 @@ -102,5 +109,5 @@ #gcdebug=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:jvm_gc.log # all jvm args for query. -jvmArgs=-server -Xmx${maxMem} -showversion ${gcopts} ${gcdebug} ${profiler} -Dlog4j.configuration=file:log4j.properties +jvmArgs=-server -Xmx${maxMem} -XX:MaxDirectMemorySize=2g -showversion ${gcopts} ${gcdebug} ${profiler} -Dlog4j.configuration=file:log4j.properties # -Dlog4j.debug Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/gas/build.xml =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/gas/build.xml 2013-08-27 14:23:55 UTC (rev 7343) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-perf/gas/build.xml 2013-08-27 15:25:55 UTC (rev 7344) @@ -29,10 +29,10 @@ <target name="run-gas-engine" description="Run a GAS Engine performance test."> - <java classname="com.bigdata.rdf.graph.analytics.${analytic}" + <java classname="com.bigdata.rdf.graph.impl.GASRunner" fork="true" failonerror="true" > - <arg line="-bufferMode ${bufferMode} -namespace ${namespace} -seed ${seed} -nsamples ${nsamples} -nthreads ${nthreads} ${journalPropertyFile}" /> + <arg line="-bufferMode ${bufferMode} -namespace ${namespace} -seed ${seed} -nsamples ${nsamples} -nthreads ${nthreads} ${load} com.bigdata.rdf.graph.analytics.${analytic} ${journalPropertyFile}" /> <!-- specify/override the journal file name. --> <jvmarg line="${jvmArgs}" /> <classpath> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-27 14:24:07
|
Revision: 7343 http://bigdata.svn.sourceforge.net/bigdata/?rev=7343&view=rev Author: thompsonbry Date: 2013-08-27 14:23:55 +0000 (Tue, 27 Aug 2013) Log Message: ----------- Refactored to abstract everything except for the GASRunner completely away from the IIndexManager and Journal. See #629 Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASContext.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASProgram.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASState.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGraphAccessor.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASStats.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/VertexTaskFactory.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/analytics/TestBFS.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/analytics/TestBFS0.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/analytics/TestSSSP.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/TestGather.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASScheduler.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASSchedulerImpl.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IStaticFrontier.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASEngine.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASState.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/bd/BigdataGASUtil.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/scheduler/ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/scheduler/CHMScheduler.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/scheduler/CHSScheduler.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/scheduler/STScheduler.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/scheduler/TLScheduler.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/util/ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/util/GASImplUtil.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/util/MergeSortIterator.java Removed Paths: ------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IScheduler.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASGraphUtil.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/IStaticFrontier.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/MergeSortIterator.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASContext.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASContext.java 2013-08-26 19:13:58 UTC (rev 7342) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASContext.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -45,16 +45,5 @@ */ boolean doRound(IGASStats stats) throws Exception, ExecutionException, InterruptedException; - - /** - * Compute a reduction over the vertex state table (all vertices that have - * had their vertex state materialized). - * - * @param op - * The reduction operation. - * - * @return The reduction. - */ - <T> T reduce(IReducer<VS, ES, ST, T> op); } \ No newline at end of file Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASProgram.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASProgram.java 2013-08-26 19:13:58 UTC (rev 7342) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASProgram.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -17,9 +17,13 @@ * the generic type for the per-edge state, but that is not always * true. The SUM type is scoped to the GATHER + SUM operation (NOT * the computation). - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * + * TODO There should be a means to specify a filter on the possible + * predicates to be used for traversal. If there is a single predicate, + * then that gives us S+P bound. If there are multiple predicates, then + * we have an IElementFilter on P (in addition to the filter that is + * removing the Literals from the scan). */ @SuppressWarnings("rawtypes") public interface IGASProgram<VS, ES, ST> extends IGASOptions<VS, ES> { @@ -111,7 +115,7 @@ * @param u * The vertex. */ - void init(IGASState<VS, ES, ST> ctx, IV u); + void init(IGASState<VS, ES, ST> state, IV u); /** * GATHER is a map/reduce over the edges of the vertex. The SUM provides @@ -145,7 +149,7 @@ * depends on the algorithm. How can we get these constraints into * the API? */ - ST gather(IGASState<VS, ES, ST> ctx, IV u, ISPO e); + ST gather(IGASState<VS, ES, ST> state, IV u, ISPO e); /** * SUM is a pair-wise reduction that is applied during the GATHER. @@ -192,7 +196,7 @@ * when compared to either the frontier or the set of states that * have been in the frontier during the computation. */ - VS apply(IGASState<VS, ES, ST> ctx, IV u, ST sum); + VS apply(IGASState<VS, ES, ST> state, IV u, ST sum); /** * Return <code>true</code> iff the vertex should run its SCATTER phase. @@ -200,21 +204,40 @@ * on the APPLY) that the vertex has not changed. This can save a * substantial amount of effort. * - * @param ctx + * @param state * @param u * The vertex. * @return */ - boolean isChanged(IGASState<VS, ES, ST> ctx, IV u); + boolean isChanged(IGASState<VS, ES, ST> state, IV u); /** * - * @param ctx + * @param state * @param u * The vertex for which the scatter will being performed. * @param e * The edge. */ - void scatter(IGASState<VS, ES, ST> ctx, IScheduler sch, IV u, ISPO e); + void scatter(IGASState<VS, ES, ST> state, IGASScheduler sch, IV u, ISPO e); + /** + * Return <code>true</code> iff the algorithm should continue. This is + * invoked after every iteration, once the new frontier has been computed + * and {@link IGASState#round()} has been advanced. An implementation may + * simple return <code>true</code>, in which case the algorithm will + * continue IFF the current frontier is not empty. + * <p> + * Note: While this can be used to make custom decisions concerning the + * halting criteria, it can also be used as an opportunity to handshake with + * a custom {@link IGraphAccessor} in order to process a dynamic graph. + * + * @param ctx + * The evaluation context. + * + * @return <code>true</code> if the algorithm should continue (as long as + * the frontier is non-empty). + */ + boolean nextRound(IGASContext ctx); + } \ No newline at end of file Copied: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASScheduler.java (from rev 7335, branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IScheduler.java) =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASScheduler.java (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASScheduler.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -0,0 +1,21 @@ +package com.bigdata.rdf.graph; + +import com.bigdata.rdf.internal.IV; + +/** + * Interface schedules a vertex for execution. This interface is exposed to the + * {@link IGASProgram}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public interface IGASScheduler { + + /** + * Add the vertex to the schedule. + * + * @param v + * The vertex. + */ + void schedule(@SuppressWarnings("rawtypes") IV v); + +} Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASSchedulerImpl.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASSchedulerImpl.java (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASSchedulerImpl.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -0,0 +1,31 @@ +package com.bigdata.rdf.graph; + +import com.bigdata.rdf.internal.IV; + +/** + * Extended {@link IGASScheduler} interface. This interface is exposed to the + * implementation of the GAS Engine. The methods on this interface are NOT for + * use by the {@link IGASProgram} and MIGHT NOT (really, should not) be + * available on the {@link IGASScheduler} supplied to an {@link IGASProgram}. + * + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public interface IGASSchedulerImpl extends IGASScheduler { + + /** + * Compact the schedule into the new frontier. + * <p> + * Note: Typical contracts ensure that the frontier is compact (no + * duplicates) and in ascending {@link IV} order (this provides cache + * locality for the index reads, even if those reads are against indices + * wired into RAM). + */ + void compactFrontier(IStaticFrontier frontier); + + /** + * Reset all internal state (and get rid of any thread locals). + */ + void clear(); + +} Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASState.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASState.java 2013-08-26 19:13:58 UTC (rev 7342) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASState.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -85,5 +85,47 @@ * @see IGASProgram#getEdgeStateFactory() */ ES getState(ISPO e); - + + /** + * The current frontier. + */ + IStaticFrontier frontier(); + + /** + * Return the {@link IGASSchedulerImpl}. + */ + IGASSchedulerImpl getScheduler(); + + /** + * Compute a reduction over the vertex state table (all vertices that have + * had their vertex state materialized). + * + * @param op + * The reduction operation. + * + * @return The reduction. + */ + <T> T reduce(IReducer<VS, ES, ST, T> op); + + /** + * End the current round, advance the round counter, and compact the new + * frontier. + */ + void endRound(); + + /** + * Conditionally log various interesting information about the state of the + * computation. + */ + void traceState(); + + /** + * Return a useful representation of an edge (non-batch API, debug only). + * + * @param e + * The edge. + * @return The representation of that edge. + */ + String toString(ISPO e); + } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGraphAccessor.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGraphAccessor.java 2013-08-26 19:13:58 UTC (rev 7342) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGraphAccessor.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -1,5 +1,9 @@ package com.bigdata.rdf.graph; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.spo.ISPO; +import com.bigdata.striterator.ICloseableIterator; + /** * Interface abstracts access to a backend graph implementation. * @@ -7,4 +11,22 @@ */ public interface IGraphAccessor { + /** + * Return the edges for the vertex. + * + * @param u + * The vertex. + * @param edges + * Typesafe enumeration indicating which edges should be visited. + * @return An iterator that will visit the edges for that vertex. + */ + ICloseableIterator<ISPO> getEdges(@SuppressWarnings("rawtypes") final IV u, + final EdgesEnum edges); + + /** + * Hook to advance the view of the graph. This is invoked at the end of each + * GAS computation round for a given {@link IGASProgram}. + */ + void advanceView(); + } Deleted: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IScheduler.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IScheduler.java 2013-08-26 19:13:58 UTC (rev 7342) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IScheduler.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -1,20 +0,0 @@ -package com.bigdata.rdf.graph; - -import com.bigdata.rdf.internal.IV; - -/** - * Interface schedules a vertex for execution. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - */ -public interface IScheduler { - - /** - * Add the vertex to the schedule. - * - * @param v - * The vertex. - */ - void schedule(@SuppressWarnings("rawtypes") IV v); - -} Copied: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IStaticFrontier.java (from rev 7335, branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/IStaticFrontier.java) =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IStaticFrontier.java (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IStaticFrontier.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -0,0 +1,41 @@ +package com.bigdata.rdf.graph; + +import java.util.Iterator; + +import com.bigdata.rdf.internal.IV; + +/** + * Interface abstracts the fixed frontier as known on entry into a new + * round. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ +@SuppressWarnings("rawtypes") +public interface IStaticFrontier extends Iterable<IV> { + + /** + * The number of vertices in the frontier. + * + * TODO Long? Or just do not allow in scale-out? + */ + int size(); + + /** + * Return <code>true</code> if the frontier is known to be empty. + */ + boolean isEmpty(); + + /** + * Reset the frontier from the {@link IV}s. + * + * @param minCapacity + * The minimum capacity of the new frontier. (A minimum capacity + * is specified since many techniques to compact the frontier can + * only estimate the required capacity.) + * @param vertices + * The vertices in the new frontier. + */ + void resetFrontier(int minCapacity, Iterator<IV> vertices); + +} Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java 2013-08-26 19:13:58 UTC (rev 7342) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -4,9 +4,10 @@ import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.Factory; +import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASProgram; import com.bigdata.rdf.graph.IGASState; -import com.bigdata.rdf.graph.IScheduler; +import com.bigdata.rdf.graph.IGASScheduler; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.spo.ISPO; @@ -178,7 +179,7 @@ */ @Override public void scatter(final IGASState<BFS.VS, BFS.ES, Void> state, - final IScheduler sch, final IV u, final ISPO e) { + final IGASScheduler sch, final IV u, final ISPO e) { // remote vertex state. final VS otherState = state.getState(e.o()); @@ -197,22 +198,11 @@ } -// /** -// * Performance testing harness. -// */ -// public static void main(final String[] args) throws Exception { -// -// new GASRunner<BFS.VS, BFS.ES, Void>(args) { -// -// @Override -// protected IGASProgram<BFS.VS, BFS.ES, Void> newGASProgram() { -// -// return new BFS(); -// -// } -// -// }.call(); -// -// } + @Override + public boolean nextRound(IGASContext ctx) { + return true; + + } + } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java 2013-08-26 19:13:58 UTC (rev 7342) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -5,9 +5,10 @@ import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.Factory; import com.bigdata.rdf.graph.GASUtil; +import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASProgram; import com.bigdata.rdf.graph.IGASState; -import com.bigdata.rdf.graph.IScheduler; +import com.bigdata.rdf.graph.IGASScheduler; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.spo.ISPO; @@ -278,7 +279,7 @@ */ @Override public void scatter(final IGASState<SSSP.VS, SSSP.ES, Integer> state, - final IScheduler sch, final IV u, final ISPO e) { + final IGASScheduler sch, final IV u, final ISPO e) { final IV other = GASUtil.getOtherVertex(u, e); @@ -310,22 +311,11 @@ } -// /** -// * Performance test harness. -// */ -// public static void main(final String[] args) throws Exception { -// -// new GASRunner<SSSP.VS, SSSP.ES, Integer>(args) { -// -// @Override -// protected IGASProgram<SSSP.VS, SSSP.ES, Integer> newGASProgram() { -// -// return new SSSP(); -// -// } -// -// }.call(); -// -// } + @Override + public boolean nextRound(IGASContext ctx) { + return true; + + } + } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2013-08-26 19:13:58 UTC (rev 7342) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -1,70 +1,40 @@ package com.bigdata.rdf.graph.impl; -import java.util.Iterator; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import org.apache.log4j.Logger; -import com.bigdata.btree.IIndex; -import com.bigdata.btree.IRangeQuery; -import com.bigdata.btree.ITuple; -import com.bigdata.btree.keys.IKeyBuilder; -import com.bigdata.btree.keys.SuccessorUtil; import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.GASUtil; import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IGASScheduler; import com.bigdata.rdf.graph.IGASState; import com.bigdata.rdf.graph.IGASStats; -import com.bigdata.rdf.graph.IReducer; -import com.bigdata.rdf.graph.IScheduler; -import com.bigdata.rdf.graph.impl.GASEngine.BigdataGraphAccessor; +import com.bigdata.rdf.graph.IGraphAccessor; +import com.bigdata.rdf.graph.IStaticFrontier; import com.bigdata.rdf.internal.IV; -import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.spo.ISPO; -import com.bigdata.rdf.spo.SPOFilter; -import com.bigdata.rdf.spo.SPOKeyOrder; -import com.bigdata.rdf.store.AbstractTripleStore; -import com.bigdata.relation.accesspath.ElementFilter; -import com.bigdata.relation.accesspath.EmptyCloseableIterator; -import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.striterator.ICloseableIterator; -import com.bigdata.striterator.Resolver; -import com.bigdata.striterator.Striterator; @SuppressWarnings("rawtypes") public class GASContext<VS, ES, ST> implements IGASContext<VS, ES, ST> { private static final Logger log = Logger.getLogger(GASContext.class); - /** - * Filter visits only edges (filters out attribute values). - * <p> - * Note: This filter is pushed down onto the AP and evaluated close to the - * data. - */ - static final IElementFilter<ISPO> edgeOnlyFilter = new SPOFilter<ISPO>() { - private static final long serialVersionUID = 1L; - - @Override - public boolean isValid(final Object e) { - return ((ISPO) e).o().isURI(); - } - }; - private final GASEngine gasEngine; /** * Used to access the graph (a KB instance). */ - private final BigdataGraphAccessor graphAccessor; + private final IGraphAccessor graphAccessor; /** * This {@link IGASState}. */ - private final GASState<VS, ES, ST> state; + private final IGASState<VS, ES, ST> gasState; /** * The graph analytic to be executed. @@ -78,12 +48,13 @@ * @param timestamp * The timestamp of the graph view (this should be a read-only * view for non-blocking index reads). - * @param program + * @param gasProgram * The program to execute against that graph. */ public GASContext(final GASEngine gasEngine, - final BigdataGraphAccessor graphAccessor, - final IGASProgram<VS, ES, ST> program) { + final IGraphAccessor graphAccessor, + final IGASState<VS, ES, ST> gasState, + final IGASProgram<VS, ES, ST> gasProgram) { if (gasEngine == null) throw new IllegalArgumentException(); @@ -91,22 +62,25 @@ if (graphAccessor == null) throw new IllegalArgumentException(); - if (program == null) + if (gasState == null) throw new IllegalArgumentException(); + if (gasProgram == null) + throw new IllegalArgumentException(); + this.gasEngine = gasEngine; this.graphAccessor = graphAccessor; - this.program = program; + this.program = gasProgram; - this.state = new GASState<VS, ES, ST>(gasEngine, this, program); + this.gasState = gasState; } @Override public IGASState<VS, ES, ST> getGASState() { - return state; + return gasState; } @Override @@ -119,7 +93,7 @@ final GASStats total = new GASStats(); - while (!state.frontier().isEmpty()) { + while (!gasState.frontier().isEmpty()) { final GASStats roundStats = new GASStats(); @@ -132,7 +106,7 @@ if (log.isInfoEnabled()) log.info("Done: " + total); - state.traceState(graphAccessor.getKB()); + gasState.traceState(); // Done return total; @@ -160,51 +134,11 @@ public boolean doRound(final IGASStats stats) throws InterruptedException, ExecutionException, Exception { - /* - * This is the new frontier. It is initially empty. All newly - * discovered vertices are inserted into this frontier. - * - * TODO This assumes that only SCATTER can schedule new vertices. If - * we also permit scheduling during GATHER (or APPLY), then that - * will require us to communicate about the new frontier during - * operations other than SCATTER. On a cluster, the communication - * overhead is real. On a single machine, it is completely - * artificial. (Some GAS programs visit all vertices in every round - * and thus do not use a scheduler at all and would not need to - * implement a SCATTER phase, at least, not to schedule vertices.) - */ - - final IScheduler sch = state.getScheduler(); - - try { - - return _doRound(stats, sch); - - } finally { - - // Ensure that thread-locals are released. - state.resetScheduler(); - - } - - - } - - private boolean _doRound(final IGASStats stats, final IScheduler sch) - throws InterruptedException, ExecutionException, Exception { - - /* - * Obtain a view on the graph. - * - * Note: This will automatically advance if there has been an - * intervening commit and the caller specified ITx.READ_COMMITTED. - */ - final AbstractTripleStore kb = graphAccessor.getKB(); - // The fontier for this round. - final IStaticFrontier f = state.frontier(); + final IStaticFrontier f = gasState.frontier(); - state.traceState(kb); + // Conditionally log the computation state. + gasState.traceState(); /* * TODO This logic allows us to push down the APPLY into the GATHER or @@ -263,7 +197,7 @@ } else { - gatherEdgeCount = gatherEdges(kb, f, gatherEdges, + gatherEdgeCount = gatherEdges(graphAccessor, f, gatherEdges, pushDownApplyInGather); } @@ -304,7 +238,8 @@ } else { - scatterEdgeCount = scatterEdges(kb, f, sch, scatterEdges, + scatterEdgeCount = scatterEdges(graphAccessor, f, + gasState.getScheduler(), scatterEdges, pushDownApplyInScatter); } @@ -358,11 +293,26 @@ } // End the round, advance the counter, and compact new frontier. - state.endRound(); + gasState.endRound(); - // True if the new frontier is empty. - return state.frontier().isEmpty(); + /* + * Handshake with the GASProgram. If it votes to continue -OR- the new + * frontier is not empty, then we will do another round. + */ + final boolean nextRound = program.nextRound(this) || !gasState.frontier().isEmpty(); + + if(nextRound) { + + /* + * Optionally advance the view of the graph before the next round. + */ + graphAccessor.advanceView(); + + } + + return nextRound; + } // doRound() /** @@ -376,166 +326,13 @@ for (IV u : f) { - program.apply(state, u, null/* sum */); + program.apply(gasState, u, null/* sum */); } } - static private final SPOKeyOrder getKeyOrder(final AbstractTripleStore kb, - final boolean inEdges) { - final SPOKeyOrder keyOrder; - if (inEdges) { - // in-edges: OSP / OCSP : [u] is the Object. - keyOrder = kb.isQuads() ? SPOKeyOrder.OCSP : SPOKeyOrder.OSP; - } else { - // out-edges: SPO / (SPOC|SOPC) : [u] is the Subject. - keyOrder = kb.isQuads() ? SPOKeyOrder.SPOC : SPOKeyOrder.SPO; - } - return keyOrder; - } - - @SuppressWarnings("unchecked") - static private Striterator<Iterator<ISPO>, ISPO> getEdges( - final AbstractTripleStore kb, final boolean inEdges, final IV u) { - - final SPOKeyOrder keyOrder = getKeyOrder(kb, inEdges); - - final IIndex ndx = kb.getSPORelation().getIndex(keyOrder); - - final IKeyBuilder keyBuilder = ndx.getIndexMetadata().getKeyBuilder(); - - keyBuilder.reset(); - - IVUtility.encode(keyBuilder, u); - - final byte[] fromKey = keyBuilder.getKey(); - - final byte[] toKey = SuccessorUtil.successor(fromKey.clone()); - - return (Striterator<Iterator<ISPO>, ISPO>) new Striterator( - ndx.rangeIterator(fromKey, toKey, 0/* capacity */, - IRangeQuery.DEFAULT, - ElementFilter.newInstance(edgeOnlyFilter))) - .addFilter(new Resolver() { - private static final long serialVersionUID = 1L; - - @Override - protected Object resolve(final Object e) { - final ITuple<ISPO> t = (ITuple<ISPO>) e; - return t.getObject(); - } - }); - - } - /** - * Return the edges for the vertex. - * - * @param u - * The vertex. - * @param edges - * Typesafe enumeration indicating which edges should be visited. - * @return An iterator that will visit the edges for that vertex. - * - * TODO There should be a means to specify a filter on the possible - * predicates to be used for traversal. If there is a single - * predicate, then that gives us S+P bound. If there are multiple - * predicates, then we have an IElementFilter on P (in addition to - * the filter that is removing the Literals from the scan). - */ - static private ICloseableIterator<ISPO> getEdges( - final AbstractTripleStore kb, final IV u, final EdgesEnum edges) { - - switch (edges) { - case NoEdges: - return new EmptyCloseableIterator<ISPO>(); - case InEdges: - return (ICloseableIterator<ISPO>) getEdges(kb, true/* inEdges */, u); - case OutEdges: - return (ICloseableIterator<ISPO>) getEdges(kb, false/* inEdges */, - u); - case AllEdges: { - final Striterator<Iterator<ISPO>, ISPO> a = getEdges(kb, - true/* inEdges */, u); - final Striterator<Iterator<ISPO>, ISPO> b = getEdges(kb, - false/* outEdges */, u); - a.append(b); - return (ICloseableIterator<ISPO>) a; - } - default: - throw new UnsupportedOperationException(edges.name()); - } - - } - - // private IChunkedIterator<ISPO> getInEdges(final AbstractTripleStore kb, - // final IV u) { - // - // // in-edges: OSP / OCSP : [u] is the Object. - // return kb - // .getSPORelation() - // .getAccessPath(null/* s */, null/* p */, u/* o */, null/* c */, - // edgeOnlyFilter).iterator(); - // - // } - // - // private IChunkedIterator<ISPO> getOutEdges(final AbstractTripleStore kb, - // final IV u) { - // - // // out-edges: SPO / SPOC : [u] is the Subject. - // return kb - // .getSPORelation() - // .getAccessPath(u/* s */, null/* p */, null/* o */, - // null/* c */, edgeOnlyFilter).iterator(); - // - // } - // - // /** - // * Return the edges for the vertex. - // * - // * @param u - // * The vertex. - // * @param edges - // * Typesafe enumeration indicating which edges should be visited. - // * @return An iterator that will visit the edges for that vertex. - // * - // * TODO There should be a means to specify a filter on the possible - // * predicates to be used for traversal. If there is a single - // * predicate, then that gives us S+P bound. If there are multiple - // * predicates, then we have an IElementFilter on P (in addition to - // * the filter that is removing the Literals from the scan). - // * - // * TODO Use the chunk parallelism? Explicit for(x : chunk)? This - // * could make it easier to collect the edges into an array (but that - // * is not required for powergraph). - // */ - // @SuppressWarnings("unchecked") - // private IChunkedIterator<ISPO> getEdges(final AbstractTripleStore kb, - // final IV u, final EdgesEnum edges) { - // - // switch (edges) { - // case NoEdges: - // return new EmptyChunkedIterator<ISPO>(null/* keyOrder */); - // case InEdges: - // return getInEdges(kb, u); - // case OutEdges: - // return getOutEdges(kb, u); - // case AllEdges:{ - // final IChunkedIterator<ISPO> a = getInEdges(kb, u); - // final IChunkedIterator<ISPO> b = getOutEdges(kb, u); - // final IChunkedIterator<ISPO> c = (IChunkedIterator<ISPO>) new - // ChunkedStriterator<IChunkedIterator<ISPO>, ISPO>( - // a).append(b); - // return c; - // } - // default: - // throw new UnsupportedOperationException(edges.name()); - // } - // - // } - - /** * @param inEdges * when <code>true</code> the GATHER is over the in-edges. * Otherwise it is over the out-edges. @@ -546,8 +343,8 @@ * @throws ExecutionException * @throws InterruptedException */ - private long scatterEdges(final AbstractTripleStore kb, - final IStaticFrontier f, final IScheduler sch, + private long scatterEdges(final IGraphAccessor graphAccessor, + final IStaticFrontier f, final IGASScheduler sch, final EdgesEnum scatterEdges, final boolean pushDownApply) throws InterruptedException, ExecutionException, Exception { @@ -558,7 +355,7 @@ public Callable<Long> newVertexTask(final IV u) { - return new ScatterTask(kb, u) { + return new ScatterTask(u) { @Override protected boolean pushDownApply() { return pushDownApply; @@ -570,9 +367,14 @@ } @Override - protected IScheduler scheduler() { + protected IGASScheduler scheduler() { return sch; } + + @Override + protected IGraphAccessor graphAccessor() { + return graphAccessor; + } }; }; } @@ -592,7 +394,7 @@ * @throws ExecutionException * @throws InterruptedException */ - private long gatherEdges(final AbstractTripleStore kb, + private long gatherEdges(final IGraphAccessor graphAccessor, final IStaticFrontier f, //final IScheduler sch, final EdgesEnum gatherEdges, final boolean pushDownApply) throws InterruptedException, ExecutionException, Exception { @@ -604,7 +406,7 @@ public Callable<Long> newVertexTask(final IV u) { - return new GatherTask(kb, u) { + return new GatherTask(u) { @Override protected boolean pushDownApply() { return pushDownApply; @@ -620,9 +422,14 @@ * for execution during the GATHER phase. */ @Override - protected IScheduler scheduler() { + protected IGASScheduler scheduler() { throw new UnsupportedOperationException(); } + + @Override + protected IGraphAccessor graphAccessor() { + return graphAccessor; + } }; }; } @@ -644,22 +451,21 @@ */ abstract private class VertexEdgesTask implements Callable<Long> { - protected final AbstractTripleStore kb; protected final IV u; - public VertexEdgesTask(final AbstractTripleStore kb, final IV u) { + public VertexEdgesTask(final IV u) { - this.kb = kb; - this.u = u; } + abstract protected IGraphAccessor graphAccessor(); + abstract protected boolean pushDownApply(); abstract protected EdgesEnum getEdgesEnum(); - abstract protected IScheduler scheduler(); + abstract protected IGASScheduler scheduler(); } @@ -671,9 +477,9 @@ */ abstract private class ScatterTask extends VertexEdgesTask { - public ScatterTask(final AbstractTripleStore kb, final IV u) { + public ScatterTask(final IV u) { - super(kb, u); + super(u); } @@ -695,11 +501,11 @@ * since we know that there are no duplicates in the frontier. */ - program.apply(state, u, null/* sum */); + program.apply(gasState, u, null/* sum */); } - if (!program.isChanged(state, u)) { + if (!program.isChanged(gasState, u)) { // Unchanged. Do not scatter. return 0L; @@ -711,9 +517,9 @@ */ long nedges = 0L; - final IScheduler sch = scheduler(); + final IGASScheduler sch = scheduler(); - final ICloseableIterator<ISPO> eitr = getEdges(kb, u, + final ICloseableIterator<ISPO> eitr = graphAccessor.getEdges(u, getEdgesEnum()); try { @@ -726,9 +532,9 @@ nedges++; if (TRACE) // TODO Batch resolve if @ TRACE - log.trace("e=" + kb.toString(e)); + log.trace("e=" + gasState.toString(e)); - program.scatter(state, sch, u, e); + program.scatter(gasState, sch, u, e); } @@ -752,9 +558,9 @@ */ abstract private class GatherTask extends VertexEdgesTask { - public GatherTask(final AbstractTripleStore kb, final IV u) { + public GatherTask(final IV u) { - super(kb, u); + super(u); } @@ -763,7 +569,7 @@ long nedges = 0; - final ICloseableIterator<ISPO> eitr = getEdges(kb, u, + final ICloseableIterator<ISPO> eitr = graphAccessor.getEdges(u, getEdgesEnum()); try { @@ -781,10 +587,10 @@ final ISPO e = eitr.next(); if (log.isTraceEnabled()) // TODO Batch resolve if @ TRACE - log.trace("u=" + u + ", e=" + kb.toString(e) + ", sum=" + log.trace("u=" + u + ", e=" + gasState.toString(e) + ", sum=" + left); - final ST right = program.gather(state, u, e); + final ST right = program.gather(gasState, u, e); if (first) { @@ -810,7 +616,7 @@ * frontier. */ - program.apply(state, u, left/* sum */); + program.apply(gasState, u, left/* sum */); } @@ -826,18 +632,4 @@ } // GatherTask - // TODO REDUCE : parallelize with nthreads. - @Override - public <T> T reduce(final IReducer<VS, ES, ST, T> op) { - - for (IV v : state.getKnownVertices()) { - - op.visit(state, v); - - } - - return op.get(); - - } - } // GASContext Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-26 19:13:58 UTC (rev 7342) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -1,7 +1,7 @@ package com.bigdata.rdf.graph.impl; import java.lang.reflect.Constructor; -import java.util.ArrayList; +import java.util.LinkedList; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; @@ -9,19 +9,15 @@ import java.util.concurrent.FutureTask; import java.util.concurrent.atomic.AtomicReference; -import com.bigdata.journal.IIndexManager; -import com.bigdata.journal.ITx; -import com.bigdata.journal.TimestampUtility; -import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASEngine; import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IGASScheduler; +import com.bigdata.rdf.graph.IGASSchedulerImpl; +import com.bigdata.rdf.graph.IGASState; import com.bigdata.rdf.graph.IGraphAccessor; -import com.bigdata.rdf.graph.IScheduler; -import com.bigdata.rdf.graph.impl.GASState.CHMScheduler; -import com.bigdata.rdf.graph.impl.GASState.MyScheduler; +import com.bigdata.rdf.graph.IStaticFrontier; +import com.bigdata.rdf.graph.impl.scheduler.CHMScheduler; import com.bigdata.rdf.internal.IV; -import com.bigdata.rdf.store.AbstractTripleStore; -import com.bigdata.service.IBigdataFederation; import com.bigdata.util.concurrent.DaemonThreadFactory; /** @@ -56,16 +52,11 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ @SuppressWarnings("rawtypes") -public class GASEngine implements IGASEngine { +abstract public class GASEngine implements IGASEngine { // private static final Logger log = Logger.getLogger(GASEngine.class); /** - * The {@link IIndexManager} is used to access the graph. - */ - private final IIndexManager indexManager; - - /** * The {@link ExecutorService} used to parallelize tasks (iff * {@link #nthreads} GT ONE). */ @@ -77,9 +68,9 @@ private final int nthreads; /** - * The factory for the {@link IScheduler}. + * The factory for the {@link IGASScheduler}. */ - private final AtomicReference<Class<MyScheduler>> schedulerClassRef; + private final AtomicReference<Class<IGASSchedulerImpl>> schedulerClassRef; /** * The parallelism for the SCATTER and GATHER phases. @@ -97,62 +88,26 @@ * @param nthreads * The number of threads to use for the SCATTER and GATHER * phases. - * - * TODO Scale-out: The {@link IIndexmanager} MAY be an - * {@link IBigdataFederation}. The {@link GASEngine} would - * automatically use remote indices. However, for proper - * scale-out we want to partition the work and the VS/ES so that - * would imply a different {@link IGASEngine} design. */ @SuppressWarnings("unchecked") - public GASEngine(final IIndexManager indexManager, final int nthreads) { + public GASEngine(final int nthreads) { - if (indexManager == null) - throw new IllegalArgumentException(); - if (nthreads <= 0) throw new IllegalArgumentException(); - this.indexManager = indexManager; - this.nthreads = nthreads; this.executorService = nthreads == 0 ? null : Executors .newFixedThreadPool(nthreads, new DaemonThreadFactory( GASEngine.class.getSimpleName())); - this.schedulerClassRef = new AtomicReference<Class<MyScheduler>>(); + this.schedulerClassRef = new AtomicReference<Class<IGASSchedulerImpl>>(); this.schedulerClassRef.set((Class) CHMScheduler.class); } - /** - * {@inheritDoc} - * - * FIXME Dynamic graphs: Allowing {@link ITx#READ_COMMITTED} to be specified - * for the timestamp this class provides some support for dynamic graphs, - * but for some use cases we would want to synchronize things such the - * iteration is performed (or re-converged) with each commit point or to - * replay a series of commit points (either through the commit record index - * or through the history index). - * <p> - * Note: READ_COMMITTED is NOT a good idea. It will use the wrong kind of - * index object (ReadCommittedView, which has a nasty synchronization hot - * spot). - */ @Override - public <VS, ES, ST> IGASContext<VS, ES, ST> newGASContext( - final IGraphAccessor graphAccessor, - final IGASProgram<VS, ES, ST> program) { - - final BigdataGraphAccessor tmp = (BigdataGraphAccessor) graphAccessor; - - return new GASContext<VS, ES, ST>(this/* GASEngine */, tmp, program); - - } - - @Override public void shutdown() { if (executorService != null) { @@ -176,14 +131,18 @@ /** * Factory for the parallelism strategy that is used to map a task across - * the frontier. + * the frontier. The returned {@link Callable} should be executed in the + * caller's thread. The {@link Callable} will schedule tasks that consume + * the frontier. A variety of frontier strategies are implemented. Those + * that execute in parallel do so using the thread pool associated with the + * {@link IGASEngine}. * * @param taskFactory * The task to be mapped across the frontier. * * @return The strategy that will map that task across the frontier. */ - Callable<Long> newFrontierStrategy( + protected Callable<Long> newFrontierStrategy( final VertexTaskFactory<Long> taskFactory, final IStaticFrontier f) { if (nthreads == 1) @@ -273,8 +232,20 @@ @Override public Long call() throws Exception { - final List<FutureTask<Long>> tasks = new ArrayList<FutureTask<Long>>( - f.size()); + /* + * Note: This places the tasks onto the queue for the executor + * service in the caller's thread. Tasks begin executing as soon as + * they are submitted. This allows the threads that will consume the + * frontier to get started before all of the tasks have been + * created. + * + * TODO This does not check the futures until all tasks have been + * created. It would be nicer if we had a queue model going with one + * queue to submit the tasks and another to drain them. This would + * require either non-blocking operations in a single thread or two + * threads. + */ + final List<FutureTask<Long>> tasks = new LinkedList<FutureTask<Long>>(); long nedges = 0L; @@ -340,7 +311,7 @@ } - void setSchedulerClass(final Class<MyScheduler> newValue) { + public void setSchedulerClass(final Class<IGASSchedulerImpl> newValue) { if(newValue == null) throw new IllegalArgumentException(); @@ -349,110 +320,42 @@ } - MyScheduler newScheduler(final GASContext<?, ?, ?> gasContext) { + public Class<IGASSchedulerImpl> getSchedulerClass() { - final Class<MyScheduler> cls = schedulerClassRef.get(); + return schedulerClassRef.get(); - try { - - final Constructor<MyScheduler> ctor = cls - .getConstructor(new Class[] { GASEngine.class }); - - final MyScheduler sch = ctor.newInstance(new Object[] { this }); - - return sch; - - } catch (Exception e) { - - throw new RuntimeException(e); - - } - } - - public class BigdataGraphAccessor implements IGraphAccessor { - - private final String namespace; - private final long timestamp; - - /** - * - * @param namespace - * The namespace of the graph. - * @param timestamp - * The timestamp of the view. - */ - private BigdataGraphAccessor(final String namespace,final long timestamp) { - this.namespace = namespace; - this.timestamp = timestamp; - - } - - /** - * Return a view of the specified graph (aka KB) as of the specified - * timestamp. - * - * @return The graph. - * - * @throws RuntimeException - * if the graph could not be resolved. - */ - public AbstractTripleStore getKB() { + public IGASSchedulerImpl newScheduler() { - long timestamp = this.timestamp; + final Class<IGASSchedulerImpl> cls = schedulerClassRef.get(); - if (timestamp == ITx.READ_COMMITTED) { + try { - /** - * Note: This code is request the view as of the the last commit - * time. If we use ITx.READ_COMMITTED here then it will cause - * the Journal to provide us with a ReadCommittedIndex and that - * has a synchronization hot spot! - */ + final Constructor<IGASSchedulerImpl> ctor = cls + .getConstructor(new Class[] { GASEngine.class }); - timestamp = indexManager.getLastCommitTime(); + final IGASSchedulerImpl sch = ctor + .newInstance(new Object[] { this }); - } + return sch; - final AbstractTripleStore kb = (AbstractTripleStore) indexManager - .getResourceLocator().locate(namespace, timestamp); + } catch (Exception e) { - if (kb == null) { + throw new RuntimeException(e); - throw new RuntimeException("Not found: namespace=" + namespace - + ", timestamp=" + TimestampUtility.toString(timestamp)); - - } - - return kb; - } - public String getNamespace() { - return namespace; - } + } - public Long getTimestamp() { - return timestamp; - } + public <VS, ES, ST> IGASState<VS, ES, ST> newGASState( + final IGraphAccessor graphAccessor, + final IGASProgram<VS, ES, ST> gasProgram) { - } + final IGASSchedulerImpl gasScheduler = newScheduler(); - /** - * - * - * @param namespace - * The namespace of the graph. - * @param timestamp - * The timestamp of the view. - * @return - */ - public BigdataGraphAccessor newGraphAccessor(final String namespace, - final long timestamp) { + return new GASState<VS, ES, ST>(graphAccessor, gasScheduler, gasProgram); - return new BigdataGraphAccessor(namespace, timestamp); - } - + } // GASEngine Deleted: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASGraphUtil.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASGraphUtil.java 2013-08-26 19:13:58 UTC (rev 7342) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASGraphUtil.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -1,159 +0,0 @@ -package com.bigdata.rdf.graph.impl; - -import java.util.HashSet; -import java.util.Random; -import java.util.Set; - -import org.apache.log4j.Logger; - -import com.bigdata.btree.BTree; -import com.bigdata.btree.IRangeQuery; -import com.bigdata.btree.Tuple; -import com.bigdata.rdf.internal.IV; -import com.bigdata.rdf.spo.ISPO; -import com.bigdata.rdf.store.AbstractTripleStore; - -/** - * Utility class for operations on the backing graph (sampling and the like). - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * - * TODO Add a utility to find the vertex with the maximum degree. That requires - * a scan on SPO (or a distinct term advancer that computes the range count of - * the skipped interval). - */ -public class GASGraphUtil { - - private static final Logger log = Logger.getLogger(GASGraphUtil.class); - - /** - * Return a sample (without duplicates) of vertices from the graph. - * <p> - * Note: This sampling procedure has a bias in favor of the vertices with - * the most edges and properties (vertices are choosen randomly in - * proportion to the #of edges and properties for the vertex). - * - * @param desiredSampleSize - * The desired sample size. - * - * @return The distinct samples that were found. - */ - @SuppressWarnings("rawtypes") - static public IV[] getRandomSample(final Random r, - final AbstractTripleStore kb, final int desiredSampleSize) { - -// /* -// * TODO This assumes a local, non-sharded index. The specific approach -// * to identifying a starting vertex relies on the ILinearList API. If -// * the caller is specifying the starting vertex then we do not need to -// * do this. -// * -// * TODO The bias here is towards vertices having more out-edges and/or -// * attributes since the sample is uniform over the triples in the index -// * and a triple may be either an edge or an attribute value (or a link -// * attribute using RDR). -// */ -// final BTree ndx = (BTree) kb.getSPORelation().getPrimaryIndex(); -// -// // Truncate at MAX_INT. -// final int size = (int) Math.min(ndx.rangeCount(), Integer.MAX_VALUE); - - // Maximum number of samples to attempt. - final int limit = (int) Math.min(desiredSampleSize * 3L, - Integer.MAX_VALUE); - - final Set<IV> samples = new HashSet<IV>(); - - int round = 0; - while (samples.size() < desiredSampleSize && round++ < limit) { - - final IV iv = GASGraphUtil.getRandomVertex(r, kb); - - samples.add(iv); - - } - - return samples.toArray(new IV[samples.size()]); - - } - - /** - * Return a random vertex. - * - * @param kb - * @return - */ - @SuppressWarnings("rawtypes") - public static IV getRandomVertex(final Random r, final AbstractTripleStore kb) { - - /* - * TODO This assumes a local, non-sharded index. The specific - * approach to identifying a starting vertex relies on the - * ILinearList API. If the caller is specifying the starting vertex - * then we do not need to do this. - * - * TODO The bias here is towards vertices having more out-edges - * and/or attributes since the sample is uniform over the triples in - * the index and a triple may be either an edge or an attribute - * value (or a link attribute using RDR). - */ - final BTree ndx = (BTree) kb.getSPORelation().getPrimaryIndex(); - - // Select a random starting vertex. - IV startingVertex = null; - { - - // Truncate at MAX_INT. - final int size = (int) Math - .min(ndx.rangeCount(), Integer.MAX_VALUE); - - while (size > 0L && startingVertex == null) { - - final int rindex = r.nextInt(size); - - /* - * Use tuple that will return both the key and the value so we - * can decode the entire tuple. - */ - final Tuple<ISPO> tuple = new Tuple<ISPO>(ndx, IRangeQuery.KEYS - | IRangeQuery.VALS); - - if (ndx.valueAt(rindex, tuple) == null) { - - /* - * This is a deleted tuple. Try again. - * - * Note: This retry is NOT safe for production use. The - * index might not have any undeleted tuples. However, we - * should not be using an index without any undeleted tuples - * for a test harness, so this should be safe enough here. - * If you want to use this production, at a mimimum make - * sure that you limit the #of attempts for the outer loop. - */ - continue; - - } - - // Decode the selected edge. - final ISPO edge = (ISPO) ndx.getIndexMetadata() - .getTupleSerializer().deserialize(tuple); - - // Use the subject for that edge as the starting vertex. - startingVertex = edge.s(); - - if (log.isInfoEnabled()) - log.info("Starting vertex: " + startingVertex); - - } - - } - - if (startingVertex == null) - throw new RuntimeException("No starting vertex: nedges=" - + ndx.rangeCount()); - - return startingVertex; - - } - -} Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java 2013-08-26 19:13:58 UTC (rev 7342) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java 2013-08-27 14:23:55 UTC (rev 7343) @@ -23,10 +23,12 @@ import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASEngine; import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IGASScheduler; +import com.bigdata.rdf.graph.IGASSchedulerImpl; import com.bigdata.rdf.graph.IGASState; -import com.bigdata.rdf.graph.IScheduler; -import com.bigdata.rdf.graph.impl.GASEngine.BigdataGraphAccessor; -import com.bigdata.rdf.graph.impl.GASState.MyScheduler; +import com.bigdata.rdf.graph.impl.bd.BigdataGASEngine; +import com.bigdata.rdf.graph.impl.bd.BigdataGASEngine.BigdataGraphAccessor; +import com.bigdata.rdf.graph.impl.bd.BigdataGASUtil; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.rio.LoadStats; import com.bigdata.rdf.sail.BigdataSail; @@ -86,9 +88,9 @@ private final String namespaceOverride; /** - * The {@link MyScheduler} class to use. + * The {@link IGASSchedulerImpl} class to use. */ - private final Class<MyScheduler> schedulerClassOverride; + private final Class<IGASSchedulerImpl> schedulerClassOverride; /** * When non-<code>null</code>, a list of zero or more resources to be @@ -148,8 +150,8 @@ * <dd>Overrides the {@link BufferMode} (if any) specified in the * <code>propertyFile</code>.</dd> * <dt>-schedulerClass</dt> - * <dd>Override the default {@link IScheduler}. Class must - * implement {@link MyScheduler}.</dd> + * <dd>Override the default {@link IGASScheduler}. Class must + * implement {@link IGASSchedulerImpl}.</dd> * <dt>-namespace</dt> * <dd>The namespace of the default SPARQL endpoint (the * namespace wil... [truncated message content] |
From: <tho...@us...> - 2013-08-26 19:14:07
|
Revision: 7342 http://bigdata.svn.sourceforge.net/bigdata/?rev=7342&view=rev Author: thompsonbry Date: 2013-08-26 19:13:58 +0000 (Mon, 26 Aug 2013) Log Message: ----------- I added an abstraction for obtaining access to the graph in preparation for creating a main memory implementation of the GAS API. See #629 Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASEngine.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/analytics/TestBFS.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/analytics/TestSSSP.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/TestGather.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGraphAccessor.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/package.html branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/mem/ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/mem/package.html branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/package.html branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/package.html Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASEngine.java 2013-08-26 16:16:14 UTC (rev 7341) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASEngine.java 2013-08-26 19:13:58 UTC (rev 7342) @@ -10,11 +10,8 @@ /** * Obtain an execution context for the specified {@link IGASProgram}. * - * @param namespace - * The namespace of the graph (KB instance). - * @param timestamp - * The timestamp of the graph view (this should be a read-only - * view for non-blocking index reads). + * @param graphAccessor + * Indicates the graph to be processed. * @param program * The program to execute against that graph. * @@ -30,8 +27,8 @@ * always true. The SUM type is scoped to the GATHER + SUM * operation (NOT the computation). */ - <VS, ES, ST> IGASContext<VS, ES, ST> newGASContext(String namespace, - long timestamp, IGASProgram<VS, ES, ST> program); + <VS, ES, ST> IGASContext<VS, ES, ST> newGASContext( + IGraphAccessor graphAccessor, IGASProgram<VS, ES, ST> program); /** * Polite shutdown. Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGraphAccessor.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGraphAccessor.java (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGraphAccessor.java 2013-08-26 19:13:58 UTC (rev 7342) @@ -0,0 +1,10 @@ +package com.bigdata.rdf.graph; + +/** + * Interface abstracts access to a backend graph implementation. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public interface IGraphAccessor { + +} Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java 2013-08-26 16:16:14 UTC (rev 7341) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java 2013-08-26 19:13:58 UTC (rev 7342) @@ -7,7 +7,6 @@ import com.bigdata.rdf.graph.IGASProgram; import com.bigdata.rdf.graph.IGASState; import com.bigdata.rdf.graph.IScheduler; -import com.bigdata.rdf.graph.impl.GASRunner; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.spo.ISPO; @@ -198,22 +197,22 @@ } - /** - * Performance testing harness. - */ - public static void main(final String[] args) throws Exception { +// /** +// * Performance testing harness. +// */ +// public static void main(final String[] args) throws Exception { +// +// new GASRunner<BFS.VS, BFS.ES, Void>(args) { +// +// @Override +// protected IGASProgram<BFS.VS, BFS.ES, Void> newGASProgram() { +// +// return new BFS(); +// +// } +// +// }.call(); +// +// } - new GASRunner<BFS.VS, BFS.ES, Void>(args) { - - @Override - protected IGASProgram<BFS.VS, BFS.ES, Void> newGASProgram() { - - return new BFS(); - - } - - }.call(); - - } - } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java 2013-08-26 16:16:14 UTC (rev 7341) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java 2013-08-26 19:13:58 UTC (rev 7342) @@ -8,7 +8,6 @@ import com.bigdata.rdf.graph.IGASProgram; import com.bigdata.rdf.graph.IGASState; import com.bigdata.rdf.graph.IScheduler; -import com.bigdata.rdf.graph.impl.GASRunner; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.spo.ISPO; @@ -311,22 +310,22 @@ } - /** - * Performance test harness. - */ - public static void main(final String[] args) throws Exception { +// /** +// * Performance test harness. +// */ +// public static void main(final String[] args) throws Exception { +// +// new GASRunner<SSSP.VS, SSSP.ES, Integer>(args) { +// +// @Override +// protected IGASProgram<SSSP.VS, SSSP.ES, Integer> newGASProgram() { +// +// return new SSSP(); +// +// } +// +// }.call(); +// +// } - new GASRunner<SSSP.VS, SSSP.ES, Integer>(args) { - - @Override - protected IGASProgram<SSSP.VS, SSSP.ES, Integer> newGASProgram() { - - return new SSSP(); - - } - - }.call(); - - } - } Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/package.html =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/package.html (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/package.html 2013-08-26 19:13:58 UTC (rev 7342) @@ -0,0 +1,11 @@ +<html> +<head> +<title>Graph Analytics</title> +</head> +<body> +<p> +This package provides implementations of various graph analytics using the +GAS API. +</p> +</body> +</html> \ No newline at end of file Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2013-08-26 16:16:14 UTC (rev 7341) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2013-08-26 19:13:58 UTC (rev 7342) @@ -12,7 +12,6 @@ import com.bigdata.btree.ITuple; import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.SuccessorUtil; -import com.bigdata.journal.ITx; import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.GASUtil; import com.bigdata.rdf.graph.IGASContext; @@ -21,6 +20,7 @@ import com.bigdata.rdf.graph.IGASStats; import com.bigdata.rdf.graph.IReducer; import com.bigdata.rdf.graph.IScheduler; +import com.bigdata.rdf.graph.impl.GASEngine.BigdataGraphAccessor; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.spo.ISPO; @@ -57,18 +57,11 @@ private final GASEngine gasEngine; /** - * The graph (a KB instance). + * Used to access the graph (a KB instance). */ - private final String namespace; + private final BigdataGraphAccessor graphAccessor; /** - * The timestamp of the view of that graph. This MAY be - * {@link ITx#READ_COMMITTED} to use the current committed view of the graph - * for each iteration (dynamic graph). - */ - private final long timestamp; - - /** * This {@link IGASState}. */ private final GASState<VS, ES, ST> state; @@ -88,24 +81,23 @@ * @param program * The program to execute against that graph. */ - public GASContext(final GASEngine gasEngine, final String namespace, - final long timestamp, final IGASProgram<VS, ES, ST> program) { + public GASContext(final GASEngine gasEngine, + final BigdataGraphAccessor graphAccessor, + final IGASProgram<VS, ES, ST> program) { if (gasEngine == null) throw new IllegalArgumentException(); + if (graphAccessor == null) + throw new IllegalArgumentException(); + if (program == null) throw new IllegalArgumentException(); - if (namespace == null) - throw new IllegalArgumentException(); - this.gasEngine = gasEngine; - this.namespace = namespace; + this.graphAccessor = graphAccessor; - this.timestamp = timestamp; - this.program = program; this.state = new GASState<VS, ES, ST>(gasEngine, this, program); @@ -140,7 +132,7 @@ if (log.isInfoEnabled()) log.info("Done: " + total); - state.traceState(gasEngine.getKB(namespace, timestamp)); + state.traceState(graphAccessor.getKB()); // Done return total; @@ -207,7 +199,7 @@ * Note: This will automatically advance if there has been an * intervening commit and the caller specified ITx.READ_COMMITTED. */ - final AbstractTripleStore kb = gasEngine.getKB(namespace, timestamp); + final AbstractTripleStore kb = graphAccessor.getKB(); // The fontier for this round. final IStaticFrontier f = state.frontier(); Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-26 16:16:14 UTC (rev 7341) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-26 19:13:58 UTC (rev 7342) @@ -15,6 +15,7 @@ import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASEngine; import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IGraphAccessor; import com.bigdata.rdf.graph.IScheduler; import com.bigdata.rdf.graph.impl.GASState.CHMScheduler; import com.bigdata.rdf.graph.impl.GASState.MyScheduler; @@ -142,12 +143,13 @@ */ @Override public <VS, ES, ST> IGASContext<VS, ES, ST> newGASContext( - final String namespace, final long timestamp, + final IGraphAccessor graphAccessor, final IGASProgram<VS, ES, ST> program) { - return new GASContext<VS, ES, ST>(this/* GASEngine */, namespace, - timestamp, program); + final BigdataGraphAccessor tmp = (BigdataGraphAccessor) graphAccessor; + return new GASContext<VS, ES, ST>(this/* GASEngine */, tmp, program); + } @Override @@ -173,48 +175,6 @@ } /** - * Return a view of the specified graph (aka KB) as of the specified - * timestamp. - * - * @param namespace - * The namespace of the graph. - * @param timestamp - * The timestamp of the view. - * @return The graph. - * - * @throws RuntimeException - * if the graph could not be resolved. - */ - protected AbstractTripleStore getKB(final String namespace, long timestamp) { - - if (timestamp == ITx.READ_COMMITTED) { - - /** - * Note: This code is request the view as of the the last commit - * time. If we use ITx.READ_COMMITTED here then it will cause the - * Journal to provide us with a ReadCommittedIndex and that has a - * synchronization hot spot! - */ - - timestamp = indexManager.getLastCommitTime(); - - } - - final AbstractTripleStore kb = (AbstractTripleStore) indexManager - .getResourceLocator().locate(namespace, timestamp); - - if (kb == null) { - - throw new RuntimeException("Not found: namespace=" + namespace - + ", timestamp=" + TimestampUtility.toString(timestamp)); - - } - - return kb; - - } - - /** * Factory for the parallelism strategy that is used to map a task across * the frontier. * @@ -409,5 +369,90 @@ } } + + public class BigdataGraphAccessor implements IGraphAccessor { + + private final String namespace; + private final long timestamp; + + /** + * + * @param namespace + * The namespace of the graph. + * @param timestamp + * The timestamp of the view. + */ + private BigdataGraphAccessor(final String namespace,final long timestamp) { + this.namespace = namespace; + this.timestamp = timestamp; + + } + + /** + * Return a view of the specified graph (aka KB) as of the specified + * timestamp. + * + * @return The graph. + * + * @throws RuntimeException + * if the graph could not be resolved. + */ + public AbstractTripleStore getKB() { + + long timestamp = this.timestamp; + + if (timestamp == ITx.READ_COMMITTED) { + + /** + * Note: This code is request the view as of the the last commit + * time. If we use ITx.READ_COMMITTED here then it will cause + * the Journal to provide us with a ReadCommittedIndex and that + * has a synchronization hot spot! + */ + + timestamp = indexManager.getLastCommitTime(); + + } + + final AbstractTripleStore kb = (AbstractTripleStore) indexManager + .getResourceLocator().locate(namespace, timestamp); + + if (kb == null) { + + throw new RuntimeException("Not found: namespace=" + namespace + + ", timestamp=" + TimestampUtility.toString(timestamp)); + + } + + return kb; + + } + + public String getNamespace() { + return namespace; + } + + public Long getTimestamp() { + return timestamp; + } + + } + + /** + * + * + * @param namespace + * The namespace of the graph. + * @param timestamp + * The timestamp of the view. + * @return + */ + public BigdataGraphAccessor newGraphAccessor(final String namespace, + final long timestamp) { + + return new BigdataGraphAccessor(namespace, timestamp); + + } + } // GASEngine Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java 2013-08-26 16:16:14 UTC (rev 7341) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java 2013-08-26 19:13:58 UTC (rev 7342) @@ -7,6 +7,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; +import java.lang.reflect.Constructor; import java.util.LinkedHashSet; import java.util.Properties; import java.util.Random; @@ -24,6 +25,7 @@ import com.bigdata.rdf.graph.IGASProgram; import com.bigdata.rdf.graph.IGASState; import com.bigdata.rdf.graph.IScheduler; +import com.bigdata.rdf.graph.impl.GASEngine.BigdataGraphAccessor; import com.bigdata.rdf.graph.impl.GASState.MyScheduler; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.rio.LoadStats; @@ -39,7 +41,7 @@ * vertices? For such algorithms, we just run them once per graph * (unless the graph is dynamic). */ -abstract public class GASRunner<VS, ES, ST> implements Callable<GASStats> { +public class GASRunner<VS, ES, ST> implements Callable<GASStats> { private static final Logger log = Logger.getLogger(GASRunner.class); @@ -64,6 +66,11 @@ private final int nthreads; /** + * The analytic class to be executed. + */ + private final Class<IGASProgram<VS, ES, ST>> analyticClass; + + /** * The property file */ private final String propertyFile; @@ -107,7 +114,7 @@ } - System.err.println("[options] propertyFile"); + System.err.println("[options] analyticClass propertyFile"); System.exit(status); @@ -118,7 +125,7 @@ * * @param args * USAGE:<br/> - * <code>(options) propertyFile</code> + * <code>(options) analyticClass propertyFile</code> * <p> * <i>Where:</i> * <dl> @@ -217,7 +224,7 @@ * Check for the remaining (required) argument(s). */ final int nremaining = args.length - i; - if (nremaining != 1) { + if (nremaining != 2) { /* * There are either too many or too few arguments remaining. */ @@ -226,6 +233,18 @@ } /* + * The analytic to be executed. + */ + { + + final String s = args[i++]; + + this.analyticClass = (Class<IGASProgram<VS, ES, ST>>) Class + .forName(s); + + } + + /* * Property file. */ this.propertyFile = args[i++]; @@ -248,10 +267,30 @@ } /** - * Return the {@link IGASProgram} to be evaluated. + * Return an instance of the {@link IGASProgram} to be evaluated. */ - abstract protected IGASProgram<VS, ES, ST> newGASProgram(); + protected IGASProgram<VS, ES, ST> newGASProgram() { + final Class<IGASProgram<VS, ES, ST>> cls = analyticClass; + + try { + + final Constructor<IGASProgram<VS, ES, ST>> ctor = cls + .getConstructor(new Class[] {}); + + final IGASProgram<VS, ES, ST> gasProgram = ctor + .newInstance(new Object[] {}); + + return gasProgram; + + } catch (Exception e) { + + throw new RuntimeException(e); + + } + + } + private Properties getProperties(final String resource) throws IOException { if (log.isInfoEnabled()) @@ -564,8 +603,11 @@ final IGASProgram<VS, ES, ST> gasProgram = newGASProgram(); + final BigdataGraphAccessor graphAccessor = ((GASEngine) gasEngine) + .newGraphAccessor(namespace, jnl.getLastCommitTime()); + final IGASContext<VS, ES, ST> gasContext = gasEngine.newGASContext( - namespace, jnl.getLastCommitTime(), gasProgram); + graphAccessor, gasProgram); final IGASState<VS, ES, ST> gasState = gasContext.getGASState(); @@ -613,4 +655,16 @@ } + /** + * Performance testing harness. + * + * @see #GASRunner(String[]) + */ + @SuppressWarnings("rawtypes") + public static void main(final String[] args) throws Exception { + + new GASRunner(args).call(); + + } + } Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/mem/package.html =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/mem/package.html (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/mem/package.html 2013-08-26 19:13:58 UTC (rev 7342) @@ -0,0 +1,11 @@ +<html> +<head> +<title>GAS Engine for Main Memory</title> +</head> +<body> +<p> +This is an implementation of the GAS API for main memory. It is intended +to provide a fast, light-weight implementation with high concurrency. +</p> +</body> +</html> \ No newline at end of file Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/package.html =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/package.html (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/package.html 2013-08-26 19:13:58 UTC (rev 7342) @@ -0,0 +1,12 @@ +<html> +<head> +<title>GAS Engine Implementation for Bigdata</title> +</head> +<body> +<p> +This is an implementation of the GAS API for the bigdata platform. It is +optimized to run over the RDF graphs as indexed in a bigdata triple store +or quad store. +</p> +</body> +</html> \ No newline at end of file Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/package.html =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/package.html (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/package.html 2013-08-26 19:13:58 UTC (rev 7342) @@ -0,0 +1,11 @@ +<html> +<head> +<title>GAS Engine API</title> +</head> +<body> +<p> +The GAS (Gather Apply Scatter) API was developed for PowerGraph. This is +a port of that API to the Java platform. +</p> +</body> +</html> \ No newline at end of file Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/analytics/TestBFS.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/analytics/TestBFS.java 2013-08-26 16:16:14 UTC (rev 7341) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/analytics/TestBFS.java 2013-08-26 19:13:58 UTC (rev 7342) @@ -23,7 +23,6 @@ */ package com.bigdata.rdf.graph.analytics; -import com.bigdata.journal.ITx; import com.bigdata.rdf.graph.AbstractGraphTestCase; import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASEngine; @@ -31,6 +30,7 @@ import com.bigdata.rdf.graph.analytics.BFS.ES; import com.bigdata.rdf.graph.analytics.BFS.VS; import com.bigdata.rdf.graph.impl.GASEngine; +import com.bigdata.rdf.graph.impl.GASEngine.BigdataGraphAccessor; /** * Test class for Breadth First Search (BFS) traversal. @@ -58,9 +58,13 @@ try { + final BigdataGraphAccessor graphAccessor = ((GASEngine) gasEngine) + .newGraphAccessor(sail.getDatabase().getNamespace(), sail + .getDatabase().getIndexManager() + .getLastCommitTime()); + final IGASContext<BFS.VS, BFS.ES, Void> gasContext = gasEngine - .newGASContext(sail.getDatabase().getNamespace(), - ITx.READ_COMMITTED, new BFS()); + .newGASContext(graphAccessor, new BFS()); final IGASState<VS, ES, Void> gasState = gasContext.getGASState(); Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/analytics/TestSSSP.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/analytics/TestSSSP.java 2013-08-26 16:16:14 UTC (rev 7341) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/analytics/TestSSSP.java 2013-08-26 19:13:58 UTC (rev 7342) @@ -23,12 +23,12 @@ */ package com.bigdata.rdf.graph.analytics; -import com.bigdata.journal.ITx; import com.bigdata.rdf.graph.AbstractGraphTestCase; import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASEngine; import com.bigdata.rdf.graph.IGASState; import com.bigdata.rdf.graph.impl.GASEngine; +import com.bigdata.rdf.graph.impl.GASEngine.BigdataGraphAccessor; /** * Test class for SSP traversal. @@ -54,9 +54,13 @@ try { + final BigdataGraphAccessor graphAccessor = ((GASEngine) gasEngine) + .newGraphAccessor(sail.getDatabase().getNamespace(), sail + .getDatabase().getIndexManager() + .getLastCommitTime()); + final IGASContext<SSSP.VS, SSSP.ES, Integer> gasContext = gasEngine - .newGASContext(sail.getDatabase().getNamespace(), - ITx.READ_COMMITTED, new SSSP()); + .newGASContext(graphAccessor, new SSSP()); final IGASState<SSSP.VS, SSSP.ES, Integer> gasState = gasContext.getGASState(); Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/TestGather.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/TestGather.java 2013-08-26 16:16:14 UTC (rev 7341) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/TestGather.java 2013-08-26 19:13:58 UTC (rev 7342) @@ -27,7 +27,6 @@ import java.util.LinkedHashSet; import java.util.Set; -import com.bigdata.journal.ITx; import com.bigdata.rdf.graph.AbstractGraphTestCase; import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.Factory; @@ -36,6 +35,7 @@ import com.bigdata.rdf.graph.IGASProgram; import com.bigdata.rdf.graph.IGASState; import com.bigdata.rdf.graph.IScheduler; +import com.bigdata.rdf.graph.impl.GASEngine.BigdataGraphAccessor; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.model.StatementEnum; import com.bigdata.rdf.spo.ISPO; @@ -252,10 +252,15 @@ .getIndexManager(), 1/* nthreads */); try { - + + final BigdataGraphAccessor graphAccessor = ((GASEngine) gasEngine) + .newGraphAccessor(sail.getDatabase().getNamespace(), sail + .getDatabase().getIndexManager() + .getLastCommitTime()); + final IGASContext<Set<ISPO>, Set<ISPO>, Set<ISPO>> gasContext = gasEngine - .newGASContext(sail.getDatabase().getNamespace(), - ITx.READ_COMMITTED, new MockGASProgram(gatherEdges)); + .newGASContext(graphAccessor, new MockGASProgram( + gatherEdges)); final IGASState<Set<ISPO>, Set<ISPO>, Set<ISPO>> gasState = gasContext .getGASState(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jer...@us...> - 2013-08-26 16:16:25
|
Revision: 7341 http://bigdata.svn.sourceforge.net/bigdata/?rev=7341&view=rev Author: jeremy_carroll Date: 2013-08-26 16:16:14 +0000 (Mon, 26 Aug 2013) Log Message: ----------- new test case for trac 731 - using CBD describe mode, and an insert Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/TestInsertFilterFalse727.java branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/TestNanoSparqlServerWithProxyIndexManager.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/AbstractSimpleInsertTest.java branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/TestCBD731.java Copied: branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/AbstractSimpleInsertTest.java (from rev 7340, branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/TestInsertFilterFalse727.java) =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/AbstractSimpleInsertTest.java (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/AbstractSimpleInsertTest.java 2013-08-26 16:16:14 UTC (rev 7341) @@ -0,0 +1,149 @@ +/** +Copyright (C) SYSTAP, LLC 2006-2013. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* +Portions of this code are: + +Copyright Aduna (http://www.aduna-software.com/) � 2001-2007 + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.bigdata.rdf.sail.webapp; + +import org.openrdf.model.Resource; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.model.vocabulary.RDF; +import org.openrdf.model.vocabulary.RDFS; +import org.openrdf.model.vocabulary.XMLSchema; +import org.openrdf.query.parser.sparql.DC; +import org.openrdf.query.parser.sparql.FOAF; +import org.openrdf.repository.RepositoryException; + +import com.bigdata.journal.IIndexManager; + +public class AbstractSimpleInsertTest<S extends IIndexManager> extends + AbstractTestNanoSparqlClient<S> { + + private static final String EX_NS = "http://example.org/"; + private ValueFactory f = new ValueFactoryImpl(); + private URI bob; + + public AbstractSimpleInsertTest() { + } + + public AbstractSimpleInsertTest(String name) { + super(name); + } + + @Override + public void setUp() throws Exception { + + super.setUp(); + + bob = f.createURI(EX_NS, "bob"); + } + + public void tearDown() throws Exception { + + bob = null; + + f = null; + + super.tearDown(); + + } + + /** + * Get a set of useful namespace prefix declarations. + * + * @return namespace prefix declarations for rdf, rdfs, dc, foaf and ex. + */ + protected String getNamespaceDeclarations() { + final StringBuilder declarations = new StringBuilder(); + declarations.append("PREFIX rdf: <" + RDF.NAMESPACE + "> \n"); + declarations.append("PREFIX rdfs: <" + RDFS.NAMESPACE + "> \n"); + declarations.append("PREFIX dc: <" + DC.NAMESPACE + "> \n"); + declarations.append("PREFIX foaf: <" + FOAF.NAMESPACE + "> \n"); + declarations.append("PREFIX ex: <" + EX_NS + "> \n"); + declarations.append("PREFIX xsd: <" + XMLSchema.NAMESPACE + "> \n"); + declarations.append("\n"); + + return declarations.toString(); + } + + protected boolean hasStatement(final Resource subj, final URI pred, final Value obj, final boolean includeInferred, final Resource... contexts) + throws RepositoryException { + + try { + + return m_repo.getStatements(subj, pred, obj, includeInferred, + contexts).hasNext(); + + } catch (Exception e) { + + throw new RepositoryException(e); + + } + + } + + protected void executeInsert(String where, boolean expected) throws RepositoryException, Exception { + final StringBuilder update = new StringBuilder(); + update.append(getNamespaceDeclarations()); + update.append("INSERT { ex:bob rdfs:label \"Bob\" . } WHERE { " + where +" }"); + + assertFalse(hasStatement(bob, RDFS.LABEL, f.createLiteral("Bob"), true)); + + m_repo.prepareUpdate(update.toString()).evaluate(); + + assertEquals(expected, hasStatement(bob, RDFS.LABEL, f.createLiteral("Bob"), true)); + } + +} Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/TestCBD731.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/TestCBD731.java (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/TestCBD731.java 2013-08-26 16:16:14 UTC (rev 7341) @@ -0,0 +1,95 @@ +/** +Copyright (C) SYSTAP, LLC 2006-2013. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* +Portions of this code are: + +Copyright Aduna (http://www.aduna-software.com/) � 2001-2007 + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.bigdata.rdf.sail.webapp; + + +import junit.framework.Test; + + + +import com.bigdata.journal.IIndexManager; + +/** + * Proxied test suite. + * We test the behavior reported in trac 731 - the bug resport uses the repo properties to set up the describe mode, + * the behavior reproduced just using a query hint. + */ +public class TestCBD731<S extends IIndexManager> extends AbstractSimpleInsertTest<S> { + + static public Test suite() { + return ProxySuiteHelper.suiteWhenStandalone(TestCBD731.class,"test.*", TestMode.quads,TestMode.sids,TestMode.triples); + } + public TestCBD731() { + + } + + public TestCBD731(final String name) { + + super(name); + + } + + public void testInsertSCBD() + throws Exception + { + executeInsert("hint:Query hint:describeMode \"SCBD\"", true); + } + public void testInsertCBD() + throws Exception + { + executeInsert("hint:Query hint:describeMode \"CBD\"", true); + } + +} Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/TestInsertFilterFalse727.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/TestInsertFilterFalse727.java 2013-08-26 00:50:57 UTC (rev 7340) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/TestInsertFilterFalse727.java 2013-08-26 16:16:14 UTC (rev 7341) @@ -1,5 +1,5 @@ /** -Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. +Copyright (C) SYSTAP, LLC 2006-2013. All rights reserved. Contact: SYSTAP, LLC @@ -20,54 +20,12 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* -Portions of this code are: -Copyright Aduna (http://www.aduna-software.com/) � 2001-2007 - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - package com.bigdata.rdf.sail.webapp; import junit.framework.Test; -import org.openrdf.model.Resource; -import org.openrdf.model.URI; -import org.openrdf.model.Value; -import org.openrdf.model.ValueFactory; -import org.openrdf.model.impl.ValueFactoryImpl; -import org.openrdf.model.vocabulary.RDF; -import org.openrdf.model.vocabulary.RDFS; -import org.openrdf.model.vocabulary.XMLSchema; -import org.openrdf.query.parser.sparql.DC; -import org.openrdf.query.parser.sparql.FOAF; -import org.openrdf.repository.RepositoryException; import com.bigdata.journal.IIndexManager; @@ -76,7 +34,7 @@ * We test the behavior reported in trac 727. */ public class TestInsertFilterFalse727<S extends IIndexManager> extends - AbstractTestNanoSparqlClient<S> { + AbstractSimpleInsertTest<S> { static public Test suite() { return ProxySuiteHelper.suiteWhenStandalone(TestInsertFilterFalse727.class,"test.*", TestMode.quads,TestMode.sids,TestMode.triples); @@ -91,86 +49,12 @@ } - private static final String EX_NS = "http://example.org/"; - - private ValueFactory f = new ValueFactoryImpl(); - private URI bob; -// protected RemoteRepository m_repo; - - - @Override - public void setUp() throws Exception { - - super.setUp(); - - bob = f.createURI(EX_NS, "bob"); - } - - public void tearDown() throws Exception { - - bob = null; - - f = null; - - super.tearDown(); - - } - - - - /** - * Get a set of useful namespace prefix declarations. - * - * @return namespace prefix declarations for rdf, rdfs, dc, foaf and ex. - */ - protected String getNamespaceDeclarations() { - final StringBuilder declarations = new StringBuilder(); - declarations.append("PREFIX rdf: <" + RDF.NAMESPACE + "> \n"); - declarations.append("PREFIX rdfs: <" + RDFS.NAMESPACE + "> \n"); - declarations.append("PREFIX dc: <" + DC.NAMESPACE + "> \n"); - declarations.append("PREFIX foaf: <" + FOAF.NAMESPACE + "> \n"); - declarations.append("PREFIX ex: <" + EX_NS + "> \n"); - declarations.append("PREFIX xsd: <" + XMLSchema.NAMESPACE + "> \n"); - declarations.append("\n"); - - return declarations.toString(); - } - - protected boolean hasStatement(final Resource subj, final URI pred, - final Value obj, final boolean includeInferred, - final Resource... contexts) throws RepositoryException { - - try { - - return m_repo.getStatements(subj, pred, obj, includeInferred, - contexts).hasNext(); - - } catch (Exception e) { - - throw new RepositoryException(e); - - } - - } - - public void testInsertWhereTrue() + public void testInsertWhereTrue() throws Exception { executeInsert("FILTER ( true )", true); } - private void executeInsert(String where, boolean expected) throws RepositoryException, Exception { - final StringBuilder update = new StringBuilder(); - update.append(getNamespaceDeclarations()); - update.append("INSERT { ex:bob rdfs:label \"Bob\" . } WHERE { " + where +" }"); - - assertFalse(hasStatement(bob, RDFS.LABEL, f.createLiteral("Bob"), true)); - - m_repo.prepareUpdate(update.toString()).evaluate(); - - assertEquals(expected, hasStatement(bob, RDFS.LABEL, f.createLiteral("Bob"), true)); - } - - public void testInsertWhereFalse() + public void testInsertWhereFalse() throws Exception { executeInsert("FILTER ( false )", false); Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/TestNanoSparqlServerWithProxyIndexManager.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/TestNanoSparqlServerWithProxyIndexManager.java 2013-08-26 00:50:57 UTC (rev 7340) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/webapp/TestNanoSparqlServerWithProxyIndexManager.java 2013-08-26 16:16:14 UTC (rev 7341) @@ -229,8 +229,9 @@ // BigdataSailRemoteRepository test (nano sparql server client-wrapper) suite.addTestSuite(TestBigdataSailRemoteRepository.class); - // TestInsertFilterFalse727 + // Insert tests from trac issues suite.addTestSuite(TestInsertFilterFalse727.class); + suite.addTestSuite(TestCBD731.class); // SPARQL UPDATE test suite. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-26 00:51:04
|
Revision: 7340 http://bigdata.svn.sourceforge.net/bigdata/?rev=7340&view=rev Author: thompsonbry Date: 2013-08-26 00:50:57 +0000 (Mon, 26 Aug 2013) Log Message: ----------- Some more fiddling. I have isolated 4 different scheduler implementations (single thread, concurrent hash set (jetty), concurrent hash map, and a thread-local version with a merge sort iterator). There is still overhead in the step to compact the frontier. A lot of costs are also showing up in the index read operations, mainly in decoding the index pages. I plan to provide a pure main memory object based implementation of the GAS API next. This will let me focus on the costs without the indices. It will also provide a light weight implementation and an ongoing basis for comparing the disk-based and pure main memory implementations with the GPU based implementations. See #629 Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/MergeSortIterator.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java 2013-08-25 20:47:26 UTC (rev 7339) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java 2013-08-26 00:50:57 UTC (rev 7340) @@ -1,5 +1,7 @@ package com.bigdata.rdf.graph.analytics; +import java.util.concurrent.atomic.AtomicInteger; + import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.Factory; import com.bigdata.rdf.graph.IGASProgram; @@ -25,25 +27,29 @@ /** * <code>-1</code> until visited. When visited, set to the current round * in order to assign each vertex its traversal depth. + * <p> + * Note: It is possible that the same vertex may be visited multiple + * times in a given expansion (from one or more source vertices that all + * target the same destination vertex). However, in this case the same + * value will be assigned by each visitor. Thus, synchronization is only + * required for visibility of the update within the round. As long as + * one thread reports that it modified the depth, the vertex will be + * scheduled. */ - private int depth = -1; + private final AtomicInteger depth = new AtomicInteger(-1); /** * The depth at which this vertex was first visited (origin ZERO) and * <code>-1</code> if the vertex has not been visited. */ public int depth() { - synchronized (this) { - return depth; - } +// synchronized (this) { + return depth.get(); +// } } /** * Note: This marks the vertex at the current traversal depth. - * <p> - * Note: It is possible that the same vertex may be visited multiple - * times in a given expansion (from one or more source vertices that all - * target the same destination vertex). * * @return <code>true</code> if the vertex was visited for the first * time in this round and the calling thread is the thread that @@ -51,13 +57,18 @@ * scheduling of a vertex). */ public boolean visit(final int depth) { - synchronized (this) { - if (this.depth == -1) { - this.depth = depth; - return true; - } - return false; + if (this.depth.compareAndSet(-1/* expect */, depth/* newValue */)) { + // Scheduled by this thread. + return true; } + return false; +// synchronized (this) { +// if (this.depth == -1) { +// this.depth = depth; +// return true; +// } +// return false; +// } } @Override Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-25 20:47:26 UTC (rev 7339) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-26 00:50:57 UTC (rev 7340) @@ -15,6 +15,7 @@ import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASEngine; import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IScheduler; import com.bigdata.rdf.graph.impl.GASState.CHMScheduler; import com.bigdata.rdf.graph.impl.GASState.MyScheduler; import com.bigdata.rdf.internal.IV; @@ -75,6 +76,11 @@ private final int nthreads; /** + * The factory for the {@link IScheduler}. + */ + private final AtomicReference<Class<MyScheduler>> schedulerClassRef; + + /** * The parallelism for the SCATTER and GATHER phases. */ public int getNThreads() { @@ -97,6 +103,7 @@ * scale-out we want to partition the work and the VS/ES so that * would imply a different {@link IGASEngine} design. */ + @SuppressWarnings("unchecked") public GASEngine(final IIndexManager indexManager, final int nthreads) { if (indexManager == null) @@ -115,8 +122,8 @@ this.schedulerClassRef = new AtomicReference<Class<MyScheduler>>(); - this.schedulerClassRef.set((Class)CHMScheduler.class); - + this.schedulerClassRef.set((Class) CHMScheduler.class); + } /** @@ -373,8 +380,6 @@ } - private final AtomicReference<Class<MyScheduler>> schedulerClassRef; - void setSchedulerClass(final Class<MyScheduler> newValue) { if(newValue == null) Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java 2013-08-25 20:47:26 UTC (rev 7339) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java 2013-08-26 00:50:57 UTC (rev 7340) @@ -32,7 +32,7 @@ @SuppressWarnings("rawtypes") public class GASState<VS, ES, ST> implements IGASState<VS, ES, ST> { - static final Logger log = Logger.getLogger(GASState.class); + private final Logger log = Logger.getLogger(GASState.class); // /** // * The {@link GASEngine} on which the {@link IGASProgram} will be run. @@ -446,11 +446,11 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ - static class SingleThreadScheduler implements MyScheduler { + static class STScheduler implements MyScheduler { private final Set<IV> vertices; - public SingleThreadScheduler() { + public STScheduler(final GASEngine gasEngine) { this.vertices = new LinkedHashSet<IV>(); @@ -484,13 +484,15 @@ * * @author <a href="mailto:tho...@us...">Bryan * Thompson</a> + * + * FIXME SCHEDULER: This is a Jetty class. Unbundle it! Use CHM + * instead. See {@link CHMScheduler}. */ - static class CHMScheduler implements MyScheduler { + static class CHSScheduler implements MyScheduler { - // FIXME This is a Jetty class. Unbundle it! Use CHM instead. private final ConcurrentHashSet<IV> vertices; - public CHMScheduler(final GASEngine gasEngine) { + public CHSScheduler(final GASEngine gasEngine) { vertices = new ConcurrentHashSet<IV>(/* TODO nthreads (CHM) */); @@ -520,31 +522,70 @@ } // CHMScheduler /** - * This scheduler uses thread-local {@link LinkedHashSet}s to track - * the distinct vertices scheduled by each execution thread. After - * the computation round, those per-thread segments of the frontier - * are combined into a single global, compact, and ordered frontier. - * To maximize the parallel activity, the per-thread frontiers are - * sorted using N threads (one per segment). Finally, the frontier - * segments are combined using a {@link MergeSortIterator} - this is - * a sequential step with a linear cost in the size of the frontier. + * A simple scheduler based on a {@link ConcurrentHashMap}. * * @author <a href="mailto:tho...@us...">Bryan * Thompson</a> */ - static class ThreadLocalScheduler implements MyScheduler { + static class CHMScheduler implements MyScheduler { + private final ConcurrentHashMap<IV,IV> vertices; + + public CHMScheduler(final GASEngine gasEngine) { + + vertices = new ConcurrentHashMap<IV,IV>(gasEngine.getNThreads()); + + } + + @Override + public void schedule(final IV v) { + + vertices.putIfAbsent(v,v); + + } + + @Override + public void clear() { + + vertices.clear(); + + } + + @Override + public void compactFrontier(final StaticFrontier frontier) { + + frontier.resetFrontier(compactAndSort(vertices.keySet())); + + } + + } // CHMScheduler + + /** + * This scheduler uses thread-local buffers ({@link LinkedHashSet}) to track + * the distinct vertices scheduled by each execution thread. After the + * computation round, those per-thread segments of the frontier are combined + * into a single global, compact, and ordered frontier. To maximize the + * parallel activity, the per-thread frontiers are sorted using N threads + * (one per segment). Finally, the frontier segments are combined using a + * {@link MergeSortIterator} - this is a sequential step with a linear cost + * in the size of the frontier. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + static class TLScheduler implements MyScheduler { + private final GASEngine gasEngine; private final int nthreads; - private final ConcurrentHashMap<Long/*threadId*/,SingleThreadScheduler> map; + private final ConcurrentHashMap<Long/*threadId*/,STScheduler> map; - public ThreadLocalScheduler(final GASEngine gasEngine) { + public TLScheduler(final GASEngine gasEngine) { this.gasEngine = gasEngine; this.nthreads = gasEngine.getNThreads(); - this.map = new ConcurrentHashMap<Long, SingleThreadScheduler>( + this.map = new ConcurrentHashMap<Long, STScheduler>( nthreads/* initialCapacity */, .75f/* loadFactor */, nthreads); @@ -554,12 +595,12 @@ final Long id = Thread.currentThread().getId(); - SingleThreadScheduler s = map.get(id); + STScheduler s = map.get(id); if (s == null) { - final IScheduler old = map.putIfAbsent(id, - s = new SingleThreadScheduler()); + final IScheduler old = map.putIfAbsent(id, s = new STScheduler( + gasEngine)); if (old != null) { @@ -592,7 +633,7 @@ * Clear the per-thread maps, but do not discard. They will be * reused in the next round. */ - for(SingleThreadScheduler s : map.values()) { + for(STScheduler s : map.values()) { s.clear(); @@ -614,8 +655,8 @@ { final List<Callable<IV[]>> tasks = new ArrayList<Callable<IV[]>>(nthreads); - for (SingleThreadScheduler s : map.values()) { - final SingleThreadScheduler t = s; + for (STScheduler s : map.values()) { + final STScheduler t = s; tasks.add(new Callable<IV[]>(){ @Override public IV[] call() throws Exception { @@ -672,9 +713,31 @@ } /* - * Now merge sort those arrays. + * Now merge sort those arrays and populate the new frontier. */ - + mergeSortSourcesAndSetFrontier(nsources, nvertices, frontiers, + frontier); + + } + + /** + * Now merge sort the ordered frontier segments and populate the new + * frontier. + * + * @param nsources + * The #of frontier segments. + * @param nvertices + * The total #of vertice across those segments (may + * double-count across segments). + * @param frontiers + * The ordered, compact frontier segments + * @param frontier + * The new frontier to be populated. + */ + private void mergeSortSourcesAndSetFrontier(final int nsources, + final int nvertices, final IV[][] frontiers, + final StaticFrontier frontier) { + // wrap IVs[] as Iterators. @SuppressWarnings("unchecked") final Iterator<IV>[] itrs = new Iterator[nsources]; @@ -687,7 +750,7 @@ // merge sort of those iterators. final Iterator<IV> itr = new MergeSortIterator(itrs); - + // ensure enough capacity for the new frontier. frontier.ensureCapacity(nvertices); Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/MergeSortIterator.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/MergeSortIterator.java 2013-08-25 20:47:26 UTC (rev 7339) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/MergeSortIterator.java 2013-08-26 00:50:57 UTC (rev 7340) @@ -14,6 +14,8 @@ @SuppressWarnings("rawtypes") class MergeSortIterator implements Iterator<IV> { +// private final static Logger log = Logger.getLogger(MergeSortIterator.class); + /** * The #of source iterators. */ @@ -75,8 +77,8 @@ if (current != -1) { - if (GASState.log.isTraceEnabled()) - GASState.log.trace("Already matched: source=" + current); +// if (log.isTraceEnabled()) +// log.trace("Already matched: source=" + current); return true; @@ -97,9 +99,9 @@ sourceTuple[i] = sourceIterator[i].next(); - if (GASState.log.isTraceEnabled()) - GASState.log.trace("read sourceTuple[" + i + "]=" - + sourceTuple[i]); +// if (log.isTraceEnabled()) +// log.trace("read sourceTuple[" + i + "]=" +// + sourceTuple[i]); } else { @@ -182,13 +184,10 @@ } - if (GASState.log.isDebugEnabled()) { +// if (log.isDebugEnabled()) +// log.debug("Will visit next: source=" + current + ", tuple: " +// + sourceTuple[current]); - GASState.log.debug("Will visit next: source=" + current - + ", tuple: " + sourceTuple[current]); - - } - return true; } @@ -274,4 +273,4 @@ } -} // MergeSortIterator \ No newline at end of file +} // MergeSortIterator This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-25 20:47:34
|
Revision: 7339 http://bigdata.svn.sourceforge.net/bigdata/?rev=7339&view=rev Author: thompsonbry Date: 2013-08-25 20:47:26 +0000 (Sun, 25 Aug 2013) Log Message: ----------- Made it possible to configure the scheduler (a bit of a hack). Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/MergeSortIterator.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2013-08-25 16:33:39 UTC (rev 7338) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2013-08-25 20:47:26 UTC (rev 7339) @@ -108,7 +108,7 @@ this.program = program; - this.state = new GASState<VS, ES, ST>(gasEngine, program); + this.state = new GASState<VS, ES, ST>(gasEngine, this, program); } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-25 16:33:39 UTC (rev 7338) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-25 20:47:26 UTC (rev 7339) @@ -1,11 +1,13 @@ package com.bigdata.rdf.graph.impl; +import java.lang.reflect.Constructor; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.FutureTask; +import java.util.concurrent.atomic.AtomicReference; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.ITx; @@ -13,6 +15,8 @@ import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASEngine; import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.impl.GASState.CHMScheduler; +import com.bigdata.rdf.graph.impl.GASState.MyScheduler; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.service.IBigdataFederation; @@ -109,6 +113,10 @@ .newFixedThreadPool(nthreads, new DaemonThreadFactory( GASEngine.class.getSimpleName())); + this.schedulerClassRef = new AtomicReference<Class<MyScheduler>>(); + + this.schedulerClassRef.set((Class)CHMScheduler.class); + } /** @@ -365,4 +373,36 @@ } + private final AtomicReference<Class<MyScheduler>> schedulerClassRef; + + void setSchedulerClass(final Class<MyScheduler> newValue) { + + if(newValue == null) + throw new IllegalArgumentException(); + + schedulerClassRef.set(newValue); + + } + + MyScheduler newScheduler(final GASContext<?, ?, ?> gasContext) { + + final Class<MyScheduler> cls = schedulerClassRef.get(); + + try { + + final Constructor<MyScheduler> ctor = cls + .getConstructor(new Class[] { GASEngine.class }); + + final MyScheduler sch = ctor.newInstance(new Object[] { this }); + + return sch; + + } catch (Exception e) { + + throw new RuntimeException(e); + + } + + } + } // GASEngine Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java 2013-08-25 16:33:39 UTC (rev 7338) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java 2013-08-25 20:47:26 UTC (rev 7339) @@ -22,6 +22,9 @@ import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASEngine; import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IGASState; +import com.bigdata.rdf.graph.IScheduler; +import com.bigdata.rdf.graph.impl.GASState.MyScheduler; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.rio.LoadStats; import com.bigdata.rdf.sail.BigdataSail; @@ -76,6 +79,11 @@ private final String namespaceOverride; /** + * The {@link MyScheduler} class to use. + */ + private final Class<MyScheduler> schedulerClassOverride; + + /** * When non-<code>null</code>, a list of zero or more resources to be * loaded. The resources will be searched for as URLs, on the CLASSPATH, and * in the file system. @@ -129,6 +137,12 @@ * <dt>-seed</dt> * <dd>The seed for the random number generator (default is * <code>217L</code>).</dd> + * <dt>-bufferMode</dt> + * <dd>Overrides the {@link BufferMode} (if any) specified in the + * <code>propertyFile</code>.</dd> + * <dt>-schedulerClass</dt> + * <dd>Override the default {@link IScheduler}. Class must + * implement {@link MyScheduler}.</dd> * <dt>-namespace</dt> * <dd>The namespace of the default SPARQL endpoint (the * namespace will be <code>kb</code> if none was specified when @@ -138,12 +152,10 @@ * exist) at the time this utility is executed. This option may * appear multiple times. The resources will be searched for as * URLs, on the CLASSPATH, and in the file system.</dd> - * <dt>-bufferMode</dt> - * <dd>Overrides the {@link BufferMode} (if any) specified in the - * <code>propertyFile</code>.</dd> * </p> + * @throws ClassNotFoundException */ - public GASRunner(final String[] args) { + public GASRunner(final String[] args) throws ClassNotFoundException { Banner.banner(); @@ -151,6 +163,7 @@ int nsamples = 100; int nthreads = 4; BufferMode bufferMode = null; // override only. + Class<MyScheduler> schedulerClass = null; // override only. String namespace = "kb"; // Set of files to load (optional). LinkedHashSet<String> loadSet = new LinkedHashSet<String>(); @@ -182,6 +195,9 @@ } else if (arg.equals("-bufferMode")) { final String s = args[++i]; bufferMode = BufferMode.valueOf(s); + } else if (arg.equals("-schedulerClass")) { + final String s = args[++i]; + schedulerClass = (Class<MyScheduler>) Class.forName(s); } else if (arg.equals("-namespace")) { final String s = args[++i]; namespace = s; @@ -222,6 +238,7 @@ this.nthreads = nthreads; this.namespaceOverride = namespace; this.bufferModeOverride = bufferMode; + this.schedulerClassOverride = schedulerClass; this.loadSet = loadSet.isEmpty() ? null : loadSet .toArray(new String[loadSet.size()]); @@ -538,11 +555,20 @@ try { + if (schedulerClassOverride != null) { + + ((GASEngine) gasEngine) + .setSchedulerClass(schedulerClassOverride); + + } + final IGASProgram<VS, ES, ST> gasProgram = newGASProgram(); final IGASContext<VS, ES, ST> gasContext = gasEngine.newGASContext( namespace, jnl.getLastCommitTime(), gasProgram); + final IGASState<VS, ES, ST> gasState = gasContext.getGASState(); + final GASStats total = new GASStats(); for (int i = 0; i < samples.length; i++) { @@ -550,9 +576,9 @@ @SuppressWarnings("rawtypes") final IV startingVertex = samples[i]; - gasContext.getGASState().init(startingVertex); + gasState.init(startingVertex); - // TODO Pure interface for this. + // TODO STATS: Pure interface. final GASStats stats = (GASStats) gasContext.call(); total.add(stats); @@ -566,11 +592,16 @@ } // Total over all sampled vertices. - System.out.println("TOTAL: analytic=" - + gasProgram.getClass().getSimpleName() + ", nseed=" + seed - + ", nsamples=" + nsamples + ", nthreads=" + nthreads + System.out.println("TOTAL"// + +": analytic=" + gasProgram.getClass().getSimpleName() // + + ", nseed=" + seed + + ", nsamples=" + nsamples // + + ", nthreads=" + nthreads + ", bufferMode=" + jnl.getBufferStrategy().getBufferMode() - + ", edges(kb)=" + nedges + ", stats(total)=" + total); + + ", scheduler=" + ((GASState<VS, ES, ST>)gasState).getScheduler().getClass().getSimpleName() + + ", edges(kb)=" + nedges// + + ", stats(total)=" + total// + ); return total; Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java 2013-08-25 16:33:39 UTC (rev 7338) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java 2013-08-25 20:47:26 UTC (rev 7339) @@ -7,7 +7,6 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; -import java.util.NoSuchElementException; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; @@ -24,7 +23,6 @@ import com.bigdata.rdf.graph.IGASState; import com.bigdata.rdf.graph.IScheduler; import com.bigdata.rdf.internal.IV; -import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.store.AbstractTripleStore; @@ -34,12 +32,12 @@ @SuppressWarnings("rawtypes") public class GASState<VS, ES, ST> implements IGASState<VS, ES, ST> { - private static final Logger log = Logger.getLogger(GASState.class); + static final Logger log = Logger.getLogger(GASState.class); - /** - * The {@link GASEngine} on which the {@link IGASProgram} will be run. - */ - private final GASEngine gasEngine; +// /** +// * The {@link GASEngine} on which the {@link IGASProgram} will be run. +// */ +// private final GASEngine gasEngine; /** * The {@link IGASProgram} to be run. @@ -94,9 +92,11 @@ */ private final ConcurrentMap<ISPO, ES> edgeState = null; - GASState(final GASEngine gasEngine, final IGASProgram<VS, ES, ST> program) { + GASState(final GASEngine gasEngine, + final GASContext<VS, ES, ST> gasContext, + final IGASProgram<VS, ES, ST> program) { - this.gasEngine = gasEngine; +// this.gasEngine = gasEngine; this.gasProgram = program; @@ -106,17 +106,7 @@ this.frontier = new StaticFrontier(); - /* - * TODO FRONTIER: Choose thread-local versus CHM implementation using a - * GASEngine option and then echo in the GASRunner reports. - */ - if (false) { - this.scheduler = new SingleThreadScheduler(); - } else if (false) { - this.scheduler = new CHMScheduler(gasEngine.getNThreads()); - } else { - this.scheduler = new ThreadLocalScheduler(gasEngine); - } + this.scheduler = (MyScheduler) gasEngine.newScheduler(gasContext); } @@ -403,7 +393,7 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ - private interface MyScheduler extends IScheduler { + interface MyScheduler extends IScheduler { /** * Compact the schedule into the new frontier. @@ -456,7 +446,7 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ - static private class SingleThreadScheduler implements MyScheduler { + static class SingleThreadScheduler implements MyScheduler { private final Set<IV> vertices; @@ -495,14 +485,14 @@ * @author <a href="mailto:tho...@us...">Bryan * Thompson</a> */ - static private class CHMScheduler implements MyScheduler { + static class CHMScheduler implements MyScheduler { // FIXME This is a Jetty class. Unbundle it! Use CHM instead. private final ConcurrentHashSet<IV> vertices; - public CHMScheduler(final int nthreads) { + public CHMScheduler(final GASEngine gasEngine) { - vertices = new ConcurrentHashSet<IV>(); + vertices = new ConcurrentHashSet<IV>(/* TODO nthreads (CHM) */); } @@ -542,7 +532,7 @@ * @author <a href="mailto:tho...@us...">Bryan * Thompson</a> */ - static private class ThreadLocalScheduler implements MyScheduler { + static class ThreadLocalScheduler implements MyScheduler { private final GASEngine gasEngine; private final int nthreads; @@ -607,18 +597,7 @@ s.clear(); } - -// if (false) { -// /* -// * Note: This should not be required. It is a bit of a paranoid -// * step. It could reduce the efficiency by forcing us to -// * reallocate the backing data structures. We should keep those -// * on hand for the life of the Scheduler, which is linked to the -// * execution of the GASProgram. -// */ -// map.clear(); -// } - + } @Override @@ -725,278 +704,4 @@ } - /** - * An N-way merge sort of N source iterators. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ - private static class MergeSortIterator implements Iterator<IV> { - - /** - * The #of source iterators. - */ - private final int n; - - /** - * The source iterators in the order given to the ctor. - */ - private final Iterator<IV>[] sourceIterator; - - /** - * The current value from each source and <code>null</code> if we need - * to get another value from that source. The value for a source - * iterator that has been exhausted will remain <code>null</code>. When - * all entries in this array are <code>null</code> there are no more - * values to be visited and we are done. - */ - private final IV[] sourceTuple; - - /** - * Index into {@link #sourceIterator} and {@link #sourceTuple} of the - * iterator whose value will be returned next -or- <code>-1</code> if we - * need to choose the next value to be visited. - */ - private int current = -1; - - /** - * - * @param sourceIterators - * Each source iterator MUST be in ascending {@link IV} - * order. - */ - public MergeSortIterator(final Iterator<IV>[] sourceIterators) { - - assert sourceIterators != null; - - assert sourceIterators.length > 0; - - this.n = sourceIterators.length; - - for (int i = 0; i < n; i++) { - - assert sourceIterators[i] != null; - - } - - this.sourceIterator = sourceIterators; - - sourceTuple = new IV[n]; - - } - - @Override - public boolean hasNext() { - - /* - * Until we find an undeleted tuple (or any tuple if DELETED is - * true). - */ - while (true) { - - if (current != -1) { - - if (log.isTraceEnabled()) - log.trace("Already matched: source=" + current); - - return true; - - } - - /* - * First, make sure that we have a tuple for each source - * iterator (unless that iterator is exhausted). - */ - - int nexhausted = 0; - - for (int i = 0; i < n; i++) { - - if (sourceTuple[i] == null) { - - if (sourceIterator[i].hasNext()) { - - sourceTuple[i] = sourceIterator[i].next(); - - if (log.isTraceEnabled()) - log.trace("read sourceTuple[" + i + "]=" - + sourceTuple[i]); - - } else { - - nexhausted++; - - } - - } - - } - - if (nexhausted == n) { - - // the aggregate iterator is exhausted. - - return false; - - } - - /* - * Now consider the current tuple for each source iterator in - * turn and choose the _first_ iterator having a tuple whose key - * orders LTE all the others (or GTE if [reverseScan == true]). - * This is the next tuple to be visited by the aggregate - * iterator. - */ - { - - // current is index of the smallest key so far. - assert current == -1; - - IV key = null; // smallest key so far. - - for (int i = 0; i < n; i++) { - - if (sourceTuple[i] == null) { - - // This source is exhausted. - - continue; - - } - - if (current == -1) { - - current = i; - - key = sourceTuple[i]; - - assert key != null; - - } else { - - final IV tmp = sourceTuple[i]; - - final int ret = IVUtility.compare(tmp, key); - - if (ret < 0) { - - /* - * This key orders LT the current key. - * - * Note: This test MUST be strictly LT since LTE - * would break the precedence in which we are - * processing the source iterators and give us - * the key from the last source by preference - * when we need the key from the first source by - * preference. - */ - - current = i; - - key = tmp; - - } - - } - - } - - assert current != -1; - - } - - if (log.isDebugEnabled()) { - - log.debug("Will visit next: source=" + current - + ", tuple: " + sourceTuple[current]); - - } - - return true; - - } - - } - - @Override - public IV next() { - - if (!hasNext()) - throw new NoSuchElementException(); - - return consumeLookaheadTuple(); - - } - - /** - * Consume the {@link #current} source value. - * - * @return The {@link #current} tuple. - */ - private IV consumeLookaheadTuple() { - - final IV t = sourceTuple[current]; - - // clear tuples from other sources having the same key as the - // current tuple. - clearCurrent(); - - return t; - - } - - /** - * <p> - * Clear tuples from other sources having the same key as the current - * tuple (eliminates duplicates). - * </p> - */ - protected void clearCurrent() { - - assert current != -1; - - final IV key = sourceTuple[current]; - - for (int i = current + 1; i < n; i++) { - - if (sourceTuple[i] == null) { - - // this iterator is exhausted. - - continue; - - } - - final IV tmp = sourceTuple[i]; - - final int ret = IVUtility.compare(key, tmp); - - if (ret == 0) { - - // discard tuple. - - sourceTuple[i] = null; - - } - - } - - // clear the tuple that we are returning so that we will read - // another from that source. - sourceTuple[current] = null; - - // clear so that we will look again. - current = -1; - - } - - @Override - public void remove() { - - throw new UnsupportedOperationException(); - - } - - } // MergeSortIterator - } Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/MergeSortIterator.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/MergeSortIterator.java (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/MergeSortIterator.java 2013-08-25 20:47:26 UTC (rev 7339) @@ -0,0 +1,277 @@ +package com.bigdata.rdf.graph.impl; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.IVUtility; + +/** + * An N-way merge sort of N source iterators. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +@SuppressWarnings("rawtypes") +class MergeSortIterator implements Iterator<IV> { + + /** + * The #of source iterators. + */ + private final int n; + + /** + * The source iterators in the order given to the ctor. + */ + private final Iterator<IV>[] sourceIterator; + + /** + * The current value from each source and <code>null</code> if we need to + * get another value from that source. The value for a source iterator that + * has been exhausted will remain <code>null</code>. When all entries in + * this array are <code>null</code> there are no more values to be visited + * and we are done. + */ + private final IV[] sourceTuple; + + /** + * Index into {@link #sourceIterator} and {@link #sourceTuple} of the + * iterator whose value will be returned next -or- <code>-1</code> if we + * need to choose the next value to be visited. + */ + private int current = -1; + + /** + * + * @param sourceIterators + * Each source iterator MUST be in ascending {@link IV} order. + */ + public MergeSortIterator(final Iterator<IV>[] sourceIterators) { + + assert sourceIterators != null; + + assert sourceIterators.length > 0; + + this.n = sourceIterators.length; + + for (int i = 0; i < n; i++) { + + assert sourceIterators[i] != null; + + } + + this.sourceIterator = sourceIterators; + + sourceTuple = new IV[n]; + + } + + @Override + public boolean hasNext() { + + /* + * Until we find an undeleted tuple (or any tuple if DELETED is true). + */ + while (true) { + + if (current != -1) { + + if (GASState.log.isTraceEnabled()) + GASState.log.trace("Already matched: source=" + current); + + return true; + + } + + /* + * First, make sure that we have a tuple for each source iterator + * (unless that iterator is exhausted). + */ + + int nexhausted = 0; + + for (int i = 0; i < n; i++) { + + if (sourceTuple[i] == null) { + + if (sourceIterator[i].hasNext()) { + + sourceTuple[i] = sourceIterator[i].next(); + + if (GASState.log.isTraceEnabled()) + GASState.log.trace("read sourceTuple[" + i + "]=" + + sourceTuple[i]); + + } else { + + nexhausted++; + + } + + } + + } + + if (nexhausted == n) { + + // the aggregate iterator is exhausted. + + return false; + + } + + /* + * Now consider the current tuple for each source iterator in turn + * and choose the _first_ iterator having a tuple whose key orders + * LTE all the others (or GTE if [reverseScan == true]). This is the + * next tuple to be visited by the aggregate iterator. + */ + { + + // current is index of the smallest key so far. + assert current == -1; + + IV key = null; // smallest key so far. + + for (int i = 0; i < n; i++) { + + if (sourceTuple[i] == null) { + + // This source is exhausted. + + continue; + + } + + if (current == -1) { + + current = i; + + key = sourceTuple[i]; + + assert key != null; + + } else { + + final IV tmp = sourceTuple[i]; + + final int ret = IVUtility.compare(tmp, key); + + if (ret < 0) { + + /* + * This key orders LT the current key. + * + * Note: This test MUST be strictly LT since LTE + * would break the precedence in which we are + * processing the source iterators and give us the + * key from the last source by preference when we + * need the key from the first source by preference. + */ + + current = i; + + key = tmp; + + } + + } + + } + + assert current != -1; + + } + + if (GASState.log.isDebugEnabled()) { + + GASState.log.debug("Will visit next: source=" + current + + ", tuple: " + sourceTuple[current]); + + } + + return true; + + } + + } + + @Override + public IV next() { + + if (!hasNext()) + throw new NoSuchElementException(); + + return consumeLookaheadTuple(); + + } + + /** + * Consume the {@link #current} source value. + * + * @return The {@link #current} tuple. + */ + private IV consumeLookaheadTuple() { + + final IV t = sourceTuple[current]; + + // clear tuples from other sources having the same key as the + // current tuple. + clearCurrent(); + + return t; + + } + + /** + * <p> + * Clear tuples from other sources having the same key as the current tuple + * (eliminates duplicates). + * </p> + */ + protected void clearCurrent() { + + assert current != -1; + + final IV key = sourceTuple[current]; + + for (int i = current + 1; i < n; i++) { + + if (sourceTuple[i] == null) { + + // this iterator is exhausted. + + continue; + + } + + final IV tmp = sourceTuple[i]; + + final int ret = IVUtility.compare(key, tmp); + + if (ret == 0) { + + // discard tuple. + + sourceTuple[i] = null; + + } + + } + + // clear the tuple that we are returning so that we will read + // another from that source. + sourceTuple[current] = null; + + // clear so that we will look again. + current = -1; + + } + + @Override + public void remove() { + + throw new UnsupportedOperationException(); + + } + +} // MergeSortIterator \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-25 16:33:45
|
Revision: 7338 http://bigdata.svn.sourceforge.net/bigdata/?rev=7338&view=rev Author: thompsonbry Date: 2013-08-25 16:33:39 +0000 (Sun, 25 Aug 2013) Log Message: ----------- bug fix to apply the buffer mode, Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java 2013-08-25 14:50:41 UTC (rev 7337) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java 2013-08-25 16:33:39 UTC (rev 7338) @@ -340,6 +340,8 @@ .valueOf(properties.getProperty(Journal.Options.BUFFER_MODE, Journal.Options.DEFAULT_BUFFER_MODE)) : this.bufferModeOverride; + properties.setProperty(Journal.Options.BUFFER_MODE, bufferMode.name()); + final boolean isTransient = !bufferMode.isStable(); final boolean isTemporary; @@ -380,6 +382,8 @@ .getProperty(BigdataSail.Options.NAMESPACE, BigdataSail.Options.DEFAULT_NAMESPACE) : this.namespaceOverride; + properties.setProperty(BigdataSail.Options.NAMESPACE, namespace); + /* * TODO Could start NSS and use SPARQL UPDATE "LOAD" to load the data. * That exposes the SPARQL end point for other purposes during the test. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-25 14:50:51
|
Revision: 7337 http://bigdata.svn.sourceforge.net/bigdata/?rev=7337&view=rev Author: thompsonbry Date: 2013-08-25 14:50:41 +0000 (Sun, 25 Aug 2013) Log Message: ----------- Added a thread-local frontier scheduler. It maintains a per-thread scheduler. The per thread schedulrs are sorted in one thread each (but not in the scatter phase, which would take less latency since now we have two barriers instead of one). The per-thread ordered schedules are then combined using an n-way merge. Unit tests pass. Parallel runs agree with original behavior. See #629 Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASOptions.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/VertexTaskFactory.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASOptions.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASOptions.java 2013-08-24 19:55:18 UTC (rev 7336) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASOptions.java 2013-08-25 14:50:41 UTC (rev 7337) @@ -7,6 +7,12 @@ * Interface for options that are understood by the {@link IGASEngine} and which * may be declared by the {@link IGASProgram}. * + * TODO Add option to order the vertices to provide a serializable execution + * plan (like GraphChi). I believe that this reduces to computing a DAG over the + * frontier before executing the GATHER and then executing the frontier such + * that the parallel execution is constrained by arcs in the DAG that do not + * have mutual dependencies. + * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ public interface IGASOptions<VS, ES> { Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2013-08-24 19:55:18 UTC (rev 7336) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2013-08-25 14:50:41 UTC (rev 7337) @@ -169,6 +169,39 @@ ExecutionException, Exception { /* + * This is the new frontier. It is initially empty. All newly + * discovered vertices are inserted into this frontier. + * + * TODO This assumes that only SCATTER can schedule new vertices. If + * we also permit scheduling during GATHER (or APPLY), then that + * will require us to communicate about the new frontier during + * operations other than SCATTER. On a cluster, the communication + * overhead is real. On a single machine, it is completely + * artificial. (Some GAS programs visit all vertices in every round + * and thus do not use a scheduler at all and would not need to + * implement a SCATTER phase, at least, not to schedule vertices.) + */ + + final IScheduler sch = state.getScheduler(); + + try { + + return _doRound(stats, sch); + + } finally { + + // Ensure that thread-locals are released. + state.resetScheduler(); + + } + + + } + + private boolean _doRound(final IGASStats stats, final IScheduler sch) + throws InterruptedException, ExecutionException, Exception { + + /* * Obtain a view on the graph. * * Note: This will automatically advance if there has been an @@ -279,22 +312,6 @@ } else { - /* - * This is the new frontier. It is initially empty. All newly - * discovered vertices are inserted into this frontier. - * - * TODO This assumes that only SCATTER can schedule new vertices. If - * we also permit scheduling during GATHER (or APPLY), then that - * will require us to communicate about the new frontier during - * operations other than SCATTER. On a cluster, the communication - * overhead is real. On a single machine, it is completely - * artificial. (Some GAS programs visit all vertices in every round - * and thus do not use a scheduler at all and would not need to - * implement a SCATTER phase, at least, not to schedule vertices.) - */ - - final IScheduler sch = state.getScheduler(); - scatterEdgeCount = scatterEdges(kb, f, sch, scatterEdges, pushDownApplyInScatter); Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-24 19:55:18 UTC (rev 7336) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-25 14:50:41 UTC (rev 7337) @@ -4,6 +4,7 @@ import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.FutureTask; import com.bigdata.journal.IIndexManager; @@ -15,7 +16,7 @@ import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.service.IBigdataFederation; -import com.bigdata.util.concurrent.LatchedExecutor; +import com.bigdata.util.concurrent.DaemonThreadFactory; /** * {@link IGASEngine} for dynamic activation of vertices. This implementation @@ -59,9 +60,11 @@ private final IIndexManager indexManager; /** - * The {@link ExecutorService} used to parallelize tasks. + * The {@link ExecutorService} used to parallelize tasks (iff + * {@link #nthreads} GT ONE). */ private final ExecutorService executorService; + /** * The parallelism for the SCATTER and GATHER phases. */ @@ -101,9 +104,11 @@ this.indexManager = indexManager; this.nthreads = nthreads; - - this.executorService = indexManager.getExecutorService(); - + + this.executorService = nthreads == 0 ? null : Executors + .newFixedThreadPool(nthreads, new DaemonThreadFactory( + GASEngine.class.getSimpleName())); + } /** @@ -130,21 +135,26 @@ } - /* - * TODO If we use our own thread pool, then we need to shut it down here. We - * also need to terminate each IGASContext. - */ - @Override public void shutdown() { - // TODO Auto-generated method stub + + if (executorService != null) { + + executorService.shutdown(); + + } } @Override public void shutdownNow() { - // TODO Auto-generated method stub + + if (executorService != null) { + executorService.shutdownNow(); + + } + } /** @@ -204,8 +214,7 @@ if (nthreads == 1) return new RunInCallersThreadFrontierStrategy(taskFactory, f); - return new LatchedExecutorFrontierStrategy(taskFactory, - executorService, nthreads, f); + return new ParallelFrontierStrategy(taskFactory, f); } @@ -273,24 +282,15 @@ * @author <a href="mailto:tho...@us...">Bryan * Thompson</a> */ - private class LatchedExecutorFrontierStrategy extends - AbstractFrontierStrategy { + private class ParallelFrontierStrategy extends AbstractFrontierStrategy { - private final ExecutorService executorService; - private final int nparallel; private final IStaticFrontier f; - LatchedExecutorFrontierStrategy( - final VertexTaskFactory<Long> taskFactory, - final ExecutorService executorService, final int nparallel, + ParallelFrontierStrategy(final VertexTaskFactory<Long> taskFactory, final IStaticFrontier f) { super(taskFactory); - this.executorService = executorService; - - this.nparallel = nparallel; - this.f = f; } @@ -302,10 +302,7 @@ f.size()); long nedges = 0L; - - final LatchedExecutor e = new LatchedExecutor(executorService, - nparallel); - + try { // For all vertices in the frontier. @@ -319,7 +316,7 @@ tasks.add(ft); // Enqueue future for execution. - e.execute(ft); + executorService.execute(ft); } @@ -352,4 +349,20 @@ } + /** + * If there is an {@link ExecutorService} for the {@link GASEngine}, then + * return it (nthreads GT 1). + * + * @throws UnsupportedOperationException + * if nthreads==1. + */ + public ExecutorService getGASThreadPool() { + + if (executorService == null) + throw new UnsupportedOperationException(); + + return executorService; + + } + } // GASEngine Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java 2013-08-24 19:55:18 UTC (rev 7336) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java 2013-08-25 14:50:41 UTC (rev 7337) @@ -4,10 +4,16 @@ import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; import java.util.Map; +import java.util.NoSuchElementException; import java.util.Set; +import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; import org.apache.log4j.Logger; @@ -18,16 +24,27 @@ import com.bigdata.rdf.graph.IGASState; import com.bigdata.rdf.graph.IScheduler; import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.store.AbstractTripleStore; +import cutthecrap.utils.striterators.ArrayIterator; + @SuppressWarnings("rawtypes") public class GASState<VS, ES, ST> implements IGASState<VS, ES, ST> { private static final Logger log = Logger.getLogger(GASState.class); - private final IGASProgram<VS, ES, ST> program; + /** + * The {@link GASEngine} on which the {@link IGASProgram} will be run. + */ + private final GASEngine gasEngine; + + /** + * The {@link IGASProgram} to be run. + */ + private final IGASProgram<VS, ES, ST> gasProgram; /** * Factory for the vertex state objects. @@ -45,8 +62,8 @@ * Note: This data structure is reused for each round. * * @see StaticFrontier - * @see NextFrontier - * @see #nextFrontier + * @see CHMScheduler + * @see #scheduler */ private final StaticFrontier frontier; @@ -54,7 +71,7 @@ * Used to schedule the new frontier and then compact it onto * {@link #frontier} at the end of the round. */ - private final NextFrontier nextFrontier; + private final MyScheduler scheduler; /** * The current evaluation round. @@ -79,15 +96,27 @@ GASState(final GASEngine gasEngine, final IGASProgram<VS, ES, ST> program) { - this.program = program; + this.gasEngine = gasEngine; + this.gasProgram = program; + this.vsf = program.getVertexStateFactory(); this.esf = program.getEdgeStateFactory(); this.frontier = new StaticFrontier(); - this.nextFrontier = new NextFrontier(gasEngine.getNThreads()); + /* + * TODO FRONTIER: Choose thread-local versus CHM implementation using a + * GASEngine option and then echo in the GASRunner reports. + */ + if (false) { + this.scheduler = new SingleThreadScheduler(); + } else if (false) { + this.scheduler = new CHMScheduler(gasEngine.getNThreads()); + } else { + this.scheduler = new ThreadLocalScheduler(gasEngine); + } } @@ -100,6 +129,15 @@ } + /** + * Return the {@link IScheduler}. + */ + IScheduler getScheduler() { + + return scheduler; + + } + @Override public VS getState(final IV v) { @@ -184,13 +222,13 @@ if (tmp.add(v)) { // Put into the current frontier. - frontier.add(v); + frontier.schedule(v); /* * Callback to initialize the vertex state before the first * iteration. */ - program.init(this, v); + gasProgram.init(this, v); } @@ -264,21 +302,23 @@ round.incrementAndGet(); - nextFrontier.compactFrontier(); + scheduler.compactFrontier(frontier); - nextFrontier.clear(); + scheduler.clear(); } /** - * Return the {@link IScheduler}. + * Reset the scheduler (this is used to ensure that thread locals are + * released if we are using a scheduler that uses per-thread data + * structures). */ - IScheduler getScheduler() { - - return nextFrontier; - + void resetScheduler() { + + scheduler.clear(); + } - + /** * Simple implementation of a "static" frontier. * <p> @@ -330,118 +370,633 @@ } - private void add(IV v) { + private void schedule(IV v) { vertices.add(v); } + + /** + * Setup the same static frontier object for the new compact fronter (it + * is reused in each round). + */ + void resetFrontier(final IV[] a) { + + // clear the old frontier. + clear(); + + // ensure enough capacity for the new frontier. + ensureCapacity(a.length); + + for (IV v : a) { + + vertices.add(v); + + } + + } + + } + + /** + * Extended interface so we can try different implementation strategies. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ + private interface MyScheduler extends IScheduler { + + /** + * Compact the schedule into the new frontier. + * <p> + * Note: Typical contracts ensure that the frontier is compact (no + * duplicates) and in ascending {@link IV} order (this provides cache + * locality for the index reads, even if those reads are against indices + * wired into RAM). + */ + void compactFrontier(StaticFrontier frontier); + + /** + * Reset all internal state (and get rid of any thread locals). + */ + void clear(); } /** - * FIXME FRONTIER: Implement a variation on this that uses per-thread - * LinkedHashSets for the new frontier, sorts in each thread, and then does - * an N-way merge sort to produce the new compact frontier. This will - * require either thread locals (in which case we need to avoid leaking out - * those resources) or an explicit threadId concept that is coordinated with - * the GASEngine. + * Compact a collection of vertices into an ordered frontier. * - * TODO Add option to order the vertices to provide a serializable execution - * plan (like GraphChi). I believe that this reduces to computing a DAG over - * the frontier before executing the GATHER and then executing the frontier - * such that the parallel execution is constrained by arcs in the DAG that - * do not have mutual dependencies. + * @param vertices + * The collection of vertices for the new frontier. * + * @return The compact, ordered frontier. + */ + private static IV[] compactAndSort(final Set<IV> vertices) { + + final IV[] a; + + final int size = vertices.size(); + + // TODO FRONTIER: Could reuse this array for each round! + vertices.toArray(a = new IV[size]); + + /* + * Order for index access. An ordered scan on a B+Tree is 10X faster + * than random access lookups. + * + * Note: This uses natural V order, which is also the index order. + */ + java.util.Arrays.sort(a); + + return a; + + } + + /** + * A scheduler suitable for a single thread. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ + static private class SingleThreadScheduler implements MyScheduler { + + private final Set<IV> vertices; + + public SingleThreadScheduler() { + + this.vertices = new LinkedHashSet<IV>(); + + } + + @Override + public void schedule(final IV v) { + + vertices.add(v); + + } + + @Override + public void compactFrontier(final StaticFrontier frontier) { + + frontier.resetFrontier(compactAndSort(vertices)); + + } + + @Override + public void clear() { + + vertices.clear(); + + } + + } + + /** + * A simple scheduler based on a concurrent hash collection + * * @author <a href="mailto:tho...@us...">Bryan * Thompson</a> */ - class NextFrontier implements IScheduler { + static private class CHMScheduler implements MyScheduler { - private final ConcurrentHashSet<IV> f; + // FIXME This is a Jetty class. Unbundle it! Use CHM instead. + private final ConcurrentHashSet<IV> vertices; - public NextFrontier(final int nthreads) { + public CHMScheduler(final int nthreads) { - f = new ConcurrentHashSet<IV>(); + vertices = new ConcurrentHashSet<IV>(); } @Override public void schedule(final IV v) { - f.add(v); + vertices.add(v); } - private void clear() { + @Override + public void clear() { - f.clear(); + vertices.clear(); } + + @Override + public void compactFrontier(final StaticFrontier frontier) { + + frontier.resetFrontier(compactAndSort(vertices)); + + } + + } // CHMScheduler + + /** + * This scheduler uses thread-local {@link LinkedHashSet}s to track + * the distinct vertices scheduled by each execution thread. After + * the computation round, those per-thread segments of the frontier + * are combined into a single global, compact, and ordered frontier. + * To maximize the parallel activity, the per-thread frontiers are + * sorted using N threads (one per segment). Finally, the frontier + * segments are combined using a {@link MergeSortIterator} - this is + * a sequential step with a linear cost in the size of the frontier. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + static private class ThreadLocalScheduler implements MyScheduler { + + private final GASEngine gasEngine; + private final int nthreads; + private final ConcurrentHashMap<Long/*threadId*/,SingleThreadScheduler> map; - /** - * Convert the frontier into a representation suitable for the next - * round of processing. - * <p> - * Note: Typical contracts ensure that the frontier is compact (no - * duplicates) and in ascending {@link IV} order (this provides cache - * locality for the index reads, even if those reads are against indices - * wired into RAM). - * <p> - * This implementation generates an ordered frontier to maximize the - * locality of reference within the indices. - * - * FIXME FRONTIER: The frontier should be compacted using parallel - * threads. For example, we can sort the new frontier within each thread - * that adds a vertex to be scheduled for the new frontier (in the - * SCATTER phase). Those per-thread frontiers could then be combined by - * a merge sort, either using multiple threads (pair-wise) or a single - * thread (N-way merge). - * - * 2/3rds of the time is CHM.toArray(). 1/3 is the sort. - * - * TODO FRONTIER: Find a parallel sort that we can use for java. This is - * not required if we do a sort within each SCATTER thread and then a - * merge sort across the per-thread compact, ordered frontiers. It is - * required if we defer the sort until we have combined those per-thread - * frontiers into a global frontier. - */ - public IStaticFrontier compactFrontier() { + public ThreadLocalScheduler(final GASEngine gasEngine) { - final IV[] a; + this.gasEngine = gasEngine; + + this.nthreads = gasEngine.getNThreads(); + + this.map = new ConcurrentHashMap<Long, SingleThreadScheduler>( + nthreads/* initialCapacity */, .75f/* loadFactor */, + nthreads); - final int size = this.f.size(); + } + + private IScheduler threadLocalScheduler() { - this.f.toArray(a = new IV[size]); + final Long id = Thread.currentThread().getId(); + + SingleThreadScheduler s = map.get(id); + + if (s == null) { + final IScheduler old = map.putIfAbsent(id, + s = new SingleThreadScheduler()); + + if (old != null) { + + /* + * We should not have a key collision since this is based on + * the threadId. + */ + + throw new AssertionError(); + + } + + } + + return s; + + } + + @Override + public void schedule(final IV v) { + + threadLocalScheduler().schedule(v); + + } + + @Override + public void clear() { + /* - * Order for index access. An ordered scan on a B+Tree is 10X faster - * than random access lookups. - * - * Note: This uses natural V order, which is also the index order. + * Clear the per-thread maps, but do not discard. They will be + * reused in the next round. */ - java.util.Arrays.sort(a); + for(SingleThreadScheduler s : map.values()) { + s.clear(); + + } + +// if (false) { +// /* +// * Note: This should not be required. It is a bit of a paranoid +// * step. It could reduce the efficiency by forcing us to +// * reallocate the backing data structures. We should keep those +// * on hand for the life of the Scheduler, which is linked to the +// * execution of the GASProgram. +// */ +// map.clear(); +// } + + } + + @Override + public void compactFrontier(final StaticFrontier frontier) { + /* - * Setup the same static frontier object for the new compact fronter - * (it is reused in each round). + * Extract a sorted, compact frontier from each thread local + * frontier. */ + final IV[][] frontiers = new IV[nthreads][]; - // clear the old frontier. + int nsources = 0; + int nvertices = 0; + { + final List<Callable<IV[]>> tasks = new ArrayList<Callable<IV[]>>(nthreads); + + for (SingleThreadScheduler s : map.values()) { + final SingleThreadScheduler t = s; + tasks.add(new Callable<IV[]>(){ + @Override + public IV[] call() throws Exception { + return compactAndSort(t.vertices); + } + }); + + } + // invokeAll() - futures will be done() before it returns. + final List<Future<IV[]>> futures; + try { + futures = gasEngine.getGASThreadPool() + .invokeAll(tasks); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + + for (Future<IV[]> f : futures) { + + final IV[] b; + try { + b = frontiers[nsources] = f.get(); + nvertices += b.length; + nsources++; + } catch (InterruptedException e) { + throw new RuntimeException(e); + } catch (ExecutionException e) { + throw new RuntimeException(e); + } + + } + } + + // Clear the new frontier. frontier.clear(); + + if (nsources == 0) { + // The new frontier is empty. + return; + + } + + if (nsources > nthreads) { + + /* + * nsources could be LT nthreads if we have a very small + * frontier, but it should never be GTE nthreads. + */ + + throw new AssertionError("nsources=" + nsources + ", nthreads=" + + nthreads); + + } + + /* + * Now merge sort those arrays. + */ + + // wrap IVs[] as Iterators. + @SuppressWarnings("unchecked") + final Iterator<IV>[] itrs = new Iterator[nsources]; + + for (int i = 0; i < nsources; i++) { + + itrs[i] = new ArrayIterator<IV>(frontiers[i]); + + } + + // merge sort of those iterators. + final Iterator<IV> itr = new MergeSortIterator(itrs); + // ensure enough capacity for the new frontier. - frontier.ensureCapacity(a.length); + frontier.ensureCapacity(nvertices); - for (IV v : a) { + // and populate the new frontier. + while (itr.hasNext()) { + final IV v = itr.next(); + frontier.vertices.add(v); } - - return frontier; } - } // NextFrontier - + } + + /** + * An N-way merge sort of N source iterators. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + private static class MergeSortIterator implements Iterator<IV> { + + /** + * The #of source iterators. + */ + private final int n; + + /** + * The source iterators in the order given to the ctor. + */ + private final Iterator<IV>[] sourceIterator; + + /** + * The current value from each source and <code>null</code> if we need + * to get another value from that source. The value for a source + * iterator that has been exhausted will remain <code>null</code>. When + * all entries in this array are <code>null</code> there are no more + * values to be visited and we are done. + */ + private final IV[] sourceTuple; + + /** + * Index into {@link #sourceIterator} and {@link #sourceTuple} of the + * iterator whose value will be returned next -or- <code>-1</code> if we + * need to choose the next value to be visited. + */ + private int current = -1; + + /** + * + * @param sourceIterators + * Each source iterator MUST be in ascending {@link IV} + * order. + */ + public MergeSortIterator(final Iterator<IV>[] sourceIterators) { + + assert sourceIterators != null; + + assert sourceIterators.length > 0; + + this.n = sourceIterators.length; + + for (int i = 0; i < n; i++) { + + assert sourceIterators[i] != null; + + } + + this.sourceIterator = sourceIterators; + + sourceTuple = new IV[n]; + + } + + @Override + public boolean hasNext() { + + /* + * Until we find an undeleted tuple (or any tuple if DELETED is + * true). + */ + while (true) { + + if (current != -1) { + + if (log.isTraceEnabled()) + log.trace("Already matched: source=" + current); + + return true; + + } + + /* + * First, make sure that we have a tuple for each source + * iterator (unless that iterator is exhausted). + */ + + int nexhausted = 0; + + for (int i = 0; i < n; i++) { + + if (sourceTuple[i] == null) { + + if (sourceIterator[i].hasNext()) { + + sourceTuple[i] = sourceIterator[i].next(); + + if (log.isTraceEnabled()) + log.trace("read sourceTuple[" + i + "]=" + + sourceTuple[i]); + + } else { + + nexhausted++; + + } + + } + + } + + if (nexhausted == n) { + + // the aggregate iterator is exhausted. + + return false; + + } + + /* + * Now consider the current tuple for each source iterator in + * turn and choose the _first_ iterator having a tuple whose key + * orders LTE all the others (or GTE if [reverseScan == true]). + * This is the next tuple to be visited by the aggregate + * iterator. + */ + { + + // current is index of the smallest key so far. + assert current == -1; + + IV key = null; // smallest key so far. + + for (int i = 0; i < n; i++) { + + if (sourceTuple[i] == null) { + + // This source is exhausted. + + continue; + + } + + if (current == -1) { + + current = i; + + key = sourceTuple[i]; + + assert key != null; + + } else { + + final IV tmp = sourceTuple[i]; + + final int ret = IVUtility.compare(tmp, key); + + if (ret < 0) { + + /* + * This key orders LT the current key. + * + * Note: This test MUST be strictly LT since LTE + * would break the precedence in which we are + * processing the source iterators and give us + * the key from the last source by preference + * when we need the key from the first source by + * preference. + */ + + current = i; + + key = tmp; + + } + + } + + } + + assert current != -1; + + } + + if (log.isDebugEnabled()) { + + log.debug("Will visit next: source=" + current + + ", tuple: " + sourceTuple[current]); + + } + + return true; + + } + + } + + @Override + public IV next() { + + if (!hasNext()) + throw new NoSuchElementException(); + + return consumeLookaheadTuple(); + + } + + /** + * Consume the {@link #current} source value. + * + * @return The {@link #current} tuple. + */ + private IV consumeLookaheadTuple() { + + final IV t = sourceTuple[current]; + + // clear tuples from other sources having the same key as the + // current tuple. + clearCurrent(); + + return t; + + } + + /** + * <p> + * Clear tuples from other sources having the same key as the current + * tuple (eliminates duplicates). + * </p> + */ + protected void clearCurrent() { + + assert current != -1; + + final IV key = sourceTuple[current]; + + for (int i = current + 1; i < n; i++) { + + if (sourceTuple[i] == null) { + + // this iterator is exhausted. + + continue; + + } + + final IV tmp = sourceTuple[i]; + + final int ret = IVUtility.compare(key, tmp); + + if (ret == 0) { + + // discard tuple. + + sourceTuple[i] = null; + + } + + } + + // clear the tuple that we are returning so that we will read + // another from that source. + sourceTuple[current] = null; + + // clear so that we will look again. + current = -1; + + } + + @Override + public void remove() { + + throw new UnsupportedOperationException(); + + } + + } // MergeSortIterator + } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/VertexTaskFactory.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/VertexTaskFactory.java 2013-08-24 19:55:18 UTC (rev 7336) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/VertexTaskFactory.java 2013-08-25 14:50:41 UTC (rev 7337) @@ -23,4 +23,3 @@ Callable<T> newVertexTask(@SuppressWarnings("rawtypes") IV u); } - This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-24 19:55:25
|
Revision: 7336 http://bigdata.svn.sourceforge.net/bigdata/?rev=7336&view=rev Author: thompsonbry Date: 2013-08-24 19:55:18 +0000 (Sat, 24 Aug 2013) Log Message: ----------- javadoc fix Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/journal/QueueStatsPlugIn.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/journal/QueueStatsPlugIn.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/journal/QueueStatsPlugIn.java 2013-08-24 18:42:40 UTC (rev 7335) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/journal/QueueStatsPlugIn.java 2013-08-24 19:55:18 UTC (rev 7336) @@ -61,7 +61,7 @@ } /** - * Collects interesting statistics on the {@link #executorService}. + * Collects interesting statistics on the {@link ExecutorService}. * <p> * Note: Guarded by synchronized(this). * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-24 18:42:51
|
Revision: 7335 http://bigdata.svn.sourceforge.net/bigdata/?rev=7335&view=rev Author: thompsonbry Date: 2013-08-24 18:42:40 +0000 (Sat, 24 Aug 2013) Log Message: ----------- Major refactoring of the GAS implementation. This breaks apart the concept of the GASEngine (thread pool and resource management life cycle), the GASContext (execution context for a specific GAS program run), and the GASState (vertex state, edge state, the current frontier, and the scheduler for the new frontier). I have not yet reimplemented the frontier scheduler to provide a compact frontier efficiently. That is next. Tests pass. Local performance looks good. See #629 Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASContext.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASEngine.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASOptions.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASProgram.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IReducer.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASStats.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/analytics/TestBFS.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/analytics/TestSSSP.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/graph/impl/TestGather.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASState.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IScheduler.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASState.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/IStaticFrontier.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/VertexTaskFactory.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASContext.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASContext.java 2013-08-24 18:39:50 UTC (rev 7334) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASContext.java 2013-08-24 18:42:40 UTC (rev 7335) @@ -1,15 +1,15 @@ package com.bigdata.rdf.graph; -import com.bigdata.rdf.internal.IV; -import com.bigdata.rdf.spo.ISPO; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; /** * Execution context for an {@link IGASProgram}. This is distinct from the - * {@link IGASEngine} so we can support distributed evaluation. + * {@link IGASEngine} so we can support distributed evaluation and concurrent + * evaluation of multiple {@link IGASProgram}s. * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * * @param <VS> * The generic type for the per-vertex state. This is scoped to the * computation of the {@link IGASProgram}. @@ -22,51 +22,31 @@ * true. The SUM type is scoped to the GATHER + SUM operation (NOT * the computation). */ -@SuppressWarnings("rawtypes") -public interface IGASContext<VS, ES, ST> { +public interface IGASContext<VS, ES, ST> extends Callable<IGASStats> { /** - * Schedule a vertex for execution. - * - * @param v - * The vertex. + * Return the program that is being evaluated. */ - void schedule(IV v); - + IGASProgram<VS, ES, ST> getGASProgram(); + /** - * Return the current evaluation round (origin ZERO). + * The computation state. */ - int round(); - + IGASState<VS, ES, ST> getGASState(); + /** - * Get the state for the vertex using the appropriate factory. If this is - * the first visit for that vertex, then the state is initialized using the - * factory. Otherwise the existing state is returned. + * Execute one iteration. * - * @param v - * The vertex. + * @param stats + * Used to report statistics about the execution of the + * algorithm. * - * @return The state for that vertex. - * - * @see IGASProgram#getVertexStateFactory() + * @return true iff the new frontier is empty. */ - VS getState(IV v); + boolean doRound(IGASStats stats) throws Exception, ExecutionException, + InterruptedException; /** - * Get the state for the edge using the appropriate factory. If this is the - * first visit for that edge, then the state is initialized using the - * factory. Otherwise the existing state is returned. - * - * @param v - * The vertex. - * - * @return The state for that vertex. - * - * @see IGASProgram#getEdgeStateFactory() - */ - ES getState(ISPO e); - - /** * Compute a reduction over the vertex state table (all vertices that have * had their vertex state materialized). * Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASEngine.java 2013-08-24 18:39:50 UTC (rev 7334) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASEngine.java 2013-08-24 18:42:40 UTC (rev 7335) @@ -1,74 +1,46 @@ package com.bigdata.rdf.graph; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; - -import com.bigdata.rdf.internal.IV; - /** + * The interface used to submit an {@link IGASProgram} for evaluation. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @param <VS> - * The generic type for the per-vertex state. This is scoped to the - * computation of the {@link IGASProgram}. - * @param <ES> - * The generic type for the per-edge state. This is scoped to the - * computation of the {@link IGASProgram}. - * @param <ST> - * The generic type for the SUM. This is often directly related to - * the generic type for the per-edge state, but that is not always - * true. The SUM type is scoped to the GATHER + SUM operation (NOT - * the computation). - * - * FIXME This should be refactored to allow a singleton for the - * {@link IGASEngine} (for a server process, much like a QueryEngine) - * and then to create an {@link IGASContext} to execute an - * {@link IGASProgram}. This would allow us to reuse resources within - * the {@link IGASEngine}. */ -public interface IGASEngine<VS, ES, ST> extends Callable<IGASStats> { +public interface IGASEngine { /** - * Return the program that is being evaluated. - */ - IGASProgram<VS, ES, ST> getGASProgram(); - - /** - * The execution context for the {@link IGASEngine}. - */ - IGASContext<VS, ES, ST> getGASContext(); - - /** - * {@link #reset()} the computation state and populate the initial frontier. + * Obtain an execution context for the specified {@link IGASProgram}. * - * @param v - * One or more vertices that will be included in the initial - * frontier. + * @param namespace + * The namespace of the graph (KB instance). + * @param timestamp + * The timestamp of the graph view (this should be a read-only + * view for non-blocking index reads). + * @param program + * The program to execute against that graph. * - * @throws IllegalArgumentException - * if no vertices are specified. + * @param <VS> + * The generic type for the per-vertex state. This is scoped to + * the computation of the {@link IGASProgram}. + * @param <ES> + * The generic type for the per-edge state. This is scoped to the + * computation of the {@link IGASProgram}. + * @param <ST> + * The generic type for the SUM. This is often directly related + * to the generic type for the per-edge state, but that is not + * always true. The SUM type is scoped to the GATHER + SUM + * operation (NOT the computation). */ - void init(@SuppressWarnings("rawtypes") IV... v); + <VS, ES, ST> IGASContext<VS, ES, ST> newGASContext(String namespace, + long timestamp, IGASProgram<VS, ES, ST> program); /** - * Discard computation state (the frontier, vertex state, and edge state) - * and reset the round counter. - * <p> - * Note: The graph is NOT part of the computation and is not discared by - * this method. + * Polite shutdown. */ - void reset(); - + void shutdown(); + /** - * Execute one iteration. - * - * @param stats - * Used to report statistics about the execution of the - * algorithm. - * - * @return true iff the new frontier is empty. + * Immediate shutdown. */ - boolean doRound(IGASStats stats) throws Exception, ExecutionException, - InterruptedException; - + void shutdownNow(); + } \ No newline at end of file Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASOptions.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASOptions.java 2013-08-24 18:39:50 UTC (rev 7334) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASOptions.java 2013-08-24 18:42:40 UTC (rev 7335) @@ -8,18 +8,6 @@ * may be declared by the {@link IGASProgram}. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * - * TODO Option to materialize Literals (or to declare the set of - * literals of interest). How do we do a gather of the attribute values - * for a vertex? That would be the SPO index for clustered access, so - * this should be done at the same time that we SCATTER over out-edges, - * which implies that the SCATTER gets pushed into the APPLY which makes - * sense. - * - * TODO Option for scalable state (HTree or BTree with buffered eviction - * as per the DISTINCT filter). - * - * TODO Option to materialize the VS for the target vertex in SCATTER. */ public interface IGASOptions<VS, ES> { Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASProgram.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASProgram.java 2013-08-24 18:39:50 UTC (rev 7334) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASProgram.java 2013-08-24 18:42:40 UTC (rev 7335) @@ -111,7 +111,7 @@ * @param u * The vertex. */ - void init(IGASContext<VS, ES, ST> ctx, IV u); + void init(IGASState<VS, ES, ST> ctx, IV u); /** * GATHER is a map/reduce over the edges of the vertex. The SUM provides @@ -133,28 +133,19 @@ * <p> * Note: by lazily resolving the vertex and/or edge state in the GAS * callback methods we avoid eagerly materializing data that we do - * not need. + * not need. [Lazy resolution does not work on a cluster. The only + * available semantics there are lazy resolution of state that was + * materialized in order to support a gather() or scatter() for a + * vertex.] * <p> - * Note: However, this might cause problems with a powergraph style - * decomposition onto a cluster since the state needs to be - * communicated up front if it will be required by the gather() for - * the edge. - * <p> * Note: The state associated with the source/target vertex and the * edge should all be immutable for the GATHER. The vertex state * should only be mutable for the APPLY(). The target vertex state * and/or edge state MAY be mutable for the SCATTER, but that * depends on the algorithm. How can we get these constraints into * the API? - * - * TODO If gather/scatter over ALL edges, then do we need to pass - * through a parameter so the caller can figure out what direction - * the edge points in (alternatively, pass in the vertex for which - * the gather is being performance and they can reference test both - * [s] and [o] to see which one is the vertex on which the gather is - * invoked and which one is the remote vertex. */ - ST gather(IGASContext<VS, ES, ST> ctx, IV u, ISPO e); + ST gather(IGASState<VS, ES, ST> ctx, IV u, ISPO e); /** * SUM is a pair-wise reduction that is applied during the GATHER. @@ -201,7 +192,7 @@ * when compared to either the frontier or the set of states that * have been in the frontier during the computation. */ - VS apply(IGASContext<VS, ES, ST> ctx, IV u, ST sum); + VS apply(IGASState<VS, ES, ST> ctx, IV u, ST sum); /** * Return <code>true</code> iff the vertex should run its SCATTER phase. @@ -214,7 +205,7 @@ * The vertex. * @return */ - boolean isChanged(IGASContext<VS, ES, ST> ctx, IV u); + boolean isChanged(IGASState<VS, ES, ST> ctx, IV u); /** * @@ -224,6 +215,6 @@ * @param e * The edge. */ - void scatter(IGASContext<VS, ES, ST> ctx, IV u, ISPO e); - + void scatter(IGASState<VS, ES, ST> ctx, IScheduler sch, IV u, ISPO e); + } \ No newline at end of file Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASState.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASState.java (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IGASState.java 2013-08-24 18:42:40 UTC (rev 7335) @@ -0,0 +1,89 @@ +package com.bigdata.rdf.graph; + +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.spo.ISPO; + +/** + * Interface exposes access to the VS and ES that is visible during a GATHER or + * SCATTER operation. + * <p> + * This interface is intended to be restrictive in both its API and the state + * that the API will expose in order to facilitate scaling in multi-machine + * environments. + * <p> + * A concrete implementation of this interface for a cluster WILL ONLY provide + * O(1) access to vertices whose state has not been materialized on a given node + * by a GATHER or SCATTER. State for vertices that were not materialized will + * not be accessible. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * + * @param <VS> + * The generic type for the per-vertex state. This is scoped to the + * computation of the {@link IGASProgram}. + * @param <ES> + * The generic type for the per-edge state. This is scoped to the + * computation of the {@link IGASProgram}. + * @param <ST> + * The generic type for the SUM. This is often directly related to + * the generic type for the per-edge state, but that is not always + * true. The SUM type is scoped to the GATHER + SUM operation (NOT + * the computation). + */ +public interface IGASState<VS,ES, ST> { + + /** + * {@link #reset()} the computation state and populate the initial frontier. + * + * @param v + * One or more vertices that will be included in the initial + * frontier. + * + * @throws IllegalArgumentException + * if no vertices are specified. + */ + void init(@SuppressWarnings("rawtypes") IV... v); + + /** + * Discard computation state (the frontier, vertex state, and edge state) + * and reset the round counter. + * <p> + * Note: The graph is NOT part of the computation and is not discared by + * this method. + */ + void reset(); + + /** + * Return the current evaluation round (origin ZERO). + */ + int round(); + + /** + * Get the state for the vertex using the appropriate factory. If this is + * the first visit for that vertex, then the state is initialized using the + * factory. Otherwise the existing state is returned. + * + * @param v + * The vertex. + * + * @return The state for that vertex. + * + * @see IGASProgram#getVertexStateFactory() + */ + VS getState(@SuppressWarnings("rawtypes") IV v); + + /** + * Get the state for the edge using the appropriate factory. If this is the + * first visit for that edge, then the state is initialized using the + * factory. Otherwise the existing state is returned. + * + * @param v + * The vertex. + * + * @return The state for that vertex. + * + * @see IGASProgram#getEdgeStateFactory() + */ + ES getState(ISPO e); + +} Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IReducer.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IReducer.java 2013-08-24 18:39:50 UTC (rev 7334) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IReducer.java 2013-08-24 18:42:40 UTC (rev 7335) @@ -48,7 +48,7 @@ * The result from applying the procedure to a single index * partition. */ - public void visit(IGASContext<VS, ES, ST> ctx, @SuppressWarnings("rawtypes") IV u); + public void visit(IGASState<VS, ES, ST> ctx, @SuppressWarnings("rawtypes") IV u); /** * Return the aggregated results as an implementation dependent object. Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IScheduler.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IScheduler.java (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/IScheduler.java 2013-08-24 18:42:40 UTC (rev 7335) @@ -0,0 +1,20 @@ +package com.bigdata.rdf.graph; + +import com.bigdata.rdf.internal.IV; + +/** + * Interface schedules a vertex for execution. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public interface IScheduler { + + /** + * Add the vertex to the schedule. + * + * @param v + * The vertex. + */ + void schedule(@SuppressWarnings("rawtypes") IV v); + +} Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java 2013-08-24 18:39:50 UTC (rev 7334) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/BFS.java 2013-08-24 18:42:40 UTC (rev 7335) @@ -2,8 +2,9 @@ import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.Factory; -import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IGASState; +import com.bigdata.rdf.graph.IScheduler; import com.bigdata.rdf.graph.impl.GASRunner; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.spo.ISPO; @@ -116,8 +117,8 @@ * Not used. */ @Override - public void init(IGASContext<BFS.VS, BFS.ES, Void> ctx, IV u) { - ctx.getState(u).visit(0); + public void init(IGASState<BFS.VS, BFS.ES, Void> state, IV u) { + state.getState(u).visit(0); } @@ -125,7 +126,7 @@ * Not used. */ @Override - public Void gather(IGASContext<BFS.VS, BFS.ES, Void> ctx, IV u, ISPO e) { + public Void gather(IGASState<BFS.VS, BFS.ES, Void> state, IV u, ISPO e) { throw new UnsupportedOperationException(); } @@ -141,7 +142,7 @@ * NOP */ @Override - public BFS.VS apply(final IGASContext<BFS.VS, BFS.ES, Void> ctx, final IV u, + public BFS.VS apply(final IGASState<BFS.VS, BFS.ES, Void> state, final IV u, final Void sum) { return null; @@ -152,7 +153,7 @@ * Returns <code>true</code>. */ @Override - public boolean isChanged(IGASContext<VS, ES, Void> ctx, IV u) { + public boolean isChanged(IGASState<VS, ES, Void> state, IV u) { return true; @@ -166,21 +167,21 @@ * {@link ISPO#s()}. The remote vertex is {@link ISPO#o()}. */ @Override - public void scatter(final IGASContext<BFS.VS, BFS.ES, Void> ctx, - final IV u, final ISPO e) { + public void scatter(final IGASState<BFS.VS, BFS.ES, Void> state, + final IScheduler sch, final IV u, final ISPO e) { // remote vertex state. - final VS otherState = ctx.getState(e.o()); + final VS otherState = state.getState(e.o()); // visit. - if (otherState.visit(ctx.round() + 1)) { + if (otherState.visit(state.round() + 1)) { /* * This is the first visit for the remote vertex. Add it to the * schedule for the next iteration. */ - ctx.schedule(e.o()); + sch.schedule(e.o()); } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java 2013-08-24 18:39:50 UTC (rev 7334) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/analytics/SSSP.java 2013-08-24 18:42:40 UTC (rev 7335) @@ -5,8 +5,9 @@ import com.bigdata.rdf.graph.EdgesEnum; import com.bigdata.rdf.graph.Factory; import com.bigdata.rdf.graph.GASUtil; -import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IGASState; +import com.bigdata.rdf.graph.IScheduler; import com.bigdata.rdf.graph.impl.GASRunner; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.spo.ISPO; @@ -153,10 +154,10 @@ * {@inheritDoc} */ @Override - public void init(final IGASContext<SSSP.VS, SSSP.ES, Integer> ctx, + public void init(final IGASState<SSSP.VS, SSSP.ES, Integer> state, final IV u) { - final VS us = ctx.getState(u); + final VS us = state.getState(u); synchronized (us) { @@ -176,12 +177,12 @@ * {@inheritDoc} */ @Override - public Integer gather(final IGASContext<SSSP.VS, SSSP.ES, Integer> ctx, + public Integer gather(final IGASState<SSSP.VS, SSSP.ES, Integer> state, final IV u, final ISPO e) { // assert e.o().equals(u); - final VS src = ctx.getState(e.s()); + final VS src = state.getState(e.s()); final int d = src.dist(); @@ -213,7 +214,7 @@ * {@inheritDoc} */ @Override - public SSSP.VS apply(IGASContext<SSSP.VS, SSSP.ES, Integer> ctx, + public SSSP.VS apply(final IGASState<SSSP.VS, SSSP.ES, Integer> state, final IV u, final Integer sum) { if (sum != null) { @@ -221,7 +222,7 @@ // log.error("u=" + u + ", us=" + us + ", sum=" + sum); // Get the state for that vertex. - final SSSP.VS us = ctx.getState(u); + final SSSP.VS us = state.getState(u); final int minDist = sum; @@ -243,10 +244,10 @@ } @Override - public boolean isChanged(final IGASContext<SSSP.VS, SSSP.ES, Integer> ctx, + public boolean isChanged(final IGASState<SSSP.VS, SSSP.ES, Integer> state, final IV u) { - return ctx.getState(u).isChanged(); + return state.getState(u).isChanged(); } @@ -277,14 +278,14 @@ * </p> */ @Override - public void scatter(final IGASContext<SSSP.VS, SSSP.ES, Integer> ctx, - final IV u, final ISPO e) { + public void scatter(final IGASState<SSSP.VS, SSSP.ES, Integer> state, + final IScheduler sch, final IV u, final ISPO e) { final IV other = GASUtil.getOtherVertex(u, e); - final VS selfState = ctx.getState(u); + final VS selfState = state.getState(u); - final VS otherState = ctx.getState(other); + final VS otherState = state.getState(other); // last observed distance for the remote vertex. final int otherDist = otherState.dist(); @@ -304,7 +305,7 @@ + ", scheduling: " + other + " with newDist=" + newDist); // Then add the remote vertex to the next frontier. - ctx.schedule(e.o()); + sch.schedule(e.o()); } Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASContext.java 2013-08-24 18:42:40 UTC (rev 7335) @@ -0,0 +1,834 @@ +package com.bigdata.rdf.graph.impl; + +import java.util.Iterator; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; + +import org.apache.log4j.Logger; + +import com.bigdata.btree.IIndex; +import com.bigdata.btree.IRangeQuery; +import com.bigdata.btree.ITuple; +import com.bigdata.btree.keys.IKeyBuilder; +import com.bigdata.btree.keys.SuccessorUtil; +import com.bigdata.journal.ITx; +import com.bigdata.rdf.graph.EdgesEnum; +import com.bigdata.rdf.graph.GASUtil; +import com.bigdata.rdf.graph.IGASContext; +import com.bigdata.rdf.graph.IGASProgram; +import com.bigdata.rdf.graph.IGASState; +import com.bigdata.rdf.graph.IGASStats; +import com.bigdata.rdf.graph.IReducer; +import com.bigdata.rdf.graph.IScheduler; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.IVUtility; +import com.bigdata.rdf.spo.ISPO; +import com.bigdata.rdf.spo.SPOFilter; +import com.bigdata.rdf.spo.SPOKeyOrder; +import com.bigdata.rdf.store.AbstractTripleStore; +import com.bigdata.relation.accesspath.ElementFilter; +import com.bigdata.relation.accesspath.EmptyCloseableIterator; +import com.bigdata.relation.accesspath.IElementFilter; +import com.bigdata.striterator.ICloseableIterator; +import com.bigdata.striterator.Resolver; +import com.bigdata.striterator.Striterator; + +@SuppressWarnings("rawtypes") +public class GASContext<VS, ES, ST> implements IGASContext<VS, ES, ST> { + + private static final Logger log = Logger.getLogger(GASContext.class); + + /** + * Filter visits only edges (filters out attribute values). + * <p> + * Note: This filter is pushed down onto the AP and evaluated close to the + * data. + */ + static final IElementFilter<ISPO> edgeOnlyFilter = new SPOFilter<ISPO>() { + private static final long serialVersionUID = 1L; + + @Override + public boolean isValid(final Object e) { + return ((ISPO) e).o().isURI(); + } + }; + + private final GASEngine gasEngine; + + /** + * The graph (a KB instance). + */ + private final String namespace; + + /** + * The timestamp of the view of that graph. This MAY be + * {@link ITx#READ_COMMITTED} to use the current committed view of the graph + * for each iteration (dynamic graph). + */ + private final long timestamp; + + /** + * This {@link IGASState}. + */ + private final GASState<VS, ES, ST> state; + + /** + * The graph analytic to be executed. + */ + private final IGASProgram<VS, ES, ST> program; + + /** + * + * @param namespace + * The namespace of the graph (KB instance). + * @param timestamp + * The timestamp of the graph view (this should be a read-only + * view for non-blocking index reads). + * @param program + * The program to execute against that graph. + */ + public GASContext(final GASEngine gasEngine, final String namespace, + final long timestamp, final IGASProgram<VS, ES, ST> program) { + + if (gasEngine == null) + throw new IllegalArgumentException(); + + if (program == null) + throw new IllegalArgumentException(); + + if (namespace == null) + throw new IllegalArgumentException(); + + this.gasEngine = gasEngine; + + this.namespace = namespace; + + this.timestamp = timestamp; + + this.program = program; + + this.state = new GASState<VS, ES, ST>(gasEngine, program); + + } + + @Override + public IGASState<VS, ES, ST> getGASState() { + return state; + } + + @Override + public IGASProgram<VS, ES, ST> getGASProgram() { + return program; + } + + @Override + public IGASStats call() throws Exception { + + final GASStats total = new GASStats(); + + while (!state.frontier().isEmpty()) { + + final GASStats roundStats = new GASStats(); + + doRound(roundStats); + + total.add(roundStats); + + } + + if (log.isInfoEnabled()) + log.info("Done: " + total); + + state.traceState(gasEngine.getKB(namespace, timestamp)); + + // Done + return total; + + } + + /** + * {@inheritDoc} + * + * TODO This is an Asynchronous implementation. Further, it does not attempt + * to race ahead to accelerate convergence (unlike an asynchronous neural + * network). + * + * TODO There should be an option for property value access during the APPLY + * (either no property values are required, or some (or all) are required + * and must optionally be materialized. Likewise, there could be an option + * to force the materialization of the URIs for the (s,p,o). + * <p> + * Property value access is on the SPO index. If we are doing a reverse + * gather (out-edges) then it will be right there and the Apply should be + * pushed into the Gather. If we are doing a forward gather (in-edges), then + * we are reading on OSP and will need to do a separate read on SPO. + */ + @Override + public boolean doRound(final IGASStats stats) throws InterruptedException, + ExecutionException, Exception { + + /* + * Obtain a view on the graph. + * + * Note: This will automatically advance if there has been an + * intervening commit and the caller specified ITx.READ_COMMITTED. + */ + final AbstractTripleStore kb = gasEngine.getKB(namespace, timestamp); + + // The fontier for this round. + final IStaticFrontier f = state.frontier(); + + state.traceState(kb); + + /* + * TODO This logic allows us to push down the APPLY into the GATHER or + * SCATTER depending on some characteristics of the algorithm. Is this + * worth while? + * + * TODO The ability to pushd down the APPLY for AllEdges for the GATHER + * depends on our using the union of the in-edges and out-edges + * iterators to visit those edges. That union means that we do not have + * to preserve the accumulant across the in-edges and out-edges aspects + * of the GATHER. If this UNION over the iterators causes problems with + * other optimizations, then it could be discarded. Note that this is + * not an issue for the SCATTER since we can scatter over the in-edges + * and out-edges for any given vertex independently (so long as the + * APPLY is done before the SCATTER - this would not work if we pushed + * down the APPLY into the SCATTER). + */ + final EdgesEnum gatherEdges = program.getGatherEdges(); + final EdgesEnum scatterEdges = program.getScatterEdges(); + final boolean pushDownApplyInGather; + final boolean pushDownApplyInScatter; + final boolean runApplyStage; + + if (scatterEdges == EdgesEnum.NoEdges) { + // Do APPLY() in GATHER. + pushDownApplyInGather = true; + pushDownApplyInScatter = false; + runApplyStage = false; + } else if (gatherEdges == EdgesEnum.NoEdges) { + // APPLY() in SCATTER. + pushDownApplyInGather = false; + pushDownApplyInScatter = true; + runApplyStage = false; + } else { + /* + * Do not push down the APPLY. + * + * TODO We could still push down the apply into the GATHER if we are + * doing both stages. + */ + pushDownApplyInGather = false; + pushDownApplyInScatter = false; + runApplyStage = true; + } + + /* + * GATHER + */ + + final long beginGather = System.nanoTime(); + + final long gatherEdgeCount; + if (gatherEdges == EdgesEnum.NoEdges) { + + gatherEdgeCount = 0L; + + } else { + + gatherEdgeCount = gatherEdges(kb, f, gatherEdges, + pushDownApplyInGather); + + } + + final long elapsedGather = System.nanoTime() - beginGather; + + /* + * APPLY + */ + + final long elapsedApply; + + if (runApplyStage) { + + final long beginApply = System.nanoTime(); + + apply(f); + + elapsedApply = System.nanoTime() - beginApply; + + } else { + + elapsedApply = 0L; + + } + + /* + * SCATTER + */ + + final long beginScatter = System.nanoTime(); + + final long scatterEdgeCount; + + if (scatterEdges == EdgesEnum.NoEdges) { + + scatterEdgeCount = 0L; + + } else { + + /* + * This is the new frontier. It is initially empty. All newly + * discovered vertices are inserted into this frontier. + * + * TODO This assumes that only SCATTER can schedule new vertices. If + * we also permit scheduling during GATHER (or APPLY), then that + * will require us to communicate about the new frontier during + * operations other than SCATTER. On a cluster, the communication + * overhead is real. On a single machine, it is completely + * artificial. (Some GAS programs visit all vertices in every round + * and thus do not use a scheduler at all and would not need to + * implement a SCATTER phase, at least, not to schedule vertices.) + */ + + final IScheduler sch = state.getScheduler(); + + scatterEdgeCount = scatterEdges(kb, f, sch, scatterEdges, + pushDownApplyInScatter); + + } + + final long elapsedScatter = System.nanoTime() - beginScatter; + + /* + * Reporting. + */ + + final long totalElapsed = elapsedGather + elapsedApply + elapsedScatter; + + final long totalEdges = scatterEdgeCount + gatherEdgeCount; + + // TODO pure interface for this. + ((GASStats) stats).add(f.size(), totalEdges, totalElapsed); + + if (log.isInfoEnabled()) { + + log.info("\ntotal"// + + ": fontierSize=" + + f.size() // + + ", ms=" + + TimeUnit.NANOSECONDS.toMillis(totalElapsed)// + + ", edges=" + + totalEdges// + + ", teps=" + + GASUtil.getTEPS(totalEdges, totalElapsed)// + + "\ngather"// + + ": ms=" + + TimeUnit.NANOSECONDS.toMillis(elapsedGather)// + + ", nedges=" + + gatherEdgeCount// + + ", fanIn=" + + GASUtil.fanOut(f.size(), gatherEdgeCount)// + + ", teps=" + + GASUtil.getTEPS(gatherEdgeCount, elapsedGather) // + + (runApplyStage ? ", apply=" + + TimeUnit.NANOSECONDS.toMillis(elapsedApply) : "")// + + "\nscatter"// + + ": ms=" + + TimeUnit.NANOSECONDS.toMillis(elapsedScatter)// + + ", nedges=" + + scatterEdgeCount // + + ", fanOut=" + + GASUtil.fanOut(f.size(), scatterEdgeCount) // + + ", teps=" + + GASUtil.getTEPS(scatterEdgeCount, elapsedScatter)// + ); + + } + + // End the round, advance the counter, and compact new frontier. + state.endRound(); + + // True if the new frontier is empty. + return state.frontier().isEmpty(); + + } // doRound() + + /** + * Do APPLY. + * + * TODO The apply() should be parallelized. For some algorithms, there is a + * moderate amount of work per vertex in apply(). Use {@link #nthreads} to + * set the parallelism. + */ + private void apply(final IStaticFrontier f) { + + for (IV u : f) { + + program.apply(state, u, null/* sum */); + + } + + } + + static private final SPOKeyOrder getKeyOrder(final AbstractTripleStore kb, + final boolean inEdges) { + final SPOKeyOrder keyOrder; + if (inEdges) { + // in-edges: OSP / OCSP : [u] is the Object. + keyOrder = kb.isQuads() ? SPOKeyOrder.OCSP : SPOKeyOrder.OSP; + } else { + // out-edges: SPO / (SPOC|SOPC) : [u] is the Subject. + keyOrder = kb.isQuads() ? SPOKeyOrder.SPOC : SPOKeyOrder.SPO; + } + return keyOrder; + } + + @SuppressWarnings("unchecked") + static private Striterator<Iterator<ISPO>, ISPO> getEdges( + final AbstractTripleStore kb, final boolean inEdges, final IV u) { + + final SPOKeyOrder keyOrder = getKeyOrder(kb, inEdges); + + final IIndex ndx = kb.getSPORelation().getIndex(keyOrder); + + final IKeyBuilder keyBuilder = ndx.getIndexMetadata().getKeyBuilder(); + + keyBuilder.reset(); + + IVUtility.encode(keyBuilder, u); + + final byte[] fromKey = keyBuilder.getKey(); + + final byte[] toKey = SuccessorUtil.successor(fromKey.clone()); + + return (Striterator<Iterator<ISPO>, ISPO>) new Striterator( + ndx.rangeIterator(fromKey, toKey, 0/* capacity */, + IRangeQuery.DEFAULT, + ElementFilter.newInstance(edgeOnlyFilter))) + .addFilter(new Resolver() { + private static final long serialVersionUID = 1L; + + @Override + protected Object resolve(final Object e) { + final ITuple<ISPO> t = (ITuple<ISPO>) e; + return t.getObject(); + } + }); + + } + + /** + * Return the edges for the vertex. + * + * @param u + * The vertex. + * @param edges + * Typesafe enumeration indicating which edges should be visited. + * @return An iterator that will visit the edges for that vertex. + * + * TODO There should be a means to specify a filter on the possible + * predicates to be used for traversal. If there is a single + * predicate, then that gives us S+P bound. If there are multiple + * predicates, then we have an IElementFilter on P (in addition to + * the filter that is removing the Literals from the scan). + */ + static private ICloseableIterator<ISPO> getEdges( + final AbstractTripleStore kb, final IV u, final EdgesEnum edges) { + + switch (edges) { + case NoEdges: + return new EmptyCloseableIterator<ISPO>(); + case InEdges: + return (ICloseableIterator<ISPO>) getEdges(kb, true/* inEdges */, u); + case OutEdges: + return (ICloseableIterator<ISPO>) getEdges(kb, false/* inEdges */, + u); + case AllEdges: { + final Striterator<Iterator<ISPO>, ISPO> a = getEdges(kb, + true/* inEdges */, u); + final Striterator<Iterator<ISPO>, ISPO> b = getEdges(kb, + false/* outEdges */, u); + a.append(b); + return (ICloseableIterator<ISPO>) a; + } + default: + throw new UnsupportedOperationException(edges.name()); + } + + } + + // private IChunkedIterator<ISPO> getInEdges(final AbstractTripleStore kb, + // final IV u) { + // + // // in-edges: OSP / OCSP : [u] is the Object. + // return kb + // .getSPORelation() + // .getAccessPath(null/* s */, null/* p */, u/* o */, null/* c */, + // edgeOnlyFilter).iterator(); + // + // } + // + // private IChunkedIterator<ISPO> getOutEdges(final AbstractTripleStore kb, + // final IV u) { + // + // // out-edges: SPO / SPOC : [u] is the Subject. + // return kb + // .getSPORelation() + // .getAccessPath(u/* s */, null/* p */, null/* o */, + // null/* c */, edgeOnlyFilter).iterator(); + // + // } + // + // /** + // * Return the edges for the vertex. + // * + // * @param u + // * The vertex. + // * @param edges + // * Typesafe enumeration indicating which edges should be visited. + // * @return An iterator that will visit the edges for that vertex. + // * + // * TODO There should be a means to specify a filter on the possible + // * predicates to be used for traversal. If there is a single + // * predicate, then that gives us S+P bound. If there are multiple + // * predicates, then we have an IElementFilter on P (in addition to + // * the filter that is removing the Literals from the scan). + // * + // * TODO Use the chunk parallelism? Explicit for(x : chunk)? This + // * could make it easier to collect the edges into an array (but that + // * is not required for powergraph). + // */ + // @SuppressWarnings("unchecked") + // private IChunkedIterator<ISPO> getEdges(final AbstractTripleStore kb, + // final IV u, final EdgesEnum edges) { + // + // switch (edges) { + // case NoEdges: + // return new EmptyChunkedIterator<ISPO>(null/* keyOrder */); + // case InEdges: + // return getInEdges(kb, u); + // case OutEdges: + // return getOutEdges(kb, u); + // case AllEdges:{ + // final IChunkedIterator<ISPO> a = getInEdges(kb, u); + // final IChunkedIterator<ISPO> b = getOutEdges(kb, u); + // final IChunkedIterator<ISPO> c = (IChunkedIterator<ISPO>) new + // ChunkedStriterator<IChunkedIterator<ISPO>, ISPO>( + // a).append(b); + // return c; + // } + // default: + // throw new UnsupportedOperationException(edges.name()); + // } + // + // } + + /** + * @param inEdges + * when <code>true</code> the GATHER is over the in-edges. + * Otherwise it is over the out-edges. + * @param pushDownApply + * When <code>true</code>, the APPLY() will be done during the + * GATHER. + * + * @throws ExecutionException + * @throws InterruptedException + */ + private long scatterEdges(final AbstractTripleStore kb, + final IStaticFrontier f, final IScheduler sch, + final EdgesEnum scatterEdges, final boolean pushDownApply) + throws InterruptedException, ExecutionException, Exception { + + if (scatterEdges == null) + throw new IllegalArgumentException(); + + class ScatterVertexTaskFactory implements VertexTaskFactory<Long> { + + public Callable<Long> newVertexTask(final IV u) { + + return new ScatterTask(kb, u) { + @Override + protected boolean pushDownApply() { + return pushDownApply; + } + + @Override + protected EdgesEnum getEdgesEnum() { + return scatterEdges; + } + + @Override + protected IScheduler scheduler() { + return sch; + } + }; + }; + } + + return gasEngine.newFrontierStrategy(new ScatterVertexTaskFactory(), f) + .call(); + + } + + /** + * @param gatherEdges + * The edges to be gathered. + * @param pushDownApply + * When <code>true</code>, the APPLY() will be done during the + * GATHER. + * + * @throws ExecutionException + * @throws InterruptedException + */ + private long gatherEdges(final AbstractTripleStore kb, + final IStaticFrontier f, //final IScheduler sch, + final EdgesEnum gatherEdges, final boolean pushDownApply) + throws InterruptedException, ExecutionException, Exception { + + if (gatherEdges == null) + throw new IllegalArgumentException(); + + class GatherVertexTaskFactory implements VertexTaskFactory<Long> { + + public Callable<Long> newVertexTask(final IV u) { + + return new GatherTask(kb, u) { + @Override + protected boolean pushDownApply() { + return pushDownApply; + } + + @Override + protected EdgesEnum getEdgesEnum() { + return gatherEdges; + } + + /** + * Note: The API does not permit vertices to be scheduled + * for execution during the GATHER phase. + */ + @Override + protected IScheduler scheduler() { + throw new UnsupportedOperationException(); + } + }; + }; + } + + return gasEngine.newFrontierStrategy(new GatherVertexTaskFactory(), f) + .call(); + + } + + /** + * Base class for SCATTER or GATHER of edges for a vertex. + * <p> + * Note: An abstract task pattern is used to factor out parameters that are + * constants within the scope of the scatter for each vertex in the + * frontier. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + abstract private class VertexEdgesTask implements Callable<Long> { + + protected final AbstractTripleStore kb; + protected final IV u; + + public VertexEdgesTask(final AbstractTripleStore kb, final IV u) { + + this.kb = kb; + + this.u = u; + + } + + abstract protected boolean pushDownApply(); + + abstract protected EdgesEnum getEdgesEnum(); + + abstract protected IScheduler scheduler(); + + } + + /** + * Scatter for the edges of a single vertex. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + abstract private class ScatterTask extends VertexEdgesTask { + + public ScatterTask(final AbstractTripleStore kb, final IV u) { + + super(kb, u); + + } + + /** + * Execute the scatter for the vertex. + * + * @return The #of visited edges. + */ + public Long call() throws Exception { + + final boolean TRACE = log.isTraceEnabled(); + + if (pushDownApply()) { + + /* + * Run the APPLY as part of the SCATTER. + * + * TODO This can be done on a thread pool or fork/join pool + * since we know that there are no duplicates in the frontier. + */ + + program.apply(state, u, null/* sum */); + + } + + if (!program.isChanged(state, u)) { + + // Unchanged. Do not scatter. + return 0L; + + } + + /* + * Visit the (in|out)-edges of that vertex. + */ + long nedges = 0L; + + final IScheduler sch = scheduler(); + + final ICloseableIterator<ISPO> eitr = getEdges(kb, u, + getEdgesEnum()); + + try { + + while (eitr.hasNext()) { + + // edge + final ISPO e = eitr.next(); + + nedges++; + + if (TRACE) // TODO Batch resolve if @ TRACE + log.trace("e=" + kb.toString(e)); + + program.scatter(state, sch, u, e); + + } + + } finally { + + eitr.close(); + + } + + return nedges; + + } + + } // ScatterTask + + /** + * Gather for the edges of a single vertex. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + abstract private class GatherTask extends VertexEdgesTask { + + public GatherTask(final AbstractTripleStore kb, final IV u) { + + super(kb, u); + + } + + @Override + public Long call() throws Exception { + + long nedges = 0; + + final ICloseableIterator<ISPO> eitr = getEdges(kb, u, + getEdgesEnum()); + + try { + + /* + * Note: since (left,right) may be null, we need to known if + * left is defined. + */ + boolean first = true; + + ST left = null; + + while (eitr.hasNext()) { + + final ISPO e = eitr.next(); + + if (log.isTraceEnabled()) // TODO Batch resolve if @ TRACE + log.trace("u=" + u + ", e=" + kb.toString(e) + ", sum=" + + left); + + final ST right = program.gather(state, u, e); + + if (first) { + + left = right; + + first = false; + + } else { + + left = program.sum(left, right); + + } + + } + + if (pushDownApply()) { + + /* + * Run the APPLY as part of the GATHER. + * + * TODO This can be done on a thread pool or fork/join pool + * since we know that there are no duplicates in the + * frontier. + */ + + program.apply(state, u, left/* sum */); + + } + + } finally { + + eitr.close(); + + } + + return nedges; + + } + + } // GatherTask + + // TODO REDUCE : parallelize with nthreads. + @Override + public <T> T reduce(final IReducer<VS, ES, ST, T> op) { + + for (IV v : state.getKnownVertices()) { + + op.visit(state, v); + + } + + return op.get(); + + } + +} // GASContext Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-24 18:39:50 UTC (rev 7334) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-24 18:42:40 UTC (rev 7335) @@ -1,54 +1,20 @@ package com.bigdata.rdf.graph.impl; import java.util.ArrayList; -import java.util.Collection; -import java.util.HashSet; -import java.util.Iterator; import java.util.List; -import java.util.Map; -import java.util.Set; import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.FutureTask; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import org.apache.log4j.Logger; -import org.eclipse.jetty.util.ConcurrentHashSet; - -import com.bigdata.btree.IIndex; -import com.bigdata.btree.IRangeQuery; -import com.bigdata.btree.ITuple; -import com.bigdata.btree.keys.IKeyBuilder; -import com.bigdata.btree.keys.SuccessorUtil; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.ITx; import com.bigdata.journal.TimestampUtility; -import com.bigdata.rdf.graph.EdgesEnum; -import com.bigdata.rdf.graph.Factory; -import com.bigdata.rdf.graph.GASUtil; import com.bigdata.rdf.graph.IGASContext; import com.bigdata.rdf.graph.IGASEngine; import com.bigdata.rdf.graph.IGASProgram; -import com.bigdata.rdf.graph.IGASStats; -import com.bigdata.rdf.graph.IReducer; import com.bigdata.rdf.internal.IV; -import com.bigdata.rdf.internal.IVUtility; -import com.bigdata.rdf.model.BigdataValue; -import com.bigdata.rdf.spo.ISPO; -import com.bigdata.rdf.spo.SPOFilter; -import com.bigdata.rdf.spo.SPOKeyOrder; import com.bigdata.rdf.store.AbstractTripleStore; -import com.bigdata.relation.accesspath.ElementFilter; -import com.bigdata.relation.accesspath.EmptyCloseableIterator; -import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.service.IBigdataFederation; -import com.bigdata.striterator.ICloseableIterator; -import com.bigdata.striterator.Resolver; -import com.bigdata.striterator.Striterator; import com.bigdata.util.concurrent.LatchedExecutor; /** @@ -74,851 +40,233 @@ * is Shortest Path (as per RDF3X). Reachability queries for a hierarchy can * also be maintained and accelerated (again, RDF3X using a ferrari index). * + * TODO Option to materialize Literals (or to declare the set of literals of + * interest) [Note: We can also require that people inline all URIs and Literals + * if they need to have them materialized, but a materialization filter for + * Gather and Scatter would be nice if it can be selective for just those + * attributes or vertex identifiers that matter). + * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> */ @SuppressWarnings("rawtypes") -public class GASEngine<VS, ES, ST> implements IGASEngine<VS, ES, ST>, - IGASContext<VS, ES, ST> { +public class GASEngine implements IGASEngine { - private static final Logger log = Logger.getLogger(GASEngine.class); +// private static final Logger log = Logger.getLogger(GASEngine.class); /** - * Filter visits only edges (filters out attribute values). - * <p> - * Note: This filter is pushed down onto the AP and evaluated close to the - * data. - */ - static final IElementFilter<ISPO> edgeOnlyFilter = new SPOFilter<ISPO>() { - private static final long serialVersionUID = 1L; - - @Override - public boolean isValid(final Object e) { - return ((ISPO) e).o().isURI(); - } - }; - - /** * The {@link IIndexManager} is used to access the graph. */ private final IIndexManager indexManager; - - /** - * The graph (a KB instance). - */ - private final String namespace; /** - * The timestamp of the view of that graph. This MAY be - * {@link ITx#READ_COMMITTED} to use the current committed view of the graph - * for each iteration (dynamic graph). - */ - private final long timestamp; - - /** * The {@link ExecutorService} used to parallelize tasks. */ private final ExecutorService executorService; - /** - * The graph analytic to be executed. + * The parallelism for the SCATTER and GATHER phases. */ - private final IGASProgram<VS, ES, ST> program; + private final int nthreads; /** - * The {@link IGASContext}. + * The parallelism for the SCATTER and GATHER phases. */ - private final IGASContext<VS, ES, ST> ctx = this; - - /** - * Factory for the vertex state objects. - */ - private final Factory<IV, VS> vsf; - - /** - * Factory for the edge state objects. - */ - private final Factory<ISPO, ES> esf; - - /** - * The state associated with each visited vertex. - * - * TODO Offer scalable backend with high throughput, e.g., using a batched - * striped lock as per DISTINCT. - */ - private final ConcurrentMap<IV, VS> vertexState = new ConcurrentHashMap<IV, VS>(); - - /** - * TODO Edge state needs to be configurable. When disabled, leave this as - * <code>null</code>. - */ - private final ConcurrentMap<ISPO, ES> edgeState = null; - - /** - * The set of vertices that were identified in the current iteration. - */ - @SuppressWarnings("unchecked") - private final ConcurrentHashSet<IV>[] frontier = new ConcurrentHashSet[2]; - - /** - * The current evaluation round. - */ - private final AtomicInteger round = new AtomicInteger(0); - - @Override - public VS getState(final IV v) { - - VS vs = vertexState.get(v); - - if (vs == null) { - - VS old = vertexState.putIfAbsent(v, vs = vsf.initialValue(v)); - - if (old != null) { - - // Lost data race. - vs = old; - - } - - } - - return vs; - - } - - @Override - public ES getState(final ISPO e) { - - if (edgeState == null) - return null; - - ES es = edgeState.get(e); - - if (es == null) { - - ES old = edgeState.putIfAbsent(e, es = esf.initialValue(e)); - - if (old != null) { - - // Lost data race. - es = old; - - } - - } - - return es; - - } - - /** - * The current frontier. - */ - protected Set<IV> frontier() { - - return frontier[round.get() % 2]; - - } - - /** - * The new frontier - this is populated during the round. At the end of the - * round, the new frontier replaces the current frontier (this happens when - * we increment the {@link #round()}). If the current frontier is empty - * after that replacement, then the traversal is done. - */ - protected Set<IV> newFrontier() { - - return frontier[(round.get() + 1) % 2]; - - } - - @Override - public int round() { - - return round.get(); - - } - -// @Override - protected AbstractTripleStore getKB() { - - long timestamp = th... [truncated message content] |
From: <tho...@us...> - 2013-08-24 18:40:02
|
Revision: 7334 http://bigdata.svn.sourceforge.net/bigdata/?rev=7334&view=rev Author: thompsonbry Date: 2013-08-24 18:39:50 +0000 (Sat, 24 Aug 2013) Log Message: ----------- test cases and generalization of ArrayIterator() (now accepts a slice of an array). Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/java/cutthecrap/utils/striterators/ArrayIterator.java Modified: branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/java/cutthecrap/utils/striterators/ArrayIterator.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/java/cutthecrap/utils/striterators/ArrayIterator.java 2013-08-24 18:39:36 UTC (rev 7333) +++ branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/java/cutthecrap/utils/striterators/ArrayIterator.java 2013-08-24 18:39:50 UTC (rev 7334) @@ -29,33 +29,54 @@ import java.util.NoSuchElementException; /** - * Supports standard iteration over an object Array, allowing this to - * be used as a source for a <code>Striterator</code>. + * Supports standard iteration over an object Array, allowing this to be used as + * a source for a <code>Striterator</code>. */ -public class ArrayIterator implements Iterator { +public class ArrayIterator<T> implements Iterator<T> { + + /** Source array. */ + private final T[] m_src; + + /** Current index (next to be visited). */ + private int m_index; - private Object[] m_src = null; - private int m_index = 0; + /** Exclusive upper bound. */ + private final int m_last; - /** Constructor takes source object array **/ - public ArrayIterator(final Object[] src) { - m_src = src; - } + /** Constructor takes source object array **/ + public ArrayIterator(final T[] src) { + this(src, 0, src.length); + } + /** Constructor takes source object array **/ + public ArrayIterator(final T[] src, final int off, final int len) { + if (src == null) + throw new NullPointerException(); + if (off < 0) + throw new IllegalArgumentException(); + if (len < 0) + throw new IllegalArgumentException(); + if (off + len > src.length) + throw new IllegalArgumentException(); + m_src = src; + m_index = off; + m_last = off + len; + } + /** checks with current index and array size **/ public boolean hasNext() { - return m_src != null && m_src.length > m_index; + return m_last > m_index; } - /** @return current index from array **/ - public Object next() { - if (m_index < m_src.length) - return m_src[m_index++]; - else - throw new NoSuchElementException(); - } + /** @return current index from array **/ + public T next() { + if (m_index < m_last) + return m_src[m_index++]; + else + throw new NoSuchElementException(); + } - /** void .. does nothing **/ - public void remove() { - } + /** void .. does nothing **/ + public void remove() { + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-24 18:39:49
|
Revision: 7333 http://bigdata.svn.sourceforge.net/bigdata/?rev=7333&view=rev Author: thompsonbry Date: 2013-08-24 18:39:36 +0000 (Sat, 24 Aug 2013) Log Message: ----------- test cases and generalization of ArrayIterator() (now accepts a slice of an array). Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/test/cutthecrap/utils/striterators/TestAll.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/test/cutthecrap/utils/striterators/TestArrayIterator.java Modified: branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/test/cutthecrap/utils/striterators/TestAll.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/test/cutthecrap/utils/striterators/TestAll.java 2013-08-24 14:15:19 UTC (rev 7332) +++ branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/test/cutthecrap/utils/striterators/TestAll.java 2013-08-24 18:39:36 UTC (rev 7333) @@ -88,7 +88,9 @@ suite.addTestSuite(TestCloseable.class); // @todo test Striterator - + + suite.addTestSuite(TestArrayIterator.class); + return suite; } Added: branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/test/cutthecrap/utils/striterators/TestArrayIterator.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/test/cutthecrap/utils/striterators/TestArrayIterator.java (rev 0) +++ branches/BIGDATA_RELEASE_1_2_0/ctc-striterators/src/test/cutthecrap/utils/striterators/TestArrayIterator.java 2013-08-24 18:39:36 UTC (rev 7333) @@ -0,0 +1,114 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 24, 2013 + */ +package cutthecrap.utils.striterators; + +import junit.framework.TestCase2; + +public class TestArrayIterator extends TestCase2 { + + public TestArrayIterator() { + } + + public TestArrayIterator(String name) { + super(name); + } + + public void test_ctor() { + + try { + new ArrayIterator<String>(null); + fail(); + } catch (NullPointerException ex) { + // ignore + } + + try { + new ArrayIterator<String>(null, 0, 0); + fail(); + } catch (NullPointerException ex) { + // ignore + } + + try { + new ArrayIterator<String>(new String[] {}, 0, 1); + fail(); + } catch (IllegalArgumentException ex) { + // ignore + } + + try { + new ArrayIterator<String>(new String[] {}, 0, -1); + fail(); + } catch (IllegalArgumentException ex) { + // ignore + } + + try { + new ArrayIterator<String>(new String[] {}, -1, 0); + fail(); + } catch (IllegalArgumentException ex) { + // ignore + } + + new ArrayIterator<String>(new String[] {}, 0, 0); + + new ArrayIterator<String>(new String[] {"1"}, 0, 1); + + new ArrayIterator<String>(new String[] {"1"}, 1, 0); + + try { + new ArrayIterator<String>(new String[] {"1"}, 1, 1); + fail(); + } catch (IllegalArgumentException ex) { + // ignore + } + + } + + public void test_iterator() { + + assertSameIterator(new String[] {}, new ArrayIterator<String>( + new String[] {}, 0, 0)); + + assertSameIterator(new String[] {}, new ArrayIterator<String>( + new String[] { "1" }, 1, 0)); + + assertSameIterator(new String[] { "1" }, new ArrayIterator<String>( + new String[] { "1" }, 0, 1)); + + assertSameIterator(new String[] { "1" }, new ArrayIterator<String>( + new String[] { "1", "2" }, 0, 1)); + + assertSameIterator(new String[] { "1", "2" }, + new ArrayIterator<String>(new String[] { "1", "2" }, 0, 2)); + + assertSameIterator(new String[] { "2" }, new ArrayIterator<String>( + new String[] { "1", "2" }, 1, 1)); + + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-24 14:15:26
|
Revision: 7332 http://bigdata.svn.sourceforge.net/bigdata/?rev=7332&view=rev Author: thompsonbry Date: 2013-08-24 14:15:19 +0000 (Sat, 24 Aug 2013) Log Message: ----------- Removed redundent frontier compaction when using nthreads:=1. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-24 11:46:38 UTC (rev 7331) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-24 14:15:19 UTC (rev 7332) @@ -697,7 +697,9 @@ * example, we can sort the new frontier within each thread that adds a * vertex to be scheduled for the new frontier (in the SCATTER phase). Those * per-thread frontiers could then be combined by a merge sort, either using - * multiple threads (pair-wise) or a single thread (N-way merge). + * multiple threads (pair-wise) or a single thread (N-way merge). + * + * 2/3rds of the time is CHM.toArray(). 1/3 is the sort. */ private IV[] getCompactFrontier() { @@ -1012,18 +1014,19 @@ private class RunInCallersThreadFrontierStrategy extends AbstractFrontierStrategy { + final IV[] f; + RunInCallersThreadFrontierStrategy( - final VertexTaskFactory<Long> taskFactory) { + final VertexTaskFactory<Long> taskFactory, final IV[] f) { super(taskFactory); + this.f = f; + } public Long call() throws Exception { - // Compact, ordered frontier. No duplicates! - final IV[] f = getCompactFrontier(); - long nedges = 0L; // For all vertices in the frontier. @@ -1052,7 +1055,7 @@ final VertexTaskFactory<Long> taskFactory, final IV[] f) { if (nthreads == 1) - return new RunInCallersThreadFrontierStrategy(taskFactory); + return new RunInCallersThreadFrontierStrategy(taskFactory, f); return new LatchedExecutorFrontierStrategy(taskFactory, executorService, nthreads, f); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-24 11:46:45
|
Revision: 7331 http://bigdata.svn.sourceforge.net/bigdata/?rev=7331&view=rev Author: thompsonbry Date: 2013-08-24 11:46:38 +0000 (Sat, 24 Aug 2013) Log Message: ----------- Removed redundant computation of the compact frontier. See #629 Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-24 11:15:06 UTC (rev 7330) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-24 11:46:38 UTC (rev 7331) @@ -539,6 +539,9 @@ */ newFrontier().clear(); + // Compact, ordered frontier. No duplicates! + final IV[] f = getCompactFrontier(); + /* * TODO This logic allows us to push down the APPLY into the GATHER or * SCATTER depending on some characteristics of the algorithm. Is this @@ -596,7 +599,7 @@ } else { - gatherEdgeCount = gatherEdges(kb, gatherEdges, + gatherEdgeCount = gatherEdges(kb, f, gatherEdges, pushDownApplyInGather); } @@ -613,7 +616,7 @@ final long beginApply = System.nanoTime(); - apply(); + apply(f); elapsedApply = System.nanoTime() - beginApply; @@ -637,7 +640,7 @@ } else { - scatterEdgeCount = scatterEdges(kb, scatterEdges, + scatterEdgeCount = scatterEdges(kb, f, scatterEdges, pushDownApplyInScatter); } @@ -689,6 +692,12 @@ /** * Generate an ordered frontier to maximize the locality of reference within * the indices. + * + * FIXME The frontier should be compacted using parallel threads. For + * example, we can sort the new frontier within each thread that adds a + * vertex to be scheduled for the new frontier (in the SCATTER phase). Those + * per-thread frontiers could then be combined by a merge sort, either using + * multiple threads (pair-wise) or a single thread (N-way merge). */ private IV[] getCompactFrontier() { @@ -713,11 +722,8 @@ /** * Do APPLY. */ - private void apply() { + private void apply(final IV[] f) { - // Compact, ordered frontier. No duplicates! - final IV[] f = getCompactFrontier(); - for (IV u : f) { program.apply(ctx, u, null/* sum */); @@ -922,25 +928,27 @@ private final ExecutorService executorService; private final int nparallel; - + /** Compact, ordered frontier. No duplicates! */ + private final IV[] f; + LatchedExecutorFrontierStrategy( final VertexTaskFactory<Long> taskFactory, - final ExecutorService executorService, final int nparallel) { + final ExecutorService executorService, final int nparallel, + final IV[] f) { super(taskFactory); this.executorService = executorService; this.nparallel = nparallel; + + this.f = f; } @Override public Long call() throws Exception { - // Compact, ordered frontier. No duplicates! - final IV[] f = getCompactFrontier(); - final List<FutureTask<Long>> tasks = new ArrayList<FutureTask<Long>>( f.length); @@ -1041,13 +1049,13 @@ * @return The strategy that will map that task across the frontier. */ private Callable<Long> newFrontierStrategy( - final VertexTaskFactory<Long> taskFactory) { + final VertexTaskFactory<Long> taskFactory, final IV[] f) { if (nthreads == 1) return new RunInCallersThreadFrontierStrategy(taskFactory); return new LatchedExecutorFrontierStrategy(taskFactory, - executorService, nthreads); + executorService, nthreads, f); } @@ -1062,7 +1070,7 @@ * @throws ExecutionException * @throws InterruptedException */ - private long scatterEdges(final AbstractTripleStore kb, + private long scatterEdges(final AbstractTripleStore kb, final IV[] f, final EdgesEnum scatterEdges, final boolean pushDownApply) throws InterruptedException, ExecutionException, Exception { @@ -1087,7 +1095,7 @@ }; } - return newFrontierStrategy(new ScatterVertexTaskFactory()).call(); + return newFrontierStrategy(new ScatterVertexTaskFactory(), f).call(); } @@ -1101,7 +1109,7 @@ * @throws ExecutionException * @throws InterruptedException */ - private long gatherEdges(final AbstractTripleStore kb, + private long gatherEdges(final AbstractTripleStore kb, final IV[] f, final EdgesEnum gatherEdges, final boolean pushDownApply) throws InterruptedException, ExecutionException, Exception { @@ -1126,7 +1134,7 @@ }; } - return newFrontierStrategy(new GatherVertexTaskFactory()).call(); + return newFrontierStrategy(new GatherVertexTaskFactory(), f).call(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-24 11:15:20
|
Revision: 7330 http://bigdata.svn.sourceforge.net/bigdata/?rev=7330&view=rev Author: thompsonbry Date: 2013-08-24 11:15:06 +0000 (Sat, 24 Aug 2013) Log Message: ----------- Optimized GASEngine to use the B+Tree iterator without requiring the materialization of the ISPO tuples into an array. This should avoid range counts, allocations of the arrays, and drive the index scan directly from the calling thread (rather than potentially using a producer/consumer pattern that is built into the AccessPath class for large key range scans). Optimized potential hot spot in AbstractTripleStore.getSPORelation() using AtomicReference and double-checked locking pattern (was potentially contended on synchronized(this)), which could have had an impact on the GAS parallelism. Made IStriterator implement ICloseableIterator. See #629 Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ChunkedStriterator.java branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/IStriterator.java branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/Striterator.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ChunkedStriterator.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ChunkedStriterator.java 2013-08-24 00:40:01 UTC (rev 7329) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/ChunkedStriterator.java 2013-08-24 11:15:06 UTC (rev 7330) @@ -50,12 +50,12 @@ public class ChunkedStriterator<I extends IChunkedIterator<E>, E> extends Striterator<I, E> implements IChunkedStriterator<I, E> { - public ChunkedStriterator(I src) { - - super( src ); - + public ChunkedStriterator(final I src) { + + super(src); + this.src = src; - + } /** @@ -65,8 +65,7 @@ * @param src * The source iterator. */ - @SuppressWarnings("unchecked") - public ChunkedStriterator(Iterator<E> src) { + public ChunkedStriterator(final Iterator<E> src) { this(IChunkedIterator.DEFAULT_CHUNK_SIZE, src); @@ -81,27 +80,27 @@ * The source iterator. */ @SuppressWarnings("unchecked") - public ChunkedStriterator(int chunkSize, Iterator<E> src) { + public ChunkedStriterator(final int chunkSize, final Iterator<E> src) { this((I) new ChunkedWrappedIterator<E>(src, chunkSize, null/* keyOrder */, null/* filter */)); - + } @Override final public E[] nextChunk() { return src.nextChunk(); - - } - @Override - final public void close() { - - ((ICloseableIterator<?>) src).close(); - } +// @Override +// final public void close() { +// +// ((ICloseableIterator<?>) src).close(); +// +// } + /** * Strengthened return type. */ @@ -109,7 +108,7 @@ public IChunkedStriterator<I, E> addFilter(final IFilter<I, ?, E> filter) { return (IChunkedStriterator<I, E>) super.addFilter(filter); - + } - + } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/IStriterator.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/IStriterator.java 2013-08-24 00:40:01 UTC (rev 7329) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/IStriterator.java 2013-08-24 11:15:06 UTC (rev 7330) @@ -42,8 +42,9 @@ * @todo appender and excluder patterns. These are just filters so we only * really need a single {@link #addFilter(IFilter)} method. */ -public interface IStriterator<I extends Iterator<E>,E> extends Iterator<E>, Enumeration<E> { - +public interface IStriterator<I extends Iterator<E>, E> extends + ICloseableIterator<E>, Enumeration<E> { + /** * Stack a filter on the source iterator. * Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/Striterator.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/Striterator.java 2013-08-24 00:40:01 UTC (rev 7329) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/striterator/Striterator.java 2013-08-24 11:15:06 UTC (rev 7330) @@ -166,6 +166,17 @@ } @Override + final public void close() { + + if (src instanceof ICloseable) { + + ((ICloseable) src).close(); + + } + + } + + @Override final public boolean hasMoreElements() { return src.hasNext(); Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-24 00:40:01 UTC (rev 7329) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASEngine.java 2013-08-24 11:15:06 UTC (rev 7330) @@ -3,6 +3,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -18,6 +19,11 @@ import org.apache.log4j.Logger; import org.eclipse.jetty.util.ConcurrentHashSet; +import com.bigdata.btree.IIndex; +import com.bigdata.btree.IRangeQuery; +import com.bigdata.btree.ITuple; +import com.bigdata.btree.keys.IKeyBuilder; +import com.bigdata.btree.keys.SuccessorUtil; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.ITx; import com.bigdata.journal.TimestampUtility; @@ -30,15 +36,19 @@ import com.bigdata.rdf.graph.IGASStats; import com.bigdata.rdf.graph.IReducer; import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPOFilter; +import com.bigdata.rdf.spo.SPOKeyOrder; import com.bigdata.rdf.store.AbstractTripleStore; +import com.bigdata.relation.accesspath.ElementFilter; +import com.bigdata.relation.accesspath.EmptyCloseableIterator; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.service.IBigdataFederation; -import com.bigdata.striterator.ChunkedStriterator; -import com.bigdata.striterator.EmptyChunkedIterator; -import com.bigdata.striterator.IChunkedIterator; +import com.bigdata.striterator.ICloseableIterator; +import com.bigdata.striterator.Resolver; +import com.bigdata.striterator.Striterator; import com.bigdata.util.concurrent.LatchedExecutor; /** @@ -716,26 +726,49 @@ } - private IChunkedIterator<ISPO> getInEdges(final AbstractTripleStore kb, - final IV u) { + private final SPOKeyOrder getKeyOrder(final AbstractTripleStore kb, + final boolean inEdges) { + final SPOKeyOrder keyOrder; + if (inEdges) { + // in-edges: OSP / OCSP : [u] is the Object. + keyOrder = kb.isQuads() ? SPOKeyOrder.OCSP : SPOKeyOrder.OSP; + } else { + // out-edges: SPO / (SPOC|SOPC) : [u] is the Subject. + keyOrder = kb.isQuads() ? SPOKeyOrder.SPOC : SPOKeyOrder.SPO; + } + return keyOrder; + } - // in-edges: OSP / OCSP : [u] is the Object. - return kb - .getSPORelation() - .getAccessPath(null/* s */, null/* p */, u/* o */, null/* c */, - edgeOnlyFilter).iterator(); + @SuppressWarnings("unchecked") + private Striterator<Iterator<ISPO>,ISPO> getEdges(final AbstractTripleStore kb, + final boolean inEdges, final IV u) { - } + final SPOKeyOrder keyOrder = getKeyOrder(kb, inEdges); + + final IIndex ndx = kb.getSPORelation().getIndex(keyOrder); - private IChunkedIterator<ISPO> getOutEdges(final AbstractTripleStore kb, - final IV u) { + final IKeyBuilder keyBuilder = ndx.getIndexMetadata().getKeyBuilder(); - // out-edges: SPO / SPOC : [u] is the Subject. - return kb - .getSPORelation() - .getAccessPath(u/* s */, null/* p */, null/* o */, - null/* c */, edgeOnlyFilter).iterator(); + keyBuilder.reset(); + IVUtility.encode(keyBuilder, u); + + final byte[] fromKey = keyBuilder.getKey(); + + final byte[] toKey = SuccessorUtil.successor(fromKey.clone()); + + return (Striterator<Iterator<ISPO>,ISPO>) new Striterator(ndx.rangeIterator( + fromKey, toKey, 0/* capacity */, IRangeQuery.DEFAULT, + ElementFilter.newInstance(edgeOnlyFilter))) + .addFilter(new Resolver() { + private static final long serialVersionUID = 1L; + @Override + protected Object resolve(final Object e) { + final ITuple<ISPO> t = (ITuple<ISPO>) e; + return t.getObject(); + } + }); + } /** @@ -752,34 +785,93 @@ * predicate, then that gives us S+P bound. If there are multiple * predicates, then we have an IElementFilter on P (in addition to * the filter that is removing the Literals from the scan). - * - * TODO Use the chunk parallelism? Explicit for(x : chunk)? This - * could make it easier to collect the edges into an array (but that - * is not required for powergraph). */ - @SuppressWarnings("unchecked") - private IChunkedIterator<ISPO> getEdges(final AbstractTripleStore kb, + private ICloseableIterator<ISPO> getEdges(final AbstractTripleStore kb, final IV u, final EdgesEnum edges) { switch (edges) { case NoEdges: - return new EmptyChunkedIterator<ISPO>(null/* keyOrder */); + return new EmptyCloseableIterator<ISPO>(); case InEdges: - return getInEdges(kb, u); + return (ICloseableIterator<ISPO>) getEdges(kb, true/*inEdges*/, u); case OutEdges: - return getOutEdges(kb, u); + return (ICloseableIterator<ISPO>) getEdges(kb, false/*inEdges*/, u); case AllEdges:{ - final IChunkedIterator<ISPO> a = getInEdges(kb, u); - final IChunkedIterator<ISPO> b = getOutEdges(kb, u); - final IChunkedIterator<ISPO> c = (IChunkedIterator<ISPO>) new ChunkedStriterator<IChunkedIterator<ISPO>, ISPO>( - a).append(b); - return c; + final Striterator<Iterator<ISPO>,ISPO> a = getEdges(kb, true/*inEdges*/, u); + final Striterator<Iterator<ISPO>,ISPO> b = getEdges(kb, false/*outEdges*/, u); + a.append(b); + return (ICloseableIterator<ISPO>) a; } default: throw new UnsupportedOperationException(edges.name()); } } + +// private IChunkedIterator<ISPO> getInEdges(final AbstractTripleStore kb, +// final IV u) { +// +// // in-edges: OSP / OCSP : [u] is the Object. +// return kb +// .getSPORelation() +// .getAccessPath(null/* s */, null/* p */, u/* o */, null/* c */, +// edgeOnlyFilter).iterator(); +// +// } +// +// private IChunkedIterator<ISPO> getOutEdges(final AbstractTripleStore kb, +// final IV u) { +// +// // out-edges: SPO / SPOC : [u] is the Subject. +// return kb +// .getSPORelation() +// .getAccessPath(u/* s */, null/* p */, null/* o */, +// null/* c */, edgeOnlyFilter).iterator(); +// +// } +// +// /** +// * Return the edges for the vertex. +// * +// * @param u +// * The vertex. +// * @param edges +// * Typesafe enumeration indicating which edges should be visited. +// * @return An iterator that will visit the edges for that vertex. +// * +// * TODO There should be a means to specify a filter on the possible +// * predicates to be used for traversal. If there is a single +// * predicate, then that gives us S+P bound. If there are multiple +// * predicates, then we have an IElementFilter on P (in addition to +// * the filter that is removing the Literals from the scan). +// * +// * TODO Use the chunk parallelism? Explicit for(x : chunk)? This +// * could make it easier to collect the edges into an array (but that +// * is not required for powergraph). +// */ +// @SuppressWarnings("unchecked") +// private IChunkedIterator<ISPO> getEdges(final AbstractTripleStore kb, +// final IV u, final EdgesEnum edges) { +// +// switch (edges) { +// case NoEdges: +// return new EmptyChunkedIterator<ISPO>(null/* keyOrder */); +// case InEdges: +// return getInEdges(kb, u); +// case OutEdges: +// return getOutEdges(kb, u); +// case AllEdges:{ +// final IChunkedIterator<ISPO> a = getInEdges(kb, u); +// final IChunkedIterator<ISPO> b = getOutEdges(kb, u); +// final IChunkedIterator<ISPO> c = (IChunkedIterator<ISPO>) new ChunkedStriterator<IChunkedIterator<ISPO>, ISPO>( +// a).append(b); +// return c; +// } +// default: +// throw new UnsupportedOperationException(edges.name()); +// } +// +// } /** * A factory for tasks that are applied to each vertex in the frontier. @@ -1115,7 +1207,8 @@ */ long nedges = 0L; - final IChunkedIterator<ISPO> eitr = getEdges(kb, u, getEdgesEnum()); + final ICloseableIterator<ISPO> eitr = getEdges(kb, u, + getEdgesEnum()); try { @@ -1164,7 +1257,8 @@ long nedges = 0; - final IChunkedIterator<ISPO> eitr = getEdges(kb, u, getEdgesEnum()); + final ICloseableIterator<ISPO> eitr = getEdges(kb, u, + getEdgesEnum()); try { Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java 2013-08-24 00:40:01 UTC (rev 7329) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java 2013-08-24 11:15:06 UTC (rev 7330) @@ -131,9 +131,10 @@ * namespace will be <code>kb</code> if none was specified when * the triple/quad store was created).</dd> * <dt>-load</dt> - * <dd>Loads the named resource. This option may appear multiple - * times. The resources will be searched for as URLs, on the - * CLASSPATH, and in the file system.</dd> + * <dd>Loads the named resource IFF the KB is empty (or does not + * exist) at the time this utility is executed. This option may + * appear multiple times. The resources will be searched for as + * URLs, on the CLASSPATH, and in the file system.</dd> * <dt>-bufferMode</dt> * <dd>Overrides the {@link BufferMode} (if any) specified in the * <code>propertyFile</code>.</dd> @@ -419,7 +420,7 @@ } /* - * Load data sets. + * Load data sets. TODO Document that KB load is IFF empty!!! (Or change the code.) */ if (newKB && (loadSet != null && loadSet.length > 0)) { Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2013-08-24 00:40:01 UTC (rev 7329) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2013-08-24 11:15:06 UTC (rev 7330) @@ -1591,12 +1591,12 @@ } - spoRelation = new SPORelation(this/* container */, + spoRelationRef.set(new SPORelation(this/* container */, getIndexManager(), SPO_NAMESPACE, getTimestamp(), new Properties(tmp)// Note: must wrap properties! - ); + )); - spoRelation.create();//assignedSplits); + spoRelationRef.get().create(); /* * The axioms require the lexicon to pre-exist. The axioms also @@ -1775,7 +1775,7 @@ spo.destroy(); } - spoRelation = null; + spoRelationRef.set(null/* clearRef */); super.destroy(); @@ -1919,21 +1919,38 @@ /** * The {@link SPORelation} (triples and their access paths). */ - final synchronized public SPORelation getSPORelation() { - - if (spoRelation == null) { + final public SPORelation getSPORelation() { - spoRelation = (SPORelation) getIndexManager().getResourceLocator() - .locate(getNamespace() + "." + SPORelation.NAME_SPO_RELATION, - getTimestamp()); + if (spoRelationRef.get() == null) { + /* + * Note: double-checked locking pattern (mostly non-blocking). Only + * synchronized if not yet resolved. The AtomicReference is reused + * as the monitor to serialize the resolution of the SPORelation in + * order to have that operation not contend with any other part of + * the API. + */ + synchronized (this) { + + if (spoRelationRef.get() == null) { + + spoRelationRef.set((SPORelation) getIndexManager() + .getResourceLocator().locate( + getNamespace() + "." + + SPORelation.NAME_SPO_RELATION, + getTimestamp())); + + } + + } + } - return spoRelation; + return spoRelationRef.get(); } - private SPORelation spoRelation; + private final AtomicReference<SPORelation> spoRelationRef = new AtomicReference<SPORelation>(); /** * The {@link LexiconRelation} handles all things related to the indices @@ -2103,14 +2120,14 @@ } - if (spoRelation != null) { + final SPORelation tmp = spoRelationRef.getAndSet(null/* clearRef */); - locator.discard(spoRelation, false/*destroyed*/); + if (tmp != null) { - spoRelation = null; + locator.discard(tmp, false/* destroyed */); } - + } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-24 00:40:07
|
Revision: 7329 http://bigdata.svn.sourceforge.net/bigdata/?rev=7329&view=rev Author: thompsonbry Date: 2013-08-24 00:40:01 +0000 (Sat, 24 Aug 2013) Log Message: ----------- Rolling back r7319 which broke UNION processing. Revision Links: -------------- http://bigdata.svn.sourceforge.net/bigdata/?rev=7319&view=rev Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpContext.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpContext.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpContext.java 2013-08-24 00:20:25 UTC (rev 7328) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpContext.java 2013-08-24 00:40:01 UTC (rev 7329) @@ -50,6 +50,7 @@ /** * Convenience class for passing around the various pieces of context necessary * to construct the bop pipeline. + * FIXME Rolling back r7319 which broke UNION processing. */ public class AST2BOpContext implements IdFactory, IEvaluationContext { @@ -66,13 +67,6 @@ private final AtomicInteger idFactory; /** - * Temporary "next id" bypasses the idFactory when we want to be explicit - * about the next bop id. Used for Tees (Unions). nextId = -1 means use - * the idFactory. - */ - private transient int nextId = -1; - - /** * The KB instance. */ protected final AbstractTripleStore db; @@ -463,34 +457,10 @@ } - /** - * Temporarily set the next bop Id to come out of the context. - */ - public void setNextId(final int nextId) { - - this.nextId = nextId; - - } - - /** - * Return the next id from the idFactory, unless there is a temporary - * bop id set, in which case return it and clear it. - */ + /** FIXME Rolling back r7319 which broke UNION processing. */ public int nextId() { - if (nextId == -1) { - - return idFactory.incrementAndGet(); - - } else { - - final int tmp = nextId; - - nextId = -1; - - return tmp; - - } + return idFactory.incrementAndGet(); } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java 2013-08-24 00:20:25 UTC (rev 7328) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java 2013-08-24 00:40:01 UTC (rev 7329) @@ -164,6 +164,7 @@ * @see <a href= * "https://sourceforge.net/apps/mediawiki/bigdata/index.php?title=QueryEvaluation" * >Query Evaluation</a>. + * */ public class AST2BOpUtility extends AST2BOpJoins { @@ -2242,12 +2243,13 @@ /* * Need to make sure the first operator in the group has the right * Id. + * + * FIXME Rolling back r7319 which broke UNION processing. */ -// left = new CopyOp(leftOrEmpty(left), NV.asMap(new NV[] {// -// new NV(Predicate.Annotations.BOP_ID, subqueryIds[i++]),// -// })); - ctx.setNextId(subqueryIds[i++]); - + left = new CopyOp(leftOrEmpty(left), NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, subqueryIds[i++]),// + })); + // Start with everything already known to be materialized. final Set<IVariable<?>> tmp = new LinkedHashSet<IVariable<?>>( doneSet); @@ -2279,12 +2281,11 @@ /* * All the subqueries get routed here when they are done. */ -// left = applyQueryHints(new CopyOp(leftOrEmpty(left),// -// new NV(Predicate.Annotations.BOP_ID, downstreamId),// -// new NV(BOp.Annotations.EVALUATION_CONTEXT, -// BOpEvaluationContext.CONTROLLER)// -// ), ctx.queryHints); - ctx.setNextId(downstreamId); + left = applyQueryHints(new CopyOp(leftOrEmpty(left),// + new NV(Predicate.Annotations.BOP_ID, downstreamId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER)// + ), ctx.queryHints); // Add in anything which was known materialized for all child groups. doneSet.addAll(doneSetsIntersection); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jer...@us...> - 2013-08-24 00:20:34
|
Revision: 7328 http://bigdata.svn.sourceforge.net/bigdata/?rev=7328&view=rev Author: jeremy_carroll Date: 2013-08-24 00:20:25 +0000 (Sat, 24 Aug 2013) Log Message: ----------- avoid DESCRIBE processing when not doing a describe. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTEvalHelper.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTEvalHelper.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTEvalHelper.java 2013-08-23 21:31:48 UTC (rev 7327) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTEvalHelper.java 2013-08-24 00:20:25 UTC (rev 7328) @@ -566,34 +566,38 @@ ); final CloseableIteration<BigdataStatement, QueryEvaluationException> src2; - switch (describeMode) { - case SymmetricOneStep: // No expansion step. - case ForwardOneStep: // No expansion step. - src2 = src; - break; - case CBD: - case SCBD: -// case CBDNR: -// case SCBDNR: - { - /* - * Concise Bounded Description (of any flavor) requires a fixed - * point expansion. - * - * TODO CBD : The expansion should monitor a returned iterator so - * the query can be cancelled by the openrdf client. Right now the - * expansion is performed before the iteration is returned to the - * client, so there is no opportunity to cancel a running CBD - * DESCRIBE. - */ - src2 = new CBD(store, describeMode, describeIterationLimit, - describeStatementlimit, bnodes).computeClosure(src); - break; + if (isDescribe) { + switch (describeMode) { + case SymmetricOneStep: // No expansion step. + case ForwardOneStep: // No expansion step. + src2 = src; + break; + case CBD: + case SCBD: + // case CBDNR: + // case SCBDNR: + { + /* + * Concise Bounded Description (of any flavor) requires a fixed + * point expansion. + * + * TODO CBD : The expansion should monitor a returned iterator so + * the query can be cancelled by the openrdf client. Right now the + * expansion is performed before the iteration is returned to the + * client, so there is no opportunity to cancel a running CBD + * DESCRIBE. + */ + src2 = new CBD(store, describeMode, describeIterationLimit, + describeStatementlimit, bnodes).computeClosure(src); + break; + } + default: + throw new UnsupportedOperationException("describeMode=" + + describeMode); + } + } else { + src2 = src; } - default: - throw new UnsupportedOperationException("describeMode=" - + describeMode); - } final CloseableIteration<BigdataStatement, QueryEvaluationException> src3; if (describeCache != null) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2013-08-23 21:31:55
|
Revision: 7327 http://bigdata.svn.sourceforge.net/bigdata/?rev=7327&view=rev Author: thompsonbry Date: 2013-08-23 21:31:48 +0000 (Fri, 23 Aug 2013) Log Message: ----------- Modified to not reload the KB each time.... Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java 2013-08-23 21:24:14 UTC (rev 7326) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/graph/impl/GASRunner.java 2013-08-23 21:31:48 UTC (rev 7327) @@ -388,11 +388,13 @@ try { // Locate/create KB. + final boolean newKB; { final AbstractTripleStore kb; if (isTemporary) { kb = BigdataSail.createLTS(jnl, properties); + newKB = true; } else { @@ -404,10 +406,12 @@ // create. kb = BigdataSail.createLTS(jnl, properties); + newKB = true; } else { kb = tmp; + newKB = kb.getStatementCount() == 0L; } @@ -417,7 +421,7 @@ /* * Load data sets. */ - if (isTemporary || (loadSet != null && loadSet.length > 0)) { + if (newKB && (loadSet != null && loadSet.length > 0)) { loadFiles(jnl, namespace, loadSet); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |