|
From: <tho...@us...> - 2010-08-06 15:46:16
|
Revision: 3423
http://bigdata.svn.sourceforge.net/bigdata/?rev=3423&view=rev
Author: thompsonbry
Date: 2010-08-06 15:46:07 +0000 (Fri, 06 Aug 2010)
Log Message:
-----------
Rationalized the bigdata cluster configuration files slightly and added one for a single node cluster (bigdataStandalone.config).
Modified build.xml to remove all the "standalone" targets and to state the new bigdataStandalone.config" file.
Modified Paths:
--------------
trunk/build.xml
trunk/src/resources/config/README
trunk/src/resources/config/bigdataCluster.config
trunk/src/resources/config/bigdataCluster16.config
Added Paths:
-----------
trunk/src/resources/config/bigdataStandalone.config
Modified: trunk/build.xml
===================================================================
--- trunk/build.xml 2010-08-06 15:14:23 UTC (rev 3422)
+++ trunk/build.xml 2010-08-06 15:46:07 UTC (rev 3423)
@@ -777,118 +777,8 @@
</java>
</target>
-
+
<!-- -->
-<!-- STANDALONE FEDERATION TARGETS -->
-<!-- (test/benchamarking) -->
-
-<target name="generateLookupStarterJar" unless="lookupStarterJarAvailable">
-<antcall target="testCompile" />
-</target>
-
-<target name="testLookupStarterJarAvailability">
-<property name="bigdata-test.lib" location="${bigdata.dir}/bigdata-test/lib" />
-<condition property="lookupStarterJarAvailable">
- <available file="${bigdata-test.lib}/lookupstarter.jar" />
-</condition>
-</target>
-
-<target name="standalone-setup" depends="testLookupStarterJarAvailability,generateLookupStarterJar" description="Setup properties used by standalone federation and LUS start/stop.">
-<property name="app.home" location="${bigdata.dir}" />
-<property name="test.codebase.port" value="23333" />
-<property name="test.codebase.dir" location="${bigdata.dir}/bigdata-jini/lib/jini/lib-dl" />
-<property name="dist.lib" location="${bigdata.dir}/bigdata-jini/lib/jini/lib" />
-<property name="dist.lib.dl" location="${bigdata.dir}/bigdata-jini/lib/jini/lib-dl" />
-<property name="test.codebase" value="http://${this.hostname}:${test.codebase.port}/jsk-dl.jar" />
-<property name="java.security.policy" value="${bigdata.dir}/policy.all" />
-<property name="log4j.configuration" value="resources/logging/log4j.properties" />
-<property name="java.net.preferIPv4Stack" value="true" />
-<property name="bigdata.fedname" value="${standalone.fed}" />
-</target>
-
-<!-- Note: You should 'nohup' this, e.g., "nohup ant standalone-start" to
- avoid taking down the ServicesManagerServer if you are disconnected
- from a terminal. -->
-<target name="standalone-start" depends="jar,standalone-setup" description="Start the standalone federation.">
-<!-- Start the lookup service. -->
-<antcall target="startHttpd" />
-<antcall target="startLookup" />
-<java classname="com.bigdata.jini.start.ServicesManagerServer" failonerror="true" fork="true" logerror="true">
- <classpath refid="runtime.classpath" />
- <jvmarg value="-Xmx200m" />
- <jvmarg value="-showversion" />
- <!-- The name of the federation instance. -->
- <jvmarg value="-Dbigdata.fedname=${standalone.fed}" />
- <jvmarg value="-Djava.security.policy=policy.all" />
- <jvmarg value="-Dcom.bigdata.jmx.log4j.disable=true" />
- <jvmarg value="-Dcom.bigdata.counters.linux.sysstat.path=${SYSSTAT_HOME}" />
- <jvmarg value="-Dlog4j.configuration=file:src/resources/config/standalone/log4j.properties" />
- <arg value="src/resources/config/standalone/bigdataStandalone.config" />
-</java>
-</target>
-
-<target name="standalone-stop" depends="jar,standalone-setup" description="Stop the standalone federation.">
-<java classname="com.bigdata.service.jini.util.ShutdownFederation" failonerror="true" fork="true" logerror="true">
- <classpath refid="runtime.classpath" />
- <jvmarg value="-Xmx200m" />
- <jvmarg value="-showversion" />
- <!-- The name of the federation instance. -->
- <jvmarg value="-Dbigdata.fedname=${standalone.fed}" />
- <jvmarg value="-Djava.security.policy=policy.all" />
- <jvmarg value="-Dcom.bigdata.jmx.log4j.disable=true" />
- <jvmarg value="-Dcom.bigdata.counters.linux.sysstat.path=${SYSSTAT_HOME}" />
- <jvmarg value="-Dlog4j.configuration=file:src/resources/config/standalone/log4j.properties" />
- <arg value="src/resources/config/standalone/bigdataStandalone.config" />
-</java>
-<!-- Then take down the lookup service as well. -->
-<antcall target="stopLookup" />
-<antcall target="stopHttpd" />
-</target>
-
-<target name="standalone-start-nano-server" depends="jar" description="Start a small http server fronting for a bigdata database instance.">
-<java classname="com.bigdata.rdf.sail.bench.NanoSparqlServer" fork="true" failonerror="true">
- <arg line="${standalone.nanoServerPort} ${standalone.namespace} src/resources/config/standalone/bigdataStandalone.config" />
- <jvmarg line="-server" />
- <jvmarg line="-Xmx200M" />
- <classpath refid="runtime.classpath" />
-</java>
-</target>
-
-<target name="standalone-stop-nano-server" depends="jar" description="Stop the small http server running at the configured port.">
-<java classname="com.bigdata.rdf.sail.bench.NanoSparqlServer" fork="true" failonerror="true">
- <arg line="${standalone.nanoServerPort} -stop" />
- <classpath refid="runtime.classpath" />
-</java>
-</target>
-
-<target name="standalone-bulk-load" depends="jar" description="Bulk load RDF data into the standalone federation.">
-<java classname="com.bigdata.rdf.load.MappedRDFDataLoadMaster" failonerror="true" fork="true" logerror="true">
- <classpath refid="runtime.classpath" />
- <jvmarg value="-Xmx200m" />
- <jvmarg value="-showversion" />
- <!-- The name of the federation instance. -->
- <jvmarg value="-Dbigdata.fedname=${standalone.fed}" />
- <jvmarg value="-Djava.security.policy=policy.all" />
- <jvmarg value="-Dcom.bigdata.jmx.log4j.disable=true" />
- <jvmarg value="-Dcom.bigdata.counters.linux.sysstat.path=${SYSSTAT_HOME}" />
- <jvmarg value="-Dlog4j.configuration=file:src/resources/config/standalone/log4j.properties" />
- <!-- -->
- <!-- Per job parameters -->
- <!-- -->
- <!-- The namespace of the target KB. -->
- <jvmarg value="-Dbigdata.rdf.namespace=${standalone.namespace}" />
- <!-- The job name (same as the KB namespace is a common default). -->
- <jvmarg value="-Dbigdata.rdf.job.name=bulk-load-kb-${standalone-namespace}" />
- <!-- The file or directory containing zero or more files to be loaded first. -->
- <jvmarg value="-Dbigdata.rdf.ontology=${standalone.bulkLoad.ontology}" />
- <!-- The file or directory containing RDF data to be loaded. -->
- <jvmarg value="-Dbigdata.rdf.data=${standalone.bulkLoad.data}" />
- <!-- The main configuration file. -->
- <arg value="src/resources/config/standalone/bigdataStandalone.config" />
-</java>
-</target>
-
-<!-- -->
<!-- MISC. UTILITY TARGETS -->
<!-- -->
@@ -1122,9 +1012,9 @@
<!-- Stage the bigdata Jini config files -->
+<copy file="${src.resources.config}/bigdataStandalone.config" todir="${dist.var.config.jini}" />
<copy file="${src.resources.config}/bigdataCluster.config" todir="${dist.var.config.jini}" />
<copy file="${src.resources.config}/bigdataCluster16.config" todir="${dist.var.config.jini}" />
-<copy file="${src.resources.config}/standalone/bigdataStandalone.config" todir="${dist.var.config.jini}" />
<!-- Stage the infrastructure service config files -->
Modified: trunk/src/resources/config/README
===================================================================
--- trunk/src/resources/config/README 2010-08-06 15:14:23 UTC (rev 3422)
+++ trunk/src/resources/config/README 2010-08-06 15:46:07 UTC (rev 3423)
@@ -3,8 +3,10 @@
bigdataStandalone.config - A sample configuration file for a workstation.
-bigdataCluster.config - A sample configuration file for a cluster.
+bigdataCluster.config - A sample configuration file for a 3-node cluster.
+bigdataCluster.config - A sample configuration file for a 16-node cluster.
+
log4j.properties - A default log4j configuration file for use by the bigdata
clients and services.
Modified: trunk/src/resources/config/bigdataCluster.config
===================================================================
--- trunk/src/resources/config/bigdataCluster.config 2010-08-06 15:14:23 UTC (rev 3422)
+++ trunk/src/resources/config/bigdataCluster.config 2010-08-06 15:46:07 UTC (rev 3423)
@@ -1200,15 +1200,6 @@
static private awaitDataServicesTimeout = 8000;
/* Multiplier for the scatter effect.
- *
- * Note: TERM2ID tends to grow more slowly than the other indices for two
- * reasons. First, there are many more distinct RDF Statements than RDF
- * Values for nearly any data set (except if statement identifiers are enabled,
- * in which case there are more terms than statements). Second, the keys of
- * the TERM2ID index compress nicely since long prefixes are very common.
- * Therefore it makes sense to use a smaller scatter factor for this index
- * UNLESS you have only 2-3 data services, in which case you will see hot
- * spots develop with this index unless it is more widely distributed.
*/
static private scatterFactor = 2;
static private scatterFactor_term2id = 2; // use 1 @ 4DS and up.
Modified: trunk/src/resources/config/bigdataCluster16.config
===================================================================
--- trunk/src/resources/config/bigdataCluster16.config 2010-08-06 15:14:23 UTC (rev 3422)
+++ trunk/src/resources/config/bigdataCluster16.config 2010-08-06 15:46:07 UTC (rev 3423)
@@ -1305,13 +1305,6 @@
static private awaitDataServicesTimeout = 8000;
/* Multiplier for the scatter effect.
- *
- * Note: TERM2ID tends to grow more slowly than the other indices for two
- * reasons. First, there are many more distinct RDF Statements than RDF
- * Values for nearly any data set (except if statement identifiers are enabled,
- * in which case there are more terms than statements). Second, the keys of
- * the TERM2ID index compress nicely since long prefixes are very common.
- * Therefore it makes sense to use a smaller scatter factor for this index.
*/
static private scatterFactor = 2;
static private scatterFactor_term2id = 1;
Added: trunk/src/resources/config/bigdataStandalone.config
===================================================================
--- trunk/src/resources/config/bigdataStandalone.config (rev 0)
+++ trunk/src/resources/config/bigdataStandalone.config 2010-08-06 15:46:07 UTC (rev 3423)
@@ -0,0 +1,1886 @@
+import net.jini.jeri.BasicILFactory;
+import net.jini.jeri.BasicJeriExporter;
+import net.jini.jeri.tcp.TcpServerEndpoint;
+
+import net.jini.discovery.LookupDiscovery;
+import net.jini.core.discovery.LookupLocator;
+import net.jini.core.entry.Entry;
+import net.jini.lookup.entry.Name;
+import net.jini.lookup.entry.Comment;
+import net.jini.lookup.entry.Address;
+import net.jini.lookup.entry.Location;
+import net.jini.lookup.entry.ServiceInfo;
+import net.jini.core.lookup.ServiceTemplate;
+
+import java.io.File;
+
+import com.bigdata.util.NV;
+import com.bigdata.journal.BufferMode;
+import com.bigdata.jini.lookup.entry.*;
+import com.bigdata.service.IBigdataClient;
+import com.bigdata.service.jini.*;
+import com.bigdata.service.jini.lookup.DataServiceFilter;
+import com.bigdata.service.jini.master.ServicesTemplate;
+import com.bigdata.jini.start.config.*;
+import com.bigdata.jini.util.ConfigMath;
+
+import org.apache.zookeeper.ZooDefs;
+import org.apache.zookeeper.data.ACL;
+import org.apache.zookeeper.data.Id;
+
+// imports for various options.
+import com.bigdata.btree.IndexMetadata;
+import com.bigdata.btree.keys.KeyBuilder;
+import com.bigdata.rdf.sail.BigdataSail;
+import com.bigdata.rdf.spo.SPORelation;
+import com.bigdata.rdf.spo.SPOKeyOrder;
+import com.bigdata.rdf.lexicon.LexiconRelation;
+import com.bigdata.rdf.lexicon.LexiconKeyOrder;
+import com.bigdata.rawstore.Bytes;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeUnit.*;
+
+/*
+ * This is a sample configuration file for a bigdata federation.
+ *
+ * Note: The original file is a template. The template contains parameters
+ * of the form @XXX@. The values for those template parameters are specified
+ * in the build.properties file when you use ant to install bigdata.
+ *
+ * Note: This file uses the jini configuration mechanism. The syntax
+ * is a subset of Java. The properties for each component are grouped
+ * within the namespace for that component.
+ *
+ * See the net.jini.config.ConfigurationFile javadoc for more
+ * information.
+ */
+
+/*
+ * A namespace use for static entries referenced elsewhere in this
+ * ConfigurationFile.
+ */
+bigdata {
+
+ /**
+ * The name for this federation.
+ *
+ * Note: This is used to form the [zroot] (root node in zookeeper
+ * for the federation) and the [serviceDir] (path in the file
+ * system for persistent state for the federation).
+ *
+ * Note: If you will be running more than one federation, then you
+ * MUST use unicast discovery and specify the federation name in
+ * the [groups].
+ */
+ static private fedname = "@FED@";
+
+ /**
+ * Where to put all the persistent state.
+ */
+ static private serviceDir = new File("@LAS@");
+
+ /**
+ * Which JDK to use.
+ */
+ static private javaHome = new File("@JAVA_HOME@");
+
+ /**
+ * A common point to set the Zookeeper client's requested
+ * sessionTimeout and the jini lease timeout. The default lease
+ * renewal period for jini is 5 minutes while for zookeeper it is
+ * more like 5 seconds. This puts the two systems onto a similar
+ * timeout period so that a disconnected client is more likely to
+ * be noticed in roughly the same period of time for either
+ * system. A value larger than the zookeeper default helps to
+ * prevent client disconnects under sustained heavy load.
+ */
+ // jini
+ static private leaseTimeout = ConfigMath.m2ms(60);// 20s=20000; 5m=300000;
+ // zookeeper
+ static private sessionTimeout = (int)ConfigMath.m2ms(10);// was 5m 20s=20000; 5m=300000;
+
+ /*
+ * Example cluster configuration.
+ *
+ * Data services are load balanced. Index partitions will be
+ * moved around as necessary to ensure hosts running data
+ * service(s) are neither under nor over utilized. Data services
+ * can be very resource intensive processes. They heavily buffer
+ * both reads and writes, and they use RAM to do so. They also
+ * support high concurrency and can use up to one thread per index
+ * partition. How many cores they will consume is very much a
+ * function of the application.
+ *
+ * Zookeeper services use a quorum model. Always allocate an odd
+ * number. 3 gives you one failure. 5 gives you two failures.
+ * Zookeeper will sync the disk almost continuously while it is
+ * running. It really deserves its own local disk. Zookeeper
+ * also runs in memory. Since all operations are serialized, if
+ * it starts swapping then peformance will drop through the floor.
+ *
+ * Jini uses a peer model. Each service registers with each
+ * registrar that it discovers. Each client listeners to each
+ * registrar that it discovers. The default jini core services
+ * installation runs entirely in memory (no disk operations, at
+ * least not for service registration). A second instance of the
+ * jini core services provides a safety net. If you are using
+ * multicast then you can always add another instance.
+ */
+
+ /* Declare the hosts. This provides indirection for planning
+ * purposes.
+ *
+ * The summary notation is: cores@GHZ/cache x RAM x DISK
+ */
+ static private h0 = "192.168.1.50"; // 4@3ghz/1kb x 4GB x 263G
+ //static private h1 = "192.168.20.27"; // 4@3ghz/2kb x 4GB x 263G
+ //static private h2 = "192.168.20.28"; // 4@3ghz/1kb x 4GB x 64G
+
+ /* Note: this configuration puts things that are not disk intensive
+ * on the host with the least disk space and zookeeper.
+ */
+ static private lbs = h0; // specify as @LOAD_BALANCER_HOST@ ?
+ static private txs = h0;
+ static private mds = h0;
+
+ // 1+ jini servers
+ static private jini1 = h0;
+ //static private jini2 = h1;
+ static private jini = new String[]{ jini1 }; //,jini2};
+
+ // Either 1 or 3 zookeeper machines (one instance per).
+ // See the QuorumPeerMain and ZooKeeper configurations below.
+ static private zoo1 = h0;
+ //static private zoo2 = h1;
+ //static private zoo3 = h2;
+ static private zoo = new String[] { zoo1 }; // ,zoo2,zoo3};
+
+ // 1+ client service machines (1+ instance per host).
+ static private cs0 = h0;
+
+ // 1+ data service machines (1+ instance per host).
+ static private ds0 = h0;
+ static private ds1 = h1;
+
+ // client servers
+ static private cs = new String[] {
+ cs0 //, ...
+ };
+
+ // The target #of client servers.
+ static private clientServiceCount = 1;
+ static private maxClientServicePerHost = 1;
+
+ // data servers
+ static private ds = new String[]{
+ ds0//, ds1 //, ...
+ };
+
+ // The target #of data services.
+ static private dataServiceCount = 1;
+
+ // Maximum #of data services per host.
+ static private maxDataServicesPerHost = 1;
+
+ // @todo also specify k (replicationCount)
+
+ // Sets the initial and maximum journal extents.
+ static private journalExtent = ConfigMath.multiply(200, Bytes.megabyte);
+
+ /**
+ * A String[] whose values are the group(s) to be used for discovery
+ * (no default). Note that multicast discovery is always used if
+ * LookupDiscovery.ALL_GROUPS (a <code>null</code>) is specified.
+ */
+
+ // one federation, multicast discovery.
+ //static private groups = LookupDiscovery.ALL_GROUPS;
+
+ // unicast discovery or multiple federations, MUST specify groups.
+ static private groups = new String[]{bigdata.fedname};
+
+ /**
+ * One or more unicast URIs of the form <code>jini://host/</code>
+ * or <code>jini://host:port/</code> (no default).
+ *
+ * This MAY be an empty array if you want to use multicast
+ * discovery <strong>and</strong> you have specified the groups as
+ * LookupDiscovery.ALL_GROUPS (a <code>null</code>).
+ */
+ static private locators = new LookupLocator[] {
+
+ // runs jini on the localhost using unicast locators.
+ //new LookupLocator("jini://localhost/")
+
+ // runs jini on two hosts using unicast locators.
+ new LookupLocator("jini://"+jini1),
+ //new LookupLocator("jini://"+jini2),
+
+ };
+
+ /**
+ * The policy file that will be used to start services.
+ */
+ private static policy = "@POLICY_FILE@";
+
+ /**
+ * log4j configuration file (applies to bigdata and zookeeper).
+ *
+ * Note: The value is URI!
+ *
+ * Note: You should aggregate all of the log output to a single
+ * host. For example, using the log4j SocketAppender and the
+ * SimpleNodeServer.
+ */
+ log4j = "@LOG4J_CONFIG@";
+
+ /**
+ * java.util.logging configuration file (applies to jini as used
+ * within bigdata).
+ *
+ * Note: The value is a file path!
+ */
+ logging = "@LOGGING_CONFIG@";
+
+ /*
+ private static host = ConfigUtil.getHostName();
+ private static port = "8081";
+ private static jskdl = " http://" + host + ":" + port + "/jsk-dl.jar";
+ */
+
+ /**
+ * JVM argument may be used to enable the yourkit profiler agent on a
+ * service. Of course, yourkit must be installed at this location and
+ * you must have a licensed copy of the yourkit UI running either on a
+ * node of the cluster or on a machine routed to the cluster, e.g., via
+ * an ssh tunnel. The yourkit profiler uses ports in [10001:100010] by
+ * default on each node.
+ *
+ * See http://www.yourkit.com/docs/80/help/running_with_profiler.jsp
+ *
+ * See http://www.yourkit.com/docs/80/help/agent.jsp
+ *
+ * See http://www.yourkit.com/docs/80/help/additional_agent_options.jsp
+ *
+ * Note: Conditionally include ${profilerAgent} iff you want to enable
+ * profiling for some service class.
+ */
+
+ // linux-64 with all profiling options initially disabled.
+ profilerAgent="-agentpath:/usr/java/yjp-9.0.3/bin/linux-x86-64/libyjpagent.so=disableexceptiontelemetry,disablestacktelemetry";
+
+}
+
+/*
+ * Service configuration defaults. These can also be specified on a
+ * per service-type basis. When the property is an array type, the
+ * value here is concatenated with the optional array value on the per
+ * service-type configuration. Otherwise it is used iff no value is
+ * specified for the service-type configuration.
+ */
+com.bigdata.jini.start.config.ServiceConfiguration {
+
+ /*
+ * Default java command line arguments that will be used for all
+ * java-based services
+ *
+ * Note: [-Dcom.sun.jini.jeri.tcp.useNIO=true] enables NIO in
+ * combination with the [exporter] configured below.
+ */
+ defaultJavaArgs = new String[]{
+ "-server",
+ "-ea",
+ "-showversion",
+ //"-Xmx2G",
+ /* This is a workaround for a JVM bug which can result in a
+ * lost wakeup. This bug is fixed in JDK1.6.0_18. However,
+ * JDK1.6.0_18 has other problems which result in segfaults.
+ *
+ * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6822370
+ */
+ "-XX:+UseMembar",
+ "-Dcom.sun.jini.jeri.tcp.useNIO=@USE_NIO@",
+ "-Djava.security.policy="+bigdata.policy,
+ "-Djava.util.logging.config.file="+bigdata.logging,
+ "-Dcom.bigdata.counters.linux.sysstat.path=@SYSSTAT_HOME@",
+ //bigdata.profilerAgent,
+ };
+
+ /* Default path for service instances and their persistent
+ * data. This may be overriden on a per service-type basis.
+ *
+ * Note: For logical services that support failover, the concrete
+ * service directory is assigned dynamically when a physical
+ * service instance is created.
+ */
+ serviceDir = bigdata.serviceDir;
+
+ // The JVM to use.
+ javaHome = bigdata.javaHome;
+
+ /* The bigdata services default logging configuration (a URI!)
+ */
+ log4j = bigdata.log4j;
+
+ /*
+ * Set up some default properties values that will be inherited
+ * (copy by value) by all clients and services started using this
+ * configuration file.
+ */
+ properties = new NV[] {
+
+ /*
+ * Each JiniClient (and hence all bigdata services) can run an
+ * httpd that will expose performance counters for the service and
+ * the host on which it is running. This property specifies the
+ * port for that httpd service. Valid values are port number,
+ * zero (0) for a random open port, MINUS ONE (-1) to disable the
+ * httpd service.
+ */
+ //new NV(IBigdataClient.Options.HTTPD_PORT, "-1"),
+
+ /*
+ * Option to disable collection of performance counters for the
+ * host on which the client or service is running.
+ *
+ * Note: The load balancer relies on this information!
+ */
+ //new NV(IBigdataClient.Options.COLLECT_PLATFORM_STATISTICS,"false"),
+
+ /* Option to disable collection of performance counters on the
+ * queues used internally by the client or service.
+ *
+ * Note: The load balancer relies on this information!
+ */
+ //new NV(IBigdataClient.Options.COLLECT_QUEUE_STATISTICS,"false"),
+
+ /* Option controls how many times a client request will be
+ * reissued on receiving notice that an index partition locator is
+ * stale. Stale locators arise when an index partition is split,
+ * moved, or joined.
+ *
+ * Note: This option needs to be larger if we are aggressively
+ * driving journal overflows and index partitions splits during
+ * the "young" phase of a data service or scale-out index since a
+ * LOT of redirects will result.
+ */
+ new NV(IBigdataClient.Options.CLIENT_MAX_STALE_LOCATOR_RETRIES,"1000"),
+
+ };
+
+}
+
+/**
+ * JoinManager options.
+ *
+ * Note: These options must be copied into the service.config (to
+ * specify the service lease timeout) as well as used by the client
+ * (which uses this file directly).
+ */
+net.jini.lookup.JoinManager {
+
+ // The lease timeout for jini joins.
+ maxLeaseDuration = bigdata.leaseTimeout;
+
+}
+
+/**
+ * Jini service configuration.
+ */
+jini {
+
+ /* This sets command line arguments for the ServiceStarter which
+ * is used to run the jini services.
+ */
+ args = new String[] {
+
+ "-Xmx400m",
+ "-Djava.security.policy="+bigdata.policy,
+ "-Djava.util.logging.config.file="+bigdata.logging,
+ "-Dlog4j.configuration="+bigdata.log4j,
+ "-Dlog4j.primary.configuration="+bigdata.log4j,
+ "-DinitialMemberGroups="+bigdata.fedname
+
+ };
+
+ /**
+ * The main jini configuration file. This file contains a
+ * NonActivatableServiceDescriptor[]. The elements of that array
+ * describe how to start each of the jini services.
+ */
+ configFile = new File("@JINI_CONFIG@");
+
+ /**
+ * The #of instances to run.
+ *
+ * Note: A jini service instance may be started on a host if it is
+ * declared in [locators]. If locators is empty, then you are
+ * using multicast discovery. In this case an instance may be
+ * started on any host, unless [constraints] are imposed. In any
+ * case, no more than [serviceCount] jini services will be started
+ * at any given time. This is checked against the #of discovered
+ * instances.
+ */
+ serviceCount = 1;
+
+}
+
+/**
+ * Zookeeper server configuration.
+ */
+org.apache.zookeeper.server.quorum.QuorumPeerMain {
+
+ /* Directory for zookeeper's persistent state. The [id] will be
+ * appended as another path component automatically to keep
+ * instances separate.
+ */
+ dataDir = new File(bigdata.serviceDir,"zookeeper");
+
+ /* Optional directory for the zookeeper log files. The [id] will
+ * be appended as another path component automatically to keep
+ * instances separate.
+ *
+ * Note: A dedicated local storage device is highly recommended
+ * for the zookeeper transaction logs!
+ */
+ //dataLogDir=new File("/var/zookeeper-log");
+
+ // required.
+ clientPort=2181;
+
+ tickTime=2000;
+
+ initLimit=5;
+
+ syncLimit=2;
+
+ /* A comma delimited list of the known zookeeper servers together
+ * with their assigned "myid": {myid=host:port(:port)}+
+ *
+ * Note: You SHOULD specify the full list of servers that are
+ * available to the federation. An instance of zookeeper will be
+ * started automatically on each host running ServicesManager that
+ * is present in the [servers] list IF no instance is found
+ * running on that host at the specified [clientPort].
+ *
+ * Note: zookeeper interprets NO entries as the localhost with
+ * default peer and leader ports. This will work as long as the
+ * localhost is already running zookeeper. However, zookeeper
+ * WILL NOT automatically start zookeeper if you do not specify
+ * the [servers] property. You can also explicitly specify
+ * "localhost" as the hostname, but that only works for a single
+ * machine.
+ */
+ // standalone
+ //servers="1=localhost:2888:3888";
+ // ensemble
+ /**/
+ servers = "1="+bigdata.zoo1+":2888:3888"
+// + ",2="+bigdata.zoo2+":2888:3888"
+// + ",3="+bigdata.zoo3+":2888:3888"
+ ;
+
+ // This is all you need to run zookeeper.
+ classpath = new String[] {
+ "@LIB_DIR@/apache/zookeeper-3.2.1.jar",
+ "@LIB_DIR@/apache/log4j-1.2.15.jar"
+ };
+
+ /* Optional command line arguments for the JVM used to execute
+ * zookeeper.
+ *
+ * Note: swapping for zookeeper is especially bad since the
+ * operations are serialized, so if anything hits then disk then
+ * all operations in the queue will have that latency as well.
+ * However, bigdata places a very light load on
+ * zookeeper so a modest heap should be Ok. For example, I have
+ * observed a process size of only 94m after 10h on a 15-node
+ * cluster.
+ */
+ args = new String[]{
+ "-Xmx200m",
+ /*
+ * Enable JXM remote management.
+ *
+ "-Dcom.sun.management.jmxremote.port=9997",
+ "-Dcom.sun.management.jmxremote.authenticate=false",
+ "-Dcom.sun.management.jmxremote.ssl=false",
+ */
+};
+
+ // zookeeper server logging configuration (value is a URI!)
+ log4j = bigdata.log4j;
+
+}
+
+/*
+ * Zookeeper client configuration.
+ */
+org.apache.zookeeper.ZooKeeper {
+
+ /* Root znode for the federation instance. */
+ zroot = "/"+bigdata.fedname;
+
+ /* A comma separated list of host:port pairs, where the port is
+ * the CLIENT port for the zookeeper server instance.
+ */
+ // standalone.
+ // servers = "localhost:2181";
+ // ensemble
+ servers = bigdata.zoo1+":2181" // @TODO enable other instances.
+// + ","+bigdata.zoo2+":2181"
+// + ","+bigdata.zoo3+":2181"
+ ;
+
+ /* Session timeout (optional). */
+ sessionTimeout = bigdata.sessionTimeout;
+
+ /*
+ * ACL for the zookeeper nodes created by the bigdata federation.
+ *
+ * Note: zookeeper ACLs are not transmitted over secure channels
+ * and are placed into plain text Configuration files by the
+ * ServicesManagerServer.
+ */
+ acl = new ACL[] {
+
+ new ACL(ZooDefs.Perms.ALL, new Id("world", "anyone"))
+
+ };
+
+}
+
+/*
+ * Jini client configuration
+ */
+com.bigdata.service.jini.JiniClient {
+
+ /* Default Entry[] for jini services. Also used by the
+ * ServicesManagerService as is.
+ *
+ * Note: A Name attribute will be added automatically using the
+ * service type and the znode of the service instance. That Name
+ * will be canonical. It is best if additional service names are
+ * NOT specified as that might confuse somethings :-)
+ *
+ * Note: A Hostname attribute will be added dynamically.
+ */
+ entries = new Entry[] {
+ // Purely informative.
+ new Comment(bigdata.fedname),
+ };
+
+ groups = bigdata.groups;
+
+ locators = bigdata.locators;
+
+ // optional JiniClient properties.
+ // properties = new NV[] {};
+
+ /*
+ * Overrides for jini SERVICES (things which are started
+ * automatically) BUT NOT CLIENTs (things which you start by hand
+ * and which read this file directly).
+ *
+ * The difference here is whether or not a service.config file is
+ * being generated. When it is, the jiniOptions[] will be
+ * included in how that service is invoked and will operate as
+ * overrides for the parameters specified in the generated
+ * service.config file. However, normal clients directly consume
+ * this config file rather than the generated one and therefore
+ * you must either specify their overrides directly on the command
+ * line when you start the client or specify them explicitly in
+ * the appropriate component section within this configuration
+ * file.
+ *
+ * In practice, this means that you must specify some parameters
+ * both here and in the appropriate component configuration. E.g.,
+ * see the component section for "net.jini.lookup.JoinManager"
+ * elsewhere in this file.
+ */
+ jiniOptions = new String[] {
+
+ // The lease timeout for jini joins.
+ "net.jini.lookup.JoinManager.maxLeaseDuration="+bigdata.leaseTimeout,
+
+ };
+
+}
+
+/**
+ * Options for the bigdata services manager.
+ */
+com.bigdata.jini.start.ServicesManagerServer {
+
+ /*
+ * This object is used to export the service proxy. The choice
+ * here effects the protocol that will be used for communications
+ * between the clients and the service.
+ */
+ exporter = new BasicJeriExporter(TcpServerEndpoint.getInstance(0),
+ new BasicILFactory());
+
+ /*
+ * The data directory and the file on which the serviceID will be
+ * written.
+ *
+ * Note: These properties MUST be specified explicitly for the
+ * ServicesManager since it uses this as its Configuration file.
+ * For other services, it generates the Configuration file and
+ * will generate this property as well.
+ */
+
+ serviceDir = new File(bigdata.serviceDir,"ServicesManager");
+
+ serviceIdFile = new File(serviceDir,"service.id");
+
+ /* The services that will be started. For each service, there
+ * must be a corresponding component defined within this
+ * configuration file. For each "ManagedServiceConfiguration", an
+ * entry will be made in zookeeper and logical and physical
+ * service instances will be managed automatically. For unmanaged
+ * services, such as jini and zookeeper itself, instances will be
+ * started iff necessary by the services manager when it starts
+ * up.
+ */
+ services = new String[] {
+
+ "jini",
+ "org.apache.zookeeper.server.quorum.QuorumPeerMain",
+ "com.bigdata.service.jini.TransactionServer",
+ "com.bigdata.service.jini.MetadataServer",
+ "com.bigdata.service.jini.DataServer",
+ "com.bigdata.service.jini.LoadBalancerServer",
+ "com.bigdata.service.jini.ClientServer"
+
+ };
+
+ /*
+ * Additional properties passed through to the JiniClient or the
+ * service.
+ *
+ * Note: The services manager is used to collect statistics from the
+ * OS for each host so we have performance counters for hosts which
+ * are only running non-bigdata services, such as jini or zookeeper.
+ */
+ properties = new NV[]{
+
+ };
+
+ /* The services manager MUDT be run on every host so that it may
+ * start both bigdata and non-bigdata services (jini, zookeeper).
+ * This is also used to report per-host performance counters to
+ * the load balancer for hosts that are not running bigdata
+ * services.
+ */
+ constraints = new IServiceConstraint[] {
+
+ };
+
+}
+
+com.bigdata.service.jini.TransactionServer {
+
+ constraints = new IServiceConstraint[] {
+
+ new JiniRunningConstraint(),
+ new ZookeeperRunningConstraint(),
+
+ new HostAllowConstraint(bigdata.txs)
+
+ };
+
+ args = new String[]{
+
+ // Does not need much RAM.
+ "-Xmx200m"
+
+ };
+
+ properties = new NV[] {
+
+ /* The #of milliseconds that the database will retain history no
+ * longer required to support the earliest active transaction.
+ *
+ * A value of ZERO means that only the last commit point will
+ * be retained. The larger the value the more history will be
+ * retained. You can use a really big number if you never want
+ * to release history and you have lots of disk space :-)
+ *
+ * Note: The most recent committed state of the database is
+ * NEVER released.
+ */
+ new NV(TransactionServer.Options.MIN_RELEASE_AGE, "0"),
+
+ };
+
+}
+
+com.bigdata.service.jini.MetadataServer {
+
+ constraints = new IServiceConstraint[] {
+
+ new JiniRunningConstraint(),
+ new ZookeeperRunningConstraint(),
+ //new TXRunningConstraint(),
+
+ new HostAllowConstraint(bigdata.mds),
+
+ };
+
+ args = new String[]{
+
+ // Does not need much RAM.
+ "-Xmx200m"
+
+ };
+
+ properties = new NV[]{
+
+ /*
+ * The MDS does not support overflow at this time so
+ * overflow MUST be disabled for this service.
+ */
+ new NV(MetadataServer.Options.OVERFLOW_ENABLED,"false")
+
+ };
+
+}
+
+com.bigdata.service.jini.DataServer {
+
+ args = new String[]{
+ //bigdata.profilerAgent,
+ /*
+ * Grant lots of memory, but read on.
+ *
+ * Note: 32-bit JVMs have a 2G limit on the heap, but the practical limit
+ * is often much less - maybe 1400m. 64-bit JVMs can use much more RAM.
+ * However, the heap which you grant to java DOES NOT determine the total
+ * process heap. I have seen 64-bit java processes using an additional
+ * 3-4GB of heap beyond what is specified here. So, you need to consider
+ * the total RAM, subtract out enough for the other processes and the OS
+ * buffers, divide by the #of client/data services you plan to run on that
+ * host (generally 1-2) and then subtract out some more space for the JVM
+ * itself.
+ *
+ * For example, if you have 32G RAM and a 64-bit JVM and plan to run two
+ * CS/DS on the host, I would recommend 10G for the Java heap. You can
+ * expect to see Java grab another 4G per process over time. That makes
+ * the per CS/DS heap 14G. With two processes you have taken 28G leaving
+ * 4G for everything else.
+ *
+ * Here is another example: 4G RAM, 32-bit JVM, and 2 CS/DS per host. I
+ * would stick to 800m for the Java heap. You don't have a problem unless
+ * you see an OOM (OutOfMemoryException) or a process killed because GC is
+ * taking too much time.
+ *
+ * See http://www.ibm.com/developerworks/linux/library/j-nativememory-linux/index.html?ca=dgr-lnxw07Linux-JVM&S_TACT=105AGX59&S_CMP=grlnxw07
+ *
+ * Note: for linux, "sysctl -w vm.swappiness=0" will keep the RAM you do
+ * have for your applications!
+ */
+ "-Xmx4g",// was 800
+ /* Optionally, grab all/most of the max heap at once. This makes sense for
+ * DS but is less necessary for other bigdata services.
+ */
+ "-Xms2G", // 1/2 of the max heap is a good value.
+ /*
+ * This option will keep the JVM "alive" even when it is memory starved
+ * but perform of a memory starved JVM is terrible.
+ */
+ //"-XX:-UseGCOverheadLimit",
+ /* Configure GC for higher throughput. Together these options
+ * request parallel old generation collection using N threads.
+ * The application will be paused when this occurs, but GC will
+ * be faster. Hence throughput will be higher. However, be
+ * sure to use JDK 6u10+ (6676016 : ParallelOldGC leaks memory).
+ *
+ * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6676016
+ */
+ "-XX:+UseParallelOldGC",
+ //"-XX:ParallelGCThreads=8",
+ /*
+ * Enable JXM remote management for the data service.
+ *
+ * Note: This will not work if you have two data services on a host
+ * because it will assign the same port to each service. In order
+ * to work around that the argument would have to be specified by
+ * the service starter and then published in the Entry[] attributes.
+ *
+ * However, you can use ssh -X to open a tunnel with X
+ * forwarding and then run jconsole locally on the target host
+ * and bring up these data services without enabling remote
+ * JMX.
+ *
+ "-Dcom.sun.management.jmxremote.port=9999",
+ "-Dcom.sun.management.jmxremote.authenticate=false",
+ "-Dcom.sun.management.jmxremote.ssl=false",
+ */
+ /*
+ * Override the size of the default pool of direct (native) byte
+ * buffers. This was done to ensure that the nodes region for
+ * index segments remain fully buffered as the index partitions
+ * approach their maximum size before a split.
+ */
+ "-Dcom.bigdata.io.DirectBufferPool.bufferCapacity="+
+ ConfigMath.multiply(Bytes.kilobyte,1250),
+ };
+
+ serviceCount = bigdata.dataServiceCount;
+
+ // restrict where the data services can run.
+ constraints = new IServiceConstraint[] {
+
+ new JiniRunningConstraint(),
+ new ZookeeperRunningConstraint(),
+ //new TXRunningConstraint(),
+
+ new HostAllowConstraint(bigdata.ds),
+
+ new MaxDataServicesPerHostConstraint(bigdata.maxDataServicesPerHost),
+
+ };
+
+ /*
+ * Note: the [dataDir] will be filled in when a new service
+ * instance is created based on the [servicesDir], so don't set it
+ * here yourself.
+ */
+ properties = new NV[]{
+
+ new NV(DataServer.Options.BUFFER_MODE,
+ //""+com.bigdata.journal.BufferMode.Direct
+ ""+com.bigdata.journal.BufferMode.DiskWORM
+ ),
+
+ /* Option disables synchronous overflow after N times and
+ * configures the offset bits for the journal for a scale-up
+ * configuration so we may use very large journals.
+ */
+ //new NV(DataServer.Options.OVERFLOW_MAX_COUNT,"5"),
+ //new NV(DataServer.Options.OFFSET_BITS,""+com.bigdata.rawstore.WormAddressManager.SCALE_UP_OFFSET_BITS),
+
+ /* Synchronous overflow is triggered when the live journal is
+ * this full (the value is a percentage, expressed as a
+ * floating point number in [0:1]).
+ */
+ //new NV(DataServer.Options.OVERFLOW_THRESHOLD,".9"),
+
+ /* Override the initial and maximum extent so that they are more
+ * more suited to large data sets. Overflow will be triggered as
+ * the size of the journal approaches the maximum extent. The
+ * initial and maximum extent are configured up above.
+ */
+
+ new NV(DataServer.Options.INITIAL_EXTENT, "" + bigdata.journalExtent),
+ new NV(DataServer.Options.MAXIMUM_EXTENT, "" + bigdata.journalExtent),
+
+ /* Specify the queue capacity for the write service (unisolated
+ * write operations).
+ *
+ * 0 := SynchronousQueue.
+ * N := bounded queue of capacity N
+ * Integer.MAX_VALUE := unbounded queue.
+ *
+ * Note: The corePoolSize will never increase for an unbounded
+ * queue so the value specified for maximumPoolSize will
+ * essentially be ignored in this case.
+ *
+ * Note: A SynchronousQueue is a good choice here since it allows
+ * the #of threads to change in response to demand. The pool
+ * size should be unbounded when using a SynchronousQueue.
+ */
+ new NV(DataServer.Options.WRITE_SERVICE_QUEUE_CAPACITY,"0"), // synchronous queue.
+ new NV(DataServer.Options.WRITE_SERVICE_CORE_POOL_SIZE,"50"), //
+ new NV(DataServer.Options.WRITE_SERVICE_MAXIMUM_POOL_SIZE,""+Integer.MAX_VALUE),
+ new NV(DataServer.Options.WRITE_SERVICE_PRESTART_ALL_CORE_THREADS,"true"),
+
+ /*
+ * Options turns off overflow processing (debugging only).
+ * All writes will go onto the live journal, no index segments
+ * will be built, and indices will not be split, moved,
+ * joined, etc.
+ */
+ //new NV(DataServer.Options.OVERFLOW_ENABLED,"false"),
+
+ /* Maximum #of index partition moves per overflow.
+ */
+ new NV(DataServer.Options.MAXIMUM_MOVES,"1"),
+
+ /* Option controls how many index partitions may be moved onto
+ * any given target data service in a single overflow cycle
+ * and may be used to disable index partition moves (for
+ * debugging purposes).
+ */
+ new NV(DataServer.Options.MAXIMUM_MOVES_PER_TARGET,"1"),
+
+ /* The minimum CPU activity on a host before it will consider moving an
+ * index partition to shed some load.
+ *
+ * @todo A high threshold was chosen for the 3-node cluster since there
+ * are only 2 machines running data services. A "feature" in the load
+ * balancer allows moves between two heavily loaded hosts even when they
+ * are very close in their load, which is typically the case if you have
+ * only 2 machines running data services. The high threshold here is a
+ * workaround until the load balancer is modified to take into account
+ * whether or not a significant difference exists in the load between
+ * the source and possible target data service hosts.
+ */
+ new NV(DataServer.Options.MOVE_PERCENT_CPU_TIME_THRESHOLD,".99"),//was .7
+
+ /* Option limits the #of index segments in a view before a
+ * compacting merge is forced.
+ */
+ new NV(DataServer.Options.MAXIMUM_SEGMENTS_PER_VIEW,"5"), // default 6
+
+ /* Option limits the #of optional merges that are performed in each
+ * overflow cycle.
+ */
+ new NV(DataServer.Options.MAXIMUM_OPTIONAL_MERGES_PER_OVERFLOW,"1"),
+
+ /* Option effects how much splits are emphasized for a young
+ * scale-out index. If the index has fewer than this many
+ * partitions, then there will be a linear reduction in the
+ * target index partition size which will increase the likelyhood
+ * of an index split under heavy writes. This helps to distribute
+ * the index early in its life cycle.
+ */
+ new NV(DataServer.Options.ACCELERATE_SPLIT_THRESHOLD,"20"),//20//50
+
+ /* Options accelerates overflow for data services have fewer than
+ * the threshold #of bytes under management. Acceleration is
+ * accomplished by reducing the maximum extent of the live journal
+ * linearly, but with a minimum of a 10M maximum extent. When the
+ * maximum extent is reduced by this option, the initial and the
+ * maximum extent will always be set to the same value for that
+ * journal.
+ */
+ new NV(DataServer.Options.ACCELERATE_OVERFLOW_THRESHOLD,
+ //"0"
+ //""+com.bigdata.rawstore.Bytes.gigabyte
+ "2147483648" // 2G
+ ),
+
+ // #of threads for index segment builds (default 3).
+ new NV(DataServer.Options.BUILD_SERVICE_CORE_POOL_SIZE,"5"),
+
+ // #of threads for compacting merges (default 1).
+ new NV(DataServer.Options.MERGE_SERVICE_CORE_POOL_SIZE,"1"),
+
+// // Zero is full parallelism; otherwise #of threads in the pool.
+// new NV(DataServer.Options.OVERFLOW_TASKS_CONCURRENT,"5"),
+
+ /* Use Long.MAX_VALUE to always run overflow processing to
+ * completion (until no more data remains on the old journal).
+ */
+ new NV(DataServer.Options.OVERFLOW_TIMEOUT,""+Long.MAX_VALUE),
+
+ new NV(DataServer.Options.OVERFLOW_CANCELLED_WHEN_JOURNAL_FULL,"false"),
+
+ new NV(DataServer.Options.LIVE_INDEX_CACHE_CAPACITY,"10"), // was 60
+
+ new NV(DataServer.Options.HISTORICAL_INDEX_CACHE_CAPACITY,"10"), // was 60
+
+ /* The maximum #of clean indices that will be retained on the
+ * hard reference queue (default 20).
+ */
+ new NV(DataServer.Options.INDEX_CACHE_CAPACITY,"10"), // was 50
+
+ /* The timeout for unused index references before they are
+ * cleared from the hard reference queue (default is 1m).
+ * After this timeout the index reference is cleared from the
+ * queue and the index will be closed unless a hard reference
+ * exists to the index.
+ */
+// new NV(DataServer.Options.INDEX_CACHE_TIMEOUT,"1200000"), // 20m vs 1m
+
+ /* The maximum #of clean index segments that will be retained
+ * on the hard reference queue (default 60). Note that ALL
+ * index segments are clean (they are read-only).
+ */
+ new NV(DataServer.Options.INDEX_SEGMENT_CACHE_CAPACITY,"20"), // was 100
+
+ /* The timeout for unused index segment references before they
+ * are cleared from the hard reference queue (default is 1m).
+ * After this timeout the index segment reference is cleared
+ * from the queue and the index segment will be closed unless
+ * a hard reference exists to the index segment.
+ */
+// new NV(DataServer.Options.INDEX_SEGMENT_CACHE_TIMEOUT,"60000000"), // 10m vs 1m
+
+ /* The #of store files (journals and index segment stores)
+ * whose hard references will be maintained on a queue. The
+ * value should be slightly more than the index segment cache
+ * capacity since some journals also used by the views, but
+ * same journals are shared by all views so adding 3 is plenty..
+ */
+ new NV(DataServer.Options.STORE_CACHE_CAPACITY,"23"),// was 110
+
+// new NV(DataServer.Options.STORE_CACHE_TIMEOUT,"1200000"),//20m vs 1m.
+
+ };
+
+}
+
+/**
+ * Configuration options for the containers used to distribute application tasks
+ * across a federation.
+ *
+ * @todo There should be a means to tag certain client servers for one purpose
+ * or another. This could be handled by subclassing, but it really should be
+ * declarative.
+ */
+com.bigdata.service.jini.ClientServer {
+
+ args = new String[]{
+ //bigdata.profilerAgent,
+ /*
+ * Grant lots of memory, but read on.
+ *
+ * Note: 32-bit JVMs have a 2G limit on the heap, but the practical limit
+ * is often much less - maybe 1400m. 64-bit JVMs can use much more RAM.
+ * However, the heap which you grant to java DOES NOT determine the total
+ * process heap. I have seen 64-bit java processes using an additional
+ * 3-4GB of heap beyond what is specified here. So, you need to consider
+ * the total RAM, subtract out enough for the other processes and the OS
+ * buffers, divide by the #of client/data services you plan to run on that
+ * host (generally 1-2) and then subtract out some more space for the JVM
+ * itself.
+ *
+ * For example, if you have 32G RAM and a 64-bit JVM and plan to run two
+ * CS/DS on the host, I would recommend 10G for the Java heap. You can
+ * expect to see Java grab another 4G per process over time. That makes
+ * the per CS/DS heap 14G. With two processes you have taken 28G leaving
+ * 4G for everything else.
+ *
+ * Here is another example: 4G RAM, 32-bit JVM, and 2 CS/DS per host. I
+ * would stick to 800m for the Java heap. You don't have a problem unless
+ * you see an OOM (OutOfMemoryException) or a process killed because GC is
+ * taking too much time.
+ *
+ * See http://www.ibm.com/developerworks/linux/library/j-nativememory-linux/index.html?ca=dgr-lnxw07Linux-JVM&S_TACT=105AGX59&S_CMP=grlnxw07
+ *
+ * Note: for linux, "sysctl -w vm.swappiness=0" will keep the RAM you do
+ * have for your applications!
+ */
+ "-Xmx2g", // was 800m
+ /*
+ * This option will keep the JVM "alive" even when it is memory starved
+ * but perform of a memory starved JVM is terrible.
+ */
+ //"-XX:-UseGCOverheadLimit",
+ /* Configure GC for higher throughput. Together these options
+ * request parallel old generation collection using N threads.
+ * The application will be paused when this occurs, but GC will
+ * be faster. Hence throughput will be higher.
+ */
+ "-XX:+UseParallelOldGC",
+ //"-XX:ParallelGCThreads=8",
+ /*
+ * Enable JXM remote management for the data service.
+ *
+ * Note: This will not work if you have two such services on a host
+ * because it will assign the same port to each service. In order
+ * to work around that the argument would have to be specified by
+ * the service starter and then published in the Entry[] attributes.
+ *
+ * However, you can use ssh -X to open a tunnel with X
+ * forwarding and then run jconsole locally on the target host
+ * and bring up these data services without enabling remote
+ * JMX.
+ *
+ "-Dcom.sun.management.jmxremote.port=9996",
+ "-Dcom.sun.management.jmxremote.authenticate=false",
+ "-Dcom.sun.management.jmxremote.ssl=false",
+ */
+ };
+
+ serviceCount = bigdata.clientServiceCount;
+
+ constraints = new IServiceConstraint[] {
+
+ new JiniRunningConstraint(),
+ new ZookeeperRunningConstraint(),
+
+ new HostAllowConstraint(bigdata.cs),
+
+ new MaxClientServicesPerHostConstraint(bigdata.maxClientServicePerHost),
+
+ };
+
+ properties = new NV[] {
+
+ };
+
+}
+
+com.bigdata.service.jini.LoadBalancerServer {
+
+ constraints = new IServiceConstraint[] {
+
+ new JiniRunningConstraint(),
+ new ZookeeperRunningConstraint(),
+
+ new HostAllowConstraint(bigdata.lbs)
+
+ };
+
+ args = new String[]{
+ /*
+ * FIXME The load balancer is a big piggish on long runs because it
+ * keeps the performance counter histories in RAM. While those histories
+ * are bounded, it still uses more RAM than it should.
+ */
+ "-Xmx1G",
+ /*
+ * Enable JXM remote management for the data service.
+ *
+ * Note: This will not work if you have two data services on a host
+ * because it will assign the same port to each service. In order
+ * to work around that the argument would have to be specified by
+ * the service starter and then published in the Entry[] attributes.
+ *
+ "-Dcom.sun.management.jmxremote.port=9998",
+ "-Dcom.sun.management.jmxremote.authenticate=false",
+ "-Dcom.sun.management.jmxremote.ssl=false",
+ */
+ };
+
+ /*
+ * Override some properties.
+ */
+ properties = new NV[] {
+
+ /*
+ * Each JiniClient (and hence all bigdata services) can run an
+ * httpd that will expose performance counters for the service and
+ * the host on which it is running. This property specifies the
+ * port for that httpd service. Valid values are port number,
+ * zero (0) for a random open port, MINUS ONE (-1) to disable the
+ * httpd service.
+ *
+ * Note: The load balancer httpd normally uses a known port so
+ * that it is easy to find. This is where you will find all of
+ * the performance counters aggregated for the entire federation,
+ * including their history.
+ */
+ new NV(IBigdataClient.Options.HTTPD_PORT, "@LOAD_BALANCER_PORT@"),
+
+ /*
+ * Note: The load balancer SHOULD NOT collect platform statistics
+ * itself since that interfers with its ability to aggregate
+ * statistics about the host on which it is running. Instead it
+ * should rely on the presence of at least one other service
+ * running on the same host to report those statistics to the load
+ * balancer.
+ */
+ new NV(IBigdataClient.Options.COLLECT_PLATFORM_STATISTICS,"false"),
+
+ /*
+ * The directory where the aggregated statistics will be logged.
+ * The load balancer will write snapshots of the historical
+ * counters into this directory. See LoadBalancerService javadoc
+ * for configuration options which effect how frequently it will
+ * log its counters and how many snapshots will be preserved.
+ *
+ * Note: You only need to specify this option if you want to put
+ * the files into a well known location, e.g, on a shared volume.
+ */
+ //new NV(LoadBalancerServer.Options.LOG_DIR,"/opt2/var/log/bigdata"),
+
+ /* Option essentially turns off the load-based decision making for
+ * this many minutes and substitutes a round-robin policy for
+ * recommending the least utilized data services. The main reason
+ * to this is to force the initial allocation to be distributed as
+ * evenly as possible across the data services in the cluster.
+ */
+ new NV(LoadBalancerServer.Options.INITIAL_ROUND_ROBIN_UPDATE_COUNT,"10"),
+
+ };
+
+}
+
+/**
+ * Configuration options for the KB instance.
+ */
+lubm {
+
+ // The #of universities to generate.
+ // U8000 is 1.2B told triples
+ // U25000 is 3.4B told triples.
+ // U50000 is 6.7B told triples.
+ // U100000 is ~12B told triples.
+ static private univNum = 1000;
+
+ // the KB namespace (based on the #of universities by default).
+ static private namespace = "U"+univNum+"";
+
+ // minimum #of data services to run.
+ static private minDataServices = bigdata.dataServiceCount;
+
+ // How long the master will wait to discover the minimum #of data
+ // services that you specified (ms).
+ static private awaitDataServicesTimeout = 8000;
+
+ /* Multiplier for the scatter effect.
+ */
+ static private scatterFactor = 1;
+ static private scatterFactor_term2id = 1;
+
+ /* The #of index partitions to allocate on a scatter split. ZERO
+ * (0) means that 2 index partitions will be allocated per
+ * data service which partiticpates in the scatter split.
+ * Non-zero values directly give the #of index partitions to
+ * create.
+ */
+ static private scatterSplitIndexPartitionCount = ConfigMath.multiply
+ ( scatterFactor,
+ bigdata.dataServiceCount
+ );
+ static private scatterSplitIndexPartitionCount_term2id = ConfigMath.multiply
+ ( scatterFactor_term2id,
+ bigdata.dataServiceCount
+ );
+
+ // Use all discovered data services when scattering an index.
+ static private scatterSplitDataServiceCount = 0;
+
+ /* Scatter split trigger point. The scatter split will not be
+ * triggered until the initial index partition has reached
+ * this percentage of a nominal index partition in size.
+ */
+ static private scatterSplitPercentOfSplitThreshold = 0.5;//was .5
+
+ /*
+ * Multipliers that compensate for the consumer/producer ratio for
+ * the asynchronous index write API. These are empirical factors
+ * based on observing the ratio (chunkWritingTime/chunkWaitingTime).
+ * Assuming a constant chunk writing time, if the chunk size for each
+ * index is adjusted by its multiplier then this ratio would be 1:1.
+ * In practice, the chunk writing time is not a linear function of
+ * the chunk size, which is one reason why we prefer larger chunks
+ * and why the asynchronous write API is a win.
+ *
+ * Note: These factors were set relative to TERM2ID. However, when
+ * I reduced the scatterFactor for TERM2ID by 1/2, I doubled its
+ * chunk size to keep up the same throughput so it is now at 2.00
+ * rather than 1.00.
+ */
+ static private chunkSizeFactor_id2term = 1.79;
+ static private chunkSizeFactor_term2id = 2.00;
+ static private chunkSizeFactor_spo = 8.00; // was 3.89
+ static private chunkSizeFactor_pos = 8.00; // was 13.37
+ static private chunkSizeFactor_osp = 8.00; // was 27.35
+
+ /* The nominal sink chunk size. For each index, this is adjusted
+ * by the factor specified above.
+ */
+// static private sinkChunkSize = 10000;
+ static private sinkChunkSize = 1000;
+
+ /*
+ * Specify / override some triple store properties.
+ *
+ * Note: You must reference this object in the section for the
+ * component which will actually create the KB instance, e.g.,
+ * either the RDFDataLoadMaster or the LubmGeneratorMaster.
+ */
+ static private properties = new NV[] {
+
+ /*
+ * When "true", the store will perform incremental closure as
+ * the data are loaded. When "false", the closure will be
+ * computed after all data are loaded. (Actually, since we are
+ * not loading through the SAIL making this true does not
+ * cause incremental TM but it does disable closure, so
+ * "false" is what you need here).
+ */
+ new NV(BigdataSail.Options.TRUTH_MAINTENANCE, "false" ),
+
+ /*
+ * Enable rewrites of high-level queries into native rules (native JOIN
+ * execution). (Can be changed without re-loading the data to compare
+ * the performance of the Sesame query evaluation against using the
+ * native rules to perform query evaluation.)
+ */
+ new NV(BigdataSail.Options.NATIVE_JOINS, "true"),
+
+ /*
+ * May be used to turn off inference during query, but will
+ * cause ALL inferences to be filtered out when reading on the
+ * database.
+ */
+ // new NV(BigdataSail.Options.INCLUDE_INFERRED, "false"),
+
+ /*
+ * May be used to turn off query-time expansion of entailments such as
+ * (x rdf:type rdfs:Resource) and owl:sameAs even through those
+ * entailments were not materialized during forward closure (this
+ * disables the backchainer!)
+ */
+ new NV(BigdataSail.Options.QUERY_TIME_EXPANDER, "false"),
+
+ /*
+ * Option to restrict ourselves to RDFS only inference. This
+ * condition may be compared readily to many other stores.
+ *
+ * Note: While we can turn on some kinds of owl processing
+ * (e.g., TransitiveProperty, see below), we can not compute
+ * all the necessary entailments (only queries 11 and 13
+ * benefit).
+ *
+ * Note: There are no owl:sameAs assertions in LUBM.
+ *
+ * Note: lubm query does not benefit from owl:inverseOf.
+ *
+ * Note: lubm query does benefit from owl:TransitiveProperty
+ * (queries 11 and 13).
+ *
+ * Note: owl:Restriction (which we can not compute) plus
+ * owl:TransitiveProperty is required to get all the answers
+ * for LUBM.
+ */
+ new NV(BigdataSail.Options.AXIOMS_CLASS, "com.bigdata.rdf.axioms.RdfsAxioms"),
+ // new NV(BigdataSail.Options.AXIOMS_CLASS,"com.bigdata.rdf.axioms.NoAxioms"),
+
+ /*
+ * Produce a full closure (all entailments) so that the
+ * backward chainer is always a NOP. Note th...
[truncated message content] |