From: <tho...@us...> - 2010-08-06 15:46:16
|
Revision: 3423 http://bigdata.svn.sourceforge.net/bigdata/?rev=3423&view=rev Author: thompsonbry Date: 2010-08-06 15:46:07 +0000 (Fri, 06 Aug 2010) Log Message: ----------- Rationalized the bigdata cluster configuration files slightly and added one for a single node cluster (bigdataStandalone.config). Modified build.xml to remove all the "standalone" targets and to state the new bigdataStandalone.config" file. Modified Paths: -------------- trunk/build.xml trunk/src/resources/config/README trunk/src/resources/config/bigdataCluster.config trunk/src/resources/config/bigdataCluster16.config Added Paths: ----------- trunk/src/resources/config/bigdataStandalone.config Modified: trunk/build.xml =================================================================== --- trunk/build.xml 2010-08-06 15:14:23 UTC (rev 3422) +++ trunk/build.xml 2010-08-06 15:46:07 UTC (rev 3423) @@ -777,118 +777,8 @@ </java> </target> - + <!-- --> -<!-- STANDALONE FEDERATION TARGETS --> -<!-- (test/benchamarking) --> - -<target name="generateLookupStarterJar" unless="lookupStarterJarAvailable"> -<antcall target="testCompile" /> -</target> - -<target name="testLookupStarterJarAvailability"> -<property name="bigdata-test.lib" location="${bigdata.dir}/bigdata-test/lib" /> -<condition property="lookupStarterJarAvailable"> - <available file="${bigdata-test.lib}/lookupstarter.jar" /> -</condition> -</target> - -<target name="standalone-setup" depends="testLookupStarterJarAvailability,generateLookupStarterJar" description="Setup properties used by standalone federation and LUS start/stop."> -<property name="app.home" location="${bigdata.dir}" /> -<property name="test.codebase.port" value="23333" /> -<property name="test.codebase.dir" location="${bigdata.dir}/bigdata-jini/lib/jini/lib-dl" /> -<property name="dist.lib" location="${bigdata.dir}/bigdata-jini/lib/jini/lib" /> -<property name="dist.lib.dl" location="${bigdata.dir}/bigdata-jini/lib/jini/lib-dl" /> -<property name="test.codebase" value="http://${this.hostname}:${test.codebase.port}/jsk-dl.jar" /> -<property name="java.security.policy" value="${bigdata.dir}/policy.all" /> -<property name="log4j.configuration" value="resources/logging/log4j.properties" /> -<property name="java.net.preferIPv4Stack" value="true" /> -<property name="bigdata.fedname" value="${standalone.fed}" /> -</target> - -<!-- Note: You should 'nohup' this, e.g., "nohup ant standalone-start" to - avoid taking down the ServicesManagerServer if you are disconnected - from a terminal. --> -<target name="standalone-start" depends="jar,standalone-setup" description="Start the standalone federation."> -<!-- Start the lookup service. --> -<antcall target="startHttpd" /> -<antcall target="startLookup" /> -<java classname="com.bigdata.jini.start.ServicesManagerServer" failonerror="true" fork="true" logerror="true"> - <classpath refid="runtime.classpath" /> - <jvmarg value="-Xmx200m" /> - <jvmarg value="-showversion" /> - <!-- The name of the federation instance. --> - <jvmarg value="-Dbigdata.fedname=${standalone.fed}" /> - <jvmarg value="-Djava.security.policy=policy.all" /> - <jvmarg value="-Dcom.bigdata.jmx.log4j.disable=true" /> - <jvmarg value="-Dcom.bigdata.counters.linux.sysstat.path=${SYSSTAT_HOME}" /> - <jvmarg value="-Dlog4j.configuration=file:src/resources/config/standalone/log4j.properties" /> - <arg value="src/resources/config/standalone/bigdataStandalone.config" /> -</java> -</target> - -<target name="standalone-stop" depends="jar,standalone-setup" description="Stop the standalone federation."> -<java classname="com.bigdata.service.jini.util.ShutdownFederation" failonerror="true" fork="true" logerror="true"> - <classpath refid="runtime.classpath" /> - <jvmarg value="-Xmx200m" /> - <jvmarg value="-showversion" /> - <!-- The name of the federation instance. --> - <jvmarg value="-Dbigdata.fedname=${standalone.fed}" /> - <jvmarg value="-Djava.security.policy=policy.all" /> - <jvmarg value="-Dcom.bigdata.jmx.log4j.disable=true" /> - <jvmarg value="-Dcom.bigdata.counters.linux.sysstat.path=${SYSSTAT_HOME}" /> - <jvmarg value="-Dlog4j.configuration=file:src/resources/config/standalone/log4j.properties" /> - <arg value="src/resources/config/standalone/bigdataStandalone.config" /> -</java> -<!-- Then take down the lookup service as well. --> -<antcall target="stopLookup" /> -<antcall target="stopHttpd" /> -</target> - -<target name="standalone-start-nano-server" depends="jar" description="Start a small http server fronting for a bigdata database instance."> -<java classname="com.bigdata.rdf.sail.bench.NanoSparqlServer" fork="true" failonerror="true"> - <arg line="${standalone.nanoServerPort} ${standalone.namespace} src/resources/config/standalone/bigdataStandalone.config" /> - <jvmarg line="-server" /> - <jvmarg line="-Xmx200M" /> - <classpath refid="runtime.classpath" /> -</java> -</target> - -<target name="standalone-stop-nano-server" depends="jar" description="Stop the small http server running at the configured port."> -<java classname="com.bigdata.rdf.sail.bench.NanoSparqlServer" fork="true" failonerror="true"> - <arg line="${standalone.nanoServerPort} -stop" /> - <classpath refid="runtime.classpath" /> -</java> -</target> - -<target name="standalone-bulk-load" depends="jar" description="Bulk load RDF data into the standalone federation."> -<java classname="com.bigdata.rdf.load.MappedRDFDataLoadMaster" failonerror="true" fork="true" logerror="true"> - <classpath refid="runtime.classpath" /> - <jvmarg value="-Xmx200m" /> - <jvmarg value="-showversion" /> - <!-- The name of the federation instance. --> - <jvmarg value="-Dbigdata.fedname=${standalone.fed}" /> - <jvmarg value="-Djava.security.policy=policy.all" /> - <jvmarg value="-Dcom.bigdata.jmx.log4j.disable=true" /> - <jvmarg value="-Dcom.bigdata.counters.linux.sysstat.path=${SYSSTAT_HOME}" /> - <jvmarg value="-Dlog4j.configuration=file:src/resources/config/standalone/log4j.properties" /> - <!-- --> - <!-- Per job parameters --> - <!-- --> - <!-- The namespace of the target KB. --> - <jvmarg value="-Dbigdata.rdf.namespace=${standalone.namespace}" /> - <!-- The job name (same as the KB namespace is a common default). --> - <jvmarg value="-Dbigdata.rdf.job.name=bulk-load-kb-${standalone-namespace}" /> - <!-- The file or directory containing zero or more files to be loaded first. --> - <jvmarg value="-Dbigdata.rdf.ontology=${standalone.bulkLoad.ontology}" /> - <!-- The file or directory containing RDF data to be loaded. --> - <jvmarg value="-Dbigdata.rdf.data=${standalone.bulkLoad.data}" /> - <!-- The main configuration file. --> - <arg value="src/resources/config/standalone/bigdataStandalone.config" /> -</java> -</target> - -<!-- --> <!-- MISC. UTILITY TARGETS --> <!-- --> @@ -1122,9 +1012,9 @@ <!-- Stage the bigdata Jini config files --> +<copy file="${src.resources.config}/bigdataStandalone.config" todir="${dist.var.config.jini}" /> <copy file="${src.resources.config}/bigdataCluster.config" todir="${dist.var.config.jini}" /> <copy file="${src.resources.config}/bigdataCluster16.config" todir="${dist.var.config.jini}" /> -<copy file="${src.resources.config}/standalone/bigdataStandalone.config" todir="${dist.var.config.jini}" /> <!-- Stage the infrastructure service config files --> Modified: trunk/src/resources/config/README =================================================================== --- trunk/src/resources/config/README 2010-08-06 15:14:23 UTC (rev 3422) +++ trunk/src/resources/config/README 2010-08-06 15:46:07 UTC (rev 3423) @@ -3,8 +3,10 @@ bigdataStandalone.config - A sample configuration file for a workstation. -bigdataCluster.config - A sample configuration file for a cluster. +bigdataCluster.config - A sample configuration file for a 3-node cluster. +bigdataCluster.config - A sample configuration file for a 16-node cluster. + log4j.properties - A default log4j configuration file for use by the bigdata clients and services. Modified: trunk/src/resources/config/bigdataCluster.config =================================================================== --- trunk/src/resources/config/bigdataCluster.config 2010-08-06 15:14:23 UTC (rev 3422) +++ trunk/src/resources/config/bigdataCluster.config 2010-08-06 15:46:07 UTC (rev 3423) @@ -1200,15 +1200,6 @@ static private awaitDataServicesTimeout = 8000; /* Multiplier for the scatter effect. - * - * Note: TERM2ID tends to grow more slowly than the other indices for two - * reasons. First, there are many more distinct RDF Statements than RDF - * Values for nearly any data set (except if statement identifiers are enabled, - * in which case there are more terms than statements). Second, the keys of - * the TERM2ID index compress nicely since long prefixes are very common. - * Therefore it makes sense to use a smaller scatter factor for this index - * UNLESS you have only 2-3 data services, in which case you will see hot - * spots develop with this index unless it is more widely distributed. */ static private scatterFactor = 2; static private scatterFactor_term2id = 2; // use 1 @ 4DS and up. Modified: trunk/src/resources/config/bigdataCluster16.config =================================================================== --- trunk/src/resources/config/bigdataCluster16.config 2010-08-06 15:14:23 UTC (rev 3422) +++ trunk/src/resources/config/bigdataCluster16.config 2010-08-06 15:46:07 UTC (rev 3423) @@ -1305,13 +1305,6 @@ static private awaitDataServicesTimeout = 8000; /* Multiplier for the scatter effect. - * - * Note: TERM2ID tends to grow more slowly than the other indices for two - * reasons. First, there are many more distinct RDF Statements than RDF - * Values for nearly any data set (except if statement identifiers are enabled, - * in which case there are more terms than statements). Second, the keys of - * the TERM2ID index compress nicely since long prefixes are very common. - * Therefore it makes sense to use a smaller scatter factor for this index. */ static private scatterFactor = 2; static private scatterFactor_term2id = 1; Added: trunk/src/resources/config/bigdataStandalone.config =================================================================== --- trunk/src/resources/config/bigdataStandalone.config (rev 0) +++ trunk/src/resources/config/bigdataStandalone.config 2010-08-06 15:46:07 UTC (rev 3423) @@ -0,0 +1,1886 @@ +import net.jini.jeri.BasicILFactory; +import net.jini.jeri.BasicJeriExporter; +import net.jini.jeri.tcp.TcpServerEndpoint; + +import net.jini.discovery.LookupDiscovery; +import net.jini.core.discovery.LookupLocator; +import net.jini.core.entry.Entry; +import net.jini.lookup.entry.Name; +import net.jini.lookup.entry.Comment; +import net.jini.lookup.entry.Address; +import net.jini.lookup.entry.Location; +import net.jini.lookup.entry.ServiceInfo; +import net.jini.core.lookup.ServiceTemplate; + +import java.io.File; + +import com.bigdata.util.NV; +import com.bigdata.journal.BufferMode; +import com.bigdata.jini.lookup.entry.*; +import com.bigdata.service.IBigdataClient; +import com.bigdata.service.jini.*; +import com.bigdata.service.jini.lookup.DataServiceFilter; +import com.bigdata.service.jini.master.ServicesTemplate; +import com.bigdata.jini.start.config.*; +import com.bigdata.jini.util.ConfigMath; + +import org.apache.zookeeper.ZooDefs; +import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Id; + +// imports for various options. +import com.bigdata.btree.IndexMetadata; +import com.bigdata.btree.keys.KeyBuilder; +import com.bigdata.rdf.sail.BigdataSail; +import com.bigdata.rdf.spo.SPORelation; +import com.bigdata.rdf.spo.SPOKeyOrder; +import com.bigdata.rdf.lexicon.LexiconRelation; +import com.bigdata.rdf.lexicon.LexiconKeyOrder; +import com.bigdata.rawstore.Bytes; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeUnit.*; + +/* + * This is a sample configuration file for a bigdata federation. + * + * Note: The original file is a template. The template contains parameters + * of the form @XXX@. The values for those template parameters are specified + * in the build.properties file when you use ant to install bigdata. + * + * Note: This file uses the jini configuration mechanism. The syntax + * is a subset of Java. The properties for each component are grouped + * within the namespace for that component. + * + * See the net.jini.config.ConfigurationFile javadoc for more + * information. + */ + +/* + * A namespace use for static entries referenced elsewhere in this + * ConfigurationFile. + */ +bigdata { + + /** + * The name for this federation. + * + * Note: This is used to form the [zroot] (root node in zookeeper + * for the federation) and the [serviceDir] (path in the file + * system for persistent state for the federation). + * + * Note: If you will be running more than one federation, then you + * MUST use unicast discovery and specify the federation name in + * the [groups]. + */ + static private fedname = "@FED@"; + + /** + * Where to put all the persistent state. + */ + static private serviceDir = new File("@LAS@"); + + /** + * Which JDK to use. + */ + static private javaHome = new File("@JAVA_HOME@"); + + /** + * A common point to set the Zookeeper client's requested + * sessionTimeout and the jini lease timeout. The default lease + * renewal period for jini is 5 minutes while for zookeeper it is + * more like 5 seconds. This puts the two systems onto a similar + * timeout period so that a disconnected client is more likely to + * be noticed in roughly the same period of time for either + * system. A value larger than the zookeeper default helps to + * prevent client disconnects under sustained heavy load. + */ + // jini + static private leaseTimeout = ConfigMath.m2ms(60);// 20s=20000; 5m=300000; + // zookeeper + static private sessionTimeout = (int)ConfigMath.m2ms(10);// was 5m 20s=20000; 5m=300000; + + /* + * Example cluster configuration. + * + * Data services are load balanced. Index partitions will be + * moved around as necessary to ensure hosts running data + * service(s) are neither under nor over utilized. Data services + * can be very resource intensive processes. They heavily buffer + * both reads and writes, and they use RAM to do so. They also + * support high concurrency and can use up to one thread per index + * partition. How many cores they will consume is very much a + * function of the application. + * + * Zookeeper services use a quorum model. Always allocate an odd + * number. 3 gives you one failure. 5 gives you two failures. + * Zookeeper will sync the disk almost continuously while it is + * running. It really deserves its own local disk. Zookeeper + * also runs in memory. Since all operations are serialized, if + * it starts swapping then peformance will drop through the floor. + * + * Jini uses a peer model. Each service registers with each + * registrar that it discovers. Each client listeners to each + * registrar that it discovers. The default jini core services + * installation runs entirely in memory (no disk operations, at + * least not for service registration). A second instance of the + * jini core services provides a safety net. If you are using + * multicast then you can always add another instance. + */ + + /* Declare the hosts. This provides indirection for planning + * purposes. + * + * The summary notation is: cores@GHZ/cache x RAM x DISK + */ + static private h0 = "192.168.1.50"; // 4@3ghz/1kb x 4GB x 263G + //static private h1 = "192.168.20.27"; // 4@3ghz/2kb x 4GB x 263G + //static private h2 = "192.168.20.28"; // 4@3ghz/1kb x 4GB x 64G + + /* Note: this configuration puts things that are not disk intensive + * on the host with the least disk space and zookeeper. + */ + static private lbs = h0; // specify as @LOAD_BALANCER_HOST@ ? + static private txs = h0; + static private mds = h0; + + // 1+ jini servers + static private jini1 = h0; + //static private jini2 = h1; + static private jini = new String[]{ jini1 }; //,jini2}; + + // Either 1 or 3 zookeeper machines (one instance per). + // See the QuorumPeerMain and ZooKeeper configurations below. + static private zoo1 = h0; + //static private zoo2 = h1; + //static private zoo3 = h2; + static private zoo = new String[] { zoo1 }; // ,zoo2,zoo3}; + + // 1+ client service machines (1+ instance per host). + static private cs0 = h0; + + // 1+ data service machines (1+ instance per host). + static private ds0 = h0; + static private ds1 = h1; + + // client servers + static private cs = new String[] { + cs0 //, ... + }; + + // The target #of client servers. + static private clientServiceCount = 1; + static private maxClientServicePerHost = 1; + + // data servers + static private ds = new String[]{ + ds0//, ds1 //, ... + }; + + // The target #of data services. + static private dataServiceCount = 1; + + // Maximum #of data services per host. + static private maxDataServicesPerHost = 1; + + // @todo also specify k (replicationCount) + + // Sets the initial and maximum journal extents. + static private journalExtent = ConfigMath.multiply(200, Bytes.megabyte); + + /** + * A String[] whose values are the group(s) to be used for discovery + * (no default). Note that multicast discovery is always used if + * LookupDiscovery.ALL_GROUPS (a <code>null</code>) is specified. + */ + + // one federation, multicast discovery. + //static private groups = LookupDiscovery.ALL_GROUPS; + + // unicast discovery or multiple federations, MUST specify groups. + static private groups = new String[]{bigdata.fedname}; + + /** + * One or more unicast URIs of the form <code>jini://host/</code> + * or <code>jini://host:port/</code> (no default). + * + * This MAY be an empty array if you want to use multicast + * discovery <strong>and</strong> you have specified the groups as + * LookupDiscovery.ALL_GROUPS (a <code>null</code>). + */ + static private locators = new LookupLocator[] { + + // runs jini on the localhost using unicast locators. + //new LookupLocator("jini://localhost/") + + // runs jini on two hosts using unicast locators. + new LookupLocator("jini://"+jini1), + //new LookupLocator("jini://"+jini2), + + }; + + /** + * The policy file that will be used to start services. + */ + private static policy = "@POLICY_FILE@"; + + /** + * log4j configuration file (applies to bigdata and zookeeper). + * + * Note: The value is URI! + * + * Note: You should aggregate all of the log output to a single + * host. For example, using the log4j SocketAppender and the + * SimpleNodeServer. + */ + log4j = "@LOG4J_CONFIG@"; + + /** + * java.util.logging configuration file (applies to jini as used + * within bigdata). + * + * Note: The value is a file path! + */ + logging = "@LOGGING_CONFIG@"; + + /* + private static host = ConfigUtil.getHostName(); + private static port = "8081"; + private static jskdl = " http://" + host + ":" + port + "/jsk-dl.jar"; + */ + + /** + * JVM argument may be used to enable the yourkit profiler agent on a + * service. Of course, yourkit must be installed at this location and + * you must have a licensed copy of the yourkit UI running either on a + * node of the cluster or on a machine routed to the cluster, e.g., via + * an ssh tunnel. The yourkit profiler uses ports in [10001:100010] by + * default on each node. + * + * See http://www.yourkit.com/docs/80/help/running_with_profiler.jsp + * + * See http://www.yourkit.com/docs/80/help/agent.jsp + * + * See http://www.yourkit.com/docs/80/help/additional_agent_options.jsp + * + * Note: Conditionally include ${profilerAgent} iff you want to enable + * profiling for some service class. + */ + + // linux-64 with all profiling options initially disabled. + profilerAgent="-agentpath:/usr/java/yjp-9.0.3/bin/linux-x86-64/libyjpagent.so=disableexceptiontelemetry,disablestacktelemetry"; + +} + +/* + * Service configuration defaults. These can also be specified on a + * per service-type basis. When the property is an array type, the + * value here is concatenated with the optional array value on the per + * service-type configuration. Otherwise it is used iff no value is + * specified for the service-type configuration. + */ +com.bigdata.jini.start.config.ServiceConfiguration { + + /* + * Default java command line arguments that will be used for all + * java-based services + * + * Note: [-Dcom.sun.jini.jeri.tcp.useNIO=true] enables NIO in + * combination with the [exporter] configured below. + */ + defaultJavaArgs = new String[]{ + "-server", + "-ea", + "-showversion", + //"-Xmx2G", + /* This is a workaround for a JVM bug which can result in a + * lost wakeup. This bug is fixed in JDK1.6.0_18. However, + * JDK1.6.0_18 has other problems which result in segfaults. + * + * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6822370 + */ + "-XX:+UseMembar", + "-Dcom.sun.jini.jeri.tcp.useNIO=@USE_NIO@", + "-Djava.security.policy="+bigdata.policy, + "-Djava.util.logging.config.file="+bigdata.logging, + "-Dcom.bigdata.counters.linux.sysstat.path=@SYSSTAT_HOME@", + //bigdata.profilerAgent, + }; + + /* Default path for service instances and their persistent + * data. This may be overriden on a per service-type basis. + * + * Note: For logical services that support failover, the concrete + * service directory is assigned dynamically when a physical + * service instance is created. + */ + serviceDir = bigdata.serviceDir; + + // The JVM to use. + javaHome = bigdata.javaHome; + + /* The bigdata services default logging configuration (a URI!) + */ + log4j = bigdata.log4j; + + /* + * Set up some default properties values that will be inherited + * (copy by value) by all clients and services started using this + * configuration file. + */ + properties = new NV[] { + + /* + * Each JiniClient (and hence all bigdata services) can run an + * httpd that will expose performance counters for the service and + * the host on which it is running. This property specifies the + * port for that httpd service. Valid values are port number, + * zero (0) for a random open port, MINUS ONE (-1) to disable the + * httpd service. + */ + //new NV(IBigdataClient.Options.HTTPD_PORT, "-1"), + + /* + * Option to disable collection of performance counters for the + * host on which the client or service is running. + * + * Note: The load balancer relies on this information! + */ + //new NV(IBigdataClient.Options.COLLECT_PLATFORM_STATISTICS,"false"), + + /* Option to disable collection of performance counters on the + * queues used internally by the client or service. + * + * Note: The load balancer relies on this information! + */ + //new NV(IBigdataClient.Options.COLLECT_QUEUE_STATISTICS,"false"), + + /* Option controls how many times a client request will be + * reissued on receiving notice that an index partition locator is + * stale. Stale locators arise when an index partition is split, + * moved, or joined. + * + * Note: This option needs to be larger if we are aggressively + * driving journal overflows and index partitions splits during + * the "young" phase of a data service or scale-out index since a + * LOT of redirects will result. + */ + new NV(IBigdataClient.Options.CLIENT_MAX_STALE_LOCATOR_RETRIES,"1000"), + + }; + +} + +/** + * JoinManager options. + * + * Note: These options must be copied into the service.config (to + * specify the service lease timeout) as well as used by the client + * (which uses this file directly). + */ +net.jini.lookup.JoinManager { + + // The lease timeout for jini joins. + maxLeaseDuration = bigdata.leaseTimeout; + +} + +/** + * Jini service configuration. + */ +jini { + + /* This sets command line arguments for the ServiceStarter which + * is used to run the jini services. + */ + args = new String[] { + + "-Xmx400m", + "-Djava.security.policy="+bigdata.policy, + "-Djava.util.logging.config.file="+bigdata.logging, + "-Dlog4j.configuration="+bigdata.log4j, + "-Dlog4j.primary.configuration="+bigdata.log4j, + "-DinitialMemberGroups="+bigdata.fedname + + }; + + /** + * The main jini configuration file. This file contains a + * NonActivatableServiceDescriptor[]. The elements of that array + * describe how to start each of the jini services. + */ + configFile = new File("@JINI_CONFIG@"); + + /** + * The #of instances to run. + * + * Note: A jini service instance may be started on a host if it is + * declared in [locators]. If locators is empty, then you are + * using multicast discovery. In this case an instance may be + * started on any host, unless [constraints] are imposed. In any + * case, no more than [serviceCount] jini services will be started + * at any given time. This is checked against the #of discovered + * instances. + */ + serviceCount = 1; + +} + +/** + * Zookeeper server configuration. + */ +org.apache.zookeeper.server.quorum.QuorumPeerMain { + + /* Directory for zookeeper's persistent state. The [id] will be + * appended as another path component automatically to keep + * instances separate. + */ + dataDir = new File(bigdata.serviceDir,"zookeeper"); + + /* Optional directory for the zookeeper log files. The [id] will + * be appended as another path component automatically to keep + * instances separate. + * + * Note: A dedicated local storage device is highly recommended + * for the zookeeper transaction logs! + */ + //dataLogDir=new File("/var/zookeeper-log"); + + // required. + clientPort=2181; + + tickTime=2000; + + initLimit=5; + + syncLimit=2; + + /* A comma delimited list of the known zookeeper servers together + * with their assigned "myid": {myid=host:port(:port)}+ + * + * Note: You SHOULD specify the full list of servers that are + * available to the federation. An instance of zookeeper will be + * started automatically on each host running ServicesManager that + * is present in the [servers] list IF no instance is found + * running on that host at the specified [clientPort]. + * + * Note: zookeeper interprets NO entries as the localhost with + * default peer and leader ports. This will work as long as the + * localhost is already running zookeeper. However, zookeeper + * WILL NOT automatically start zookeeper if you do not specify + * the [servers] property. You can also explicitly specify + * "localhost" as the hostname, but that only works for a single + * machine. + */ + // standalone + //servers="1=localhost:2888:3888"; + // ensemble + /**/ + servers = "1="+bigdata.zoo1+":2888:3888" +// + ",2="+bigdata.zoo2+":2888:3888" +// + ",3="+bigdata.zoo3+":2888:3888" + ; + + // This is all you need to run zookeeper. + classpath = new String[] { + "@LIB_DIR@/apache/zookeeper-3.2.1.jar", + "@LIB_DIR@/apache/log4j-1.2.15.jar" + }; + + /* Optional command line arguments for the JVM used to execute + * zookeeper. + * + * Note: swapping for zookeeper is especially bad since the + * operations are serialized, so if anything hits then disk then + * all operations in the queue will have that latency as well. + * However, bigdata places a very light load on + * zookeeper so a modest heap should be Ok. For example, I have + * observed a process size of only 94m after 10h on a 15-node + * cluster. + */ + args = new String[]{ + "-Xmx200m", + /* + * Enable JXM remote management. + * + "-Dcom.sun.management.jmxremote.port=9997", + "-Dcom.sun.management.jmxremote.authenticate=false", + "-Dcom.sun.management.jmxremote.ssl=false", + */ +}; + + // zookeeper server logging configuration (value is a URI!) + log4j = bigdata.log4j; + +} + +/* + * Zookeeper client configuration. + */ +org.apache.zookeeper.ZooKeeper { + + /* Root znode for the federation instance. */ + zroot = "/"+bigdata.fedname; + + /* A comma separated list of host:port pairs, where the port is + * the CLIENT port for the zookeeper server instance. + */ + // standalone. + // servers = "localhost:2181"; + // ensemble + servers = bigdata.zoo1+":2181" // @TODO enable other instances. +// + ","+bigdata.zoo2+":2181" +// + ","+bigdata.zoo3+":2181" + ; + + /* Session timeout (optional). */ + sessionTimeout = bigdata.sessionTimeout; + + /* + * ACL for the zookeeper nodes created by the bigdata federation. + * + * Note: zookeeper ACLs are not transmitted over secure channels + * and are placed into plain text Configuration files by the + * ServicesManagerServer. + */ + acl = new ACL[] { + + new ACL(ZooDefs.Perms.ALL, new Id("world", "anyone")) + + }; + +} + +/* + * Jini client configuration + */ +com.bigdata.service.jini.JiniClient { + + /* Default Entry[] for jini services. Also used by the + * ServicesManagerService as is. + * + * Note: A Name attribute will be added automatically using the + * service type and the znode of the service instance. That Name + * will be canonical. It is best if additional service names are + * NOT specified as that might confuse somethings :-) + * + * Note: A Hostname attribute will be added dynamically. + */ + entries = new Entry[] { + // Purely informative. + new Comment(bigdata.fedname), + }; + + groups = bigdata.groups; + + locators = bigdata.locators; + + // optional JiniClient properties. + // properties = new NV[] {}; + + /* + * Overrides for jini SERVICES (things which are started + * automatically) BUT NOT CLIENTs (things which you start by hand + * and which read this file directly). + * + * The difference here is whether or not a service.config file is + * being generated. When it is, the jiniOptions[] will be + * included in how that service is invoked and will operate as + * overrides for the parameters specified in the generated + * service.config file. However, normal clients directly consume + * this config file rather than the generated one and therefore + * you must either specify their overrides directly on the command + * line when you start the client or specify them explicitly in + * the appropriate component section within this configuration + * file. + * + * In practice, this means that you must specify some parameters + * both here and in the appropriate component configuration. E.g., + * see the component section for "net.jini.lookup.JoinManager" + * elsewhere in this file. + */ + jiniOptions = new String[] { + + // The lease timeout for jini joins. + "net.jini.lookup.JoinManager.maxLeaseDuration="+bigdata.leaseTimeout, + + }; + +} + +/** + * Options for the bigdata services manager. + */ +com.bigdata.jini.start.ServicesManagerServer { + + /* + * This object is used to export the service proxy. The choice + * here effects the protocol that will be used for communications + * between the clients and the service. + */ + exporter = new BasicJeriExporter(TcpServerEndpoint.getInstance(0), + new BasicILFactory()); + + /* + * The data directory and the file on which the serviceID will be + * written. + * + * Note: These properties MUST be specified explicitly for the + * ServicesManager since it uses this as its Configuration file. + * For other services, it generates the Configuration file and + * will generate this property as well. + */ + + serviceDir = new File(bigdata.serviceDir,"ServicesManager"); + + serviceIdFile = new File(serviceDir,"service.id"); + + /* The services that will be started. For each service, there + * must be a corresponding component defined within this + * configuration file. For each "ManagedServiceConfiguration", an + * entry will be made in zookeeper and logical and physical + * service instances will be managed automatically. For unmanaged + * services, such as jini and zookeeper itself, instances will be + * started iff necessary by the services manager when it starts + * up. + */ + services = new String[] { + + "jini", + "org.apache.zookeeper.server.quorum.QuorumPeerMain", + "com.bigdata.service.jini.TransactionServer", + "com.bigdata.service.jini.MetadataServer", + "com.bigdata.service.jini.DataServer", + "com.bigdata.service.jini.LoadBalancerServer", + "com.bigdata.service.jini.ClientServer" + + }; + + /* + * Additional properties passed through to the JiniClient or the + * service. + * + * Note: The services manager is used to collect statistics from the + * OS for each host so we have performance counters for hosts which + * are only running non-bigdata services, such as jini or zookeeper. + */ + properties = new NV[]{ + + }; + + /* The services manager MUDT be run on every host so that it may + * start both bigdata and non-bigdata services (jini, zookeeper). + * This is also used to report per-host performance counters to + * the load balancer for hosts that are not running bigdata + * services. + */ + constraints = new IServiceConstraint[] { + + }; + +} + +com.bigdata.service.jini.TransactionServer { + + constraints = new IServiceConstraint[] { + + new JiniRunningConstraint(), + new ZookeeperRunningConstraint(), + + new HostAllowConstraint(bigdata.txs) + + }; + + args = new String[]{ + + // Does not need much RAM. + "-Xmx200m" + + }; + + properties = new NV[] { + + /* The #of milliseconds that the database will retain history no + * longer required to support the earliest active transaction. + * + * A value of ZERO means that only the last commit point will + * be retained. The larger the value the more history will be + * retained. You can use a really big number if you never want + * to release history and you have lots of disk space :-) + * + * Note: The most recent committed state of the database is + * NEVER released. + */ + new NV(TransactionServer.Options.MIN_RELEASE_AGE, "0"), + + }; + +} + +com.bigdata.service.jini.MetadataServer { + + constraints = new IServiceConstraint[] { + + new JiniRunningConstraint(), + new ZookeeperRunningConstraint(), + //new TXRunningConstraint(), + + new HostAllowConstraint(bigdata.mds), + + }; + + args = new String[]{ + + // Does not need much RAM. + "-Xmx200m" + + }; + + properties = new NV[]{ + + /* + * The MDS does not support overflow at this time so + * overflow MUST be disabled for this service. + */ + new NV(MetadataServer.Options.OVERFLOW_ENABLED,"false") + + }; + +} + +com.bigdata.service.jini.DataServer { + + args = new String[]{ + //bigdata.profilerAgent, + /* + * Grant lots of memory, but read on. + * + * Note: 32-bit JVMs have a 2G limit on the heap, but the practical limit + * is often much less - maybe 1400m. 64-bit JVMs can use much more RAM. + * However, the heap which you grant to java DOES NOT determine the total + * process heap. I have seen 64-bit java processes using an additional + * 3-4GB of heap beyond what is specified here. So, you need to consider + * the total RAM, subtract out enough for the other processes and the OS + * buffers, divide by the #of client/data services you plan to run on that + * host (generally 1-2) and then subtract out some more space for the JVM + * itself. + * + * For example, if you have 32G RAM and a 64-bit JVM and plan to run two + * CS/DS on the host, I would recommend 10G for the Java heap. You can + * expect to see Java grab another 4G per process over time. That makes + * the per CS/DS heap 14G. With two processes you have taken 28G leaving + * 4G for everything else. + * + * Here is another example: 4G RAM, 32-bit JVM, and 2 CS/DS per host. I + * would stick to 800m for the Java heap. You don't have a problem unless + * you see an OOM (OutOfMemoryException) or a process killed because GC is + * taking too much time. + * + * See http://www.ibm.com/developerworks/linux/library/j-nativememory-linux/index.html?ca=dgr-lnxw07Linux-JVM&S_TACT=105AGX59&S_CMP=grlnxw07 + * + * Note: for linux, "sysctl -w vm.swappiness=0" will keep the RAM you do + * have for your applications! + */ + "-Xmx4g",// was 800 + /* Optionally, grab all/most of the max heap at once. This makes sense for + * DS but is less necessary for other bigdata services. + */ + "-Xms2G", // 1/2 of the max heap is a good value. + /* + * This option will keep the JVM "alive" even when it is memory starved + * but perform of a memory starved JVM is terrible. + */ + //"-XX:-UseGCOverheadLimit", + /* Configure GC for higher throughput. Together these options + * request parallel old generation collection using N threads. + * The application will be paused when this occurs, but GC will + * be faster. Hence throughput will be higher. However, be + * sure to use JDK 6u10+ (6676016 : ParallelOldGC leaks memory). + * + * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6676016 + */ + "-XX:+UseParallelOldGC", + //"-XX:ParallelGCThreads=8", + /* + * Enable JXM remote management for the data service. + * + * Note: This will not work if you have two data services on a host + * because it will assign the same port to each service. In order + * to work around that the argument would have to be specified by + * the service starter and then published in the Entry[] attributes. + * + * However, you can use ssh -X to open a tunnel with X + * forwarding and then run jconsole locally on the target host + * and bring up these data services without enabling remote + * JMX. + * + "-Dcom.sun.management.jmxremote.port=9999", + "-Dcom.sun.management.jmxremote.authenticate=false", + "-Dcom.sun.management.jmxremote.ssl=false", + */ + /* + * Override the size of the default pool of direct (native) byte + * buffers. This was done to ensure that the nodes region for + * index segments remain fully buffered as the index partitions + * approach their maximum size before a split. + */ + "-Dcom.bigdata.io.DirectBufferPool.bufferCapacity="+ + ConfigMath.multiply(Bytes.kilobyte,1250), + }; + + serviceCount = bigdata.dataServiceCount; + + // restrict where the data services can run. + constraints = new IServiceConstraint[] { + + new JiniRunningConstraint(), + new ZookeeperRunningConstraint(), + //new TXRunningConstraint(), + + new HostAllowConstraint(bigdata.ds), + + new MaxDataServicesPerHostConstraint(bigdata.maxDataServicesPerHost), + + }; + + /* + * Note: the [dataDir] will be filled in when a new service + * instance is created based on the [servicesDir], so don't set it + * here yourself. + */ + properties = new NV[]{ + + new NV(DataServer.Options.BUFFER_MODE, + //""+com.bigdata.journal.BufferMode.Direct + ""+com.bigdata.journal.BufferMode.DiskWORM + ), + + /* Option disables synchronous overflow after N times and + * configures the offset bits for the journal for a scale-up + * configuration so we may use very large journals. + */ + //new NV(DataServer.Options.OVERFLOW_MAX_COUNT,"5"), + //new NV(DataServer.Options.OFFSET_BITS,""+com.bigdata.rawstore.WormAddressManager.SCALE_UP_OFFSET_BITS), + + /* Synchronous overflow is triggered when the live journal is + * this full (the value is a percentage, expressed as a + * floating point number in [0:1]). + */ + //new NV(DataServer.Options.OVERFLOW_THRESHOLD,".9"), + + /* Override the initial and maximum extent so that they are more + * more suited to large data sets. Overflow will be triggered as + * the size of the journal approaches the maximum extent. The + * initial and maximum extent are configured up above. + */ + + new NV(DataServer.Options.INITIAL_EXTENT, "" + bigdata.journalExtent), + new NV(DataServer.Options.MAXIMUM_EXTENT, "" + bigdata.journalExtent), + + /* Specify the queue capacity for the write service (unisolated + * write operations). + * + * 0 := SynchronousQueue. + * N := bounded queue of capacity N + * Integer.MAX_VALUE := unbounded queue. + * + * Note: The corePoolSize will never increase for an unbounded + * queue so the value specified for maximumPoolSize will + * essentially be ignored in this case. + * + * Note: A SynchronousQueue is a good choice here since it allows + * the #of threads to change in response to demand. The pool + * size should be unbounded when using a SynchronousQueue. + */ + new NV(DataServer.Options.WRITE_SERVICE_QUEUE_CAPACITY,"0"), // synchronous queue. + new NV(DataServer.Options.WRITE_SERVICE_CORE_POOL_SIZE,"50"), // + new NV(DataServer.Options.WRITE_SERVICE_MAXIMUM_POOL_SIZE,""+Integer.MAX_VALUE), + new NV(DataServer.Options.WRITE_SERVICE_PRESTART_ALL_CORE_THREADS,"true"), + + /* + * Options turns off overflow processing (debugging only). + * All writes will go onto the live journal, no index segments + * will be built, and indices will not be split, moved, + * joined, etc. + */ + //new NV(DataServer.Options.OVERFLOW_ENABLED,"false"), + + /* Maximum #of index partition moves per overflow. + */ + new NV(DataServer.Options.MAXIMUM_MOVES,"1"), + + /* Option controls how many index partitions may be moved onto + * any given target data service in a single overflow cycle + * and may be used to disable index partition moves (for + * debugging purposes). + */ + new NV(DataServer.Options.MAXIMUM_MOVES_PER_TARGET,"1"), + + /* The minimum CPU activity on a host before it will consider moving an + * index partition to shed some load. + * + * @todo A high threshold was chosen for the 3-node cluster since there + * are only 2 machines running data services. A "feature" in the load + * balancer allows moves between two heavily loaded hosts even when they + * are very close in their load, which is typically the case if you have + * only 2 machines running data services. The high threshold here is a + * workaround until the load balancer is modified to take into account + * whether or not a significant difference exists in the load between + * the source and possible target data service hosts. + */ + new NV(DataServer.Options.MOVE_PERCENT_CPU_TIME_THRESHOLD,".99"),//was .7 + + /* Option limits the #of index segments in a view before a + * compacting merge is forced. + */ + new NV(DataServer.Options.MAXIMUM_SEGMENTS_PER_VIEW,"5"), // default 6 + + /* Option limits the #of optional merges that are performed in each + * overflow cycle. + */ + new NV(DataServer.Options.MAXIMUM_OPTIONAL_MERGES_PER_OVERFLOW,"1"), + + /* Option effects how much splits are emphasized for a young + * scale-out index. If the index has fewer than this many + * partitions, then there will be a linear reduction in the + * target index partition size which will increase the likelyhood + * of an index split under heavy writes. This helps to distribute + * the index early in its life cycle. + */ + new NV(DataServer.Options.ACCELERATE_SPLIT_THRESHOLD,"20"),//20//50 + + /* Options accelerates overflow for data services have fewer than + * the threshold #of bytes under management. Acceleration is + * accomplished by reducing the maximum extent of the live journal + * linearly, but with a minimum of a 10M maximum extent. When the + * maximum extent is reduced by this option, the initial and the + * maximum extent will always be set to the same value for that + * journal. + */ + new NV(DataServer.Options.ACCELERATE_OVERFLOW_THRESHOLD, + //"0" + //""+com.bigdata.rawstore.Bytes.gigabyte + "2147483648" // 2G + ), + + // #of threads for index segment builds (default 3). + new NV(DataServer.Options.BUILD_SERVICE_CORE_POOL_SIZE,"5"), + + // #of threads for compacting merges (default 1). + new NV(DataServer.Options.MERGE_SERVICE_CORE_POOL_SIZE,"1"), + +// // Zero is full parallelism; otherwise #of threads in the pool. +// new NV(DataServer.Options.OVERFLOW_TASKS_CONCURRENT,"5"), + + /* Use Long.MAX_VALUE to always run overflow processing to + * completion (until no more data remains on the old journal). + */ + new NV(DataServer.Options.OVERFLOW_TIMEOUT,""+Long.MAX_VALUE), + + new NV(DataServer.Options.OVERFLOW_CANCELLED_WHEN_JOURNAL_FULL,"false"), + + new NV(DataServer.Options.LIVE_INDEX_CACHE_CAPACITY,"10"), // was 60 + + new NV(DataServer.Options.HISTORICAL_INDEX_CACHE_CAPACITY,"10"), // was 60 + + /* The maximum #of clean indices that will be retained on the + * hard reference queue (default 20). + */ + new NV(DataServer.Options.INDEX_CACHE_CAPACITY,"10"), // was 50 + + /* The timeout for unused index references before they are + * cleared from the hard reference queue (default is 1m). + * After this timeout the index reference is cleared from the + * queue and the index will be closed unless a hard reference + * exists to the index. + */ +// new NV(DataServer.Options.INDEX_CACHE_TIMEOUT,"1200000"), // 20m vs 1m + + /* The maximum #of clean index segments that will be retained + * on the hard reference queue (default 60). Note that ALL + * index segments are clean (they are read-only). + */ + new NV(DataServer.Options.INDEX_SEGMENT_CACHE_CAPACITY,"20"), // was 100 + + /* The timeout for unused index segment references before they + * are cleared from the hard reference queue (default is 1m). + * After this timeout the index segment reference is cleared + * from the queue and the index segment will be closed unless + * a hard reference exists to the index segment. + */ +// new NV(DataServer.Options.INDEX_SEGMENT_CACHE_TIMEOUT,"60000000"), // 10m vs 1m + + /* The #of store files (journals and index segment stores) + * whose hard references will be maintained on a queue. The + * value should be slightly more than the index segment cache + * capacity since some journals also used by the views, but + * same journals are shared by all views so adding 3 is plenty.. + */ + new NV(DataServer.Options.STORE_CACHE_CAPACITY,"23"),// was 110 + +// new NV(DataServer.Options.STORE_CACHE_TIMEOUT,"1200000"),//20m vs 1m. + + }; + +} + +/** + * Configuration options for the containers used to distribute application tasks + * across a federation. + * + * @todo There should be a means to tag certain client servers for one purpose + * or another. This could be handled by subclassing, but it really should be + * declarative. + */ +com.bigdata.service.jini.ClientServer { + + args = new String[]{ + //bigdata.profilerAgent, + /* + * Grant lots of memory, but read on. + * + * Note: 32-bit JVMs have a 2G limit on the heap, but the practical limit + * is often much less - maybe 1400m. 64-bit JVMs can use much more RAM. + * However, the heap which you grant to java DOES NOT determine the total + * process heap. I have seen 64-bit java processes using an additional + * 3-4GB of heap beyond what is specified here. So, you need to consider + * the total RAM, subtract out enough for the other processes and the OS + * buffers, divide by the #of client/data services you plan to run on that + * host (generally 1-2) and then subtract out some more space for the JVM + * itself. + * + * For example, if you have 32G RAM and a 64-bit JVM and plan to run two + * CS/DS on the host, I would recommend 10G for the Java heap. You can + * expect to see Java grab another 4G per process over time. That makes + * the per CS/DS heap 14G. With two processes you have taken 28G leaving + * 4G for everything else. + * + * Here is another example: 4G RAM, 32-bit JVM, and 2 CS/DS per host. I + * would stick to 800m for the Java heap. You don't have a problem unless + * you see an OOM (OutOfMemoryException) or a process killed because GC is + * taking too much time. + * + * See http://www.ibm.com/developerworks/linux/library/j-nativememory-linux/index.html?ca=dgr-lnxw07Linux-JVM&S_TACT=105AGX59&S_CMP=grlnxw07 + * + * Note: for linux, "sysctl -w vm.swappiness=0" will keep the RAM you do + * have for your applications! + */ + "-Xmx2g", // was 800m + /* + * This option will keep the JVM "alive" even when it is memory starved + * but perform of a memory starved JVM is terrible. + */ + //"-XX:-UseGCOverheadLimit", + /* Configure GC for higher throughput. Together these options + * request parallel old generation collection using N threads. + * The application will be paused when this occurs, but GC will + * be faster. Hence throughput will be higher. + */ + "-XX:+UseParallelOldGC", + //"-XX:ParallelGCThreads=8", + /* + * Enable JXM remote management for the data service. + * + * Note: This will not work if you have two such services on a host + * because it will assign the same port to each service. In order + * to work around that the argument would have to be specified by + * the service starter and then published in the Entry[] attributes. + * + * However, you can use ssh -X to open a tunnel with X + * forwarding and then run jconsole locally on the target host + * and bring up these data services without enabling remote + * JMX. + * + "-Dcom.sun.management.jmxremote.port=9996", + "-Dcom.sun.management.jmxremote.authenticate=false", + "-Dcom.sun.management.jmxremote.ssl=false", + */ + }; + + serviceCount = bigdata.clientServiceCount; + + constraints = new IServiceConstraint[] { + + new JiniRunningConstraint(), + new ZookeeperRunningConstraint(), + + new HostAllowConstraint(bigdata.cs), + + new MaxClientServicesPerHostConstraint(bigdata.maxClientServicePerHost), + + }; + + properties = new NV[] { + + }; + +} + +com.bigdata.service.jini.LoadBalancerServer { + + constraints = new IServiceConstraint[] { + + new JiniRunningConstraint(), + new ZookeeperRunningConstraint(), + + new HostAllowConstraint(bigdata.lbs) + + }; + + args = new String[]{ + /* + * FIXME The load balancer is a big piggish on long runs because it + * keeps the performance counter histories in RAM. While those histories + * are bounded, it still uses more RAM than it should. + */ + "-Xmx1G", + /* + * Enable JXM remote management for the data service. + * + * Note: This will not work if you have two data services on a host + * because it will assign the same port to each service. In order + * to work around that the argument would have to be specified by + * the service starter and then published in the Entry[] attributes. + * + "-Dcom.sun.management.jmxremote.port=9998", + "-Dcom.sun.management.jmxremote.authenticate=false", + "-Dcom.sun.management.jmxremote.ssl=false", + */ + }; + + /* + * Override some properties. + */ + properties = new NV[] { + + /* + * Each JiniClient (and hence all bigdata services) can run an + * httpd that will expose performance counters for the service and + * the host on which it is running. This property specifies the + * port for that httpd service. Valid values are port number, + * zero (0) for a random open port, MINUS ONE (-1) to disable the + * httpd service. + * + * Note: The load balancer httpd normally uses a known port so + * that it is easy to find. This is where you will find all of + * the performance counters aggregated for the entire federation, + * including their history. + */ + new NV(IBigdataClient.Options.HTTPD_PORT, "@LOAD_BALANCER_PORT@"), + + /* + * Note: The load balancer SHOULD NOT collect platform statistics + * itself since that interfers with its ability to aggregate + * statistics about the host on which it is running. Instead it + * should rely on the presence of at least one other service + * running on the same host to report those statistics to the load + * balancer. + */ + new NV(IBigdataClient.Options.COLLECT_PLATFORM_STATISTICS,"false"), + + /* + * The directory where the aggregated statistics will be logged. + * The load balancer will write snapshots of the historical + * counters into this directory. See LoadBalancerService javadoc + * for configuration options which effect how frequently it will + * log its counters and how many snapshots will be preserved. + * + * Note: You only need to specify this option if you want to put + * the files into a well known location, e.g, on a shared volume. + */ + //new NV(LoadBalancerServer.Options.LOG_DIR,"/opt2/var/log/bigdata"), + + /* Option essentially turns off the load-based decision making for + * this many minutes and substitutes a round-robin policy for + * recommending the least utilized data services. The main reason + * to this is to force the initial allocation to be distributed as + * evenly as possible across the data services in the cluster. + */ + new NV(LoadBalancerServer.Options.INITIAL_ROUND_ROBIN_UPDATE_COUNT,"10"), + + }; + +} + +/** + * Configuration options for the KB instance. + */ +lubm { + + // The #of universities to generate. + // U8000 is 1.2B told triples + // U25000 is 3.4B told triples. + // U50000 is 6.7B told triples. + // U100000 is ~12B told triples. + static private univNum = 1000; + + // the KB namespace (based on the #of universities by default). + static private namespace = "U"+univNum+""; + + // minimum #of data services to run. + static private minDataServices = bigdata.dataServiceCount; + + // How long the master will wait to discover the minimum #of data + // services that you specified (ms). + static private awaitDataServicesTimeout = 8000; + + /* Multiplier for the scatter effect. + */ + static private scatterFactor = 1; + static private scatterFactor_term2id = 1; + + /* The #of index partitions to allocate on a scatter split. ZERO + * (0) means that 2 index partitions will be allocated per + * data service which partiticpates in the scatter split. + * Non-zero values directly give the #of index partitions to + * create. + */ + static private scatterSplitIndexPartitionCount = ConfigMath.multiply + ( scatterFactor, + bigdata.dataServiceCount + ); + static private scatterSplitIndexPartitionCount_term2id = ConfigMath.multiply + ( scatterFactor_term2id, + bigdata.dataServiceCount + ); + + // Use all discovered data services when scattering an index. + static private scatterSplitDataServiceCount = 0; + + /* Scatter split trigger point. The scatter split will not be + * triggered until the initial index partition has reached + * this percentage of a nominal index partition in size. + */ + static private scatterSplitPercentOfSplitThreshold = 0.5;//was .5 + + /* + * Multipliers that compensate for the consumer/producer ratio for + * the asynchronous index write API. These are empirical factors + * based on observing the ratio (chunkWritingTime/chunkWaitingTime). + * Assuming a constant chunk writing time, if the chunk size for each + * index is adjusted by its multiplier then this ratio would be 1:1. + * In practice, the chunk writing time is not a linear function of + * the chunk size, which is one reason why we prefer larger chunks + * and why the asynchronous write API is a win. + * + * Note: These factors were set relative to TERM2ID. However, when + * I reduced the scatterFactor for TERM2ID by 1/2, I doubled its + * chunk size to keep up the same throughput so it is now at 2.00 + * rather than 1.00. + */ + static private chunkSizeFactor_id2term = 1.79; + static private chunkSizeFactor_term2id = 2.00; + static private chunkSizeFactor_spo = 8.00; // was 3.89 + static private chunkSizeFactor_pos = 8.00; // was 13.37 + static private chunkSizeFactor_osp = 8.00; // was 27.35 + + /* The nominal sink chunk size. For each index, this is adjusted + * by the factor specified above. + */ +// static private sinkChunkSize = 10000; + static private sinkChunkSize = 1000; + + /* + * Specify / override some triple store properties. + * + * Note: You must reference this object in the section for the + * component which will actually create the KB instance, e.g., + * either the RDFDataLoadMaster or the LubmGeneratorMaster. + */ + static private properties = new NV[] { + + /* + * When "true", the store will perform incremental closure as + * the data are loaded. When "false", the closure will be + * computed after all data are loaded. (Actually, since we are + * not loading through the SAIL making this true does not + * cause incremental TM but it does disable closure, so + * "false" is what you need here). + */ + new NV(BigdataSail.Options.TRUTH_MAINTENANCE, "false" ), + + /* + * Enable rewrites of high-level queries into native rules (native JOIN + * execution). (Can be changed without re-loading the data to compare + * the performance of the Sesame query evaluation against using the + * native rules to perform query evaluation.) + */ + new NV(BigdataSail.Options.NATIVE_JOINS, "true"), + + /* + * May be used to turn off inference during query, but will + * cause ALL inferences to be filtered out when reading on the + * database. + */ + // new NV(BigdataSail.Options.INCLUDE_INFERRED, "false"), + + /* + * May be used to turn off query-time expansion of entailments such as + * (x rdf:type rdfs:Resource) and owl:sameAs even through those + * entailments were not materialized during forward closure (this + * disables the backchainer!) + */ + new NV(BigdataSail.Options.QUERY_TIME_EXPANDER, "false"), + + /* + * Option to restrict ourselves to RDFS only inference. This + * condition may be compared readily to many other stores. + * + * Note: While we can turn on some kinds of owl processing + * (e.g., TransitiveProperty, see below), we can not compute + * all the necessary entailments (only queries 11 and 13 + * benefit). + * + * Note: There are no owl:sameAs assertions in LUBM. + * + * Note: lubm query does not benefit from owl:inverseOf. + * + * Note: lubm query does benefit from owl:TransitiveProperty + * (queries 11 and 13). + * + * Note: owl:Restriction (which we can not compute) plus + * owl:TransitiveProperty is required to get all the answers + * for LUBM. + */ + new NV(BigdataSail.Options.AXIOMS_CLASS, "com.bigdata.rdf.axioms.RdfsAxioms"), + // new NV(BigdataSail.Options.AXIOMS_CLASS,"com.bigdata.rdf.axioms.NoAxioms"), + + /* + * Produce a full closure (all entailments) so that the + * backward chainer is always a NOP. Note th... [truncated message content] |