From: <fko...@us...> - 2010-09-01 17:41:37
|
Revision: 3486 http://bigdata.svn.sourceforge.net/bigdata/?rev=3486&view=rev Author: fkoliver Date: 2010-09-01 17:41:31 +0000 (Wed, 01 Sep 2010) Log Message: ----------- Replace cluster config sections using FileSystemScanner for bulk loading with alternatives (1) using FileServer and FileSystemScannerServer to serve up files from a local non-shared file system to the bulk loader, and (2) using URLListScanner to provide URLs for externally served up files. Replace "clientsTemplate" with "clientServiceCount" as the code which searches for IClientService instances moved from config to java. Modified Paths: -------------- branches/maven_scaleout/src/main/deploy/var/config/jini/bigdataCluster.config branches/maven_scaleout/src/main/deploy/var/config/jini/bigdataCluster16.config branches/maven_scaleout/src/main/deploy/var/config/jini/bigdataStandalone.config Modified: branches/maven_scaleout/src/main/deploy/var/config/jini/bigdataCluster.config =================================================================== --- branches/maven_scaleout/src/main/deploy/var/config/jini/bigdataCluster.config 2010-09-01 16:59:34 UTC (rev 3485) +++ branches/maven_scaleout/src/main/deploy/var/config/jini/bigdataCluster.config 2010-09-01 17:41:31 UTC (rev 3486) @@ -37,6 +37,7 @@ import com.bigdata.rdf.lexicon.LexiconRelation; import com.bigdata.rdf.lexicon.LexiconKeyOrder; import com.bigdata.rawstore.Bytes; +import java.net.URL; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit.*; @@ -1726,17 +1727,34 @@ // When true, a pre-existing job with the same name is deleted first. deleteJob = true; - // Scanner identifies resources to be loaded. - resourceScannerFactory = com.bigdata.service.jini.master.FileSystemScanner.newFactory( - new File("@NAS@/lubm/U10"), // dataDir - //new File("/nas/metrics/lehigh/U10-compressed"), // dataDir - new com.bigdata.rdf.load.RDFFilenameFilter() // optional filename filter. - ); + // ALTERNATIVE 1: Start http servers for the directories containing + // the ontology and the data files: - // The ontology to load (file or directory) when the KB is created. - ontology = new File("@install.lubm.config.dir@/univ-bench.owl"); - //ontology = new File("/nas/metrics/lehigh/univ-bench.owl"); + ontologyDir = new File("/tmp/lubm"); + dataDir = new File("/tmp/lubm/datafiles"); + static dataServer = new com.bigdata.service.jini.master.FileServer( + dataDir, 20, 8702, 20); + static ontologyServer = new com.bigdata.service.jini.master.FileServer( + ontologyDir, 5, 8703, 5); + resourceScannerFactory = + com.bigdata.service.jini.master.FileSystemScannerServer.newFactory( + dataDir, + new com.bigdata.rdf.load.RDFFilenameFilter(), dataServer); + ontology = com.bigdata.service.jini.master.FileServer.getURL( + ontologyServer, "/univ-bench.owl"); + // ALTERNATIVE 2: Supply the data files and ontology from an + // external web server. + +// ontology = new URL("http://stub/lubm/univ-bench.owl"); +// resourceScannerFactory = +// com.bigdata.service.jini.master.URLListScanner.newFactory( +// new URL[] { +// new URL("http://stub/lubm/datafiles/University0_0.owl"), +// new URL("http://stub/lubm/datafiles/University0_1.owl"), +// ... +// }); + // The maximum thread pool size for RDF parser tasks. //parserPoolSize = 5; @@ -1787,7 +1805,7 @@ forceOverflow = false; /* How long the master will wait in milliseconds to discover the services - * that you specify for [servicesTemplates] and [clientsTemplate]. + * that you specify for [servicesTemplates]. */ awaitServicesTimeout = 10000; @@ -1831,25 +1849,8 @@ }; - /* Template for matching the services to which the clients will be - * distributed for execution. Normally you will specify - * IClientService as the interface to be discovered. While it is - * possible to run tasks on an IDataService or even an - * IMetadataService since they both implement IRemoteExecutor, it - * is generally discouraged unless the tasks require explicit - * access to the local index partitions for their execution. - */ - clientsTemplate = new ServicesTemplate( - bigdata.clientServiceCount, // minMatches - new ServiceTemplate( - null, //serviceID - new Class[]{ - com.bigdata.service.IClientService.class - }, - null // attributes - ), - null // filter - ); + // Minimum number of client services for distributed execution. + clientServiceCount = bigdata.clientServiceCount; /* * RDF distributed data loader options. @@ -1890,7 +1891,7 @@ forceOverflow = true; /* How long the master will wait in milliseconds to discover the services - * that you specify for [servicesTemplates] and [clientsTemplate]. + * that you specify for [servicesTemplates]. */ awaitServicesTimeout = 10000; @@ -1934,25 +1935,8 @@ }; - /* Template for matching the services to which the clients will be - * distributed for execution. Normally you will specify - * IClientService as the interface to be discovered. While it is - * possible to run tasks on an IDataService or even an - * IMetadataService since they both implement IRemoteExecutor, it - * is generally discouraged unless the tasks require explicit - * access to the local index partitions for their execution. - */ - clientsTemplate = new ServicesTemplate( - bigdata.clientServiceCount, // minMatches - new ServiceTemplate( - null, //serviceID - new Class[]{ - com.bigdata.service.IClientService.class - }, - null // attributes - ), - null // filter - ); + // Minimum number of client services for distributed execution. + clientServiceCount = bigdata.clientServiceCount; /* * RDF distributed data loader options. @@ -2063,7 +2047,7 @@ } -com.bigdata.service.jini.BroadcastSighup { +com.bigdata.service.jini.util.BroadcastSighup { pushConfig = false; @@ -2127,24 +2111,8 @@ }; - /* Template for matching the services to which the clients will be - * distributed for execution. Normally you will specify - * IClientService as the interface to be discovered. While it is - * possible to run tasks on an IDataService or even an - * IMetadataService since they both implement IRemoteExecutor, it - * is generally discouraged unless the tasks require explicit - * access to the local index partitions for their execution. - */ - clientsTemplate = new ServicesTemplate( - bigdata.clientServiceCount, // minMatches - new ServiceTemplate( - null, //serviceID - new Class[]{ - com.bigdata.service.IClientService.class - }, - null), // attributes - null // filter - ); + // Minimum number of client services for distributed execution. + clientServiceCount = bigdata.clientServiceCount; /* The initial #of index partitions for the scale-out index * (computed as #partitions per data service). Choose at least Modified: branches/maven_scaleout/src/main/deploy/var/config/jini/bigdataCluster16.config =================================================================== --- branches/maven_scaleout/src/main/deploy/var/config/jini/bigdataCluster16.config 2010-09-01 16:59:34 UTC (rev 3485) +++ branches/maven_scaleout/src/main/deploy/var/config/jini/bigdataCluster16.config 2010-09-01 17:41:31 UTC (rev 3486) @@ -37,6 +37,7 @@ import com.bigdata.rdf.lexicon.LexiconRelation; import com.bigdata.rdf.lexicon.LexiconKeyOrder; import com.bigdata.rawstore.Bytes; +import java.net.URL; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit.*; @@ -1775,19 +1776,34 @@ // KB properties made visible to JiniFederation#getProperties() properties = lubm.properties; - // Scanner identifies resources to be loaded. - resourceScannerFactory = com.bigdata.service.jini.master.FileSystemScanner.newFactory( - new File("/nas/data/U8000"), // dataDir - //new File("@NAS@/lubm/U10"), // dataDir - new com.bigdata.rdf.load.RDFFilenameFilter() // optional filename filter. - ); + // ALTERNATIVE 1: Start http servers for the directories containing + // the ontology and the data files: - // The ontology to load (file or directory) when the KB is created. - // This is a directory containing the ontology and some pre-generated data sets. - ontology = new File("/nas/data/univ-bench.owl"); - // This is the directory into which the ontology is installed by 'ant lubm-install'. - //ontology = new File("@install.lubm.config.dir@/univ-bench.owl"); + ontologyDir = new File("/tmp/lubm"); + dataDir = new File("/tmp/lubm/datafiles"); + static dataServer = new com.bigdata.service.jini.master.FileServer( + dataDir, 20, 8702, 20); + static ontologyServer = new com.bigdata.service.jini.master.FileServer( + ontologyDir, 5, 8703, 5); + resourceScannerFactory = + com.bigdata.service.jini.master.FileSystemScannerServer.newFactory( + dataDir, + new com.bigdata.rdf.load.RDFFilenameFilter(), dataServer); + ontology = com.bigdata.service.jini.master.FileServer.getURL( + ontologyServer, "/univ-bench.owl"); + // ALTERNATIVE 2: Supply the data files and ontology from an + // external web server. + +// ontology = new URL("http://stub/lubm/univ-bench.owl"); +// resourceScannerFactory = +// com.bigdata.service.jini.master.URLListScanner.newFactory( +// new URL[] { +// new URL("http://stub/lubm/datafiles/University0_0.owl"), +// new URL("http://stub/lubm/datafiles/University0_1.owl"), +// ... +// }); + // The maximum thread pool size for RDF parser tasks. //parserPoolSize = 5; @@ -1838,7 +1854,7 @@ forceOverflow = false; /* How long the master will wait in milliseconds to discover the services - * that you specify for [servicesTemplates] and [clientsTemplate]. + * that you specify for [servicesTemplates]. */ awaitServicesTimeout = 10000; @@ -1882,25 +1898,8 @@ }; - /* Template for matching the services to which the clients will be - * distributed for execution. Normally you will specify - * IClientService as the interface to be discovered. While it is - * possible to run tasks on an IDataService or even an - * IMetadataService since they both implement IRemoteExecutor, it - * is generally discouraged unless the tasks require explicit - * access to the local index partitions for their execution. - */ - clientsTemplate = new ServicesTemplate( - bigdata.clientServiceCount, // minMatches - new ServiceTemplate( - null, //serviceID - new Class[]{ - com.bigdata.service.IClientService.class - }, - null // attributes - ), - null // filter - ); + // Minimum number of client services for distributed execution. + clientServiceCount = bigdata.clientServiceCount; /* * RDF distributed data loader options. @@ -1941,7 +1940,7 @@ forceOverflow = true; /* How long the master will wait in milliseconds to discover the services - * that you specify for [servicesTemplates] and [clientsTemplate]. + * that you specify for [servicesTemplates]. */ awaitServicesTimeout = 10000; @@ -1985,25 +1984,8 @@ }; - /* Template for matching the services to which the clients will be - * distributed for execution. Normally you will specify - * IClientService as the interface to be discovered. While it is - * possible to run tasks on an IDataService or even an - * IMetadataService since they both implement IRemoteExecutor, it - * is generally discouraged unless the tasks require explicit - * access to the local index partitions for their execution. - */ - clientsTemplate = new ServicesTemplate( - bigdata.clientServiceCount, // minMatches - new ServiceTemplate( - null, //serviceID - new Class[]{ - com.bigdata.service.IClientService.class - }, - null // attributes - ), - null // filter - ); + // Minimum number of client services for distributed execution. + clientServiceCount = bigdata.clientServiceCount; /* * RDF distributed data loader options. @@ -2181,24 +2163,8 @@ }; - /* Template for matching the services to which the clients will be - * distributed for execution. Normally you will specify - * IClientService as the interface to be discovered. While it is - * possible to run tasks on an IDataService or even an - * IMetadataService since they both implement IRemoteExecutor, it - * is generally discouraged unless the tasks require explicit - * access to the local index partitions for their execution. - */ - clientsTemplate = new ServicesTemplate( - bigdata.clientServiceCount, // minMatches - new ServiceTemplate( - null, //serviceID - new Class[]{ - com.bigdata.service.IClientService.class - }, - null), // attributes - null // filter - ); + // Minimum number of client services for distributed execution. + clientServiceCount = bigdata.clientServiceCount; /* The initial #of index partitions for the scale-out index * (computed as #partitions per data service). Choose at least Modified: branches/maven_scaleout/src/main/deploy/var/config/jini/bigdataStandalone.config =================================================================== --- branches/maven_scaleout/src/main/deploy/var/config/jini/bigdataStandalone.config 2010-09-01 16:59:34 UTC (rev 3485) +++ branches/maven_scaleout/src/main/deploy/var/config/jini/bigdataStandalone.config 2010-09-01 17:41:31 UTC (rev 3486) @@ -37,6 +37,7 @@ import com.bigdata.rdf.lexicon.LexiconRelation; import com.bigdata.rdf.lexicon.LexiconKeyOrder; import com.bigdata.rawstore.Bytes; +import java.net.URL; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit.*; @@ -1740,16 +1741,15 @@ // When true, a pre-existing job with the same name is deleted first. deleteJob = true; - // Scanner identifies resources to be loaded. - resourceScannerFactory = com.bigdata.service.jini.master.FileSystemScanner.newFactory( - new File("/nas/data/lubm/U1000"), // dataDir - //new File("/nas/metrics/lehigh/U10-compressed"), // dataDir - new com.bigdata.rdf.load.RDFFilenameFilter() // optional filename filter. + // Scanner identifies resources to be loaded. + resourceScannerFactory = com.bigdata.service.jini.master.URLListScanner.newFactory( + new File("/nas/data/lubm/U1000").toURI().toURL(), // dataDir + new com.bigdata.rdf.load.RDFFilenameFilter() // optional filename filter. ); // The ontology to load (file or directory) when the KB is created. //ontology = new File("@install.lubm.config.dir@/univ-bench.owl"); - ontology = new File("/nas/data/lubm/univ-bench.owl"); + ontology = new File("/nas/data/lubm/univ-bench.owl").toURI().toURL(); // The maximum thread pool size for RDF parser tasks. //parserPoolSize = 5; @@ -1801,7 +1801,7 @@ forceOverflow = false; /* How long the master will wait in milliseconds to discover the services - * that you specify for [servicesTemplates] and [clientsTemplate]. + * that you specify for [servicesTemplates]. */ awaitServicesTimeout = 10000; @@ -1845,25 +1845,8 @@ }; - /* Template for matching the services to which the clients will be - * distributed for execution. Normally you will specify - * IClientService as the interface to be discovered. While it is - * possible to run tasks on an IDataService or even an - * IMetadataService since they both implement IRemoteExecutor, it - * is generally discouraged unless the tasks require explicit - * access to the local index partitions for their execution. - */ - clientsTemplate = new ServicesTemplate( - bigdata.clientServiceCount, // minMatches - new ServiceTemplate( - null, //serviceID - new Class[]{ - com.bigdata.service.IClientService.class - }, - null // attributes - ), - null // filter - ); + // Minimum number of client services for distributed execution. + clientServiceCount = bigdata.clientServiceCount; /* * RDF distributed data loader options. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |