From: <bi...@us...> - 2008-12-09 02:22:09
|
Revision: 2650 http://archive-access.svn.sourceforge.net/archive-access/?rev=2650&view=rev Author: binzino Date: 2008-12-09 01:38:51 +0000 (Tue, 09 Dec 2008) Log Message: ----------- Initial revision. Added Paths: ----------- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/DistributedSearch.java Added: trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/DistributedSearch.java =================================================================== --- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/DistributedSearch.java (rev 0) +++ trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/DistributedSearch.java 2008-12-09 01:38:51 UTC (rev 2650) @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.archive.nutchwax; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.ipc.RPC; +import org.apache.nutch.util.NutchConfiguration; +import org.apache.nutch.searcher.NutchBean; + +/** + * A command-line wrapper for the Nutch DistributedSearch$Server class + * which adds the NutchBeanModifier.modify() call to be able to handle + * parallel indices as well as other NutchWAX enhancements. + * </p> + * <p> + * Invoked the same as the regular Nutch DistributedSearch$Server, but + * with the NutchWAX package prefix, i.e. + * </p> + * <code> + * $ nutch org.archive.nutchwax.DistributedSearch\$Server 9000 <crawl-dir> + * </code> + */ +public class DistributedSearch +{ + public static final Log LOG = LogFactory.getLog(DistributedSearch.class); + + private DistributedSearch() {} // no public ctor + + /** The search server. */ + public static class Server + { + + private Server() + { + } + + /** Runs a search server. */ + public static void main(String[] args) throws Exception + { + String usage = "DistributedSearch$Server <port> <index dir>"; + + if (args.length == 0 || args.length > 2) + { + System.err.println(usage); + System.exit(-1); + } + + int port = Integer.parseInt(args[0]); + Path directory = new Path(args[1]); + + Configuration conf = NutchConfiguration.create(); + + org.apache.hadoop.ipc.Server server = getServer(conf, directory, port); + server.start(); + server.join(); + } + + static org.apache.hadoop.ipc.Server getServer(Configuration conf, Path directory, int port) throws IOException + { + NutchBean bean = new NutchBean(conf, directory); + + // Modify the NutchBean, adding the WAX enhancements to it. + NutchWaxBean.NutchBeanModifier.modify( bean ); + + int numHandlers = conf.getInt("searcher.num.handlers", 10); + return RPC.getServer(bean, "0.0.0.0", port, numHandlers, true, conf); + } + + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |