[Archive-access-cvs] SF.net SVN: archive-access:[2960] trunk/archive-access/projects/nutchwax/ arch

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 2960
          http://archive-access.svn.sourceforge.net/archive-access/?rev=2960&view=rev
Author:   binzino
Date:     2010-02-22 05:17:20 +0000 (Mon, 22 Feb 2010)

Log Message:
-----------
Initial revision of OpenSearch master/slave system.  Work-in-progress.

Added Paths:
-----------
    trunk/archive-access/projects/nutchwax/archive/lib/jdom.LICENSE
    trunk/archive-access/projects/nutchwax/archive/lib/jdom.jar
    trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchMaster.java
    trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchMasterServlet.java
    trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchSlave.java

Added: trunk/archive-access/projects/nutchwax/archive/lib/jdom.LICENSE
===================================================================

--- trunk/archive-access/projects/nutchwax/archive/lib/jdom.LICENSE	                        (rev 0)
+++ trunk/archive-access/projects/nutchwax/archive/lib/jdom.LICENSE	2010-02-22 05:17:20 UTC (rev 2960)
@@ -0,0 +1,56 @@
+/*-- 
+
+ $Id: LICENSE.txt,v 1.11 2004/02/06 09:32:57 jhunter Exp $
+
+ Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
+ All rights reserved.
+ 
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 
+ 1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions, and the following disclaimer.
+ 
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions, and the disclaimer that follows 
+    these conditions in the documentation and/or other materials 
+    provided with the distribution.
+
+ 3. The name "JDOM" must not be used to endorse or promote products
+    derived from this software without prior written permission.  For
+    written permission, please contact <request_AT_jdom_DOT_org>.
+ 
+ 4. Products derived from this software may not be called "JDOM", nor
+    may "JDOM" appear in their name, without prior written permission
+    from the JDOM Project Management <request_AT_jdom_DOT_org>.
+ 
+ In addition, we request (but do not require) that you include in the 
+ end-user documentation provided with the redistribution and/or in the 
+ software itself an acknowledgement equivalent to the following:
+     "This product includes software developed by the
+      JDOM Project (http://www.jdom.org/)."
+ Alternatively, the acknowledgment may be graphical using the logos 
+ available at http://www.jdom.org/images/logos.
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED.  IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGE.
+
+ This software consists of voluntary contributions made by many 
+ individuals on behalf of the JDOM Project and was originally 
+ created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
+ Brett McLaughlin <brett_AT_jdom_DOT_org>.  For more information
+ on the JDOM Project, please see <http://www.jdom.org/>. 
+
+ */
+

Added: trunk/archive-access/projects/nutchwax/archive/lib/jdom.jar
===================================================================
(Binary files differ)


Property changes on: trunk/archive-access/projects/nutchwax/archive/lib/jdom.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchMaster.java
===================================================================
--- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchMaster.java	                        (rev 0)
+++ trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchMaster.java	2010-02-22 05:17:20 UTC (rev 2960)
@@ -0,0 +1,364 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.archive.nutchwax;
+
+import java.io.IOException;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
+import java.util.Comparator;
+import java.util.Collections;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.LinkedList;
+
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.Namespace;
+import org.jdom.output.XMLOutputter;
+
+
+/** 
+ * 
+ */   
+public class OpenSearchMaster
+{
+  List<OpenSearchSlave> slaves = new ArrayList<OpenSearchSlave>( );
+  long timeout = 30 * 1000;
+
+  public OpenSearchMaster( String slavesFile, long timeout )
+    throws IOException
+  {
+    this( slavesFile );
+    this.timeout = timeout;
+  }
+
+  public OpenSearchMaster( String slavesFile )
+    throws IOException
+  {
+    BufferedReader r = null;
+    try
+      {
+        r = new BufferedReader( new InputStreamReader( new FileInputStream( slavesFile ), "utf-8" ) );
+
+        String line;
+        while ( (line = r.readLine()) != null )
+          {
+            line = line.trim();
+            if ( line.length() == 0 || line.charAt( 0 ) == '#' )
+              {
+                // Ignore it.
+                continue ;
+              }
+
+            OpenSearchSlave slave = new OpenSearchSlave( line );
+
+            this.slaves.add( slave );            
+          }
+      }
+    finally
+      {
+        try { if ( r != null ) r.close(); } catch ( IOException ioe ) { }
+      }
+    
+  }
+
+  public Document query( String query, int startIndex, int numResults, int hitsPerSite )
+  {
+    long startTime = System.currentTimeMillis( );
+    
+    List<SlaveQueryThread> slaveThreads = new ArrayList<SlaveQueryThread>( this.slaves.size() );
+
+    for ( OpenSearchSlave slave : this.slaves )
+      {
+        SlaveQueryThread sqt = new SlaveQueryThread( slave, query, 0, (startIndex+numResults), hitsPerSite );
+
+        sqt.start( );
+
+        slaveThreads.add( sqt );        
+      }
+
+    waitForThreads( slaveThreads, this.timeout, startTime );
+
+    LinkedList<Element> items = new LinkedList<Element>( );
+    long totalResults = 0;
+
+    for ( SlaveQueryThread sqt : slaveThreads )
+      {
+        if ( sqt.throwable != null )
+          {
+            // TODO: Handle problems with slaves
+            continue ;
+          }
+
+        // Dump all the results ("item" elements) into a single list.
+        Element channel = sqt.response.getRootElement( ).getChild( "channel" );
+        items.addAll( (List<Element>) channel.getChildren( "item" ) );
+        channel.removeChildren( "item" );
+
+        try
+          {
+            totalResults += Integer.parseInt( channel.getChild( "totalResults", Namespace.getNamespace( "http://a9.com/-/spec/opensearchrss/1.0/" ) ).getTextTrim( ) );
+          }
+        catch ( Exception e ) 
+          {
+            // TODO: Log error getting total.
+          }
+        
+      }
+
+    if ( items.size( ) > 0 && hitsPerSite > 0 )
+      {
+        Collections.sort( items, new ElementSiteThenScoreComparator( ) );
+
+        LinkedList<Element> collapsed = new LinkedList<Element>( );
+        
+        collapsed.add( items.removeFirst( ) );
+        
+        int count = 1;
+        for ( Element item : items )
+          {
+            String lastSite = collapsed.getLast( ).getChild( "site", Namespace.getNamespace( "http://www.nutch.org/opensearchrss/1.0/" ) ).getTextTrim( );
+
+            if ( lastSite.length( ) == 0 ||
+                 !lastSite.equals( item.getChild( "site", Namespace.getNamespace( "http://www.nutch.org/opensearchrss/1.0/" ) ).getTextTrim( ) ) )
+              {
+                collapsed.add( item );
+                count = 1;                
+              }
+            else if ( count < hitsPerSite )
+              {
+                collapsed.add( item );
+                count++;
+              }
+            else
+              {
+                // TODO: Log collapse of item.
+              }
+          }
+
+        // Replace the list of items with the collapsed list.
+        items = collapsed;
+      }
+
+    Collections.sort( items, new ElementScoreComparator( ) );
+
+    // Build the final results OpenSearch XML document.
+    Element channel = new Element( "channel" );
+    channel.addContent( new Element( "title"       ) );
+    channel.addContent( new Element( "description" ) );
+    channel.addContent( new Element( "link"        ) );
+
+    Element eTotalResults = new Element( "totalResults", Namespace.getNamespace( "http://a9.com/-/spec/opensearchrss/1.0/" ) );
+    Element eStartIndex   = new Element( "startIndex",   Namespace.getNamespace( "http://a9.com/-/spec/opensearchrss/1.0/" ) );
+    Element eItemsPerPage = new Element( "itemsPerPage", Namespace.getNamespace( "http://a9.com/-/spec/opensearchrss/1.0/" ) );
+
+    eTotalResults.setText( Long.toString( totalResults ) );
+    eStartIndex.  setText( Long.toString( startIndex   ) );
+    eItemsPerPage.setText( Long.toString( numResults   ) );
+
+    channel.addContent( eTotalResults );
+    channel.addContent( eStartIndex   );
+    channel.addContent( eItemsPerPage );
+
+    // Get a sub-list of only the items we want: [startIndex,(startIndex+numResults)]
+    List<Element> subList = items.subList( Math.min(  startIndex,             items.size( ) ),
+                                           Math.min( (startIndex+numResults), items.size( ) ) );
+    channel.addContent( subList );
+
+    Element rss = new Element( "rss" );
+    rss.addContent( channel );
+
+    return new Document( rss );
+  }
+
+
+  /**
+   * Convenience method to wait for a collection of threads to complete,
+   * or until a timeout after a startTime expires.
+   */
+  private void waitForThreads( List<SlaveQueryThread> threads, long timeout, long startTime )
+  {
+    for ( Thread t : threads )
+      {
+        long timeRemaining = timeout - (System.currentTimeMillis( ) - startTime);
+        
+        // If we are out of time, don't wait for any more threads.
+        if ( timeRemaining <= 0 )
+          {
+            break; 
+          }
+        
+        // Otherwise, wait for the next unfinished thread to finish.
+        try
+          {
+            t.join( timeRemaining );
+          }
+        catch ( InterruptedException ie ) 
+          {
+            break;
+          }
+      }
+  }
+
+  
+  public static void main( String args[] )
+    throws Exception
+  {
+    String usage = "OpenSearchMaster [OPTIONS] SLAVES.txt query"
+      + "\n\t-h <n>    Hits per site"
+      + "\n\t-n <n>    Number of results"
+      + "\n\t-s <n>    Start index"
+      + "\n";
+    
+    if ( args.length < 2 )
+      {
+        System.err.println( usage );
+        System.exit( 1 );
+      }
+
+    String slavesFile = args[args.length - 2];
+    String query      = args[args.length - 1];
+    
+    int startIndex  = 0;
+    int hitsPerSite = 0;
+    int numHits     = 10;
+    for ( int i = 0 ; i < args.length - 2 ; i++ )
+      {
+        try
+          {
+            if ( "-h".equals( args[i] ) )
+              {
+                i++;
+                hitsPerSite = Integer.parseInt( args[i] );
+              }
+            if ( "-n".equals( args[i] ) )
+              {
+                i++;
+                numHits = Integer.parseInt( args[i] );
+              }
+            if ( "-s".equals( args[i] ) )
+              {
+                i++;
+                startIndex = Integer.parseInt( args[i] );
+              }
+          }
+        catch ( NumberFormatException nfe ) 
+          {
+            System.err.println( "Error: not a numeric value: " + args[i] );
+            System.err.println( usage );
+            System.exit( 1 );
+          }
+      }
+
+    OpenSearchMaster master = new OpenSearchMaster( slavesFile );
+
+    Document doc = master.query( query, startIndex, numHits, hitsPerSite );
+
+    (new XMLOutputter()).output( doc, System.out );
+  }
+
+}
+
+
+class SlaveQueryThread extends Thread
+{
+  OpenSearchSlave slave;
+
+  String query;
+  int    startIndex;
+  int    numResults;
+  int    hitsPerSite;
+
+  Document        response;
+  Throwable       throwable;
+
+
+  SlaveQueryThread( OpenSearchSlave slave, String query, int startIndex, int numResults, int hitsPerSite )
+  {
+    this.slave       = slave;
+    this.query       = query;
+    this.startIndex  = startIndex;
+    this.numResults  = numResults;
+    this.hitsPerSite = hitsPerSite;
+  }
+
+  public void run( )
+  {
+    try
+      {
+        this.response = this.slave.query( this.query, this.startIndex, this.numResults, this.hitsPerSite );
+      }
+    catch ( Throwable t )
+      {
+        this.throwable = t;
+      }
+  }
+}
+
+
+class ElementScoreComparator implements Comparator<Element>
+{
+  public int compare( Element e1, Element e2 )
+  {
+    if ( e1 == e2 )   return 0;
+    if ( e1 == null ) return 1;
+    if ( e2 == null ) return -1;
+
+    Element score1 = e1.getChild( "score" );
+    Element score2 = e2.getChild( "score" );
+
+    if ( score1 == score2 ) return 0;
+    if ( score1 == null )   return 1;
+    if ( score2 == null )   return -1;
+
+    String text1 = score1.getText().trim();
+    String text2 = score2.getText().trim();
+
+    float value1 = 0.0f;
+    float value2 = 0.0f;
+
+    try { value1 = Float.parseFloat( text1 ); } catch ( NumberFormatException nfe ) { }
+    try { value2 = Float.parseFloat( text2 ); } catch ( NumberFormatException nfe ) { }
+
+    if ( value1 == value2 ) return 0;
+
+    return value1 > value2 ? -1 : 1;
+  }
+}
+
+class ElementSiteThenScoreComparator extends ElementScoreComparator
+{
+  public int compare( Element e1, Element e2 )
+  {
+    if ( e1 == e2 )   return 0;
+    if ( e1 == null ) return 1;
+    if ( e2 == null ) return -1;
+
+    String site1 = e1.getChild( "site", Namespace.getNamespace( "http://www.nutch.org/opensearchrss/1.0/" ) ).getTextTrim();
+    String site2 = e2.getChild( "site", Namespace.getNamespace( "http://www.nutch.org/opensearchrss/1.0/" ) ).getTextTrim();
+    
+    if ( site1.equals( site2 ) )
+      {
+        // Sites are equal, then compare scores.
+        return super.compare( e1, e2 );
+      }
+
+    return site1.compareTo( site2 );
+  }
+}
\ No newline at end of file

Added: trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchMasterServlet.java
===================================================================
--- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchMasterServlet.java	                        (rev 0)
+++ trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchMasterServlet.java	2010-02-22 05:17:20 UTC (rev 2960)
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.archive.nutchwax;
+
+import java.io.IOException;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
+import java.util.List;
+import java.util.ArrayList;
+import javax.servlet.ServletException;
+import javax.servlet.ServletConfig;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+
+/** 
+ * 
+ */   
+public class OpenSearchMasterServlet extends HttpServlet 
+{
+
+  public void init( ServletConfig config )
+    throws ServletException 
+  {
+    
+    
+  }
+
+  public void doGet( HttpServletRequest request, HttpServletResponse response )
+    throws ServletException, IOException 
+  {
+
+  }
+
+}

Added: trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchSlave.java
===================================================================
--- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchSlave.java	                        (rev 0)
+++ trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchSlave.java	2010-02-22 05:17:20 UTC (rev 2960)
@@ -0,0 +1,209 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.archive.nutchwax;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLConnection;
+import java.net.URLEncoder;
+import java.util.List;
+
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.Namespace;
+import org.jdom.input.SAXBuilder;
+import org.jdom.output.XMLOutputter;
+
+/** 
+ * 
+ */   
+public class OpenSearchSlave
+{
+  private String urlTemplate;
+
+  public OpenSearchSlave( String urlTemplate )
+  {
+    this.urlTemplate = urlTemplate;
+  }
+
+  public Document query( String query, int startIndex, int requestedNumResults, int hitsPerSite )
+    throws Exception
+  {
+    URL url = buildRequestUrl( query, startIndex, requestedNumResults, hitsPerSite );
+    
+    InputStream is = null;
+    try
+      {
+        is = getInputStream( url );
+        
+        Document doc = (new SAXBuilder()).build( is );
+
+        doc = validate( doc );
+
+        return doc;
+      }
+    finally
+      {
+        // Ensure the InputStream is closed, which should trigger the
+        // underlying HTTP connection to be cleaned-up.
+        try { if ( is != null ) is.close( ); } catch ( IOException ioe ) { } // Not much we can do
+      }
+  }
+
+  private Document validate( Document doc )
+    throws Exception
+  {
+    if ( doc.getRootElement( ) == null ) throw new Exception( "Invalid OpenSearch response: missing /rss" );
+    Element root = doc.getRootElement( );
+    
+    if ( ! "rss".equals( root.getName( ) ) ) throw new Exception( "Invalid OpenSearch response: missing /rss" );
+    Element channel = root.getChild( "channel" );
+    
+    if ( channel == null ) throw new Exception( "Invalid OpenSearch response: missing /rss/channel" );
+
+    for ( Element item : (List<Element>) channel.getChildren( "item" ) )
+      {
+        Element site = item.getChild( "site", Namespace.getNamespace( "http://www.nutch.org/opensearchrss/1.0/" ) );
+        if ( site == null )
+          {
+            item.addContent( new Element( "site", Namespace.getNamespace( "http://www.nutch.org/opensearchrss/1.0/" ) ) );
+          }
+        
+        Element score = item.getChild( "score", Namespace.getNamespace( "http://www.nutch.org/opensearchrss/1.0/" ) );
+        if ( score == null )
+          {
+            score = new Element( "score", Namespace.getNamespace( "http://www.nutch.org/opensearchrss/1.0/" ) );
+            score.setText( "" );
+
+            item.addContent( score );
+          }
+      }
+
+    return doc;
+  }
+
+  /**
+   * 
+   */
+  public URL buildRequestUrl( String query, int startIndex, int requestedNumResults, int hitsPerSite )
+    throws MalformedURLException, UnsupportedEncodingException
+  {
+    String url = this.urlTemplate;
+    
+    // Note about replaceAll: In the Java regex library, the replacement string has a few
+    // special characters: \ and $.  Forunately, since we URL-encode the replacement string,
+    // any occurance of \ or $ is converted to %xy form.  So we don't have to worry about it. :)
+    url = url.replaceAll( "[{]searchTerms[}]", URLEncoder.encode( query, "utf-8" ) );
+    url = url.replaceAll( "[{]count[}]"      , String.valueOf( requestedNumResults ) );
+    url = url.replaceAll( "[{]startIndex[}]" , String.valueOf( startIndex ) );
+    url = url.replaceAll( "[{]hitsPerSite[}]", String.valueOf( hitsPerSite ) );
+
+    // We don't know about any optional parameters, so we remove them (per the OpenSearch spec.)
+    url = url.replaceAll( "[{][^}]+[?][}]", "" );
+    
+    return new URL( url );
+  }
+
+
+  public InputStream getInputStream( URL url )
+    throws IOException
+  {
+    URLConnection connection = url.openConnection( );
+    connection.setDoOutput( false );
+    connection.setRequestProperty( "User-Agent", "Mozilla/4.0 (compatible; NutchWAX OpenSearchMaster)" );
+    connection.connect( );
+
+    if ( connection instanceof HttpURLConnection )
+      {
+        HttpURLConnection hc = (HttpURLConnection) connection;
+
+        switch ( hc.getResponseCode( ) )
+          {
+          case 200:
+            // All good.
+            break;
+          default:
+            // Problems!  Bail out.
+            throw new IOException( "HTTP error from " + url + ": " + hc.getResponseMessage( ) );
+          }
+      }
+
+    InputStream is = connection.getInputStream( );
+
+    return is;
+  }
+
+  public String toString()
+  {
+    return this.urlTemplate;
+  }
+
+  public static void main( String args[] )
+    throws Exception
+  {
+    String usage = "OpenSearchSlave [OPTIONS] urlTemplate query"
+      + "\n\t-h <n>   Hits per site"
+      + "\n\t-n <n>   Number of results"
+      + "\n";
+
+    if ( args.length < 2 )
+      {
+        System.err.println( usage );
+        System.exit( 1 );
+      }
+
+    String urlTemplate = args[args.length - 2];
+    String query       = args[args.length - 1];
+
+    int hitsPerSite = 0;
+    int numHits     = 10;
+    for ( int i = 0 ; i < args.length - 2 ; i++ )
+      {
+        try
+          {
+            if ( "-h".equals( args[i] ) )
+              {
+                i++;
+                hitsPerSite = Integer.parseInt( args[i] );
+              }
+            if ( "-n".equals( args[i] ) )
+              {
+                i++;
+                numHits = Integer.parseInt( args[i] );
+              }
+          }
+        catch ( NumberFormatException nfe ) 
+          {
+            System.err.println( "Error: not a numeric value: " + args[i] );
+            System.err.println( usage );
+            System.exit( 1 );
+          }
+      }
+
+    OpenSearchSlave osl = new OpenSearchSlave( urlTemplate );
+    
+    Document doc = osl.query( query, 0, numHits, hitsPerSite );
+
+    (new XMLOutputter()).output( doc, System.out );
+  }
+
+}
\ No newline at end of file


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




[Archive-access-cvs] SF.net SVN: archive-access:[2960] trunk/archive-access/projects/nutchwax/ arch

[Archive-access-cvs] SF.net SVN: archive-access:[2960] trunk/archive-access/projects/nutchwax/ archive