[Archive-access-cvs] SF.net SVN: archive-access:[2732] trunk/archive-access/projects/nutchwax/ arc

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 2732
          http://archive-access.svn.sourceforge.net/archive-access/?rev=2732&view=rev
Author:   binzino
Date:     2009-06-04 19:06:37 +0000 (Thu, 04 Jun 2009)

Log Message:
-----------
We have our own OpenSearchServlet in the org.archive.nutchwax package,
so we no longer need to keep a patched version.

Removed Paths:
-------------
    trunk/archive-access/projects/nutchwax/archive/src/nutch/src/java/org/apache/nutch/searcher/OpenSearchServlet.java

Deleted: trunk/archive-access/projects/nutchwax/archive/src/nutch/src/java/org/apache/nutch/searcher/OpenSearchServlet.java
===================================================================

--- trunk/archive-access/projects/nutchwax/archive/src/nutch/src/java/org/apache/nutch/searcher/OpenSearchServlet.java	2009-06-04 18:02:50 UTC (rev 2731)
+++ trunk/archive-access/projects/nutchwax/archive/src/nutch/src/java/org/apache/nutch/searcher/OpenSearchServlet.java	2009-06-04 19:06:37 UTC (rev 2732)
@@ -1,333 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.searcher;
-
-import java.io.IOException;
-import java.net.URLEncoder;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.HashSet;
-
-import javax.servlet.ServletException;
-import javax.servlet.ServletConfig;
-import javax.servlet.http.HttpServlet;
-import javax.servlet.http.HttpServletRequest;
-import javax.servlet.http.HttpServletResponse;
-
-import javax.xml.parsers.*;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.util.NutchConfiguration;
-import org.w3c.dom.*;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-
-/** Present search results using A9's OpenSearch extensions to RSS, plus a few
- * Nutch-specific extensions. */   
-public class OpenSearchServlet extends HttpServlet {
-  private static final Map NS_MAP = new HashMap();
-  private int MAX_HITS_PER_PAGE;
-
-  static {
-    NS_MAP.put("opensearch", "http://a9.com/-/spec/opensearchrss/1.0/");
-    NS_MAP.put("nutch", "http://www.nutch.org/opensearchrss/1.0/");
-  }
-
-  private static final Set SKIP_DETAILS = new HashSet();
-  static {
-    SKIP_DETAILS.add("url");                   // redundant with RSS link
-    SKIP_DETAILS.add("title");                 // redundant with RSS title
-  }
-
-  private NutchBean bean;
-  private Configuration conf;
-
-  public void init(ServletConfig config) throws ServletException {
-    try {
-      this.conf = NutchConfiguration.get(config.getServletContext());
-      bean = NutchBean.get(config.getServletContext(), this.conf);
-    } catch (IOException e) {
-      throw new ServletException(e);
-    }
-    MAX_HITS_PER_PAGE = conf.getInt("searcher.max.hits.per.page", -1);
-  }
-
-  public void doGet(HttpServletRequest request, HttpServletResponse response)
-    throws ServletException, IOException {
-
-    if (NutchBean.LOG.isInfoEnabled()) {
-      NutchBean.LOG.info("query request from " + request.getRemoteAddr());
-    }
-
-    // get parameters from request
-    request.setCharacterEncoding("UTF-8");
-    String queryString = request.getParameter("query");
-    if (queryString == null)
-      queryString = "";
-    String urlQuery = URLEncoder.encode(queryString, "UTF-8");
-    
-    // the query language
-    String queryLang = request.getParameter("lang");
-    
-    int start = 0;                                // first hit to display
-    String startString = request.getParameter("start");
-    if (startString != null)
-      start = Integer.parseInt(startString);
-    
-    int hitsPerPage = 10;                         // number of hits to display
-    String hitsString = request.getParameter("hitsPerPage");
-    if (hitsString != null)
-      hitsPerPage = Integer.parseInt(hitsString);
-    if(MAX_HITS_PER_PAGE > 0 && hitsPerPage > MAX_HITS_PER_PAGE)
-      hitsPerPage = MAX_HITS_PER_PAGE;
-
-    String sort = request.getParameter("sort");
-    boolean reverse =
-      sort!=null && "true".equals(request.getParameter("reverse"));
-
-    // De-Duplicate handling.  Look for duplicates field and for how many
-    // duplicates per results to return. Default duplicates field is 'site'
-    // and duplicates per results default is '2'.
-    String dedupField = request.getParameter("dedupField");
-    if (dedupField == null || dedupField.length() == 0) {
-        dedupField = "site";
-    }
-    int hitsPerDup = 2;
-    String hitsPerDupString = request.getParameter("hitsPerDup");
-    if (hitsPerDupString != null && hitsPerDupString.length() > 0) {
-        hitsPerDup = Integer.parseInt(hitsPerDupString);
-    } else {
-        // If 'hitsPerSite' present, use that value.
-        String hitsPerSiteString = request.getParameter("hitsPerSite");
-        if (hitsPerSiteString != null && hitsPerSiteString.length() > 0) {
-            hitsPerDup = Integer.parseInt(hitsPerSiteString);
-        }
-    }
-     
-    // Make up query string for use later drawing the 'rss' logo.
-    String params = "&hitsPerPage=" + hitsPerPage +
-        (queryLang == null ? "" : "&lang=" + queryLang) +
-        (sort == null ? "" : "&sort=" + sort + (reverse? "&reverse=true": "") +
-        (dedupField == null ? "" : "&dedupField=" + dedupField));
-
-    Query query = Query.parse(queryString, queryLang, this.conf);
-    if (NutchBean.LOG.isInfoEnabled()) {
-      NutchBean.LOG.info("query: " + queryString);
-      NutchBean.LOG.info("lang: " + queryLang);
-    }
-
-    // execute the query
-    Hits hits;
-    try {
-      hits = bean.search(query, start + hitsPerPage, hitsPerDup, dedupField,
-          sort, reverse);
-    } catch (IOException e) {
-      if (NutchBean.LOG.isWarnEnabled()) {
-        NutchBean.LOG.warn("Search Error", e);
-      }
-      hits = new Hits(0,new Hit[0]);	
-    }
-
-    if (NutchBean.LOG.isInfoEnabled()) {
-      NutchBean.LOG.info("total hits: " + hits.getTotal());
-    }
-
-    // generate xml results
-    int end = (int)Math.min(hits.getLength(), start + hitsPerPage);
-    int length = end-start;
-
-    Hit[] show = hits.getHits(start, end-start);
-    HitDetails[] details = bean.getDetails(show);
-    Summary[] summaries = bean.getSummary(details, query);
-
-    String requestUrl = request.getRequestURL().toString();
-    String base = requestUrl.substring(0, requestUrl.lastIndexOf('/'));
-      
-
-    try {
-      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
-      factory.setNamespaceAware(true);
-      Document doc = factory.newDocumentBuilder().newDocument();
- 
-      Element rss = addNode(doc, doc, "rss");
-      addAttribute(doc, rss, "version", "2.0");
-      addAttribute(doc, rss, "xmlns:opensearch",
-                   (String)NS_MAP.get("opensearch"));
-      addAttribute(doc, rss, "xmlns:nutch", (String)NS_MAP.get("nutch"));
-
-      Element channel = addNode(doc, rss, "channel");
-    
-      addNode(doc, channel, "title", "Nutch: " + queryString);
-      addNode(doc, channel, "description", "Nutch search results for query: "
-              + queryString);
-      addNode(doc, channel, "link",
-              base+"/search.jsp"
-              +"?query="+urlQuery
-              +"&start="+start
-              +"&hitsPerDup="+hitsPerDup
-              +params);
-
-      addNode(doc, channel, "opensearch", "totalResults", ""+hits.getTotal());
-      addNode(doc, channel, "opensearch", "startIndex", ""+start);
-      addNode(doc, channel, "opensearch", "itemsPerPage", ""+hitsPerPage);
-
-      addNode(doc, channel, "nutch", "query", queryString);
-    
-
-      if ((hits.totalIsExact() && end < hits.getTotal()) // more hits to show
-          || (!hits.totalIsExact() && (hits.getLength() > start+hitsPerPage))){
-        addNode(doc, channel, "nutch", "nextPage", requestUrl
-                +"?query="+urlQuery
-                +"&start="+end
-                +"&hitsPerDup="+hitsPerDup
-                +params);
-      }
-
-      if ((!hits.totalIsExact() && (hits.getLength() <= start+hitsPerPage))) {
-        addNode(doc, channel, "nutch", "showAllHits", requestUrl
-                +"?query="+urlQuery
-                +"&hitsPerDup="+0
-                +params);
-      }
-
-      for (int i = 0; i < length; i++) {
-        Hit hit = show[i];
-        HitDetails detail = details[i];
-        String title = detail.getValue("title");
-        String url = detail.getValue("url");
-        String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo();
-      
-        if (title == null || title.equals("")) {   // use url for docs w/o title
-          title = url;
-        }
-        
-        Element item = addNode(doc, channel, "item");
-
-        addNode(doc, item, "title", title);
-        if (summaries[i] != null) {
-          addNode(doc, item, "description", summaries[i].toString() );
-        }
-        addNode(doc, item, "link", url);
-
-        addNode(doc, item, "nutch", "site", hit.getDedupValue());
-
-        addNode(doc, item, "nutch", "cache", base+"/cached.jsp?"+id);
-        addNode(doc, item, "nutch", "explain", base+"/explain.jsp?"+id
-                +"&query="+urlQuery+"&lang="+queryLang);
-
-        if (hit.moreFromDupExcluded()) {
-          addNode(doc, item, "nutch", "moreFromSite", requestUrl
-                  +"?query="
-                  +URLEncoder.encode("site:"+hit.getDedupValue()
-                                     +" "+queryString, "UTF-8")
-                  +"&hitsPerSite="+0
-                  +params);
-        }
-
-        for (int j = 0; j < detail.getLength(); j++) { // add all from detail
-          String field = detail.getField(j);
-          if (!SKIP_DETAILS.contains(field))
-            addNode(doc, item, "nutch", field, detail.getValue(j));
-        }
-      }
-
-      // dump DOM tree
-
-      DOMSource source = new DOMSource(doc);
-      TransformerFactory transFactory = TransformerFactory.newInstance();
-      Transformer transformer = transFactory.newTransformer();
-      transformer.setOutputProperty("indent", "yes");
-      StreamResult result = new StreamResult(response.getOutputStream());
-      response.setContentType("text/xml");
-      transformer.transform(source, result);
-
-    } catch (javax.xml.parsers.ParserConfigurationException e) {
-      throw new ServletException(e);
-    } catch (javax.xml.transform.TransformerException e) {
-      throw new ServletException(e);
-    }
-      
-  }
-
-  private static Element addNode(Document doc, Node parent, String name) {
-    Element child = doc.createElement(name);
-    parent.appendChild(child);
-    return child;
-  }
-
-  private static void addNode(Document doc, Node parent,
-                              String name, String text) {
-    Element child = doc.createElement(name);
-    child.appendChild(doc.createTextNode(getLegalXml(text)));
-    parent.appendChild(child);
-  }
-
-  private static void addNode(Document doc, Node parent,
-                              String ns, String name, String text) {
-    Element child = doc.createElementNS((String)NS_MAP.get(ns), ns+":"+name);
-    child.appendChild(doc.createTextNode(getLegalXml(text)));
-    parent.appendChild(child);
-  }
-
-  private static void addAttribute(Document doc, Element node,
-                                   String name, String value) {
-    Attr attribute = doc.createAttribute(name);
-    attribute.setValue(getLegalXml(value));
-    node.getAttributes().setNamedItem(attribute);
-  }
-
-  /*
-   * Ensure string is legal xml.
-   * @param text String to verify.
-   * @return Passed <code>text</code> or a new string with illegal
-   * characters removed if any found in <code>text</code>.
-   * @see http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char
-   */
-  protected static String getLegalXml(final String text) {
-      if (text == null) {
-          return null;
-      }
-      StringBuffer buffer = null;
-      for (int i = 0; i < text.length(); i++) {
-        char c = text.charAt(i);
-        if (!isLegalXml(c)) {
-	  if (buffer == null) {
-              // Start up a buffer.  Copy characters here from now on
-              // now we've found at least one bad character in original.
-	      buffer = new StringBuffer(text.length());
-              buffer.append(text.substring(0, i));
-          }
-        } else {
-           if (buffer != null) {
-             buffer.append(c);
-           }
-        }
-      }
-      return (buffer != null)? buffer.toString(): text;
-  }
- 
-  private static boolean isLegalXml(final char c) {
-    return c == 0x9 || c == 0xa || c == 0xd || (c >= 0x20 && c <= 0xd7ff)
-        || (c >= 0xe000 && c <= 0xfffd) || (c >= 0x10000 && c <= 0x10ffff);
-  }
-
-}


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




[Archive-access-cvs] SF.net SVN: archive-access:[2732] trunk/archive-access/projects/nutchwax/ arc

[Archive-access-cvs] SF.net SVN: archive-access:[2732] trunk/archive-access/projects/nutchwax/ archive/src/nutch/src/java/org/apache/nutch/searcher/OpenSearchServlet.java