From: <bi...@us...> - 2009-03-03 18:20:21
|
Revision: 2687 http://archive-access.svn.sourceforge.net/archive-access/?rev=2687&view=rev Author: binzino Date: 2009-03-03 18:20:14 +0000 (Tue, 03 Mar 2009) Log Message: ----------- Fixed handling of start and end of search results so that we detect "paging off the end" and return an empty result set rather than an exception. Modified Paths: -------------- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java Modified: trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java =================================================================== --- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java 2009-02-28 01:26:25 UTC (rev 2686) +++ trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java 2009-03-03 18:20:14 UTC (rev 2687) @@ -162,18 +162,30 @@ responseTime = System.nanoTime( ) - responseTime; - // generate xml results - int end = (int)Math.min(hits.getLength(), start + hitsPerPage); - int length = end-start; + // The 'end' is usually just the end of the current page + // (start+hitsPerPage); but if we are on the last page + // of de-duped results, then the end is hits.getLength(). + int end = Math.min( hits.getLength( ), start + hitsPerPage ); - Hit[] show = hits.getHits(start, end-start); - HitDetails[] details = bean.getDetails(show); - Summary[] summaries = bean.getSummary(details, query); + // The length is usually just (end-start), unless the start + // position is past the end of the results -- which is common when + // de-duping. The user could easily jump past the true end of the + // de-dup'd results. If the start is past the end, we use a + // length of '0' to produce an empty results page. + int length = Math.max( end-start, 0 ); + // Usually, the total results is the total number of non-de-duped + // results. Howerver, if we are on last page of de-duped results, + // then we know our de-dup'd total is hits.getLength(). + long totalResults = hits.getLength( ) < (start+hitsPerPage) ? hits.getLength( ) : hits.getTotal( ); + + Hit[] show = hits.getHits(start, length ); + HitDetails[] details = bean.getDetails(show); + Summary[] summaries = bean.getSummary(details, query); + String requestUrl = request.getRequestURL().toString(); String base = requestUrl.substring(0, requestUrl.lastIndexOf('/')); - try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); @@ -197,8 +209,8 @@ +"&hitsPerDup="+hitsPerDup +params); - addNode(doc, channel, "opensearch", "totalResults", ""+hits.getTotal()); - addNode(doc, channel, "opensearch", "startIndex", ""+start); + addNode(doc, channel, "opensearch", "totalResults", ""+totalResults); + addNode(doc, channel, "opensearch", "startIndex", ""+start); addNode(doc, channel, "opensearch", "itemsPerPage", ""+hitsPerPage); addNode(doc, channel, "nutch", "query", queryString); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bi...@us...> - 2009-10-27 23:17:30
|
Revision: 2844 http://archive-access.svn.sourceforge.net/archive-access/?rev=2844&view=rev Author: binzino Date: 2009-10-27 23:17:15 +0000 (Tue, 27 Oct 2009) Log Message: ----------- Updated to Nutch 1.0 API. Also added use of Java generics to avoid type-casts. Modified Paths: -------------- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java Modified: trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java =================================================================== --- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java 2009-10-27 23:00:46 UTC (rev 2843) +++ trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java 2009-10-27 23:17:15 UTC (rev 2844) @@ -53,7 +53,7 @@ */ public class OpenSearchServlet extends HttpServlet { - private static final Map NS_MAP = new HashMap(); + private static final Map<String,String> NS_MAP = new HashMap<String,String>(); private int MAX_HITS_PER_PAGE; static { @@ -61,7 +61,7 @@ NS_MAP.put("nutch", "http://www.nutch.org/opensearchrss/1.0/"); } - private static final Set SKIP_DETAILS = new HashSet(); + private static final Set<String> SKIP_DETAILS = new HashSet<String>(); static { SKIP_DETAILS.add("url"); // redundant with RSS link SKIP_DETAILS.add("title"); // redundant with RSS title @@ -92,9 +92,8 @@ // get parameters from request request.setCharacterEncoding("UTF-8"); String queryString = request.getParameter("query"); - if (queryString == null) - queryString = ""; - String urlQuery = URLEncoder.encode(queryString, "UTF-8"); + if (queryString == null) queryString = ""; + //String urlQuery = URLEncoder.encode(queryString, "UTF-8"); // the query language String queryLang = request.getParameter("lang"); @@ -133,12 +132,6 @@ } } - // Make up query string for use later drawing the 'rss' logo. - String params = "&hitsPerPage=" + hitsPerPage + - (queryLang == null ? "" : "&lang=" + queryLang) + - (sort == null ? "" : "&sort=" + sort + (reverse? "&reverse=true": "") + - (dedupField == null ? "" : "&dedupField=" + dedupField)); - Query query = Query.parse(queryString, queryLang, this.conf); if (NutchBean.LOG.isInfoEnabled()) { NutchBean.LOG.info("query: " + queryString); @@ -183,9 +176,6 @@ HitDetails[] details = bean.getDetails(show); Summary[] summaries = bean.getSummary(details, query); - String requestUrl = request.getRequestURL().toString(); - String base = requestUrl.substring(0, requestUrl.lastIndexOf('/')); - try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); @@ -194,20 +184,14 @@ Element rss = addNode(doc, doc, "rss"); addAttribute(doc, rss, "version", "2.0"); addAttribute(doc, rss, "xmlns:opensearch", - (String)NS_MAP.get("opensearch")); - addAttribute(doc, rss, "xmlns:nutch", (String)NS_MAP.get("nutch")); + NS_MAP.get("opensearch")); + addAttribute(doc, rss, "xmlns:nutch", NS_MAP.get("nutch")); Element channel = addNode(doc, rss, "channel"); addNode(doc, channel, "title", "Nutch: " + queryString); - addNode(doc, channel, "description", "Nutch search results for query: " - + queryString); - addNode(doc, channel, "link", - base+"/search.jsp" - +"?query="+urlQuery - +"&start="+start - +"&hitsPerDup="+hitsPerDup - +params); + addNode(doc, channel, "description", "Nutch search results for query: " + queryString); + addNode(doc, channel, "link", "" ); addNode(doc, channel, "opensearch", "totalResults", ""+totalResults); addNode(doc, channel, "opensearch", "startIndex", ""+start); @@ -217,7 +201,7 @@ addNode(doc, channel, "nutch", "responseTime", Double.toString( ((long) responseTime / 1000 / 1000 ) / 1000.0 ) ); // Add a <nutch:urlParams> element containing a list of all the URL parameters. - Element urlParams = doc.createElementNS((String)NS_MAP.get("nutch"), "nutch:urlParams" ); + Element urlParams = doc.createElementNS(NS_MAP.get("nutch"), "nutch:urlParams" ); channel.appendChild( urlParams ); for ( Map.Entry<String,String[]> e : ((Map<String,String[]>) request.getParameterMap( )).entrySet( ) ) @@ -225,43 +209,19 @@ String key = e.getKey( ); for ( String value : e.getValue( ) ) { - Element urlParam = doc.createElementNS((String)NS_MAP.get("nutch"), "nutch:param" ); + Element urlParam = doc.createElementNS(NS_MAP.get("nutch"), "nutch:param" ); addAttribute( doc, urlParam, "name", key ); addAttribute( doc, urlParam, "value", value ); urlParams.appendChild(urlParam); } } - // Hmm, we should indicate whether or not the "totalResults" - // number as being exact some other way; perhaps just have a - // <nutch:totalIsExact>true</nutch:totalIsExact> element. - /* - if ((hits.totalIsExact() && end < hits.getTotal()) // more hits to show - || (!hits.totalIsExact() && (hits.getLength() > start+hitsPerPage))){ - addNode(doc, channel, "nutch", "nextPage", requestUrl - +"?query="+urlQuery - +"&start="+end - +"&hitsPerDup="+hitsPerDup - +params); - } - */ - - // Same here, this seems odd. - /* - if ((!hits.totalIsExact() && (hits.getLength() <= start+hitsPerPage))) { - addNode(doc, channel, "nutch", "showAllHits", requestUrl - +"?query="+urlQuery - +"&hitsPerDup="+0 - +params); - } - */ - for (int i = 0; i < length; i++) { Hit hit = show[i]; HitDetails detail = details[i]; String title = detail.getValue("title"); String url = detail.getValue("url"); - String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo(); + String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getUniqueKey(); if (title == null || title.equals("")) { // use url for docs w/o title title = url; @@ -274,24 +234,8 @@ addNode(doc, item, "description", summaries[i].toString() ); } addNode(doc, item, "link", url); - addNode(doc, item, "nutch", "site", hit.getDedupValue()); - addNode(doc, item, "nutch", "cache", base+"/cached.jsp?"+id); - addNode(doc, item, "nutch", "explain", base+"/explain.jsp?"+id - +"&query="+urlQuery+"&lang="+queryLang); - - // Probably don't need this as the XML processor/front-end can - // easily do this themselves. - if (hit.moreFromDupExcluded()) { - addNode(doc, item, "nutch", "moreFromSite", requestUrl - +"?query=" - +URLEncoder.encode("site:"+hit.getDedupValue() - +" "+queryString, "UTF-8") - +"&hitsPerSite="+0 - +params); - } - for (int j = 0; j < detail.getLength(); j++) { // add all from detail String field = detail.getField(j); if (!SKIP_DETAILS.contains(field)) @@ -304,9 +248,9 @@ DOMSource source = new DOMSource(doc); TransformerFactory transFactory = TransformerFactory.newInstance(); Transformer transformer = transFactory.newTransformer(); - transformer.setOutputProperty("indent", "yes"); + transformer.setOutputProperty( javax.xml.transform.OutputKeys.ENCODING, "UTF-8" ); StreamResult result = new StreamResult(response.getOutputStream()); - response.setContentType("text/xml"); + response.setContentType("application/rss+xml"); transformer.transform(source, result); } catch (javax.xml.parsers.ParserConfigurationException e) { @@ -334,7 +278,7 @@ private static void addNode(Document doc, Node parent, String ns, String name, String text) { if ( text == null ) text = ""; - Element child = doc.createElementNS((String)NS_MAP.get(ns), ns+":"+name); + Element child = doc.createElementNS(NS_MAP.get(ns), ns+":"+name); child.appendChild(doc.createTextNode(getLegalXml(text))); parent.appendChild(child); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bi...@us...> - 2010-02-22 05:19:04
|
Revision: 2962 http://archive-access.svn.sourceforge.net/archive-access/?rev=2962&view=rev Author: binzino Date: 2010-02-22 05:18:57 +0000 (Mon, 22 Feb 2010) Log Message: ----------- Added result score to OpenSearch output. Modified Paths: -------------- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java Modified: trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java =================================================================== --- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java 2010-02-22 05:18:00 UTC (rev 2961) +++ trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java 2010-02-22 05:18:57 UTC (rev 2962) @@ -46,6 +46,7 @@ import org.apache.nutch.searcher.NutchBean; import org.apache.nutch.searcher.Query; import org.apache.nutch.searcher.Summary; +import org.apache.hadoop.io.FloatWritable; /** * Present search results using A9's OpenSearch extensions to RSS, @@ -183,9 +184,8 @@ Element rss = addNode(doc, doc, "rss"); addAttribute(doc, rss, "version", "2.0"); - addAttribute(doc, rss, "xmlns:opensearch", - NS_MAP.get("opensearch")); - addAttribute(doc, rss, "xmlns:nutch", NS_MAP.get("nutch")); + addAttribute(doc, rss, "xmlns:opensearch", NS_MAP.get("opensearch")); + addAttribute(doc, rss, "xmlns:nutch", NS_MAP.get("nutch")); Element channel = addNode(doc, rss, "channel"); @@ -201,7 +201,7 @@ addNode(doc, channel, "nutch", "responseTime", Double.toString( ((long) responseTime / 1000 / 1000 ) / 1000.0 ) ); // Add a <nutch:urlParams> element containing a list of all the URL parameters. - Element urlParams = doc.createElementNS(NS_MAP.get("nutch"), "nutch:urlParams" ); + Element urlParams = doc.createElementNS( NS_MAP.get("nutch"), "nutch:urlParams" ); channel.appendChild( urlParams ); for ( Map.Entry<String,String[]> e : ((Map<String,String[]>) request.getParameterMap( )).entrySet( ) ) @@ -219,9 +219,9 @@ for (int i = 0; i < length; i++) { Hit hit = show[i]; HitDetails detail = details[i]; + String score = Float.toString( ((FloatWritable)hit.getSortValue( )).get() ); String title = detail.getValue("title"); - String url = detail.getValue("url"); - String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getUniqueKey(); + String url = detail.getValue("url"); if (title == null || title.equals("")) { // use url for docs w/o title title = url; @@ -229,6 +229,7 @@ Element item = addNode(doc, channel, "item"); + addNode(doc, item, "nutch", "score", score ); addNode(doc, item, "title", title); if (summaries[i] != null) { addNode(doc, item, "description", summaries[i].toString() ); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bi...@us...> - 2010-02-22 22:19:48
|
Revision: 2963 http://archive-access.svn.sourceforge.net/archive-access/?rev=2963&view=rev Author: binzino Date: 2010-02-22 22:19:42 +0000 (Mon, 22 Feb 2010) Log Message: ----------- Removed extra 'nutch:' prefix from urlParams and param elements in output. Modified Paths: -------------- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java Modified: trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java =================================================================== --- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java 2010-02-22 05:18:57 UTC (rev 2962) +++ trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/OpenSearchServlet.java 2010-02-22 22:19:42 UTC (rev 2963) @@ -201,7 +201,7 @@ addNode(doc, channel, "nutch", "responseTime", Double.toString( ((long) responseTime / 1000 / 1000 ) / 1000.0 ) ); // Add a <nutch:urlParams> element containing a list of all the URL parameters. - Element urlParams = doc.createElementNS( NS_MAP.get("nutch"), "nutch:urlParams" ); + Element urlParams = doc.createElementNS( NS_MAP.get("nutch"), "urlParams" ); channel.appendChild( urlParams ); for ( Map.Entry<String,String[]> e : ((Map<String,String[]>) request.getParameterMap( )).entrySet( ) ) @@ -209,7 +209,7 @@ String key = e.getKey( ); for ( String value : e.getValue( ) ) { - Element urlParam = doc.createElementNS(NS_MAP.get("nutch"), "nutch:param" ); + Element urlParam = doc.createElementNS(NS_MAP.get("nutch"), "param" ); addAttribute( doc, urlParam, "name", key ); addAttribute( doc, urlParam, "value", value ); urlParams.appendChild(urlParam); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |