From: Michael S. <sta...@us...> - 2005-11-04 21:31:30
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/web In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22778/src/web Modified Files: search.jsp Added Files: search.jsp.archiveit Log Message: * src/web/search.jsp Add carrying forward of collection name. * src/web/search.jsp.archiveit Add Dan's paging through results. * src/web/img/ia-logo.jpg Add another version of ia logo. --- NEW FILE: search.jsp.archiveit --- <%@ page contentType="text/html; charset=UTF-8" pageEncoding="UTF-8" import="javax.servlet.*" import="javax.servlet.http.*" import="java.io.*" import="java.util.*" import="java.text.*" import="java.net.*" import="java.util.regex.Pattern" import="org.apache.nutch.html.Entities" import="org.apache.nutch.searcher.*" import="org.apache.nutch.plugin.*" import="org.apache.nutch.util.NutchConf" import="org.archive.access.nutch.NutchwaxQuery" %><%! public static final DateFormat FORMAT = new SimpleDateFormat("yyyyMMddHHmmss"); public static final DateFormat DISPLAY_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private static final String COLLECTION_KEY = "collection"; private static final String COLLECTION_QUERY_PARAM_KEY = COLLECTION_KEY + ":"; %><% NutchBean bean = NutchBean.get(application); // Set the character encoding to use when interpreting request values request.setCharacterEncoding("UTF-8"); bean.LOG.info("query request from " + request.getRemoteAddr()); // get query from request String queryString = request.getParameter("query"); if (queryString == null) { queryString = ""; } String htmlQueryString = Entities.encode(queryString); int start = 0; // first hit to display String startString = request.getParameter("start"); if (startString != null) start = Integer.parseInt(startString); int hitsPerPage = 10; // number of hits to display String hitsString = request.getParameter("hitsPerPage"); if (hitsString != null) hitsPerPage = Integer.parseInt(hitsString); // Add in 'sort' parameter. String sort = request.getParameter("sort"); boolean reverse = sort!=null && "true".equals(request.getParameter("reverse")); // De-Duplicate handling. Look for duplicates field and for how many // duplicates per results to return. Default duplicates field is 'site' // and duplicates per results default is '1' (Used to be '2' but now // '1' so can have an index with dups not show dups when used doing // straight searches). String dedupField = request.getParameter("dedupField"); if (dedupField == null || dedupField.length() == 0) { dedupField = "site"; } int hitsPerDup = 1; String hitsPerDupString = request.getParameter("hitsPerDup"); if (hitsPerDupString != null && hitsPerDupString.length() > 0) { hitsPerDup = Integer.parseInt(hitsPerDupString); } else { // If 'hitsPerSite' present, use that value. String hitsPerSiteString = request.getParameter("hitsPerSite"); if (hitsPerSiteString != null && hitsPerSiteString.length() > 0) { hitsPerDup = Integer.parseInt(hitsPerSiteString); } } // If a 'collection' parameter present, always add to query. String collection = request.getParameter(COLLECTION_KEY); if (collection != null && queryString != null && queryString.length() > 0) { int collectionIndex = queryString.indexOf(COLLECTION_QUERY_PARAM_KEY); if (collectionIndex < 0) { queryString = queryString + " " + COLLECTION_QUERY_PARAM_KEY + collection; } } // Make up query string for use later drawing the 'rss' logo. String params = "&hitsPerPage=" + hitsPerPage + (sort == null ? "" : "&sort=" + sort + (reverse? "&reverse=true": "") + (dedupField == null ? "" : "&dedupField=" + dedupField)); Query query = NutchwaxQuery.parse(queryString); bean.LOG.info("query: " + query.toString()); String language = ResourceBundle.getBundle("org.nutch.jsp.search", request.getLocale()) .getLocale().getLanguage(); String requestURI = HttpUtils.getRequestURL(request).toString(); String base = requestURI.substring(0, requestURI.lastIndexOf('/')); // URLEncoder.encode the queryString rather than just use htmlQueryString. // The former will take care of other than just html entities in case its // needed. String rss = request.getContextPath() + "/opensearch?query=" + URLEncoder.encode(queryString, "UTF-8") + "&hitsPerDup=" + hitsPerDup + ((start != 0)? "&start=" + start: "") + params; %><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <% // To prevent the character encoding declared with 'contentType' page // directive from being overriden by JSTL (apache i18n), we freeze it // by flushing the output buffer. // see http://java.sun.com/developer/technicalArticles/Intl/MultilingualJSP/ out.flush(); %> <%@ taglib uri="http://jakarta.apache.org/taglibs/i18n" prefix="i18n" %> <i18n:bundle baseName="org.nutch.jsp.search"/> <html lang="<%= language %>"> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> <head> <title>Internet Archive: <i18n:message key="title"/></title> <link rel="shortcut icon" href="<%=request.getContextPath()%>/images/logo-16.jpg" type="image/x-icon"/> <jsp:include page="/include/style.html"/> <base href="<%= base + "/" + language %>/"> </head> <body> <jsp:include page="/header.jsp"/> <form name="search" action="<%=request.getContextPath()%>/search.jsp" method="get"> <input name="query" size=44 value="<%=htmlQueryString%>"> <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> <input type="hidden" name="collection" value="<%=collection%>"> <input type="submit" value="<i18n:message key="search"/>"> <% if (sort != null) { %> <input type="hidden" name="sort" value="<%=sort%>"> <input type="hidden" name="reverse" value="<%=reverse%>"> <% } %> </form> <% long startTime = System.currentTimeMillis(); Hits hits = null; try { hits = bean.search(query, start + hitsPerPage, hitsPerDup, dedupField, sort, reverse); } catch (IOException e) { hits = new Hits(0, new Hit[0]); } long searchTime = System.currentTimeMillis() - startTime; int end = (int)Math.min(hits.getLength(), start + hitsPerPage); %> Search took <%= searchTime/1000.0 %> seconds. <i18n:message key="hits"> <i18n:messageArg value="<%=new Long((end==0)?0:(start+1))%>"/> <i18n:messageArg value="<%=new Long(end)%>"/> <i18n:messageArg value="<%=new Long(hits.getTotal())%>"/> </i18n:message> <% // be responsive out.flush(); int length = end-start; int realEnd = (int)Math.min(hits.getLength(), start + hitsPerPage); Hit[] show = hits.getHits(start, realEnd-start); HitDetails[] details = bean.getDetails(show); String[] summaries = bean.getSummary(details, query); bean.LOG.info("total hits: " + hits.getTotal()); String collectionsHost = NutchConf.get().get("collections.host"); %> <br><br> <% for (int i = 0; i < length; i++) { // display the hits Hit hit = show[i]; HitDetails detail = details[i]; String title = detail.getValue("title"); String summary = summaries[i]; String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo(); String archiveDate = FORMAT.format(new Date(bean.getFetchDate(detail))); String archiveDisplayDate = DISPLAY_FORMAT.format(new Date(bean.getFetchDate(detail))); String archiveCollection = detail.getValue("collection"); String url = detail.getValue("url"); String target = "http://" + collectionsHost + "/" + archiveCollection + "/" + archiveDate + "/" + url; String allVersions = "http://" + collectionsHost + "/" + archiveCollection + "/*/" + url; if (title == null || title.equals("")) // use url for docs w/o title title = url; %> <b><a href="<%=target%>"><%=Entities.encode(title)%></a></b> <%@ include file="./more.jsp" %> <% if (!"".equals(summary)) { %> <br><%=summary%> <% } %> <br> <small> <span class="url"><%=Entities.encode(url)%></span> - <%=archiveDisplayDate%> - <a href="<%=allVersions%>">other versions</a> <% if (hit.moreFromDupExcluded()) { String more = "query="+URLEncoder.encode("site:"+hit.getDedupValue()+" "+queryString) +"&start="+start+"&hitsPerPage="+hitsPerPage+"&hitsPerDup="+0; %> - <a href="<%=request.getContextPath()%>/search.jsp?<%=more%>"><i18n:message key="moreFrom"/> <%=hit.getDedupValue()%></a> <% } %> - <a href="<%=request.getContextPath()%>/explain.jsp?<%=id%>&query=<%=URLEncoder.encode(queryString)%>">explain</a> </small> <br><br> <% } %> <% if ((hits.totalIsExact() && end < hits.getTotal()) // more hits to show || (!hits.totalIsExact() && (hits.getLength() > start+hitsPerPage))) { long pagesAvailable = (long) (hits.getTotal() / hitsPerPage) + 1 ; long currentPage = (long) ((start + 1) / hitsPerPage + 1) ; int maxPagesToShow = 20; long displayMin = (long) (currentPage - (0.5 * maxPagesToShow) ); if (displayMin < 1) { displayMin = 1; } long displayMax = displayMin + maxPagesToShow - 1 ; if (displayMax > pagesAvailable) { displayMax = pagesAvailable; } %> <!-- Debugging info <table border="1"> <tr> <td>pagesAvailable:<%=pagesAvailable%></td> <td>currentPage:<%=currentPage%></td> <td>displayMin:<%=displayMin%></td> <td>displayMax:<%=displayMax%></td> </tr> </table> --> <center> <% if (currentPage > 1) { long previousPageStart = (currentPage - 1) * hitsPerPage; String previousPageUrl = request.getContextPath() + "/search.jsp?" + "query=" + htmlQueryString + "&start=" + previousPageStart + "&hitsPerPage=" + hitsPerPage + "&hitsPerDup=" + hitsPerDup + "&dedupField=" + dedupField; if (sort != null) { previousPageUrl = previousPageUrl + "&sort=" + sort + "&reverse=" + reverse; } %> <a href="<%=previousPageUrl%>"><b>Previous</b></a>  <% } %> <% for (long pageIndex = displayMin; pageIndex <= displayMax; pageIndex++) { long pageStart = (pageIndex - 1) * hitsPerPage; String pageUrl = request.getContextPath() + "/search.jsp?" + "query=" + htmlQueryString + "&start=" + pageStart + "&hitsPerPage=" + hitsPerPage + "&hitsPerDup=" + hitsPerDup + "&dedupField=" + dedupField; if (sort != null) { pageUrl = pageUrl + "&sort=" + sort + "&reverse=" + reverse; } if (pageIndex != currentPage) { %> <a href="<%=pageUrl%>"><%=pageIndex%></a> <% } else { %> <b><%=pageIndex%></b> <% } } %> <% if (currentPage < pagesAvailable) { long nextPageStart = (currentPage + 1) * hitsPerPage; String nextPageUrl = request.getContextPath() + "/search.jsp?" + "query=" + htmlQueryString + "&start=" + nextPageStart + "&hitsPerPage=" + hitsPerPage + "&hitsPerDup=" + hitsPerDup + "&dedupField=" + dedupField; if (sort != null) { nextPageUrl = nextPageUrl + "&sort=" + sort + "&reverse=" + reverse; } %> <a href="<%=nextPageUrl%>"><b>Next</b></a>  <% } %> </center> <% } if ((!hits.totalIsExact() && (hits.getLength() <= start+hitsPerPage))) { %> <form name="search" action="<%=request.getContextPath()%>/search.jsp" method="get"> <input type="hidden" name="query" value="<%=htmlQueryString%>"> <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> <input type="hidden" name="hitsPerDup" value="0"> <input type="submit" value="<i18n:message key="showAllHits"/>"> <% if (sort != null) { %> <input type="hidden" name="sort" value="<%=sort%>"> <input type="hidden" name="reverse" value="<%=reverse%>"> <% } %> </form> <% } %> <p> <table bgcolor="3333ff" align="right"> <tr><td bgcolor="ff9900"><a href="<%=rss%>"><font color="ffffff"><b>RSS</b> </font></a></td></tr> </table> <a href="http://www.archive.org"> <img border="0" src="<%=request.getContextPath()%>/img/ia-logo.jpg"> </a> <a href="http://www.nutch.org/"> <img border="0" src="<%=request.getContextPath()%>/img/poweredbynutch_01.gif"> </a> </p> </body> </html> Index: search.jsp =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/web/search.jsp,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** search.jsp 6 Oct 2005 17:35:02 -0000 1.23 --- search.jsp 4 Nov 2005 21:31:16 -0000 1.24 *************** *** 22,25 **** --- 22,27 ---- public static final DateFormat DISPLAY_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + private static final String COLLECTION_KEY = "collection"; + private static final String COLLECTION_QUERY_PARAM_KEY = COLLECTION_KEY + ":"; %><% NutchBean bean = NutchBean.get(application); *************** *** 32,36 **** // get query from request String queryString = request.getParameter("query"); - System.out.println("Untampered query: " + queryString); if (queryString == null) { queryString = ""; --- 34,37 ---- *************** *** 73,76 **** --- 74,87 ---- } } + + // If a 'collection' parameter present, always add to query. + String collection = request.getParameter(COLLECTION_KEY); + if (collection != null && queryString != null && queryString.length() > 0) { + int collectionIndex = queryString.indexOf(COLLECTION_QUERY_PARAM_KEY); + if (collectionIndex < 0) { + queryString = queryString + " " + COLLECTION_QUERY_PARAM_KEY + + collection; + } + } // Make up query string for use later drawing the 'rss' logo. *************** *** 120,123 **** --- 131,135 ---- <input name="query" size=44 value="<%=htmlQueryString%>"> <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> + <input type="hidden" name="collection" value="<%=collection%>"> <input type="submit" value="<i18n:message key="search"/>"> <% if (sort != null) { %> |