Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/query In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30992/src/java/org/archive/wayback/query Added Files: Renderer.java UIQueryResults.java OpenSearchQueryParser.java QueryServlet.java Log Message: Massive overhaul decomposing into three main categories of changes: 1) All internal datatypes are now extensible (currently Properties, but should be Maps) including: a) WaybackRequest(was WBRequest) b) SearchResults (was ResourceResults) c) SearchResult (was ResourceResult) d) Resource so that there is no longer an assumption of Archival URL queries, or "CDX-style" index results. This will put more responsiblility on the UI components to interrogate SearchResults to decide how to render, but should enable extension to data returned from Indexes, as well as allow far more flexibility in queries, predominantly geared towards free-text searching. This is still somewhat clunky, as there are no convenience accessor methods, so all users refer to constants when interacting with them. 2) Major cleanup of servlet and filter interaction with servlet container. ReplayUI and QueryUI are now just plain old servlets, and filters can be optionally added to allow non-CGI argument requests to be coerced into standard WaybackRequest objects. 3) Alternate "Proxy" Replay mode is now functional, and some work has been done towards an alternate Nutch ResourceIndex. Currently the web.xml contains example configurations for both Proxy and Archival Url replay modes, but the Proxy related configurations are commented out. Proxy mode *requires* changing the servlet context to ROOT. ArchivalUrl replay mode works as ROOT context and as any (I think) other context. There are some cosmetic double-slashe issues to work out. --- NEW FILE: UIQueryResults.java --- /* UIQueryResults * * $Id: UIQueryResults.java,v 1.1 2005/11/16 03:11:30 bradtofel Exp $ * * Created on 12:03:14 PM Nov 8, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback.query; import java.text.ParseException; import java.util.Iterator; import org.archive.wayback.WaybackConstants; import org.archive.wayback.ReplayResultURIConverter; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; /** * * * @author brad * @version $Date: 2005/11/16 03:11:30 $, $Revision: 1.1 $ */ public class UIQueryResults { private String searchUrl; private Timestamp startTimestamp; private Timestamp endTimestamp; private Timestamp firstResultTimestamp; private Timestamp lastResultTimestamp; private int resultCount; private SearchResults results; private ReplayResultURIConverter uriConverter; /** * @param wmRequest * @param results * @param request * @param replayUI * @throws ParseException */ public UIQueryResults(WaybackRequest wbRequest, SearchResults results, ReplayResultURIConverter uriConverter) throws ParseException { this.searchUrl = wbRequest.get(WaybackConstants.RESULT_URL); this.startTimestamp = Timestamp.parseBefore(results. getFilter(WaybackConstants.REQUEST_START_DATE)); this.endTimestamp = Timestamp.parseBefore(results.getFilter( WaybackConstants.REQUEST_END_DATE)); this.firstResultTimestamp = Timestamp.parseBefore(results .getFirstResultDate()); this.lastResultTimestamp = Timestamp.parseBefore(results .getLastResultDate()); this.resultCount = results.getResultCount(); this.results = results; this.uriConverter = uriConverter; } /** * @return Timestamp end cutoff requested by user */ public Timestamp getEndTimestamp() { return endTimestamp; } /** * @return first Timestamp in returned ResourceResults */ public Timestamp getFirstResultTimestamp() { return firstResultTimestamp; } /** * @return last Timestamp in returned ResourceResults */ public Timestamp getLastResultTimestamp() { return lastResultTimestamp; } /** * @return number of SearchResult objects in response */ public int getResultCount() { return resultCount; } /** * @return URL or URL prefix requested by user */ public String getSearchUrl() { return searchUrl; } /** * @return Timestamp start cutoff requested by user */ public Timestamp getStartTimestamp() { return startTimestamp; } /** * @return Iterator of ResourceResults */ public Iterator resultsIterator() { return results.iterator(); } /** * @param result * @return URL string that will replay the specified Resource Result. */ public String resultToReplayUrl(SearchResult result) { return uriConverter.makeReplayURI(result); } public String prettySearchEndDate() { return endTimestamp.prettyDate(); } public String prettySearchStartDate() { return startTimestamp.prettyDate(); } } --- NEW FILE: QueryServlet.java --- /* QueryServlet * * $Id: QueryServlet.java,v 1.1 2005/11/16 03:11:30 bradtofel Exp $ * * Created on 2:42:50 PM Nov 7, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback.query; import java.io.IOException; import java.util.Enumeration; import java.util.Iterator; import java.util.Map; import java.util.Properties; import java.util.Set; import javax.servlet.ServletConfig; import javax.servlet.ServletContext; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.archive.wayback.WaybackConstants; import org.archive.wayback.QueryRenderer; import org.archive.wayback.ReplayResultURIConverter; import org.archive.wayback.ResourceIndex; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.WaybackLogic; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.WaybackException; /** * * * @author brad * @version $Date: 2005/11/16 03:11:30 $, $Revision: 1.1 $ */ public class QueryServlet extends HttpServlet { /** * */ private static final String WMREQUEST_ATTRIBUTE = "wmrequest.attribute"; private static final long serialVersionUID = 1L; private WaybackLogic wayback = new WaybackLogic(); /** * Constructor */ public QueryServlet() { super(); } public void init(ServletConfig c) throws ServletException { Properties p = new Properties(); for (Enumeration e = c.getInitParameterNames(); e.hasMoreElements();) { String key = (String) e.nextElement(); p.put(key, c.getInitParameter(key)); } ServletContext sc = c.getServletContext(); for (Enumeration e = sc.getInitParameterNames(); e.hasMoreElements();) { String key = (String) e.nextElement(); p.put(key, sc.getInitParameter(key)); } // TODO initialize renderer try { wayback.init(p); } catch (Exception e) { throw new ServletException(e.getMessage()); } } private String getMapParam(Map queryMap, String field) { String arr[] = (String[]) queryMap.get(field); if (arr == null || arr.length == 0) { return null; } return arr[0]; } public WaybackRequest parseCGIRequest(HttpServletRequest httpRequest) throws BadQueryException { WaybackRequest wbRequest = new WaybackRequest(); Map queryMap = httpRequest.getParameterMap(); Set keys = queryMap.keySet(); Iterator itr = keys.iterator(); while(itr.hasNext()) { String key = (String) itr.next(); String val = getMapParam(queryMap,key); wbRequest.put(key,val); } return wbRequest; } public void doGet(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws IOException, ServletException { WaybackRequest wbRequest = (WaybackRequest) httpRequest .getAttribute(WMREQUEST_ATTRIBUTE); ResourceIndex idx = wayback.getResourceIndex(); QueryRenderer renderer = wayback.getQueryRenderer(); ReplayResultURIConverter uriConverter = wayback.getURIConverter(); try { if (wbRequest == null) { wbRequest = parseCGIRequest(httpRequest); } SearchResults results; results = idx.query(wbRequest); if (wbRequest.get(WaybackConstants.REQUEST_TYPE).equals( WaybackConstants.REQUEST_URL_QUERY)) { renderer.renderUrlResults(httpRequest, httpResponse, wbRequest, results, uriConverter); } else if (wbRequest.get(WaybackConstants.REQUEST_TYPE).equals( WaybackConstants.REQUEST_URL_PREFIX_QUERY)) { renderer.renderUrlPrefixResults(httpRequest, httpResponse, wbRequest, results, uriConverter); } else { throw new BadQueryException("Unknown query " + WaybackConstants.REQUEST_TYPE); } } catch (WaybackException wbe) { renderer.renderException(httpRequest, httpResponse, wbRequest, wbe); } } } --- NEW FILE: OpenSearchQueryParser.java --- /* OpenSearchParser * * $Id: OpenSearchQueryParser.java,v 1.1 2005/11/16 03:11:30 bradtofel Exp $ * * Created on 1:37:19 PM Nov 14, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback.query; import java.util.Map; import java.util.regex.Pattern; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; /** * * * @author brad * @version $Date: 2005/11/16 03:11:30 $, $Revision: 1.1 $ */ public class OpenSearchQueryParser { private final static String SEARCH_QUERY = "q"; private final static String SEARCH_RESULTS = "count"; private final static String START_PAGE = "start_page"; // private final static String START_INDEX = "start_index"; private final static Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+"); // singles consume the next non-whitespace token following the term private String[] singleTokens = { "url", "site", "mimetype", "noredirect" }; // lines consume the entire rest of the query private String[] lineTokens = { "terms" }; private String getMapParam(Map queryMap, String field) { String arr[] = (String[]) queryMap.get(field); if (arr == null || arr.length == 0) { return null; } return arr[0]; } public WaybackRequest parseQuery(Map queryMap) throws BadQueryException { WaybackRequest wbRequest = new WaybackRequest(); String query = getMapParam(queryMap, SEARCH_QUERY); String numResults = getMapParam(queryMap, SEARCH_RESULTS); String startPage = getMapParam(queryMap, START_PAGE); if (numResults != null) { int nr = Integer.parseInt(numResults); wbRequest.setResultsPerPage(nr); } if (startPage != null) { int sp = Integer.parseInt(startPage); wbRequest.setPageNum(sp); } if (query == null) { throw new BadQueryException("No search query argument"); } parseTerms(wbRequest, query); return wbRequest; } private void parseTerms(WaybackRequest wbRequest, String query) throws BadQueryException { // first try the entire line_tokens: for (int i = 0; i < lineTokens.length; i++) { String token = lineTokens[i] + ":"; int index = query.indexOf(token); if (index > -1) { // found it, take value as the remainder of the query String value = query.substring(index + token.length()); // TODO: trim trailing whitespace? wbRequest.put(lineTokens[i], value); query = query.substring(0, index); } } // now split whatever is left on whitespace: String[] parts = WHITESPACE_PATTERN.split(query); for (int i = 0; i < parts.length; i++) { String token = parts[i]; int colonIndex = token.indexOf(":"); if (colonIndex == -1) { throw new BadQueryException("Bad search token(" + token + ")"); } String key = token.substring(0, colonIndex); String value = token.substring(colonIndex + 1); // TODO: make sure key is in singleTokens? // let's just let em all thru for now: wbRequest.put(key, value); } } } --- NEW FILE: Renderer.java --- /* QueryRenderer * * $Id: Renderer.java,v 1.1 2005/11/16 03:11:30 bradtofel Exp $ * * Created on 2:47:42 PM Nov 7, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback.query; import java.io.IOException; import java.text.ParseException; import java.util.Properties; import javax.servlet.RequestDispatcher; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.archive.wayback.QueryRenderer; import org.archive.wayback.ReplayResultURIConverter; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.WaybackException; /** * * * @author brad * @version $Date: 2005/11/16 03:11:30 $, $Revision: 1.1 $ */ public class Renderer implements QueryRenderer { private final static String JSP_PATH = "queryui.jsppath"; private String jspPath = null; private final String ERROR_JSP = "ErrorResult.jsp"; private final String QUERY_JSP = "QueryResults.jsp"; private final String PREFIX_QUERY_JSP = "PathQueryResults.jsp"; public void init(Properties p) throws ConfigurationException { this.jspPath = (String) p.get(JSP_PATH); if (this.jspPath == null || this.jspPath.length() <= 0) { throw new ConfigurationException("Failed to find " + JSP_PATH); } } public void renderException(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, WaybackException exception) throws ServletException, IOException { httpRequest.setAttribute("exception", exception); String finalJspPath = jspPath + "/" + ERROR_JSP; RequestDispatcher dispatcher = httpRequest .getRequestDispatcher(finalJspPath); dispatcher.forward(httpRequest, httpResponse); } public void renderUrlResults(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, SearchResults results, ReplayResultURIConverter uriConverter) throws ServletException, IOException { UIQueryResults uiResults; try { uiResults = new UIQueryResults(wbRequest, results, uriConverter); } catch (ParseException e) { // I don't think this should happen... e.printStackTrace(); throw new ServletException(e.getMessage()); } httpRequest.setAttribute("ui-results", uiResults); proxyRequest(httpRequest, httpResponse, QUERY_JSP); } public void renderUrlPrefixResults(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, SearchResults results, ReplayResultURIConverter uriConverter) throws ServletException, IOException { UIQueryResults uiResults; try { uiResults = new UIQueryResults(wbRequest, results, uriConverter); } catch (ParseException e) { // I don't think this should happen... e.printStackTrace(); throw new ServletException(e.getMessage()); } httpRequest.setAttribute("ui-results", uiResults); proxyRequest(httpRequest, httpResponse, PREFIX_QUERY_JSP); } /** * @param request * @param response * @param jspName * @throws ServletException * @throws IOException */ private void proxyRequest(HttpServletRequest request, HttpServletResponse response, final String jspName) throws ServletException, IOException { String finalJspPath = jspPath + "/" + jspName; RequestDispatcher dispatcher = request .getRequestDispatcher(finalJspPath); dispatcher.forward(request, response); } } |