From: Brad <bra...@us...> - 2005-11-16 03:11:40
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/replay In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30992/src/java/org/archive/wayback/replay Added Files: ReplayServlet.java Log Message: Massive overhaul decomposing into three main categories of changes: 1) All internal datatypes are now extensible (currently Properties, but should be Maps) including: a) WaybackRequest(was WBRequest) b) SearchResults (was ResourceResults) c) SearchResult (was ResourceResult) d) Resource so that there is no longer an assumption of Archival URL queries, or "CDX-style" index results. This will put more responsiblility on the UI components to interrogate SearchResults to decide how to render, but should enable extension to data returned from Indexes, as well as allow far more flexibility in queries, predominantly geared towards free-text searching. This is still somewhat clunky, as there are no convenience accessor methods, so all users refer to constants when interacting with them. 2) Major cleanup of servlet and filter interaction with servlet container. ReplayUI and QueryUI are now just plain old servlets, and filters can be optionally added to allow non-CGI argument requests to be coerced into standard WaybackRequest objects. 3) Alternate "Proxy" Replay mode is now functional, and some work has been done towards an alternate Nutch ResourceIndex. Currently the web.xml contains example configurations for both Proxy and Archival Url replay modes, but the Proxy related configurations are commented out. Proxy mode *requires* changing the servlet context to ROOT. ArchivalUrl replay mode works as ROOT context and as any (I think) other context. There are some cosmetic double-slashe issues to work out. --- NEW FILE: ReplayServlet.java --- /* WBReplayUIServlet * * Created on 2005/10/18 14:00:00 * * Copyright (C) 2005 Internet Archive. * * This file is part of the Wayback Machine (crawler.archive.org). * * Wayback Machine is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Wayback Machine is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Wayback Machine; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback.replay; import java.io.IOException; import java.text.ParseException; import java.util.Enumeration; import java.util.Iterator; import java.util.Map; import java.util.Properties; import java.util.Set; import javax.servlet.ServletConfig; import javax.servlet.ServletContext; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.archive.wayback.WaybackConstants; import org.archive.wayback.ReplayRenderer; import org.archive.wayback.ReplayResultURIConverter; import org.archive.wayback.ResourceIndex; import org.archive.wayback.ResourceStore; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.core.WaybackLogic; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.WaybackException; /** * Servlet implementation for Wayback Replay requests. * * @author Brad Tofel * @version $Date: 2005/11/16 03:11:30 $, $Revision: 1.1 $ */ public class ReplayServlet extends HttpServlet { private static final String WMREQUEST_ATTRIBUTE = "wmrequest.attribute"; private static final long serialVersionUID = 1L; private WaybackLogic wayback = new WaybackLogic(); /** * Constructor */ public ReplayServlet() { super(); } public void init(ServletConfig c) throws ServletException { Properties p = new Properties(); for (Enumeration e = c.getInitParameterNames(); e.hasMoreElements();) { String key = (String) e.nextElement(); p.put(key, c.getInitParameter(key)); } ServletContext sc = c.getServletContext(); for (Enumeration e = sc.getInitParameterNames(); e.hasMoreElements();) { String key = (String) e.nextElement(); p.put(key, sc.getInitParameter(key)); } try { wayback.init(p); } catch (Exception e) { throw new ServletException(e.getMessage()); } } private String getMapParam(Map queryMap, String field) { String arr[] = (String[]) queryMap.get(field); if (arr == null || arr.length == 0) { return null; } return arr[0]; } public WaybackRequest parseCGIRequest(HttpServletRequest httpRequest) throws BadQueryException { WaybackRequest wbRequest = new WaybackRequest(); Map queryMap = httpRequest.getParameterMap(); Set keys = queryMap.keySet(); Iterator itr = keys.iterator(); while(itr.hasNext()) { String key = (String) itr.next(); String val = getMapParam(queryMap,key); wbRequest.put(key,val); } String referer = httpRequest.getHeader("REFERER"); if (referer == null) { referer = null; } wbRequest.put(WaybackConstants.REQUEST_REFERER_URL,referer); return wbRequest; } private SearchResult getClosest(SearchResults results, WaybackRequest wbRequest) throws ParseException { SearchResult closest = null; long closestDistance = 0; SearchResult cur = null; Timestamp wantTimestamp; wantTimestamp = Timestamp.parseBefore(wbRequest. get(WaybackConstants.REQUEST_EXACT_DATE)); Iterator itr = results.iterator(); while (itr.hasNext()) { cur = (SearchResult) itr.next(); long curDistance; try { Timestamp curTimestamp = Timestamp.parseBefore(cur. get(WaybackConstants.RESULT_CAPTURE_DATE)); curDistance = curTimestamp.absDistanceFromTimestamp( wantTimestamp); } catch (ParseException e) { continue; } if ((closest == null) || (curDistance < closestDistance)) { closest = cur; closestDistance = curDistance; } } return closest; } public void doGet(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws IOException, ServletException { WaybackRequest wbRequest = (WaybackRequest) httpRequest .getAttribute(WMREQUEST_ATTRIBUTE); ResourceIndex idx = wayback.getResourceIndex(); ResourceStore store = wayback.getResourceStore(); ReplayResultURIConverter uriConverter = wayback.getURIConverter(); ReplayRenderer renderer = wayback.getReplayRenderer(); try { if (wbRequest == null) { wbRequest = parseCGIRequest(httpRequest); } SearchResults results = idx.query(wbRequest); SearchResult closest = getClosest(results,wbRequest); // TODO loop here looking for closest online/available version? // OPTIMIZ maybe assume version is here and redirect now if not // exactly the date user requested, before retrieving it... Resource resource = store.retrieveResource(closest); renderer.renderResource(httpRequest, httpResponse, wbRequest, closest, resource,uriConverter); } catch (WaybackException wbe) { renderer.renderException(httpRequest, httpResponse, wbRequest, wbe); } catch (Exception e) { // TODO show something Wayback'ish to the user rather than letting // the container deal? e.printStackTrace(); throw new ServletException(e.getMessage()); } } } |