From: Brad <bra...@us...> - 2005-11-16 03:11:37
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/proxy In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30992/src/java/org/archive/wayback/proxy Added Files: ReplayFilter.java ResultURIConverter.java RawReplayRenderer.java Log Message: Massive overhaul decomposing into three main categories of changes: 1) All internal datatypes are now extensible (currently Properties, but should be Maps) including: a) WaybackRequest(was WBRequest) b) SearchResults (was ResourceResults) c) SearchResult (was ResourceResult) d) Resource so that there is no longer an assumption of Archival URL queries, or "CDX-style" index results. This will put more responsiblility on the UI components to interrogate SearchResults to decide how to render, but should enable extension to data returned from Indexes, as well as allow far more flexibility in queries, predominantly geared towards free-text searching. This is still somewhat clunky, as there are no convenience accessor methods, so all users refer to constants when interacting with them. 2) Major cleanup of servlet and filter interaction with servlet container. ReplayUI and QueryUI are now just plain old servlets, and filters can be optionally added to allow non-CGI argument requests to be coerced into standard WaybackRequest objects. 3) Alternate "Proxy" Replay mode is now functional, and some work has been done towards an alternate Nutch ResourceIndex. Currently the web.xml contains example configurations for both Proxy and Archival Url replay modes, but the Proxy related configurations are commented out. Proxy mode *requires* changing the servlet context to ROOT. ArchivalUrl replay mode works as ROOT context and as any (I think) other context. There are some cosmetic double-slashe issues to work out. --- NEW FILE: ResultURIConverter.java --- /* ProxyResultURIConverter * * $Id: ResultURIConverter.java,v 1.1 2005/11/16 03:11:29 bradtofel Exp $ * * Created on 4:19:21 PM Nov 15, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback.proxy; import java.util.Properties; import org.apache.commons.httpclient.URIException; import org.archive.net.UURI; import org.archive.net.UURIFactory; import org.archive.wayback.ReplayResultURIConverter; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.SearchResult; import org.archive.wayback.exception.ConfigurationException; /** * * * @author brad * @version $Date: 2005/11/16 03:11:29 $, $Revision: 1.1 $ */ public class ResultURIConverter implements ReplayResultURIConverter { /* (non-Javadoc) * @see org.archive.wayback.ReplayResultURIConverter#init(java.util.Properties) */ public void init(Properties p) throws ConfigurationException { } /* (non-Javadoc) * @see org.archive.wayback.ReplayResultURIConverter#makeReplayURI(org.archive.wayback.core.ResourceResult) */ public String makeReplayURI(SearchResult result) { String finalUrl = result.get(WaybackConstants.RESULT_URL); if(!finalUrl.startsWith("http://")) { finalUrl = "http://" + finalUrl; } return finalUrl; } /** * @return Returns the replayUriPrefix. */ public String getReplayUriPrefix() { return ""; } /* (non-Javadoc) * @see org.archive.wayback.ReplayResultURIConverter#makeRedirectReplayURI(org.archive.wayback.core.SearchResult, java.lang.String) */ public String makeRedirectReplayURI(SearchResult result, String url) { String finalUrl = url; try { UURI origURI = UURIFactory.getInstance(url); if(!origURI.isAbsoluteURI()) { String resultUrl = result.get(WaybackConstants.RESULT_URL); UURI absResultURI = UURIFactory.getInstance(resultUrl); UURI finalURI = absResultURI.resolve(url); finalUrl = finalURI.getEscapedURI(); } } catch (URIException e) { // TODO Auto-generated catch block e.printStackTrace(); } if(!finalUrl.startsWith("http://")) { finalUrl = "http://" + finalUrl; } return finalUrl; } } --- NEW FILE: RawReplayRenderer.java --- /* ReplayRenderer * * $Id: RawReplayRenderer.java,v 1.1 2005/11/16 03:11:29 bradtofel Exp $ * * Created on 5:50:38 PM Oct 31, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback.proxy; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.Enumeration; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.servlet.RequestDispatcher; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.archive.wayback.ReplayRenderer; import org.archive.wayback.ReplayResultURIConverter; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.WaybackException; /** * * * @author brad * @version $Date: 2005/11/16 03:11:29 $, $Revision: 1.1 $ */ public class RawReplayRenderer implements ReplayRenderer { private final static String JSP_PATH = "replayui.jsppath"; private final static String HTTP_LENGTH_HEADER= "Content-Length"; private final static String HTTP_LOCATION_HEADER = "Location"; protected final Pattern IMAGE_REGEX = Pattern .compile(".*\\.(jpg|jpeg|gif|png|bmp|tiff|tif)$"); private String jspPath; private final String ERROR_JSP = "ErrorResult.jsp"; private final String ERROR_JAVASCRIPT = "ErrorJavascript.jsp"; private final String ERROR_IMAGE = "error_image.gif"; public void init(Properties p) throws ConfigurationException { this.jspPath = (String) p.get(JSP_PATH); if (this.jspPath == null || this.jspPath.length() <= 0) { throw new IllegalArgumentException("Failed to find " + JSP_PATH); } } private boolean requestIsEmbedded(HttpServletRequest httpRequest, WaybackRequest wbRequest) { String referer = wbRequest.get(WaybackConstants.REQUEST_REFERER_URL); return (referer != null && referer.length() > 0); } private boolean requestIsImage (HttpServletRequest httpRequest, WaybackRequest wbRequest) { String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); Matcher matcher = IMAGE_REGEX.matcher(requestUrl); return (matcher != null && matcher.matches()); } private boolean requestIsJavascript (HttpServletRequest httpRequest, WaybackRequest wbRequest) { String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); return requestUrl.endsWith(".js"); } // TODO special handling for Javascript and Images: send empty image // or empty text file to avoid client errors public void renderException(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, WaybackException exception) throws ServletException, IOException { String finalJspPath = jspPath + "/" + ERROR_JSP; // is this object embedded? if(requestIsEmbedded(httpRequest,wbRequest)) { if(requestIsJavascript(httpRequest,wbRequest)) { finalJspPath = jspPath + "/" + ERROR_JAVASCRIPT; } else if(requestIsImage(httpRequest,wbRequest)) { finalJspPath = jspPath + "/" + ERROR_IMAGE; } } httpRequest.setAttribute("exception", exception); RequestDispatcher dispatcher = httpRequest .getRequestDispatcher(finalJspPath); dispatcher.forward(httpRequest, httpResponse); } public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, SearchResult result, Resource resource, ReplayResultURIConverter uriConverter) throws ServletException, IOException { resource.parseHeaders(); copyRecordHttpHeader(httpResponse, resource, uriConverter, result, false); copy(resource, httpResponse.getOutputStream()); } protected void copyRecordHttpHeader(HttpServletResponse response, Resource resource, ReplayResultURIConverter uriConverter, SearchResult result, boolean noLength) throws IOException { Properties headers = resource.getHttpHeaders(); int code = resource.getStatusCode(); // Only return legit status codes -- don't return any minus // codes, etc. if (code <= HttpServletResponse.SC_CONTINUE) { String identifier = ""; response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, "Bad status code " + code + " (" + identifier + ")."); return; } response.setStatus(code); if (headers != null) { // Copy all headers to the response -- even date and // server, but don't copy Content-Length if arguments indicate for (Enumeration e = headers.keys(); e.hasMoreElements();) { String key = (String) e.nextElement(); String value = (String) headers.get(key); if (noLength) { if (-1 != key.indexOf(HTTP_LENGTH_HEADER)) { continue; } } if(0 == key.indexOf(HTTP_LOCATION_HEADER)) { value = uriConverter.makeRedirectReplayURI(result,value); } response.setHeader(key, (value == null) ? "" : value); } } } protected void copy(InputStream is, OutputStream os) throws IOException { // TODO: Don't allocate everytime. byte[] buffer = new byte[4 * 1024]; for (int r = -1; (r = is.read(buffer, 0, buffer.length)) != -1;) { os.write(buffer, 0, r); } } } --- NEW FILE: ReplayFilter.java --- /* ProxyReplayFilter * * $Id: ReplayFilter.java,v 1.1 2005/11/16 03:11:29 bradtofel Exp $ * * Created on 6:08:59 PM Nov 14, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback.proxy; import java.text.ParseException; import java.util.List; import javax.servlet.FilterConfig; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import org.archive.util.InetAddressUtil; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.RequestFilter; import org.archive.wayback.core.WaybackRequest; /** * * * @author brad * @version $Date: 2005/11/16 03:11:29 $, $Revision: 1.1 $ */ public class ReplayFilter extends RequestFilter { private List localhostNames = null; public ReplayFilter() { super(); } public void init(final FilterConfig c) throws ServletException { this.localhostNames = InetAddressUtil.getAllLocalHostNames(); super.init(c); } /* (non-Javadoc) * @see org.archive.wayback.core.RequestFilter#parseRequest(javax.servlet.http.HttpServletRequest) */ @Override protected WaybackRequest parseRequest(HttpServletRequest httpRequest) { WaybackRequest wbRequest = null; if(isLocalRequest(httpRequest)) { return wbRequest; } String requestServer = httpRequest.getServerName(); String requestPath = httpRequest.getRequestURI(); //int port = httpRequest.getServerPort(); String requestQuery = httpRequest.getQueryString(); String requestScheme = httpRequest.getScheme(); if (requestQuery != null) { requestPath = requestPath + "?" + requestQuery; } String requestUrl = requestScheme + "://" + requestServer + requestPath; wbRequest = new WaybackRequest(); wbRequest.put(WaybackConstants.REQUEST_URL,requestUrl); wbRequest.put(WaybackConstants.REQUEST_TYPE, WaybackConstants.REQUEST_REPLAY_QUERY); String referer = httpRequest.getHeader("REFERER"); if (referer == null) { referer = ""; } wbRequest.put(WaybackConstants.REQUEST_REFERER_URL,referer); try { wbRequest.put(WaybackConstants.REQUEST_EXACT_DATE, Timestamp.currentTimestamp().getDateStr()); } catch (ParseException e) { // Shouldn't happen... e.printStackTrace(); } return wbRequest; } protected boolean isLocalRequest(HttpServletRequest httpRequest) { return this.localhostNames.contains(httpRequest.getServerName()); } } |