From: <bra...@us...> - 2007-08-23 21:09:35
|
Revision: 1913 http://archive-access.svn.sourceforge.net/archive-access/?rev=1913&view=rev Author: bradtofel Date: 2007-08-23 14:09:34 -0700 (Thu, 23 Aug 2007) Log Message: ----------- REFACTOR: entire ReplayUI refactoring, splitting the bullk of the code into several org.archive.wayback.replay.* utility classes, which make the actual ReplayRenderers quite small. Also introduces a new ReplayDispatcher interface, which inspects a Resource, and dispatches the render operation to the correct ReplayRenderer implementation. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/StringHttpServletResponseWrapper.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TransparentReplayRenderer.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,59 @@ +/* ReplayDispatcher + * + * $Id$ + * + * Created on 6:10:18 PM Aug 9, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.WaybackException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public interface ReplayDispatcher extends ReplayRenderer { + /** + * Render the contents of a WaybackException in either html, javascript, or + * css format, depending on the guessed context, so errors in embedded + * documents do not cause unneeded errors in the embedding document. + * + * @param httpRequest + * @param httpResponse + * @param wbRequest + * @param exception + * @throws ServletException + * @throws IOException + */ + public void renderException(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + WaybackException exception) throws ServletException, IOException; + +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayDispatcher.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayDispatcher.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,176 @@ +/* ReplayRendererDispatcher + * + * $Id$ + * + * Created on 5:23:35 PM Aug 8, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ReplayDispatcher; +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.UIResults; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.WaybackException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public abstract class BaseReplayDispatcher implements ReplayDispatcher { + + private String errorJsp = "/jsp/HTMLError.jsp"; + private String imageErrorJsp = "/jsp/HTMLError.jsp"; + private String javascriptErrorJsp = "/jsp/JavaScriptError.jsp"; + private String cssErrorJsp = "/jsp/CSSError.jsp"; + + protected final Pattern IMAGE_REGEX = Pattern + .compile(".*\\.(jpg|jpeg|gif|png|bmp|tiff|tif)$"); + + /* ERROR HANDLING RESPONSES: */ + + private boolean requestIsEmbedded(HttpServletRequest httpRequest, + WaybackRequest wbRequest) { + // without a wbRequest, assume it is not embedded: send back HTML + if (wbRequest == null) { + return false; + } + String referer = wbRequest.get(WaybackConstants.REQUEST_REFERER_URL); + return (referer != null && referer.length() > 0); + } + + private boolean requestIsImage(HttpServletRequest httpRequest, + WaybackRequest wbRequest) { + String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); + if (requestUrl == null) + return false; + Matcher matcher = IMAGE_REGEX.matcher(requestUrl); + return (matcher != null && matcher.matches()); + } + + private boolean requestIsJavascript(HttpServletRequest httpRequest, + WaybackRequest wbRequest) { + + String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); + return (requestUrl != null) && requestUrl.endsWith(".js"); + } + + private boolean requestIsCSS(HttpServletRequest httpRequest, + WaybackRequest wbRequest) { + + String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); + return (requestUrl != null) && requestUrl.endsWith(".css"); + } + + /* + * (non-Javadoc) + * + * @see org.archive.wayback.ReplayRenderer#renderException(javax.servlet.http.HttpServletRequest, + * javax.servlet.http.HttpServletResponse, + * org.archive.wayback.core.WaybackRequest, + * org.archive.wayback.exception.WaybackException) + */ + public void renderException(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + WaybackException exception) throws ServletException, IOException { + + // the "standard HTML" response handler: + String finalJspPath = errorJsp; + + // try to not cause client errors by sending the HTML response if + // this request is ebedded, and is obviously one of the special types: + if (requestIsEmbedded(httpRequest, wbRequest)) { + + if (requestIsJavascript(httpRequest, wbRequest)) { + + finalJspPath = javascriptErrorJsp; + + } else if (requestIsCSS(httpRequest, wbRequest)) { + + finalJspPath = cssErrorJsp; + + } else if (requestIsImage(httpRequest, wbRequest)) { + + finalJspPath = imageErrorJsp; + + } + } + + httpRequest.setAttribute("exception", exception); + UIResults uiResults = new UIResults(wbRequest); + uiResults.storeInRequest(httpRequest, finalJspPath); + + RequestDispatcher dispatcher = httpRequest + .getRequestDispatcher(finalJspPath); + + dispatcher.forward(httpRequest, httpResponse); + } + + /** + * @param wbRequest + * @param result + * @param resource + * @return the correct ReplayRenderer for the Resource + */ + public abstract ReplayRenderer getRenderer(WaybackRequest wbRequest, + SearchResult result, Resource resource); + + /* + * (non-Javadoc) + * + * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, + * javax.servlet.http.HttpServletResponse, + * org.archive.wayback.core.WaybackRequest, + * org.archive.wayback.core.SearchResult, + * org.archive.wayback.core.Resource, + * org.archive.wayback.ResultURIConverter, + * org.archive.wayback.core.SearchResults) + */ + public void renderResource(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + SearchResult result, Resource resource, + ResultURIConverter uriConverter, SearchResults results) + throws ServletException, IOException { + + ReplayRenderer renderer = getRenderer(wbRequest, result, resource); + try { + renderer.renderResource(httpRequest, httpResponse, wbRequest, result, + resource, uriConverter, results); + } catch (WaybackException e) { + renderException(httpRequest, httpResponse, wbRequest, e); + } + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectReplayRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectReplayRenderer.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,63 @@ +/* DateRedirectReplayRenderer + * + * $Id$ + * + * Created on 11:42:50 AM Aug 9, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.WaybackRequest; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DateRedirectReplayRenderer implements ReplayRenderer { + + /* (non-Javadoc) + * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResults) + */ + public void renderResource(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + SearchResult result, Resource resource, + ResultURIConverter uriConverter, SearchResults results) + throws ServletException, IOException { + + // redirect to the better version: + String url = result.getAbsoluteUrl(); + String captureDate = result.getCaptureDate(); + String betterURI = uriConverter.makeReplayURI(captureDate,url); + httpResponse.sendRedirect(betterURI); + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,376 @@ +/* HTMLPage + * + * $Id$ + * + * Created on 12:39:52 PM Aug 7, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.text.ParseException; +import java.util.Map; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.query.UIQueryResults; +import org.mozilla.universalchardet.UniversalDetector; + +/** + * Class which wraps functionality for converting a Resource(InputStream + + * HTTP headers) into a StringBuilder, performing several common URL + * resolution methods against that StringBuilder, inserting arbitrary Strings + * into the page, and then converting the page back to a byte array. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class HTMLPage { + + // hand off this many bytes to the chardet library + private final static int MAX_CHARSET_READAHEAD = 65536; + // ...if it also includes "charset=" + private final static String CHARSET_TOKEN = "charset="; + // ...and if the chardet library fails, use the Content-Type header + private final static String HTTP_CONTENT_TYPE_HEADER = "Content-Type"; + // if documents are marked up before sending to clients, the data is + // decoded into a String in chunks. This is how big a chunk to decode with. + private final static int C_BUFFER_SIZE = 4096; + + private Resource resource = null; + private SearchResult result = null; + private ResultURIConverter uriConverter = null; + /** + * the internal StringBuilder + */ + public StringBuilder sb = null; + private String charSet = null; + private byte[] resultBytes = null; + + /** + * @param resource + * @param result + * @param uriConverter + */ + public HTMLPage(Resource resource, SearchResult result, + ResultURIConverter uriConverter) { + this.resource = resource; + this.result = result; + this.uriConverter = uriConverter; + } + + private String contentTypeToCharset(final String contentType) { + int offset = contentType.indexOf(CHARSET_TOKEN); + if (offset != -1) { + return contentType.substring(offset + CHARSET_TOKEN.length()); + } + return null; + } + + /** + * Attempt to divine the character encoding of the document from the + * Content-Type HTTP header (with a "charset=") + * + * @param resource + * @return String character set found or null if the header was not present + * @throws IOException + */ + protected String getCharsetFromHeaders(Resource resource) + throws IOException { + + String charsetName = null; + + Map<String,String> httpHeaders = resource.getHttpHeaders(); + String ctype = httpHeaders.get(HTTP_CONTENT_TYPE_HEADER); + if (ctype != null) { + charsetName = contentTypeToCharset(ctype); + } + return charsetName; + } + + /** + * Attempt to find a META tag in the HTML that hints at the character set + * used to write the document. + * + * @param resource + * @return String character set found from META tags in the HTML + * @throws IOException + */ + protected String getCharsetFromMeta(Resource resource) throws IOException { + String charsetName = null; + + byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; + resource.mark(MAX_CHARSET_READAHEAD); + resource.read(bbuffer, 0, MAX_CHARSET_READAHEAD); + resource.reset(); + // convert to UTF-8 String -- which hopefully will not mess up the + // characters we're interested in... + StringBuilder sb = new StringBuilder(new String(bbuffer,"UTF-8")); + String metaContentType = TagMagix.getTagAttrWhere(sb, "META", + "content", "http-equiv", "Content-Type"); + if(metaContentType != null) { + charsetName = contentTypeToCharset(metaContentType); + } + return charsetName; + } + + /** + * Attempts to figure out the character set of the document using + * the excellent juniversalchardet library. + * + * @param resource + * @return String character encoding found, or null if nothing looked good. + * @throws IOException + */ + protected String getCharsetFromBytes(Resource resource) throws IOException { + String charsetName = null; + + byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; + // (1) + UniversalDetector detector = new UniversalDetector(null); + + // (2) + resource.mark(MAX_CHARSET_READAHEAD); + int len = resource.read(bbuffer, 0, MAX_CHARSET_READAHEAD); + resource.reset(); + detector.handleData(bbuffer, 0, len); + // (3) + detector.dataEnd(); + // (4) + charsetName = detector.getDetectedCharset(); + + // (5) + detector.reset(); + + return charsetName; + } + + /** + * Use META tags, byte-character-detection, HTTP headers, hope, and prayer + * to figure out what character encoding is being used for the document. + * If nothing else works, assumes UTF-8 for now. + * + * @param resource + * @return String charset for Resource + * @throws IOException + */ + protected String guessCharset() throws IOException { + + String charSet = getCharsetFromMeta(resource); + if(charSet == null) { + charSet = getCharsetFromBytes(resource); + if(charSet == null) { + charSet = getCharsetFromHeaders(resource); + if(charSet == null) { + charSet = "UTF-8"; + } + } + } + return charSet; + } + + /** + * Update URLs inside the page, so those URLs which must be correct at + * page load time resolve correctly to absolute URLs. + * + * This means ensuring there is a BASE HREF tag, adding one if missing, + * and then resolving: + * FRAME-SRC, META-URL, LINK-HREF, SCRIPT-SRC + * tag-attribute pairs against either the existing BASE-HREF, or the + * page's absolute URL if it was missing. + */ + public void resolvePageUrls() { + + // TODO: get url from Resource instead of SearchResult? + String pageUrl = result.getAbsoluteUrl(); + String captureDate = result.getCaptureDate(); + + String existingBaseHref = TagMagix.getBaseHref(sb); + if (existingBaseHref != null) { + pageUrl = existingBaseHref; + } + + TagMagix.markupTagREURIC(sb, uriConverter, captureDate, pageUrl, + "FRAME", "SRC"); +// TagMagix.markupTagREURIC(page, uriConverter, captureDate, pageUrl, +// "IFRAME", "SRC"); + TagMagix.markupTagREURIC(sb, uriConverter, captureDate, pageUrl, + "META", "URL"); + TagMagix.markupTagREURIC(sb, uriConverter, captureDate, pageUrl, + "LINK", "HREF"); + // TODO: The classic WM added a js_ to the datespec, so NotInArchives + // can return an valid javascript doc, and not cause Javascript errors. + TagMagix.markupTagREURIC(sb, uriConverter, captureDate, pageUrl, + "SCRIPT", "SRC"); + + if (existingBaseHref == null) { + String baseTag = "<base href=\"" + pageUrl + "\" />"; + int insertPoint = sb.indexOf("<head>"); + if (-1 == insertPoint) { + insertPoint = sb.indexOf("<HEAD>"); + } + if (-1 == insertPoint) { + insertPoint = 0; + } else { + insertPoint += 6; // just after the tag + } + sb.insert(insertPoint, baseTag); + } + } + + + /** + * @param charSet + * @throws IOException + */ + public void readFully(String charSet) throws IOException { + if(charSet == null) { + charSet = guessCharset(); + } + this.charSet = charSet; + int recordLength = (int) resource.getRecordLength(); + + // convert bytes to characters for charset: + InputStreamReader isr = new InputStreamReader(resource, charSet); + + char[] cbuffer = new char[C_BUFFER_SIZE]; + + // slurp the whole thing into RAM: + sb = new StringBuilder(recordLength); + for (int r = -1; (r = isr.read(cbuffer, 0, C_BUFFER_SIZE)) != -1;) { + sb.append(cbuffer, 0, r); + } + } + + /** + * Read bytes from input stream, using best-guess for character encoding + * @throws IOException + */ + public void readFully() throws IOException { + readFully(null); + } + + /** + * @return raw bytes contained in internal StringBuilder + * @throws UnsupportedEncodingException + */ + public byte[] getBytes() throws UnsupportedEncodingException { + if(sb == null) { + throw new IllegalStateException("No interal StringBuffer"); + } + if(resultBytes == null) { + resultBytes = sb.toString().getBytes(charSet); + } + return resultBytes; + } + + /** + * Write the contents of the page to the client. + * + * @param os + * @throws IOException + */ + public void writeToOutputStream(OutputStream os) throws IOException { + if(sb == null) { + throw new IllegalStateException("No interal StringBuffer"); + } + byte[] b; + try { + b = getBytes(); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + os.write(b); + } + + /** + * @param toInsert + */ + public void insertAtEndOfBody(String toInsert) { + int insertPoint = sb.lastIndexOf("</body>"); + if (-1 == insertPoint) { + insertPoint = sb.lastIndexOf("</BODY>"); + } + if (-1 == insertPoint) { + insertPoint = sb.length(); + } + sb.insert(insertPoint,toInsert); + } + /** + * @param jspPath + * @param httpRequest + * @param httpResponse + * @param wbRequest + * @param results + * @return + * @throws IOException + * @throws ServletException + * @throws ParseException + */ + public String includeJspString(String jspPath, + HttpServletRequest httpRequest, HttpServletResponse httpResponse, + WaybackRequest wbRequest, SearchResults results) + throws ServletException, IOException { + + UIQueryResults uiResults = new UIQueryResults(httpRequest, wbRequest, + results, uriConverter); + + StringHttpServletResponseWrapper wrappedResponse = + new StringHttpServletResponseWrapper(httpResponse); + uiResults.storeInRequest(httpRequest,jspPath); + RequestDispatcher dispatcher = httpRequest.getRequestDispatcher(jspPath); + dispatcher.forward(httpRequest, wrappedResponse); + return wrappedResponse.getStringResponse(); + } + + /** + * @param jsUrl + * @return + */ + public String getJSIncludeString(final String jsUrl) { + return "<script type=\"text/javascript\" src=\"" + + jsUrl + "\" ></script>\n"; + } + + /** + * @return the charSet + */ + public String getCharSet() { + return charSet; + } + + /** + * @param charSet the charSet to set + */ + public void setCharSet(String charSet) { + this.charSet = charSet; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,105 @@ +/* HttpHeaderProcessor + * + * $Id$ + * + * Created on 6:44:10 PM Aug 8, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.exception.BadContentException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class HttpHeaderOperation { + + /** + * @param resource + * @param httpResponse + * @throws BadContentException + */ + public static void copyHTTPMessageHeader(Resource resource, + HttpServletResponse httpResponse) throws BadContentException { + + // set status code from original resource (which will definitely confuse + // many clients...) + int code = resource.getStatusCode(); + // Only return legit status codes -- don't return any minus + // codes, etc. + if (code <= HttpServletResponse.SC_CONTINUE) { + throw new BadContentException("Bad status code " + code); + } + httpResponse.setStatus(code); + } + + /** + * @param resource + * @param result + * @param uriConverter + * @param filter + * @return + */ + public static Map<String,String> processHeaders(Resource resource, + SearchResult result, ResultURIConverter uriConverter, + HttpHeaderProcessor filter) { + HashMap<String,String> output = new HashMap<String,String>(); + + // copy all HTTP headers, as-is, sending "" instead of nulls. + Map<String,String> headers = resource.getHttpHeaders(); + if (headers != null) { + Iterator<String> itr = headers.keySet().iterator(); + while(itr.hasNext()) { + String key = itr.next(); + String value = headers.get(key); + value = (value == null) ? "" : value; + filter.filter(output, key, value, uriConverter, result); + } + } + return output; + } + + /** + * @param headers + * @param response + */ + public static void sendHeaders(Map<String,String> headers, + HttpServletResponse response) { + Iterator<String> itr = headers.keySet().iterator(); + while(itr.hasNext()) { + String key = itr.next(); + String value = headers.get(key); + value = (value == null) ? "" : value; + response.setHeader(key,value); + } + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,51 @@ +/* HeaderFilter + * + * $Id$ + * + * Created on 6:41:12 PM Aug 8, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.util.Map; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.SearchResult; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public interface HttpHeaderProcessor { + + /** + * optionally add header key:value to output for later returning to client + * + * @param output + * @param key + * @param value + * @param uriConverter + * @param result + */ + public void filter(Map<String,String> output, String key, String value, + final ResultURIConverter uriConverter, SearchResult result); +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/StringHttpServletResponseWrapper.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/StringHttpServletResponseWrapper.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/StringHttpServletResponseWrapper.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,58 @@ +/* StringHttpServletResponseWrapper + * + * $Id$ + * + * Created on 4:35:39 PM Aug 6, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.io.PrintWriter; +import java.io.StringWriter; + +import javax.servlet.http.HttpServletResponse; +import javax.servlet.http.HttpServletResponseWrapper; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class StringHttpServletResponseWrapper extends HttpServletResponseWrapper { + + private StringWriter sw = new StringWriter(); + + /** + * @param response + */ + public StringHttpServletResponseWrapper(HttpServletResponse response) { + super(response); + } + public PrintWriter getWriter() { + return new PrintWriter(sw); + } + /** + * @return + */ + public String getStringResponse() { + return sw.toString(); + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TransparentReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TransparentReplayRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TransparentReplayRenderer.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,91 @@ +/* TransparentReplayRenderer + * + * $Id$ + * + * Created on 5:38:11 PM Aug 8, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Map; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadContentException; + +/** + * ReplayRenderer implementation which returns the archive document as + * pristinely as possible -- no modifications to response code, HTTP headers, + * or original byte-stream. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class TransparentReplayRenderer implements ReplayRenderer, HttpHeaderProcessor { + + private final static int BUFFER_SIZE = 4096; + + /* (non-Javadoc) + * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResults) + */ + public void renderResource(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + SearchResult result, Resource resource, + ResultURIConverter uriConverter, SearchResults results) + throws ServletException, IOException, BadContentException { + + // cause underlying resource to read thru HTTP headers: + resource.parseHeaders(); + + HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse); + + Map<String,String> headers = HttpHeaderOperation.processHeaders( + resource, result, uriConverter, this); + + HttpHeaderOperation.sendHeaders(headers, httpResponse); + + // and copy the raw byte-stream. + OutputStream os = httpResponse.getOutputStream(); + byte[] buffer = new byte[BUFFER_SIZE]; + for (int r = -1; (r = resource.read(buffer, 0, BUFFER_SIZE)) != -1;) { + os.write(buffer, 0, r); + } + } + + /* (non-Javadoc) + * @see org.archive.wayback.replay.HeaderFilter#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) + */ + public void filter(Map<String, String> output, String key, String value, + ResultURIConverter uriConverter, SearchResult result) { + + // copy all HTTP headers, as-is. + output.put(key, value); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |