From: <bra...@us...> - 2008-07-22 02:42:48
|
Revision: 2479 http://archive-access.svn.sourceforge.net/archive-access/?rev=2479&view=rev Author: bradtofel Date: 2008-07-22 02:42:56 +0000 (Tue, 22 Jul 2008) Log Message: ----------- REFACTOR: old Dispatcher and ReplayRenderer now replaced with more functional SelectorReplayDispatcher, and DomainPRefixTextReplayRenderer. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixTextReplayRenderer.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java 2008-07-22 02:31:44 UTC (rev 2478) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java 2008-07-22 02:42:56 UTC (rev 2479) @@ -1,83 +0,0 @@ -/* DomainPrefixReplayDispatcher - * - * $Id$ - * - * Created on 10:20:49 AM Aug 10, 2007. - * - * Copyright (C) 2007 Internet Archive. - * - * This file is part of wayback-core. - * - * wayback-core is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback-core is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback-core; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.domainprefix; - -import org.archive.wayback.ReplayDispatcher; -import org.archive.wayback.ReplayRenderer; -import org.archive.wayback.core.CaptureSearchResult; -import org.archive.wayback.core.Resource; -import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.replay.DateRedirectReplayRenderer; -import org.archive.wayback.replay.TransparentReplayRenderer; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class DomainPrefixReplayDispatcher implements ReplayDispatcher { - - private final static String TEXT_HTML_MIME = "text/html"; - private final static String TEXT_XHTML_MIME = "application/xhtml"; - - // TODO: make this configurable - private final static long MAX_HTML_MARKUP_LENGTH = 1024 * 1024 * 5; - - private ReplayRenderer redirect = new DateRedirectReplayRenderer(); - - private ReplayRenderer transparent = new TransparentReplayRenderer(); - private DomainPrefixReplayRenderer html = new DomainPrefixReplayRenderer(); - - /* (non-Javadoc) - * @see org.archive.wayback.ReplayDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) - */ - public ReplayRenderer getRenderer(WaybackRequest wbRequest, - CaptureSearchResult result, Resource resource) { - // if the result is not for the exact date requested, redirect to the - // exact date. some capture dates are not 14 digits, only compare as - // many digits as are in the result date: - String reqDateStr = wbRequest.getReplayTimestamp(); - String resDateStr = result.getCaptureTimestamp(); - if((resDateStr.length() > reqDateStr.length()) || - !resDateStr.equals(reqDateStr.substring(0, resDateStr.length()))) { - return redirect; - } - - // HTML and XHTML docs smaller than some size get marked up as HTML - if (resource.getRecordLength() < MAX_HTML_MARKUP_LENGTH) { - - if (-1 != result.getMimeType().indexOf(TEXT_HTML_MIME)) { - return html; - } - if (-1 != result.getMimeType().indexOf(TEXT_XHTML_MIME)) { - return html; - } - } - - // everything else goes transparently: - return transparent; - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java 2008-07-22 02:31:44 UTC (rev 2478) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java 2008-07-22 02:42:56 UTC (rev 2479) @@ -1,136 +0,0 @@ -/* DomainPrefixReplayRenderer - * - * $Id$ - * - * Created on 10:21:04 AM Aug 10, 2007. - * - * Copyright (C) 2007 Internet Archive. - * - * This file is part of wayback-core. - * - * wayback-core is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback-core is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback-core; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.domainprefix; - -import java.io.IOException; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.archive.wayback.ReplayRenderer; -import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.core.Resource; -import org.archive.wayback.core.CaptureSearchResult; -import org.archive.wayback.core.CaptureSearchResults; -import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.exception.BadContentException; -import org.archive.wayback.replay.HTMLPage; -import org.archive.wayback.replay.HttpHeaderProcessor; -import org.archive.wayback.replay.HttpHeaderOperation; -import org.archive.wayback.util.Timestamp; -import org.archive.wayback.util.url.UrlOperations; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class DomainPrefixReplayRenderer implements ReplayRenderer, HttpHeaderProcessor { - private final static String HTTP_LENGTH_HEADER = "Content-Length"; - private final static String HTTP_LENGTH_HEADER_UP = - HTTP_LENGTH_HEADER.toUpperCase(); - - private final static String HTTP_LOCATION_HEADER = "Location"; - private final static String HTTP_LOCATION_HEADER_UP = - HTTP_LOCATION_HEADER.toUpperCase(); - - private final static Pattern httpPattern = - Pattern.compile("(http://[^/]*/)"); - - /* (non-Javadoc) - * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResults) - */ - public void renderResource(HttpServletRequest httpRequest, - HttpServletResponse httpResponse, WaybackRequest wbRequest, - CaptureSearchResult result, Resource resource, - ResultURIConverter uriConverter, CaptureSearchResults results) - throws ServletException, IOException, BadContentException { - - HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse); - - Map<String,String> headers = HttpHeaderOperation.processHeaders( - resource, result, uriConverter, this); - - // Load content into an HTML page, and resolve load-time URLs: - HTMLPage page = new HTMLPage(resource,result,uriConverter); - page.readFully(); - - String resourceTS = result.getCaptureTimestamp(); - String captureTS = Timestamp.parseBefore(resourceTS).getDateStr(); - - - StringBuilder sb = page.sb; - StringBuffer replaced = new StringBuffer(sb.length()); - Matcher m = httpPattern.matcher(sb); - while(m.find()) { - String host = m.group(1); - String replacement = uriConverter.makeReplayURI(captureTS,host); - m.appendReplacement(replaced, replacement); - } - m.appendTail(replaced); - byte b[] = replaced.toString().getBytes(page.getCharSet()); - int bytes = b.length; - headers.put(HTTP_LENGTH_HEADER, String.valueOf(bytes)); - - HttpHeaderOperation.sendHeaders(headers, httpResponse); - httpResponse.getOutputStream().write(b); - - } - - /* (non-Javadoc) - * @see org.archive.wayback.replay.HeaderFilter#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) - */ - public void filter(Map<String, String> output, String key, String value, - ResultURIConverter uriConverter, CaptureSearchResult result) { - String keyUp = key.toUpperCase(); - - // omit Content-Length header - if (keyUp.equals(HTTP_LENGTH_HEADER_UP)) { - return; - } - - // rewrite Location header URLs - if (keyUp.startsWith(HTTP_LOCATION_HEADER_UP)) { - - String baseUrl = result.getOriginalUrl(); - String resourceTS = result.getCaptureTimestamp(); - String captureTS = Timestamp.parseBefore(resourceTS).getDateStr(); - // by the spec, these should be absolute already, but just in case: - String u = UrlOperations.resolveUrl(baseUrl, value); - - output.put(key, uriConverter.makeReplayURI(captureTS,u)); - - } else { - // others go out as-is: - - output.put(key, value); - } - } -} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixTextReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixTextReplayRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixTextReplayRenderer.java 2008-07-22 02:42:56 UTC (rev 2479) @@ -0,0 +1,87 @@ +/* DomainPrefixPageReplayRenderer + * + * $Id$ + * + * Created on 3:30:30 PM Jul 15, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.domainprefix; + +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.replay.TextDocument; +import org.archive.wayback.replay.TextReplayRenderer; +import org.archive.wayback.replay.HttpHeaderProcessor; +import org.archive.wayback.util.Timestamp; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DomainPrefixTextReplayRenderer extends TextReplayRenderer { + /** + * @param httpHeaderProcessor + */ + public DomainPrefixTextReplayRenderer( + HttpHeaderProcessor httpHeaderProcessor) { + super(httpHeaderProcessor); + } + + private final static Pattern httpPattern = + Pattern.compile("(http://[^/]*/)"); + + protected void updatePage(TextDocument page, + HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + CaptureSearchResult result, Resource resource, + ResultURIConverter uriConverter, CaptureSearchResults results) + throws ServletException, IOException { + String resourceTS = result.getCaptureTimestamp(); + String captureTS = Timestamp.parseBefore(resourceTS).getDateStr(); + + + StringBuilder sb = page.sb; + StringBuffer replaced = new StringBuffer(sb.length()); + Matcher m = httpPattern.matcher(sb); + while(m.find()) { + String host = m.group(1); + String replacement = uriConverter.makeReplayURI(captureTS,host); + m.appendReplacement(replaced, replacement); + } + m.appendTail(replaced); + // blasted StringBuilder/StringBuffer... gotta convert again... + page.sb.setLength(0); + page.sb.ensureCapacity(replaced.length()); + page.sb.append(replaced); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |