From: <bra...@us...> - 2007-08-23 21:16:46
|
Revision: 1917 http://archive-access.svn.sourceforge.net/archive-access/?rev=1917&view=rev Author: bradtofel Date: 2007-08-23 14:16:48 -0700 (Thu, 23 Aug 2007) Log Message: ----------- INITIAL REV: new replay mode based on replay context stored in hostname, while paths remain static -- somewhat working, but not ready for primetime, yet. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixCompositeRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixResultURIConverter.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixCompositeRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixCompositeRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixCompositeRequestParser.java 2007-08-23 21:16:48 UTC (rev 1917) @@ -0,0 +1,60 @@ +/* DomainPrefixCompositeRequestParser + * + * $Id$ + * + * Created on 11:20:17 AM Aug 10, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.domainprefix; + +import org.archive.wayback.RequestParser; +import org.archive.wayback.requestparser.CompositeRequestParser; +import org.archive.wayback.requestparser.FormRequestParser; +import org.archive.wayback.requestparser.OpenSearchRequestParser; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DomainPrefixCompositeRequestParser extends CompositeRequestParser { + DomainPrefixRequestParser dprp = new DomainPrefixRequestParser(); + protected RequestParser[] getRequestParsers() { + RequestParser[] theParsers = { + dprp, + new OpenSearchRequestParser(), + new FormRequestParser() + }; + return theParsers; + } + /** + * @param hostPort + */ + public void setHostPort(String hostPort) { + dprp.setHostPort(hostPort); + } + /** + * @return + */ + public String getHostPort() { + return dprp.getHostPort(); + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java 2007-08-23 21:16:48 UTC (rev 1917) @@ -0,0 +1,87 @@ +/* DomainPrefixReplayDispatcher + * + * $Id$ + * + * Created on 10:20:49 AM Aug 10, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.domainprefix; + +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.replay.BaseReplayDispatcher; +import org.archive.wayback.replay.DateRedirectReplayRenderer; +import org.archive.wayback.replay.TransparentReplayRenderer; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DomainPrefixReplayDispatcher extends BaseReplayDispatcher { + + private final static String TEXT_HTML_MIME = "text/html"; + private final static String TEXT_XHTML_MIME = "application/xhtml"; + + // TODO: make this configurable + private final static long MAX_HTML_MARKUP_LENGTH = 1024 * 1024 * 5; + + private ReplayRenderer redirect = new DateRedirectReplayRenderer(); + + private ReplayRenderer transparent = new TransparentReplayRenderer(); + private DomainPrefixReplayRenderer html = new DomainPrefixReplayRenderer(); + + /* (non-Javadoc) + * @see org.archive.wayback.replay.BaseReplayDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) + */ + @Override + public ReplayRenderer getRenderer(WaybackRequest wbRequest, + SearchResult result, Resource resource) { + // if the result is not for the exact date requested, redirect to the + // exact date. some capture dates are not 14 digits, only compare as + // many digits as are in the result date: + String reqDateStr = wbRequest.get(WaybackConstants.REQUEST_EXACT_DATE); + String resDateStr = result.get(WaybackConstants.RESULT_CAPTURE_DATE); + if((resDateStr.length() > reqDateStr.length()) || + !resDateStr.equals(reqDateStr.substring(0, resDateStr.length()))) { + return redirect; + } + + // HTML and XHTML docs smaller than some size get marked up as HTML + if (resource.getRecordLength() < MAX_HTML_MARKUP_LENGTH) { + + if (-1 != result.get(WaybackConstants.RESULT_MIME_TYPE).indexOf( + TEXT_HTML_MIME)) { + return html; + } + if (-1 != result.get(WaybackConstants.RESULT_MIME_TYPE).indexOf( + TEXT_XHTML_MIME)) { + return html; + } + } + + // everything else goes transparently: + return transparent; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java 2007-08-23 21:16:48 UTC (rev 1917) @@ -0,0 +1,140 @@ +/* DomainPrefixReplayRenderer + * + * $Id$ + * + * Created on 10:21:04 AM Aug 10, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.domainprefix; + +import java.io.IOException; +//import java.util.Date; +//import java.util.Iterator; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.ResultURIConverter; +//import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadContentException; +import org.archive.wayback.replay.HTMLPage; +import org.archive.wayback.replay.HttpHeaderProcessor; +import org.archive.wayback.replay.HttpHeaderOperation; +//import org.archive.wayback.util.StringFormatter; +import org.archive.wayback.util.UrlCanonicalizer; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DomainPrefixReplayRenderer implements ReplayRenderer, HttpHeaderProcessor { + private final static String HTTP_LENGTH_HEADER = "Content-Length"; + private final static String HTTP_LENGTH_HEADER_UP = + HTTP_LENGTH_HEADER.toUpperCase(); + + private final static String HTTP_LOCATION_HEADER = "Location"; + private final static String HTTP_LOCATION_HEADER_UP = + HTTP_LOCATION_HEADER.toUpperCase(); + + private final static Pattern httpPattern = + Pattern.compile("(http://[^/]*/)"); + + /* (non-Javadoc) + * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResults) + */ + public void renderResource(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + SearchResult result, Resource resource, + ResultURIConverter uriConverter, SearchResults results) + throws ServletException, IOException, BadContentException { + resource.parseHeaders(); + + HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse); + + Map<String,String> headers = HttpHeaderOperation.processHeaders( + resource, result, uriConverter, this); + + // Load content into an HTML page, and resolve load-time URLs: + HTMLPage page = new HTMLPage(resource,result,uriConverter); + page.readFully(); + + String resourceTS = result.getCaptureDate(); + String captureTS = Timestamp.parseBefore(resourceTS).getDateStr(); + + + StringBuilder sb = page.sb; + StringBuffer replaced = new StringBuffer(sb.length()); + Matcher m = httpPattern.matcher(sb); + while(m.find()) { + String host = m.group(1); + String replacement = uriConverter.makeReplayURI(captureTS,host); + m.appendReplacement(replaced, replacement); + } + m.appendTail(replaced); + byte b[] = replaced.toString().getBytes(page.getCharSet()); + int bytes = b.length; + headers.put(HTTP_LENGTH_HEADER, String.valueOf(bytes)); + + HttpHeaderOperation.sendHeaders(headers, httpResponse); + httpResponse.getOutputStream().write(b); + + } + + /* (non-Javadoc) + * @see org.archive.wayback.replay.HeaderFilter#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) + */ + public void filter(Map<String, String> output, String key, String value, + ResultURIConverter uriConverter, SearchResult result) { + String keyUp = key.toUpperCase(); + + // omit Content-Length header + if (keyUp.equals(HTTP_LENGTH_HEADER_UP)) { + return; + } + + // rewrite Location header URLs + if (keyUp.startsWith(HTTP_LOCATION_HEADER_UP)) { + + String baseUrl = result.getAbsoluteUrl(); + String cd = result.getCaptureDate(); + // by the spec, these should be absolute already, but just in case: + String u = UrlCanonicalizer.resolveUrl(baseUrl, value); + + output.put(key, uriConverter.makeReplayURI(cd,u)); + + } else { + // others go out as-is: + + output.put(key, value); + } + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java 2007-08-23 21:16:48 UTC (rev 1917) @@ -0,0 +1,177 @@ +/* DomainPrefixRequestParser + * + * $Id$ + * + * Created on 10:20:21 AM Aug 10, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.domainprefix; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.servlet.http.HttpServletRequest; + +import org.apache.commons.httpclient.URIException; +import org.archive.wayback.RequestParser; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.webapp.WaybackContext; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DomainPrefixRequestParser implements RequestParser { + + String hostPort = "localhost:8081"; + String earliest = Timestamp.earliestTimestamp().getDateStr(); + int maxRecords = 1000; + + private final Pattern REPLAY_REGEX = + Pattern.compile("^(\\d{1,14})\\.(.*)$"); + private final Pattern QUERY_REGEX = + Pattern.compile("^(\\d{0,13})\\*\\.(.*)$"); + + private String getRequestString(final String host, + HttpServletRequest httpRequest) { + String path = httpRequest.getRequestURI(); + String query = httpRequest.getQueryString(); + + String r = ""; + if(path == null) { + path = "/"; + } + if(query != null && query.length() > 0) { + r = "http://" + host + path + "?" + query; + } else { + r = "http://" + host + path; + } + return r; + } + + /* (non-Javadoc) + * @see org.archive.wayback.RequestParser#parse(javax.servlet.http.HttpServletRequest, org.archive.wayback.webapp.WaybackContext) + */ + public WaybackRequest parse(HttpServletRequest httpRequest, + WaybackContext wbContext) throws BadQueryException { + + WaybackRequest wbRequest = null; + String server = httpRequest.getServerName() + + ":" + httpRequest.getServerPort(); + if(server.endsWith(hostPort)) { + int length = server.length() - hostPort.length(); + if(server.length() > hostPort.length()) { + String prefix = server.substring(0,length - 1); + Matcher replayMatcher = REPLAY_REGEX.matcher(prefix); + if (replayMatcher != null && replayMatcher.matches()) { + wbRequest = new WaybackRequest(); + String dateStr = replayMatcher.group(1); + String host = replayMatcher.group(2); + + String requestUrl = getRequestString(host,httpRequest); + + wbRequest.put(WaybackConstants.REQUEST_EXACT_DATE, dateStr); + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_REPLAY_QUERY); + try { + wbRequest.setRequestUrl(requestUrl); + } catch (URIException e) { + e.printStackTrace(); + wbRequest = null; + } + } else { + Matcher queryMatcher = QUERY_REGEX.matcher(prefix); + if(queryMatcher != null && queryMatcher.matches()) { + wbRequest = new WaybackRequest(); + String dateStr = queryMatcher.group(1); + String host = queryMatcher.group(2); + String startDate; + if(dateStr.length() == 0) { + startDate = earliest; + } else { + startDate = Timestamp.parseBefore(dateStr).getDateStr(); + } + String endDate = Timestamp.parseAfter(dateStr).getDateStr(); + wbRequest.put(WaybackConstants.REQUEST_START_DATE,startDate); + wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_QUERY); + + String requestUrl = getRequestString(host,httpRequest); + + try { + wbRequest.setRequestUrl(requestUrl); + } catch (URIException e) { + e.printStackTrace(); + wbRequest = null; + } + } + } + } + } + return wbRequest; + } + + /* (non-Javadoc) + * @see org.archive.wayback.RequestParser#setEarliestTimestamp(java.lang.String) + */ + public void setEarliestTimestamp(String timestamp) { + earliest = timestamp; + } + + /** + * @return the earliest timestamp + */ + public String getEarliestTimestamp() { + return earliest; + } + + /* (non-Javadoc) + * @see org.archive.wayback.RequestParser#setMaxRecords(int) + */ + public void setMaxRecords(int maxRecords) { + this.maxRecords = maxRecords; + } + /** + * @return the maxRecords + */ + public int getMaxRecords() { + return maxRecords; + } + + /** + * @return the hostPort + */ + public String getHostPort() { + return hostPort; + } + + /** + * @param hostPort the hostPort to set + */ + public void setHostPort(String hostPort) { + this.hostPort = hostPort; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixResultURIConverter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixResultURIConverter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixResultURIConverter.java 2007-08-23 21:16:48 UTC (rev 1917) @@ -0,0 +1,81 @@ +/* DomainPrefixResultURIConverter + * + * $Id$ + * + * Created on 10:20:35 AM Aug 10, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.domainprefix; + +import java.net.URI; +import java.net.URISyntaxException; + +import org.archive.wayback.ResultURIConverter; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DomainPrefixResultURIConverter implements ResultURIConverter { + + private String hostPort = "localhost:8081"; + + /* (non-Javadoc) + * @see org.archive.wayback.ResultURIConverter#makeReplayURI(java.lang.String, java.lang.String) + */ + public String makeReplayURI(String datespec, String url) { + String replayURI = ""; + try { + URI uri = new URI(url); + StringBuilder sb = new StringBuilder(90); + sb.append("http://"); + sb.append(datespec).append("."); + sb.append(uri.getHost()).append("."); + sb.append(hostPort); + sb.append(uri.getPath()); + String query = uri.getQuery(); + if(query != null && query.length() > 0) { + sb.append("?").append(query); + } + replayURI = sb.toString(); + + } catch (URISyntaxException e) { + e.printStackTrace(); + } + return replayURI; + } + + /** + * @return the hostPort + */ + public String getHostPort() { + return hostPort; + } + + /** + * @param hostPort the hostPort to set + */ + public void setHostPort(String hostPort) { + this.hostPort = hostPort; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |