From: <bra...@us...> - 2007-07-16 22:22:17
|
Revision: 1767 http://archive-access.svn.sourceforge.net/archive-access/?rev=1767&view=rev Author: bradtofel Date: 2007-07-16 15:22:19 -0700 (Mon, 16 Jul 2007) Log Message: ----------- REFACTOR: new RequestParser classes for archival url Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDateRangeQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDateRangeQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java 2007-07-16 22:22:19 UTC (rev 1767) @@ -0,0 +1,75 @@ +/* PathDatePrefixRequestParser + * + * $Id$ + * + * Created on 6:38:19 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.archivalurl.requestparser; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.httpclient.URIException; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.requestparser.PathRequestParser; + +/** + * RequestParser implementation that extracts request info from an Archival Url + * representing an exact url and a date prefix. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class PathDatePrefixQueryRequestParser extends PathRequestParser { + /** + * Regex which parses Archival URL queries into timestamp + url for an exact + * URL + */ + private final static Pattern WB_QUERY_REGEX = Pattern + .compile("^(\\d{0,13})\\*/(.*[^*])$"); + + public WaybackRequest parse(String requestPath) { + + WaybackRequest wbRequest = null; + Matcher matcher = WB_QUERY_REGEX.matcher(requestPath); + if (matcher != null && matcher.matches()) { + + wbRequest = new WaybackRequest(); + String dateStr = matcher.group(1); + String urlStr = matcher.group(2); + + String startDate = Timestamp.parseBefore(dateStr).getDateStr(); + String endDate = Timestamp.parseAfter(dateStr).getDateStr(); + wbRequest.put(WaybackConstants.REQUEST_START_DATE,startDate); + wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_QUERY); + try { + wbRequest.setRequestUrl(urlStr); + } catch (URIException e) { + wbRequest = null; + } + } + return wbRequest; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDateRangeQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDateRangeQueryRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDateRangeQueryRequestParser.java 2007-07-16 22:22:19 UTC (rev 1767) @@ -0,0 +1,78 @@ +/* PathDateRangeQueryRequestParser + * + * $Id$ + * + * Created on 6:41:58 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.archivalurl.requestparser; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.httpclient.URIException; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.requestparser.PathRequestParser; + +/** + * RequestParser implementation that extracts request info from an Archival Url + * representing an exact url and a date range. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class PathDateRangeQueryRequestParser extends PathRequestParser { + + /** + * Regex which parses Archival URL queries into Start Timestamp + + * End Timestamp + URL for an exact URL + */ + private final static Pattern WB_QUERY2_REGEX = Pattern + .compile("^(\\d{1,14})-(\\d{1,14})\\*/(.*[^*])$"); + + + public WaybackRequest parse(String requestPath) { + WaybackRequest wbRequest = null; + Matcher matcher = WB_QUERY2_REGEX.matcher(requestPath); + if (matcher != null && matcher.matches()) { + + wbRequest = new WaybackRequest(); + String startDateStr = matcher.group(1); + String endDateStr = matcher.group(2); + String urlStr = matcher.group(3); + + String startDate = Timestamp.parseBefore(startDateStr).getDateStr(); + String endDate = Timestamp.parseAfter(endDateStr).getDateStr(); + wbRequest.put(WaybackConstants.REQUEST_START_DATE,startDate); + wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_QUERY); + try { + wbRequest.setRequestUrl(urlStr); + } catch (URIException e) { + wbRequest = null; + } + } + return wbRequest; + } + +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java 2007-07-16 22:22:19 UTC (rev 1767) @@ -0,0 +1,75 @@ +/* PathPrefixDatePrefixQueryRequestParser + * + * $Id$ + * + * Created on 6:42:18 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.archivalurl.requestparser; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.httpclient.URIException; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.requestparser.PathRequestParser; + +/** + * RequestParser implementation that extracts request info from an Archival Url + * representing an url prefix and a date prefix. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class PathPrefixDatePrefixQueryRequestParser extends PathRequestParser { + /** + * Regex which parses Archival URL queries into timestamp + URL for URLs + * beginning with the URL prefix + */ + private final static Pattern WB_PATH_QUERY_REGEX = Pattern + .compile("^(\\d{0,13})\\*/(.*)\\*$"); + + public WaybackRequest parse(String requestPath) { + WaybackRequest wbRequest = null; + Matcher matcher = WB_PATH_QUERY_REGEX.matcher(requestPath); + if (matcher != null && matcher.matches()) { + + wbRequest = new WaybackRequest(); + String dateStr = matcher.group(1); + String urlStr = matcher.group(2); + String startDate = Timestamp.parseBefore(dateStr).getDateStr(); + String endDate = Timestamp.parseAfter(dateStr).getDateStr(); + wbRequest.put(WaybackConstants.REQUEST_START_DATE, + startDate); + wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); + + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_PREFIX_QUERY); + try { + wbRequest.setRequestUrl(urlStr); + } catch (URIException e) { + wbRequest = null; + } + } + return wbRequest; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDateRangeQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDateRangeQueryRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDateRangeQueryRequestParser.java 2007-07-16 22:22:19 UTC (rev 1767) @@ -0,0 +1,76 @@ +/* PathPrefixDateRangeQueryRequestParser + * + * $Id$ + * + * Created on 6:42:38 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.archivalurl.requestparser; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.httpclient.URIException; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.requestparser.PathRequestParser; + +/** + * RequestParser implementation that extracts request info from an Archival Url + * representing an url prefix and a date range. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class PathPrefixDateRangeQueryRequestParser extends PathRequestParser { + /** + * Regex which parses Archival URL queries into Start Timestamp + + * End Timestamp + URL for URLs beginning with the URL prefix + */ + private final static Pattern WB_PATH_QUERY2_REGEX = Pattern + .compile("^(\\d{1,14})-(\\d{1,14})\\*/(.*)\\*$"); + + public WaybackRequest parse(String requestPath) { + WaybackRequest wbRequest = null; + Matcher matcher = WB_PATH_QUERY2_REGEX.matcher(requestPath); + if (matcher != null && matcher.matches()) { + + wbRequest = new WaybackRequest(); + String startDateStr = matcher.group(1); + String endDateStr = matcher.group(2); + String urlStr = matcher.group(3); + String startDate = Timestamp.parseBefore(startDateStr).getDateStr(); + String endDate = Timestamp.parseAfter(endDateStr).getDateStr(); + wbRequest.put(WaybackConstants.REQUEST_START_DATE, + startDate); + wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); + + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_PREFIX_QUERY); + try { + wbRequest.setRequestUrl(urlStr); + } catch (URIException e) { + wbRequest = null; + } + } + return wbRequest; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java 2007-07-16 22:22:19 UTC (rev 1767) @@ -0,0 +1,110 @@ +/* ReplayRequestParser + * + * $Id$ + * + * Created on 6:39:51 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.archivalurl.requestparser; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + + +import org.apache.commons.httpclient.URIException; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.requestparser.PathRequestParser; + +/** + * RequestParser implementation that extracts request info from a Replay + * Archival Url path. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ReplayRequestParser extends PathRequestParser { + /** + * Regex which parses Archival URL replay requests into timestamp + url + */ + private final Pattern WB_REQUEST_REGEX = Pattern + .compile("^(\\d{1,14})/(.*)$"); + + public WaybackRequest parse(String requestPath) { + WaybackRequest wbRequest = null; + Matcher matcher = WB_REQUEST_REGEX.matcher(requestPath); + if (matcher != null && matcher.matches()) { + wbRequest = new WaybackRequest(); + String dateStr = matcher.group(1); + String urlStr = matcher.group(2); + if (!urlStr.startsWith("http://")) { + urlStr = "http://" + urlStr; + } + + // The logic of the classic WM wrt timestamp bounding: + // if 14-digits are specified, assume min-max range boundaries + // if less than 14 are specified, assume min-max range boundaries + // based upon amount given (2001 => 20010101... - 20011231...) + // AND assume the user asked for the LATEST possible date + // within that range... + // + // ...don't ask me, I just work here. + + String startDate = null; + String endDate = null; + if (dateStr.length() == 14) { + startDate = Timestamp.earliestTimestamp().getDateStr(); + endDate = Timestamp.currentTimestamp().getDateStr(); + } else { + + // classic behavior: + // startDate = Timestamp.parseBefore(dateStr).getDateStr(); + // endDate = Timestamp.parseAfter(dateStr).getDateStr(); + // dateStr = endDate; + + // "better" behavior: + startDate = Timestamp.earliestTimestamp().getDateStr(); + endDate = Timestamp.currentTimestamp().getDateStr(); + dateStr = Timestamp.parseAfter(dateStr).getDateStr(); + + } + wbRequest.put(WaybackConstants.REQUEST_EXACT_DATE, dateStr); + wbRequest.put(WaybackConstants.REQUEST_START_DATE, startDate); + wbRequest.put(WaybackConstants.REQUEST_END_DATE, endDate); + + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_REPLAY_QUERY); + + try { +// String wbPrefix = wbRequest.getDefaultWaybackPrefix(); +// if (urlStr.startsWith(wbPrefix)) { +// wbRequest.setBetterRequestURI(urlStr); +// } + wbRequest.setRequestUrl(urlStr); + } catch (URIException e) { + e.printStackTrace(); + wbRequest = null; + } + } + return wbRequest; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-25 00:28:30
|
Revision: 1867 http://archive-access.svn.sourceforge.net/archive-access/?rev=1867&view=rev Author: bradtofel Date: 2007-07-24 17:28:30 -0700 (Tue, 24 Jul 2007) Log Message: ----------- REFACTOR: removed all references to PropertyConfigurable interface Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverter.java 2007-07-25 00:27:51 UTC (rev 1866) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverter.java 2007-07-25 00:28:30 UTC (rev 1867) @@ -24,11 +24,7 @@ */ package org.archive.wayback.archivalurl; -import java.util.Properties; - import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.core.PropertyConfiguration; -import org.archive.wayback.exception.ConfigurationException; /** * @@ -40,12 +36,7 @@ /** * configuration name for URL prefix of replay server */ - private final static String REPLAY_URI_PREFIX_PROPERTY = "replayuriprefix"; private String replayURIPrefix = null; - public void init(Properties p) throws ConfigurationException { - PropertyConfiguration pc = new PropertyConfiguration(p); - replayURIPrefix = pc.getString(REPLAY_URI_PREFIX_PROPERTY); - } /* (non-Javadoc) * @see org.archive.wayback.ResultURIConverter#makeReplayURI(java.lang.String, java.lang.String) Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java 2007-07-25 00:27:51 UTC (rev 1866) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java 2007-07-25 00:28:30 UTC (rev 1867) @@ -27,7 +27,6 @@ import java.io.IOException; import java.util.Date; import java.util.List; -import java.util.Properties; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; @@ -39,7 +38,6 @@ import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.replay.BaseReplayRenderer; import org.archive.wayback.util.StringFormatter; import org.archive.wayback.util.UrlCanonicalizer; @@ -53,8 +51,6 @@ */ public class JSReplayRenderer extends BaseReplayRenderer { - private final static String REPLAY_JS_URI = "jsuri"; - private final static String HTTP_LENGTH_HEADER = "Content-Length"; private final static String HTTP_XFER_ENCODING_HEADER = "Transfer-Encoding"; @@ -74,22 +70,6 @@ protected String scriptUrlInserts = null; - public void init(Properties p) throws ConfigurationException { - String javascriptURI = (String) p.get(REPLAY_JS_URI); - if (javascriptURI == null || javascriptURI.length() <= 0) { - throw new ConfigurationException("Failed to find " + REPLAY_JS_URI); - } - - scriptUrlInserts = ""; - String scriptUrls[] = javascriptURI.split(","); - for (int i = 0; i < scriptUrls.length; i++) { - scriptUrlInserts += "<script type=\"text/javascript\" src=\"" - + scriptUrls[i] + "\" ></script>\n"; - } - - super.init(p); - } - /** * @param list */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-08-23 21:14:30
|
Revision: 1916 http://archive-access.svn.sourceforge.net/archive-access/?rev=1916&view=rev Author: bradtofel Date: 2007-08-23 14:14:33 -0700 (Thu, 23 Aug 2007) Log Message: ----------- REFACTOR: complete refactor of Archival URL replayUI implementation using ReplayDispatcher and the replay.* utility classes. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java 2007-08-23 21:14:33 UTC (rev 1916) @@ -0,0 +1,126 @@ +/* ArchivalUrlReplayRendererDispatcher + * + * $Id$ + * + * Created on 11:38:02 AM Aug 9, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.archivalurl; + +import java.util.List; + +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.replay.BaseReplayDispatcher; +import org.archive.wayback.replay.DateRedirectReplayRenderer; +import org.archive.wayback.replay.TransparentReplayRenderer; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ArchivalUrlReplayDispatcher + extends BaseReplayDispatcher { + + /** + * MIME type of documents which should be marked up with javascript to + * rewrite URLs inside document + */ + private final static String TEXT_HTML_MIME = "text/html"; + private final static String TEXT_XHTML_MIME = "application/xhtml"; + + // TODO: make this configurable + private final static long MAX_HTML_MARKUP_LENGTH = 1024 * 1024 * 5; + + private ReplayRenderer transparent = new TransparentReplayRenderer(); + private ReplayRenderer redirect = new DateRedirectReplayRenderer(); + private ArchivalUrlReplayRenderer archivalHTML = + new ArchivalUrlReplayRenderer(); + + /* (non-Javadoc) + * @see org.archive.wayback.replay.ReplayRendererDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) + */ + @Override + public ReplayRenderer getRenderer(WaybackRequest wbRequest, + SearchResult result, Resource resource) { + + // if the result is not for the exact date requested, redirect to the + // exact date. some capture dates are not 14 digits, only compare as + // many digits as are in the result date: + String reqDateStr = wbRequest.get(WaybackConstants.REQUEST_EXACT_DATE); + String resDateStr = result.get(WaybackConstants.RESULT_CAPTURE_DATE); + if(!resDateStr.equals(reqDateStr.substring(0, resDateStr.length()))) { + return redirect; + } + + // HTML and XHTML docs smaller than some size get marked up as HTML + if (resource.getRecordLength() < MAX_HTML_MARKUP_LENGTH) { + + if (-1 != result.get(WaybackConstants.RESULT_MIME_TYPE).indexOf( + TEXT_HTML_MIME)) { + return archivalHTML; + } + if (-1 != result.get(WaybackConstants.RESULT_MIME_TYPE).indexOf( + TEXT_XHTML_MIME)) { + return archivalHTML; + } + } + + // everything else goes transparently: + return transparent; + } + + /** + * @return + * @see org.archive.wayback.archivalurl.ArchivalUrlReplayRenderer#getJsInserts() + */ + public List<String> getJsInserts() { + return archivalHTML.getJsInserts(); + } + + /** + * @return + * @see org.archive.wayback.archivalurl.ArchivalUrlReplayRenderer#getJspInserts() + */ + public List<String> getJspInserts() { + return archivalHTML.getJspInserts(); + } + + /** + * @param jsInserts + * @see org.archive.wayback.archivalurl.ArchivalUrlReplayRenderer#setJsInserts(java.util.List) + */ + public void setJsInserts(List<String> jsInserts) { + archivalHTML.setJsInserts(jsInserts); + } + + /** + * @param jspInserts + * @see org.archive.wayback.archivalurl.ArchivalUrlReplayRenderer#setJspInserts(java.util.List) + */ + public void setJspInserts(List<String> jspInserts) { + archivalHTML.setJspInserts(jspInserts); + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java 2007-08-23 21:14:33 UTC (rev 1916) @@ -0,0 +1,209 @@ +/* ArchivalUrlReplayRenderer + * + * $Id$ + * + * Created on 6:11:00 PM Aug 8, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.archivalurl; + +import java.io.IOException; +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadContentException; +import org.archive.wayback.replay.HTMLPage; +import org.archive.wayback.replay.HttpHeaderProcessor; +import org.archive.wayback.replay.HttpHeaderOperation; +import org.archive.wayback.util.StringFormatter; +import org.archive.wayback.util.UrlCanonicalizer; + +/** + * ReplayRenderer responsible for marking up HTML pages so they replay in + * ArchivalUrl context: + * resolve in page URLs + * add HTML comment and javascript to modify URLs client-side to point back + * to this context + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ArchivalUrlReplayRenderer implements ReplayRenderer, HttpHeaderProcessor { + + private final static String HTTP_LENGTH_HEADER = "Content-Length"; + private final static String HTTP_LENGTH_HEADER_UP = + HTTP_LENGTH_HEADER.toUpperCase(); + + private final static String HTTP_LOCATION_HEADER = "Location"; + private final static String HTTP_LOCATION_HEADER_UP = + HTTP_LOCATION_HEADER.toUpperCase(); + + private final static String HTTP_CONTENT_BASE_HEADER = "Content-Length"; + private final static String HTTP_CONTENT_BASE_HEADER_UP = + HTTP_CONTENT_BASE_HEADER.toUpperCase(); + + protected String scriptUrlInserts = null; + private List<String> jsInserts = null; + private List<String> jspInserts = null; + + /* (non-Javadoc) + * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResults) + */ + public void renderResource(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + SearchResult result, Resource resource, + ResultURIConverter uriConverter, SearchResults results) + throws ServletException, IOException, BadContentException { + + resource.parseHeaders(); + + HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse); + + Map<String,String> headers = HttpHeaderOperation.processHeaders( + resource, result, uriConverter, this); + + // Load content into an HTML page, and resolve load-time URLs: + HTMLPage page = new HTMLPage(resource,result,uriConverter); + page.readFully(); + page.resolvePageUrls(); + + // generate JS insert: + StringFormatter fmt = wbRequest.getFormatter(); + + String resourceTS = result.getCaptureDate(); + String resourceUrl = result.get(WaybackConstants.RESULT_URL); + Timestamp captureTS = Timestamp.parseBefore(resourceTS); + Date captureDate = captureTS.getDate(); + String contextPath = uriConverter.makeReplayURI(resourceTS, ""); + + + StringBuilder toInsert = new StringBuilder(300); + + toInsert.append("<script type=\"text/javascript\">\n\n"); + toInsert.append(fmt.format("ReplayView.javaScriptComment", captureDate, + new Date())); + String wmNotice = fmt.format("ReplayView.banner", resourceUrl, + captureDate); + String wmHideNotice = fmt.format("ReplayView.bannerHideLink"); + toInsert.append("var sWayBackCGI = \"" + contextPath + "\";\n"); + toInsert.append("var wmNotice = \"" + wmNotice + "\";\n"); + toInsert.append("var wmHideNotice = \"" + wmHideNotice + "\";\n"); + toInsert.append("</script>\n"); + + if(jsInserts != null) { + Iterator<String> itr = jsInserts.iterator(); + while(itr.hasNext()) { + toInsert.append(page.getJSIncludeString(itr.next())); + } + } + if(jspInserts != null) { + Iterator<String> itr = jspInserts.iterator(); + while(itr.hasNext()) { + toInsert.append(page.includeJspString(itr.next(), httpRequest, + httpResponse, wbRequest, results)); + } + } + + // add the javascript, and dump the result out to the client: + page.insertAtEndOfBody(toInsert.toString()); + + // send back the headers: + HttpHeaderOperation.sendHeaders(headers, httpResponse); + + // plus the corrected length: + int bytes = page.getBytes().length; + headers.put(HTTP_LENGTH_HEADER, String.valueOf(bytes)); + + page.writeToOutputStream(httpResponse.getOutputStream()); + } + + /* (non-Javadoc) + * @see org.archive.wayback.replay.HeaderFilter#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) + */ + public void filter(Map<String, String> output, String key, String value, + ResultURIConverter uriConverter, SearchResult result) { + + String keyUp = key.toUpperCase(); + + // omit Content-Length header + if (keyUp.equals(HTTP_LENGTH_HEADER_UP)) { + return; + } + + // rewrite Location header URLs + if (keyUp.startsWith(HTTP_LOCATION_HEADER_UP) || + keyUp.startsWith(HTTP_CONTENT_BASE_HEADER_UP)) { + + String baseUrl = result.getAbsoluteUrl(); + String cd = result.getCaptureDate(); + // by the spec, these should be absolute already, but just in case: + String u = UrlCanonicalizer.resolveUrl(baseUrl, value); + + output.put(key, uriConverter.makeReplayURI(cd,u)); + + } else { + // others go out as-is: + + output.put(key, value); + } + } + + /** + * @return the jsInserts + */ + public List<String> getJsInserts() { + return jsInserts; + } + + /** + * @param jsInserts the jsInserts to set + */ + public void setJsInserts(List<String> jsInserts) { + this.jsInserts = jsInserts; + } + + /** + * @return the jspInserts + */ + public List<String> getJspInserts() { + return jspInserts; + } + + /** + * @param jspInserts the jspInserts to set + */ + public void setJspInserts(List<String> jspInserts) { + this.jspInserts = jspInserts; + } +} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java 2007-08-23 21:12:12 UTC (rev 1915) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java 2007-08-23 21:14:33 UTC (rev 1916) @@ -1,272 +0,0 @@ -/* JSRenderer - * - * $Id$ - * - * Created on 1:34:16 PM Nov 8, 2005. - * - * Copyright (C) 2005 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.archivalurl; - -import java.io.IOException; -import java.util.Date; -import java.util.List; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.Timestamp; -import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.replay.BaseReplayRenderer; -import org.archive.wayback.replay.TagMagix; -import org.archive.wayback.util.StringFormatter; -import org.archive.wayback.util.UrlCanonicalizer; - -/** - * - * - * @author brad - * @version $Date$, $Revision: - * 1483 $ - */ -public class JSReplayRenderer extends BaseReplayRenderer { - - private final static String HTTP_LENGTH_HEADER = "Content-Length"; - - private final static String HTTP_XFER_ENCODING_HEADER = "Transfer-Encoding"; - - private final static String HTTP_LOCATION_HEADER = "Location"; - - /** - * MIME type of documents which should be marked up with javascript to - * rewrite URLs inside document - */ - private final static String TEXT_HTML_MIME = "text/html"; - - private final static String TEXT_XHTML_MIME = "application/xhtml"; - - // TODO: make this configurable - private final static long MAX_HTML_MARKUP_LENGTH = 1024 * 1024 * 5; - - protected String scriptUrlInserts = null; - - /** - * @param list - */ - public void setJSInserts(List<String> list) { - scriptUrlInserts = ""; - for (int i = 0; i < list.size(); i++) { - scriptUrlInserts += "<script type=\"text/javascript\" src=\"" - + list.get(i) + "\" ></script>\n"; - } - } - - /** - * test if the SearchResult should be replayed raw, without JS markup - * - * @param resource - * @param result - * @return boolean, true if the document should be returned raw. - */ - protected boolean isRawReplayResult(Resource resource, SearchResult result) { - - if (resource.getRecordLength() < MAX_HTML_MARKUP_LENGTH) { - // TODO: this needs to be configurable such that arbitrary filters - // can be applied to various mime-types... We'll just hard-code - // them for now. - if (-1 != result.get(WaybackConstants.RESULT_MIME_TYPE).indexOf( - TEXT_HTML_MIME)) { - return false; - } - if (-1 != result.get(WaybackConstants.RESULT_MIME_TYPE).indexOf( - TEXT_XHTML_MIME)) { - return false; - } - } - - return true; - } - - /** - * omit length and encoding HTTP headers. - * - * @param key - * @param value - * @param uriConverter - * @param result - * @return String - */ - protected String filterHeader(final String key, final String value, - final ResultURIConverter uriConverter, SearchResult result) { - String keyUp = key.toUpperCase(); - if (keyUp.equals(HTTP_LENGTH_HEADER.toUpperCase())) { - return null; - } - // TODO: I don't think that this is handled correctly: if the - // ARC document is chunked, we want to relay that, by NOT omitting the - // header, but we also need to tell the servlet container not to do - // any transfer ecoding of it's own "because we probably wanted it to." - if (keyUp.equals(HTTP_XFER_ENCODING_HEADER.toUpperCase())) { - return null; - } - if (0 == keyUp.indexOf(HTTP_LOCATION_HEADER.toUpperCase())) { - String baseUrl = result.getAbsoluteUrl(); - String captureDate = result.getCaptureDate(); - String url = UrlCanonicalizer.resolveUrl(baseUrl, value); - return uriConverter.makeReplayURI(captureDate,url); - } - return value; - } - - public void renderResource(HttpServletRequest httpRequest, - HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResult result, Resource resource, - ResultURIConverter uriConverter) throws ServletException, - IOException { - - // if we are not returning the exact date they asked for, redirect them: - if (isExactVersionRequested(wbRequest, result)) { - super.renderResource(httpRequest, httpResponse, wbRequest, result, - resource, uriConverter); - } else { - String url = result.getAbsoluteUrl(); - String captureDate = result.getCaptureDate(); - String betterURI = uriConverter.makeReplayURI(captureDate,url); - httpResponse.sendRedirect(betterURI); - } - } - -// private void removeString(StringBuilder page, final String toZap) { -// int idx = page.indexOf(toZap); -// if (idx >= 0) { -// page.delete(idx, idx + toZap.length()); -// } -// } - - /** - * add BASE tag and javascript to a page that will rewrite embedded URLs to - * point back into the WM. Also attempt to fix up URL attributes in some - * tags that must be correct at page load (FRAME, META, LINK, SCRIPT) - * - * @param page - * @param httpRequest - * @param httpResponse - * @param wbRequest - * @param result - * @param resource - * @param uriConverter - */ - protected void markUpPage(StringBuilder page, - HttpServletRequest httpRequest, HttpServletResponse httpResponse, - WaybackRequest wbRequest, SearchResult result, Resource resource, - ResultURIConverter uriConverter) { - - String pageUrl = result.getAbsoluteUrl(); - String captureDate = result.getCaptureDate(); - - String existingBaseHref = TagMagix.getBaseHref(page); - if (existingBaseHref != null) { - pageUrl = existingBaseHref; - } - - TagMagix.markupTagREURIC(page, uriConverter, captureDate, pageUrl, - "FRAME", "SRC"); - TagMagix.markupTagREURIC(page, uriConverter, captureDate, pageUrl, - "META", "URL"); - TagMagix.markupTagREURIC(page, uriConverter, captureDate, pageUrl, - "LINK", "HREF"); - // TODO: The classic WM added a js_ to the datespec, so NotInArchives - // can return an valid javascript doc, and not cause Javascript errors. - TagMagix.markupTagREURIC(page, uriConverter, captureDate, pageUrl, - "SCRIPT", "SRC"); - - if (existingBaseHref == null) { - insertBaseTag(page, result); - } - insertJavascriptXHTML(page, httpRequest, httpResponse, wbRequest, - result, resource, uriConverter); - } - - /** - * add a BASE HTML tag to make all path relative URLs map to the right URL - * - * @param page - * @param result - */ - protected void insertBaseTag(StringBuilder page, SearchResult result) { - String resultUrl = result.getAbsoluteUrl(); - String baseTag = "<base href=\"" + resultUrl + "\" />"; - int insertPoint = page.indexOf("<head>"); - if (-1 == insertPoint) { - insertPoint = page.indexOf("<HEAD>"); - } - if (-1 == insertPoint) { - insertPoint = 0; - } else { - insertPoint += 6; // just after the tag - } - page.insert(insertPoint, baseTag); - } - - /** - * insert Javascript into a page to rewrite URLs - * - * @param page - * @param httpRequest - * @param httpResponse - * @param wbRequest - * @param result - * @param resource - * @param uriConverter - */ - protected void insertJavascriptXHTML(StringBuilder page, - HttpServletRequest httpRequest, HttpServletResponse httpResponse, - WaybackRequest wbRequest, SearchResult result, Resource resource, - ResultURIConverter uriConverter) { - - StringFormatter fmt = wbRequest.getFormatter(); - - String resourceTS = result.getCaptureDate(); - Timestamp captureTS = Timestamp.parseBefore(resourceTS); - Date captureDate = captureTS.getDate(); - String contextPath = uriConverter.makeReplayURI(resourceTS, ""); - - StringBuilder ins = new StringBuilder(300); - ins.append("<script type=\"text/javascript\">\n\n"); - ins.append(fmt.format("ReplayView.javaScriptComment", captureDate, - new Date())); - ins.append("var sWayBackCGI = \"" + contextPath + "\";\n"); - ins.append("</script>\n"); - ins.append(scriptUrlInserts); - - int insertPoint = page.lastIndexOf("</body>"); - if (-1 == insertPoint) { - insertPoint = page.lastIndexOf("</BODY>"); - } - if (-1 == insertPoint) { - insertPoint = page.length(); - } - page.insert(insertPoint, ins.toString()); - } -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-12-12 03:06:08
|
Revision: 2111 http://archive-access.svn.sourceforge.net/archive-access/?rev=2111&view=rev Author: bradtofel Date: 2007-12-11 19:06:10 -0800 (Tue, 11 Dec 2007) Log Message: ----------- BUGFIX: now rewriting Location and Content-Base headers in non HTML documents FEATURE: Added Server-Side rendering capability to normal ArchivalUrl mode. Now JS inserts are all handled through .jsp inserts, which include page specific variables, and reference to the common .js file which uses those variables. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java 2007-12-12 02:19:12 UTC (rev 2110) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java 2007-12-12 03:06:10 UTC (rev 2111) @@ -33,7 +33,6 @@ import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.replay.BaseReplayDispatcher; import org.archive.wayback.replay.DateRedirectReplayRenderer; -import org.archive.wayback.replay.TransparentReplayRenderer; /** * @@ -54,7 +53,9 @@ // TODO: make this configurable private final static long MAX_HTML_MARKUP_LENGTH = 1024 * 1024 * 5; - private ReplayRenderer transparent = new TransparentReplayRenderer(); + private ReplayRenderer transparent = + new ArchivalUrlTransparentReplayRenderer(); + private ReplayRenderer redirect = new DateRedirectReplayRenderer(); private ArchivalUrlReplayRenderer archivalHTML = new ArchivalUrlReplayRenderer(); @@ -75,6 +76,8 @@ return redirect; } + // TODO: handle .css docs -- embedded URLs there need to be fixed + // HTML and XHTML docs smaller than some size get marked up as HTML if (resource.getRecordLength() < MAX_HTML_MARKUP_LENGTH) { @@ -123,4 +126,20 @@ public void setJspInserts(List<String> jspInserts) { archivalHTML.setJspInserts(jspInserts); } + + /** + * @return + * @see org.archive.wayback.archivalurl.ArchivalUrlReplayRenderer#isServerSideRendering() + */ + public boolean isServerSideRendering() { + return archivalHTML.isServerSideRendering(); + } + + /** + * @param isServerSideRendering + * @see org.archive.wayback.archivalurl.ArchivalUrlReplayRenderer#setServerSideRendering(boolean) + */ + public void setServerSideRendering(boolean isServerSideRendering) { + archivalHTML.setServerSideRendering(isServerSideRendering); + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java 2007-12-12 02:19:12 UTC (rev 2110) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java 2007-12-12 03:06:10 UTC (rev 2111) @@ -25,7 +25,6 @@ package org.archive.wayback.archivalurl; import java.io.IOException; -import java.util.Date; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -36,17 +35,14 @@ import org.archive.wayback.ReplayRenderer; import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; -import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadContentException; import org.archive.wayback.replay.HTMLPage; import org.archive.wayback.replay.HttpHeaderProcessor; import org.archive.wayback.replay.HttpHeaderOperation; -import org.archive.wayback.util.StringFormatter; import org.archive.wayback.util.UrlCanonicalizer; /** @@ -61,20 +57,10 @@ */ public class ArchivalUrlReplayRenderer implements ReplayRenderer, HttpHeaderProcessor { - private final static String HTTP_LENGTH_HEADER = "Content-Length"; - private final static String HTTP_LENGTH_HEADER_UP = - HTTP_LENGTH_HEADER.toUpperCase(); - private final static String HTTP_LOCATION_HEADER = "Location"; - private final static String HTTP_LOCATION_HEADER_UP = - HTTP_LOCATION_HEADER.toUpperCase(); - - private final static String HTTP_CONTENT_BASE_HEADER = "Content-Length"; - private final static String HTTP_CONTENT_BASE_HEADER_UP = - HTTP_CONTENT_BASE_HEADER.toUpperCase(); - private List<String> jsInserts = null; private List<String> jspInserts = null; + private boolean serverSideRendering = false; /* (non-Javadoc) * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResults) @@ -85,41 +71,23 @@ ResultURIConverter uriConverter, SearchResults results) throws ServletException, IOException, BadContentException { - resource.parseHeaders(); - + StringBuilder toInsert = new StringBuilder(300); + HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse); - + Map<String,String> headers = HttpHeaderOperation.processHeaders( resource, result, uriConverter, this); + // Load content into an HTML page, and resolve load-time URLs: HTMLPage page = new HTMLPage(resource,result,uriConverter); page.readFully(); - page.resolvePageUrls(); - // generate JS insert: - StringFormatter fmt = wbRequest.getFormatter(); - - String resourceTS = result.getCaptureDate(); - String resourceUrl = result.get(WaybackConstants.RESULT_URL); - Timestamp captureTS = Timestamp.parseBefore(resourceTS); - Date captureDate = captureTS.getDate(); - String contextPath = uriConverter.makeReplayURI(resourceTS, ""); - - - StringBuilder toInsert = new StringBuilder(300); - - toInsert.append("<script type=\"text/javascript\">\n\n"); - toInsert.append(fmt.format("ReplayView.javaScriptComment", captureDate, - new Date())); - String wmNotice = fmt.format("ReplayView.banner", resourceUrl, - captureDate); - String wmHideNotice = fmt.format("ReplayView.bannerHideLink"); - toInsert.append("var sWayBackCGI = \"" + contextPath + "\";\n"); - toInsert.append("var wmNotice = \"" + wmNotice + "\";\n"); - toInsert.append("var wmHideNotice = \"" + wmHideNotice + "\";\n"); - toInsert.append("</script>\n"); - + if(serverSideRendering) { + page.resolveAllPageUrls(); + } else { + page.resolvePageUrls(); + } if(jsInserts != null) { Iterator<String> itr = jsInserts.iterator(); while(itr.hasNext()) { @@ -134,16 +102,20 @@ } } - // add the javascript, and dump the result out to the client: - page.insertAtEndOfBody(toInsert.toString()); + // insert the new content: + if(serverSideRendering) { + page.insertAtStartOfBody(toInsert.toString()); + } else { + page.insertAtEndOfBody(toInsert.toString()); + } + + // set the corrected length: + int bytes = page.getBytes().length; + headers.put(HTTP_LENGTH_HEADER, String.valueOf(bytes)); // send back the headers: HttpHeaderOperation.sendHeaders(headers, httpResponse); - // plus the corrected length: - int bytes = page.getBytes().length; - headers.put(HTTP_LENGTH_HEADER, String.valueOf(bytes)); - page.writeToOutputStream(httpResponse.getOutputStream()); } @@ -205,4 +177,18 @@ public void setJspInserts(List<String> jspInserts) { this.jspInserts = jspInserts; } + + /** + * @return the isServerSideRendering + */ + public boolean isServerSideRendering() { + return serverSideRendering; + } + + /** + * @param isServerSideRendering the isServerSideRendering to set + */ + public void setServerSideRendering(boolean serverSideRendering) { + this.serverSideRendering = serverSideRendering; + } } Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java 2007-12-12 03:06:10 UTC (rev 2111) @@ -0,0 +1,45 @@ +package org.archive.wayback.archivalurl; + +import java.util.Map; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.replay.TransparentReplayRenderer; +import org.archive.wayback.util.UrlCanonicalizer; + +/** + * Slight extension to TransparentReplayRenderer, which rewrites Location and + * Content-Base HTTP headers as they go out. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ArchivalUrlTransparentReplayRenderer +extends TransparentReplayRenderer { + + /* (non-Javadoc) + * @see org.archive.wayback.replay.HeaderFilter#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) + */ + public void filter(Map<String, String> output, String key, String value, + ResultURIConverter uriConverter, SearchResult result) { + + String keyUp = key.toUpperCase(); + + // rewrite Location header URLs + if (keyUp.startsWith(HTTP_LOCATION_HEADER_UP) || + keyUp.startsWith(HTTP_CONTENT_BASE_HEADER_UP)) { + + String baseUrl = result.getAbsoluteUrl(); + String cd = result.getCaptureDate(); + // by the spec, these should be absolute already, but just in case: + String u = UrlCanonicalizer.resolveUrl(baseUrl, value); + + output.put(key, uriConverter.makeReplayURI(cd,u)); + + } else { + // others go out as-is: + + output.put(key, value); + } + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:40:40
|
Revision: 2369 http://archive-access.svn.sourceforge.net/archive-access/?rev=2369&view=rev Author: bradtofel Date: 2008-07-01 16:40:50 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlASXReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlCSSReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlASXReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlASXReplayRenderer.java 2008-07-01 23:40:18 UTC (rev 2368) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlASXReplayRenderer.java 2008-07-01 23:40:50 UTC (rev 2369) @@ -9,8 +9,8 @@ import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadContentException; import org.archive.wayback.replay.HTMLPage; @@ -22,8 +22,8 @@ */ public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResult result, Resource resource, - ResultURIConverter uriConverter, SearchResults results) + CaptureSearchResult result, Resource resource, + ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException, BadContentException { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlCSSReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlCSSReplayRenderer.java 2008-07-01 23:40:18 UTC (rev 2368) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlCSSReplayRenderer.java 2008-07-01 23:40:50 UTC (rev 2369) @@ -9,8 +9,8 @@ import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadContentException; import org.archive.wayback.replay.HTMLPage; @@ -22,8 +22,8 @@ */ public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResult result, Resource resource, - ResultURIConverter uriConverter, SearchResults results) + CaptureSearchResult result, Resource resource, + ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException, BadContentException { HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java 2008-07-01 23:40:18 UTC (rev 2368) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java 2008-07-01 23:40:50 UTC (rev 2369) @@ -30,7 +30,7 @@ import org.archive.wayback.ReplayRenderer; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.replay.DateRedirectReplayRenderer; @@ -71,13 +71,13 @@ * @see org.archive.wayback.ReplayDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) */ public ReplayRenderer getRenderer(WaybackRequest wbRequest, - SearchResult result, Resource resource) { + CaptureSearchResult result, Resource resource) { // if the result is not for the exact date requested, redirect to the // exact date. some capture dates are not 14 digits, only compare as // many digits as are in the result date: - String reqDateStr = wbRequest.get(WaybackConstants.REQUEST_EXACT_DATE); - String resDateStr = result.get(WaybackConstants.RESULT_CAPTURE_DATE); + String reqDateStr = wbRequest.get(WaybackConstants.REQUEST_DATE); + String resDateStr = result.getCaptureTimestamp(); if(!resDateStr.equals(reqDateStr.substring(0, resDateStr.length()))) { return redirect; } @@ -85,7 +85,7 @@ // only bother attempting markup on pages smaller than some size: if (resource.getRecordLength() < MAX_HTML_MARKUP_LENGTH) { - String resultMime = result.get(WaybackConstants.RESULT_MIME_TYPE); + String resultMime = result.getMimeType(); // HTML and XHTML docs get marked up as HTML if (-1 != resultMime.indexOf(TEXT_HTML_MIME)) { return archivalHTML; @@ -100,7 +100,7 @@ if (-1 != resultMime.indexOf(ASX_MIME)) { return archivalASX; } - String resultPath = result.get(WaybackConstants.RESULT_URL_KEY); + String resultPath = result.getUrlKey(); resultPath = resultPath.substring(resultPath.indexOf('/')); int queryIdx = resultPath.indexOf('?'); if(queryIdx > 0) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java 2008-07-01 23:40:18 UTC (rev 2368) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java 2008-07-01 23:40:50 UTC (rev 2369) @@ -36,8 +36,8 @@ import org.archive.wayback.ReplayRenderer; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadContentException; import org.archive.wayback.replay.HTMLPage; @@ -67,8 +67,8 @@ */ public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResult result, Resource resource, - ResultURIConverter uriConverter, SearchResults results) + CaptureSearchResult result, Resource resource, + ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException, BadContentException { StringBuilder toInsert = new StringBuilder(300); @@ -98,7 +98,7 @@ Iterator<String> itr = jspInserts.iterator(); while(itr.hasNext()) { toInsert.append(page.includeJspString(itr.next(), httpRequest, - httpResponse, wbRequest, results, result)); + httpResponse, wbRequest, results, result, resource)); } } @@ -129,7 +129,7 @@ * @see org.archive.wayback.replay.HttpHeaderProcessor#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) */ public void filter(Map<String, String> output, String key, String value, - ResultURIConverter uriConverter, SearchResult result) { + ResultURIConverter uriConverter, CaptureSearchResult result) { String keyUp = key.toUpperCase(); @@ -142,8 +142,8 @@ if (keyUp.startsWith(HTTP_LOCATION_HEADER_UP) || keyUp.startsWith(HTTP_CONTENT_BASE_HEADER_UP)) { - String baseUrl = result.getAbsoluteUrl(); - String cd = result.getCaptureDate(); + String baseUrl = result.getOriginalUrl(); + String cd = result.getCaptureTimestamp(); // by the spec, these should be absolute already, but just in case: String u = UrlOperations.resolveUrl(baseUrl, value); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java 2008-07-01 23:40:18 UTC (rev 2368) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java 2008-07-01 23:40:50 UTC (rev 2369) @@ -3,7 +3,7 @@ import java.util.Map; import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.replay.TransparentReplayRenderer; import org.archive.wayback.util.url.UrlOperations; @@ -21,7 +21,7 @@ * @see org.archive.wayback.replay.HeaderFilter#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) */ public void filter(Map<String, String> output, String key, String value, - ResultURIConverter uriConverter, SearchResult result) { + ResultURIConverter uriConverter, CaptureSearchResult result) { String keyUp = key.toUpperCase(); @@ -29,8 +29,8 @@ if (keyUp.startsWith(HTTP_LOCATION_HEADER_UP) || keyUp.startsWith(HTTP_CONTENT_BASE_HEADER_UP)) { - String baseUrl = result.getAbsoluteUrl(); - String cd = result.getCaptureDate(); + String baseUrl = result.getOriginalUrl(); + String cd = result.getCaptureTimestamp(); // by the spec, these should be absolute already, but just in case: String u = UrlOperations.resolveUrl(baseUrl, value); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |