From: <bra...@us...> - 2007-04-03 02:44:39
|
Revision: 1685 http://archive-access.svn.sourceforge.net/archive-access/?rev=1685&view=rev Author: bradtofel Date: 2007-04-02 19:44:39 -0700 (Mon, 02 Apr 2007) Log Message: ----------- REFACTOR: moved configuration-reading code into PropertyConfiguration class. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/AdministrativeExclusionAuthority.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/RequestFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/OpenSearchQueryParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/HttpARCResourceStore.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalARCResourceStore.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDB.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/AdministrativeExclusionAuthority.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/AdministrativeExclusionAuthority.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/AdministrativeExclusionAuthority.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -24,6 +24,7 @@ */ package org.archive.wayback.accesscontrol; +import java.io.File; import java.util.ArrayList; import java.util.Properties; @@ -31,6 +32,7 @@ import org.archive.wayback.bdb.BDBRecord; import org.archive.wayback.bdb.BDBRecordSet; import org.archive.wayback.bdb.BDBRecordIterator; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.surt.SURTTokenizer; @@ -135,20 +137,13 @@ * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) */ public void init(Properties p) throws ConfigurationException { - - String dbPath = (String) p.get(INDEX_PATH); - if (dbPath == null || (dbPath.length() <= 0)) { - throw new IllegalArgumentException("Failed to find " + INDEX_PATH); - } - String dbName = (String) p.get(DB_NAME); - if (dbName == null || (dbName.length() <= 0)) { - throw new IllegalArgumentException("Failed to find " + DB_NAME); - } - + PropertyConfiguration pc = new PropertyConfiguration(p); + File dbDir = pc.getDir(INDEX_PATH,true); + String dbName = pc.getString(DB_NAME); try { db = new BDBRecordSet(); - db.initializeDB(dbPath,dbName); + db.initializeDB(dbDir.getAbsolutePath(),dbName); } catch (DatabaseException e) { e.printStackTrace(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -26,6 +26,7 @@ import java.util.Properties; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.resourceindex.ExclusionFilterFactory; import org.archive.wayback.resourceindex.SearchResultFilter; @@ -58,10 +59,9 @@ * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) */ public void init(Properties p) throws ConfigurationException { - // TODO Auto-generated method stub - exclusionUrlPrefix = (String) p.get(EXCLUSION_PREFIX); - - exclusionUserAgent = (String) p.get(EXCLUSION_UA); + PropertyConfiguration pc = new PropertyConfiguration(p); + exclusionUrlPrefix = pc.getString(EXCLUSION_PREFIX); + exclusionUserAgent = pc.getString(EXCLUSION_UA); } /* (non-Javadoc) Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -26,7 +26,7 @@ import java.util.Properties; -import org.archive.wayback.PropertyConfigurable; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.resourceindex.ExclusionFilterFactory; @@ -38,8 +38,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class RobotExclusionFilterFactory implements PropertyConfigurable, - ExclusionFilterFactory { +public class RobotExclusionFilterFactory implements ExclusionFilterFactory { private final static String ROBOT_USER_AGENT = "robotexclusion.useragent"; private final static String ROBOT_CACHE_AGE = "robotexclusion.cacheagems"; @@ -49,26 +48,13 @@ private String userAgent = null; private long maxCacheMS = 0; - private String getProp(Properties p, String key) - throws ConfigurationException { - - if(p.containsKey(key)) { - String v = p.getProperty(key); - if(v == null || v.length() < 1) { - throw new ConfigurationException("Empty configuration " + key); - } - return v; - } else { - throw new ConfigurationException("Missing configuration " + key); - } - } - /* (non-Javadoc) * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) */ public void init(Properties p) throws ConfigurationException { - userAgent = getProp(p,ROBOT_USER_AGENT); - maxCacheMS = Long.parseLong(getProp(p,ROBOT_CACHE_AGE)); + PropertyConfiguration pc = new PropertyConfiguration(p); + userAgent = pc.getString(ROBOT_USER_AGENT); + maxCacheMS = pc.getLong(ROBOT_CACHE_AGE); webCache = new LiveWebCache(); webCache.init(p); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -31,7 +31,7 @@ import java.util.Properties; import java.util.logging.Logger; -import org.archive.wayback.PropertyConfigurable; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.resourceindex.ExclusionFilterFactory; import org.archive.wayback.resourceindex.SearchResultFilter; @@ -45,8 +45,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class StaticMapExclusionFilterFactory implements PropertyConfigurable, -ExclusionFilterFactory { +public class StaticMapExclusionFilterFactory implements ExclusionFilterFactory { private static final Logger LOGGER = Logger.getLogger(StaticMapExclusionFilterFactory.class.getName()); @@ -68,20 +67,16 @@ * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) */ public void init(Properties p) throws ConfigurationException { - // TODO Auto-generated method stub - String path = (String) p.get(EXCLUSION_PATH); - if((path == null) || path.length() == 0) { - throw new ConfigurationException("Invalid/missing " + - EXCLUSION_PATH + "configuration"); - } - file = new File(path); + PropertyConfiguration pc = new PropertyConfiguration(p); + file = pc.getFile(EXCLUSION_PATH); try { reloadFile(); } catch (IOException e) { e.printStackTrace(); throw new ConfigurationException(e.getLocalizedMessage()); } - LOGGER.info("starting CachedMapExclusion with file " + path); + LOGGER.info("starting CachedMapExclusion with file " + + file.getAbsolutePath()); startup(); } private void reloadFile() throws IOException { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/RequestFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/RequestFilter.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/RequestFilter.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -71,20 +71,11 @@ public RequestFilter() { super(); } - - /** - * initialize this RequestFilter based on a Properties assembled from - * Context and Filter init-params. - * @param p - * @throws ConfigurationException + /* (non-Javadoc) + * @see javax.servlet.Filter#destroy() */ - public void init(Properties p) throws ConfigurationException { - handlerUrl = (String) p.get(HANDLER_URL); - if ((handlerUrl == null) || (handlerUrl.length() <= 0)) { - throw new ConfigurationException("No config (" + HANDLER_URL + ")"); - } - } - + public void destroy() {} + /* (non-Javadoc) * @see javax.servlet.Filter#init(javax.servlet.FilterConfig) */ @@ -119,6 +110,17 @@ } /** + * initialize this RequestFilter based on a Properties assembled from + * Context and Filter init-params. + * @param p + * @throws ConfigurationException + */ + public void init(Properties p) throws ConfigurationException { + PropertyConfiguration pc = new PropertyConfiguration(p); + handlerUrl = pc.getString(HANDLER_URL); + } + + /** * @param request * @param response * @return boolean, true if a WaybackRequest was parsed from the URL @@ -157,10 +159,4 @@ protected abstract WaybackRequest parseRequest( HttpServletRequest httpRequest); - /* (non-Javadoc) - * @see javax.servlet.Filter#destroy() - */ - public void destroy() { - - } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -40,6 +40,7 @@ import org.archive.io.arc.ARCWriter; import org.archive.io.arc.ARCWriterPool; import org.archive.wayback.PropertyConfigurable; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.Resource; import org.archive.wayback.exception.ConfigurationException; @@ -75,35 +76,10 @@ private ARCWriterPool pool = null; private File arcDir = null; - private String getProp(Properties p, String key) - throws ConfigurationException { - - if(p.containsKey(key)) { - String v = p.getProperty(key); - if(v == null || v.length() < 1) { - throw new ConfigurationException("Empty configuration " + key); - } - return v; - } else { - throw new ConfigurationException("Missing configuration " + key); - } - - } - public void init(Properties p) throws ConfigurationException { - String arcDirS = getProp(p,LIVE_WEB_ARC_DIR); - arcDir = new File(arcDirS); - if (!arcDir.isDirectory()) { - if (arcDir.exists()) { - throw new ConfigurationException("Path(" + arcDirS + ") " + - "exists but is not a directory"); - } - if(!arcDir.mkdirs()) { - throw new ConfigurationException("Unable to mkdir(" + arcDirS + - ")"); - } - } - String arcPrefix = getProp(p,LIVE_WEB_ARC_PREFIX); + PropertyConfiguration pc = new PropertyConfiguration(p); + arcDir = pc.getDir(LIVE_WEB_ARC_DIR,true); + String arcPrefix = pc.getString(LIVE_WEB_ARC_PREFIX); File[] files = { arcDir }; WriterPoolSettings settings = getSettings(true, arcPrefix, files); pool = new ARCWriterPool(settings, MAX_POOL_WRITERS, MAX_POOL_WAIT); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -53,6 +53,7 @@ import org.archive.io.arc.ARCWriter; import org.archive.net.LaxURI; import org.archive.wayback.PropertyConfigurable; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.LiveDocumentNotAvailableException; @@ -91,23 +92,10 @@ * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) */ public void init(Properties p) throws ConfigurationException { - String cachePath = (String) p.get(CACHE_PATH); - if (cachePath == null || cachePath.length() <= 0) { - throw new ConfigurationException("Failed to find " + CACHE_PATH); - } - tmpDir = new File(cachePath); - if(!tmpDir.exists()) { - if(!tmpDir.mkdirs()) { - throw new ConfigurationException("Unable to mkdirs(" + - tmpDir.getAbsolutePath() + ")"); - } - } else { - if(!tmpDir.isDirectory()) { - throw new ConfigurationException("Something non-dir-ish at(" + - tmpDir.getAbsolutePath() + ")"); - } - } - LOGGER.info("URLCacher storing temp files in " + cachePath); + PropertyConfiguration pc = new PropertyConfiguration(p); + tmpDir = pc.getDir(CACHE_PATH,true); + LOGGER.info("URLCacher storing temp files in " + + tmpDir.getAbsolutePath()); } private File getTmpFile() { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/OpenSearchQueryParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/OpenSearchQueryParser.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/OpenSearchQueryParser.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -33,6 +33,7 @@ import org.archive.wayback.PropertyConfigurable; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.ConfigurationException; @@ -83,10 +84,9 @@ * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) */ public void init(Properties p) throws ConfigurationException { - String max = p.getProperty(WaybackConstants.RESULTS_PER_PAGE_CONFIG_NAME); - if(max != null) { - maxRecords = Integer.parseInt(max); - } + PropertyConfiguration pc = new PropertyConfiguration(p); + maxRecords = pc.getInt(WaybackConstants.RESULTS_PER_PAGE_CONFIG_NAME, + DEFAULT_MAX_RECORDS); } private String getMapParam(Map queryMap, String field) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -35,6 +35,7 @@ import org.archive.wayback.QueryRenderer; import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.UIResults; import org.archive.wayback.core.WaybackRequest; @@ -59,10 +60,8 @@ private final String PREFIX_QUERY_JSP = "PathQueryResults.jsp"; public void init(Properties p) throws ConfigurationException { - this.jspPath = (String) p.get(JSP_PATH); - if (this.jspPath == null || this.jspPath.length() <= 0) { - throw new ConfigurationException("Failed to find " + JSP_PATH); - } + PropertyConfiguration pc = new PropertyConfiguration(p); + jspPath = pc.getString(JSP_PATH); } public void renderException(HttpServletRequest httpRequest, Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayRenderer.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayRenderer.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -42,6 +42,7 @@ import org.archive.wayback.ReplayRenderer; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.UIResults; @@ -104,10 +105,8 @@ /* INITIALIZATION: */ public void init(Properties p) throws ConfigurationException { - this.jspPath = (String) p.get(JSP_PATH); - if (this.jspPath == null || this.jspPath.length() <= 0) { - throw new ConfigurationException("Failed to find " + JSP_PATH); - } + PropertyConfiguration pc = new PropertyConfiguration(p); + jspPath = pc.getString(JSP_PATH); } /* ERROR HANDLING RESPONSES: */ Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -43,6 +43,7 @@ import org.archive.wayback.resourceindex.filters.UrlPrefixMatchFilter; import org.archive.wayback.resourceindex.filters.WindowEndFilter; import org.archive.wayback.resourceindex.filters.WindowStartFilter; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; @@ -84,14 +85,12 @@ * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) */ public void init(Properties p) throws ConfigurationException { + PropertyConfiguration pc = new PropertyConfiguration(p); source = SearchResultSourceFactory.get(p); exclusionFactory = ExclusionFilterFactoryFactory.get(p); - String maxRecordsConfig = (String) p.get( - WaybackConstants.MAX_RESULTS_CONFIG_NAME); - if(maxRecordsConfig != null) { - maxRecords = Integer.parseInt(maxRecordsConfig); - } + maxRecords = pc.getInt(WaybackConstants.MAX_RESULTS_CONFIG_NAME, + MAX_RECORDS); } private SearchResultFilter getExclusionFilter() Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -37,6 +37,7 @@ import org.archive.wayback.ResourceIndex; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; @@ -97,31 +98,24 @@ */ public void init(Properties p) throws ConfigurationException { LOGGER.info("initializing NutchResourceIndex..."); + PropertyConfiguration pc = new PropertyConfiguration(p); + searchUrlBase = pc.getString(SEARCH_BASE_URL); + LOGGER.info("Using base search url " + this.searchUrlBase); + maxRecords = pc.getInt(WaybackConstants.MAX_RESULTS_CONFIG_NAME, + MAX_RECORDS); - this.searchUrlBase = (String)p.get(SEARCH_BASE_URL); - if (this.searchUrlBase == null || this.searchUrlBase.length() <= 0) { - throw new IllegalArgumentException("Failed to find " + - SEARCH_BASE_URL); - } - this.factory = DocumentBuilderFactory.newInstance(); - this.factory.setNamespaceAware(true); - try { + this.factory = DocumentBuilderFactory.newInstance(); + this.factory.setNamespaceAware(true); + try { this.builder = this.factory.newDocumentBuilder(); } catch (ParserConfigurationException e) { // TODO: quiet extra stacktrace.. e.printStackTrace(); throw new ConfigurationException(e.getMessage()); } - if (!this.builder.isNamespaceAware()) { - LOGGER.severe("Builder is not namespace aware."); - } - LOGGER.info("Using base search url " + this.searchUrlBase); - String maxRecordsConfig = (String) p.get( - WaybackConstants.MAX_RESULTS_CONFIG_NAME); - if(maxRecordsConfig != null) { - maxRecords = Integer.parseInt(maxRecordsConfig); + if (!this.builder.isNamespaceAware()) { + LOGGER.severe("Builder is not namespace aware."); } - } /* (non-Javadoc) Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -34,6 +34,7 @@ import javax.xml.parsers.ParserConfigurationException; import org.archive.wayback.ResourceIndex; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.WaybackRequest; @@ -108,11 +109,8 @@ public void init(Properties p) throws ConfigurationException { LOGGER.info("initializing RemoteCDXIndex..."); - this.searchUrlBase = (String) p.get(SEARCH_BASE_URL); - if (this.searchUrlBase == null || this.searchUrlBase.length() <= 0) { - throw new IllegalArgumentException("Failed to find " - + SEARCH_BASE_URL); - } + PropertyConfiguration pc = new PropertyConfiguration(p); + searchUrlBase = pc.getString(SEARCH_BASE_URL); this.factory = DocumentBuilderFactory.newInstance(); this.factory.setNamespaceAware(false); LOGGER.info("Using base search url " + this.searchUrlBase); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndex.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndex.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -37,6 +37,7 @@ import org.apache.commons.httpclient.URIException; import org.archive.wayback.ResourceIndex; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AccessControlException; @@ -68,6 +69,7 @@ public static String RANGE_CHECK_INTERVAL = "resourceindex.distributed.checkinterval"; private static long MS_PER_SEC = 1000; + private static long DEFAULT_CHECK_INTERVAL = 100; private long lastLoadStat = 0; private long nextCheck = 0; @@ -81,22 +83,9 @@ * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) */ public void init(Properties p) throws ConfigurationException { - mapPath = (String) p.get(RANGE_MAP_PATH); - if ((mapPath == null) || (mapPath.length() < 1)) { - throw new ConfigurationException("Failed to find " - + RANGE_MAP_PATH); - } - String checkIntervalStr = (String) p.get(RANGE_CHECK_INTERVAL); - if ((checkIntervalStr == null) || (checkIntervalStr.length() < 1)) { - throw new ConfigurationException("Failed to find " - + RANGE_CHECK_INTERVAL); - } - try { - checkInterval = Long.parseLong(checkIntervalStr); - } catch (NumberFormatException e) { - throw new ConfigurationException("Non numeric " - + RANGE_CHECK_INTERVAL); - } + PropertyConfiguration pc = new PropertyConfiguration(p); + mapPath = pc.getString(RANGE_MAP_PATH); + checkInterval = pc.getLong(RANGE_CHECK_INTERVAL,DEFAULT_CHECK_INTERVAL); LOGGER.info("Initialized AlphaPartitionedIndex on file (" + mapPath + ") checking every " + checkInterval + " seconds"); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/HttpARCResourceStore.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/HttpARCResourceStore.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/HttpARCResourceStore.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -34,6 +34,7 @@ import org.archive.io.arc.ARCRecord; import org.archive.wayback.ResourceStore; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.exception.ConfigurationException; @@ -55,10 +56,8 @@ private String urlPrefix = null; public void init(Properties p) throws ConfigurationException { - urlPrefix = (String) p.get(ARC_URL_PREFIX); - if ((urlPrefix == null) || (urlPrefix.length() < 1)) { - throw new ConfigurationException("Failed to find " + ARC_URL_PREFIX); - } + PropertyConfiguration pc = new PropertyConfiguration(p); + urlPrefix = pc.getString(ARC_URL_PREFIX); if(!urlPrefix.endsWith("/")) { urlPrefix = urlPrefix + "/"; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalARCResourceStore.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalARCResourceStore.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalARCResourceStore.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -38,6 +38,7 @@ import org.archive.io.arc.ARCRecord; import org.archive.wayback.ResourceStore; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; @@ -65,7 +66,6 @@ private static final String INDEX_TARGET = "resourcestore.indextarget"; private static final String INDEX_INTERVAL = "resourcestore.indexinterval"; - private String path = null; private File arcDir = null; private File tmpDir = null; private File workDir = null; @@ -81,49 +81,20 @@ */ private static Thread indexThread = null; - private String getRequired(Properties p,String key) - throws ConfigurationException { - String val = p.getProperty(key); - if((val == null) || val.length() < 1) { - throw new ConfigurationException("No configuration " + key); - } - return val; - } - - private void ensureDir(File dir) throws ConfigurationException { - if(!dir.isDirectory()) { - if(dir.exists()) { - throw new ConfigurationException("directory (" + - dir.getAbsolutePath() + - ") exists but is not a directory."); - } - if(!dir.mkdirs()) { - throw new ConfigurationException("unable to create directory(" + - dir.getAbsolutePath() + ")"); - } - } - } - public void init(Properties p) throws ConfigurationException { - path = getRequired(p,RESOURCE_PATH); - arcDir = new File(path); + PropertyConfiguration pc = new PropertyConfiguration(p); + arcDir = pc.getDir(RESOURCE_PATH, true); String autoIndex = p.getProperty(AUTO_INDEX); if((autoIndex != null) && (autoIndex.compareTo("1") == 0)) { - tmpDir = new File(getRequired(p,TMP_PATH)); - workDir = new File(getRequired(p,WORK_PATH)); - queuedDir = new File(getRequired(p,QUEUED_PATH)); - indexTarget = getRequired(p,INDEX_TARGET); - ensureDir(tmpDir); - ensureDir(workDir); - ensureDir(queuedDir); + tmpDir = pc.getDir(TMP_PATH,true); + workDir = pc.getDir(WORK_PATH,true); + queuedDir = pc.getDir(QUEUED_PATH,true); + indexTarget = pc.getString(INDEX_TARGET); if(indexTarget.startsWith("http://")) { indexClient = new IndexClient(indexTarget); } - String interval = p.getProperty(INDEX_INTERVAL); - if((interval != null ) && (interval.length() > 0)) { - runInterval = Integer.parseInt(interval); - } + runInterval = pc.getInt(INDEX_INTERVAL,DEFAULT_RUN_INTERVAL_MS); startAutoIndexThread(); } } @@ -138,7 +109,7 @@ } File arcFile = new File(arcName); if (!arcFile.isAbsolute()) { - arcFile = new File(this.path, arcName); + arcFile = new File(arcDir, arcName); } if (!arcFile.exists() || !arcFile.canRead()) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDB.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDB.java 2007-04-03 02:02:34 UTC (rev 1684) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDB.java 2007-04-03 02:44:39 UTC (rev 1685) @@ -31,6 +31,7 @@ import org.archive.wayback.PropertyConfigurable; import org.archive.wayback.bdb.BDBRecordSet; +import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.util.CloseableIterator; @@ -71,32 +72,15 @@ } public void init(Properties p) throws ConfigurationException { - String dbPath = (String) p.get(ARC_DB_PATH); - if ((dbPath == null) || (dbPath.length() < 1)) { - throw new ConfigurationException("Failed to find " + ARC_DB_PATH); - } - String dbName = (String) p.get(ARC_DB_NAME); - if ((dbName == null) || (dbName.length() < 1)) { - throw new ConfigurationException("Failed to find " + ARC_DB_NAME); - } - - String logPath = (String) p.get(ARC_DB_LOG); - if ((logPath == null) || (logPath.length() < 1)) { - throw new ConfigurationException("Failed to find " + ARC_DB_LOG); - } - + PropertyConfiguration pc = new PropertyConfiguration(p); + File dbDir = pc.getDir(ARC_DB_PATH, true); + String dbName = pc.getString(ARC_DB_NAME); + String logPath = pc.getString(ARC_DB_LOG); try { - File dbDir = new File(dbPath); - if(!dbDir.exists()) { - if(!dbDir.mkdirs()) { - throw new ConfigurationException("Failed to create " + dbPath); - } - } - - LOGGER.info("Initializing FileLocationDB at(" + dbPath + - ") named(" + dbName + ")"); - initializeDB(dbPath, dbName); + LOGGER.info("Initializing FileLocationDB at(" + + dbDir.getAbsolutePath() + ") named(" + dbName + ")"); + initializeDB(dbDir.getAbsolutePath(), dbName); log = new FileLocationDBLog(logPath); } catch (DatabaseException e) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-16 23:04:10
|
Revision: 1782 http://archive-access.svn.sourceforge.net/archive-access/?rev=1782&view=rev Author: bradtofel Date: 2007-07-16 16:04:12 -0700 (Mon, 16 Jul 2007) Log Message: ----------- REFACTOR: new core Wayback class which represents a combination of index, replay, query, store, etc, as well as the access mechanism for using those objects. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java 2007-07-16 23:04:12 UTC (rev 1782) @@ -0,0 +1,381 @@ +/* WaybackContext + * + * $Id$ + * + * Created on 5:37:31 PM Apr 20, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-webapp. + * + * wayback-webapp is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-webapp is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-webapp; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.webapp; + +import java.io.IOException; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.QueryRenderer; +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.RequestParser; +import org.archive.wayback.ResourceIndex; +import org.archive.wayback.ResourceStore; +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.CaptureSearchResults; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.UIResults; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.AccessControlException; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.BetterRequestException; +import org.archive.wayback.exception.ResourceIndexNotAvailableException; +import org.archive.wayback.exception.ResourceNotAvailableException; +import org.archive.wayback.exception.ResourceNotInArchiveException; +import org.archive.wayback.exception.WaybackException; +import org.springframework.beans.factory.BeanNameAware; + +/** + * Retains all information about a particular Wayback configuration + * withing a ServletContext, including holding references to the + * implementation instances of the primary Wayback classes: + * + * ResourceIndex + * ResourceStore + * QueryUI + * ReplayUI + * + * @author brad + * @version $Date$, $Revision$ + */ +public class WaybackContext implements BeanNameAware { + + private int contextPort = 0; + private String contextName = null; + private ResourceIndex index = null; + private ResourceStore store = null; + private ReplayRenderer replay = null; + private QueryRenderer query = null; + private RequestParser parser = null; + private ResultURIConverter uriConverter = null; + + /** + * + */ + public WaybackContext() { + + } + + /* (non-Javadoc) + * @see org.springframework.beans.factory.BeanNameAware#setBeanName(java.lang.String) + */ + public void setBeanName(String beanName) { + // TODO Auto-generated method stub + this.contextName = ""; + int idx = beanName.indexOf(":"); + if(idx > -1) { + contextPort = Integer.valueOf(beanName.substring(0,idx)); + contextName = beanName.substring(idx + 1); + } else { + try { + this.contextPort = Integer.valueOf(beanName); + } catch(NumberFormatException e) { + e.printStackTrace(); + } + } + } + /** + * @param httpRequest + * @return the prefix of paths recieved by this server that are handled by + * this WaybackContext, including the trailing '/' + */ + public String getContextPath(HttpServletRequest httpRequest) { +// if(contextPort != 0) { +// return httpRequest.getContextPath(); +// } + String httpContextPath = httpRequest.getContextPath(); + if(contextName.length() == 0) { + return httpContextPath + "/"; + } + return httpContextPath + "/" + contextName + "/"; + } + + /** + * @param httpRequest + * @param includeQuery + * @return the portion of the request following the path to this context + * without leading '/' + */ + private String translateRequest(HttpServletRequest httpRequest, + boolean includeQuery) { + + String origRequestPath = httpRequest.getRequestURI(); + if(includeQuery) { + String queryString = httpRequest.getQueryString(); + if (queryString != null) { + origRequestPath += "?" + queryString; + } + } + String contextPath = getContextPath(httpRequest); + if (!origRequestPath.startsWith(contextPath)) { + return null; + } + return origRequestPath.substring(contextPath.length()); + } + + /** + * @param httpRequest + * @return the portion of the request following the path to this context, + * including any query information,without leading '/' + */ + public String translateRequestPathQuery(HttpServletRequest httpRequest) { + return translateRequest(httpRequest,true); + } + + /** + * @param httpRequest + * @return the portion of the request following the path to this context, + * excluding any query information, without leading '/' + */ + public String translateRequestPath(HttpServletRequest httpRequest) { + return translateRequest(httpRequest,false); + } + + /** + * Construct an absolute URL that points to the root of the context that + * recieved the request, including a trailing "/". + * + * @return String absolute URL pointing to the Context root where the + * request was revieved. + */ + private String getAbsoluteContextPrefix(HttpServletRequest httpRequest, + boolean useRequestServer) { + + StringBuilder prefix = new StringBuilder(); + prefix.append(WaybackConstants.HTTP_URL_PREFIX); + String waybackPort = null; + if(useRequestServer) { + prefix.append(httpRequest.getLocalName()); + waybackPort = String.valueOf(httpRequest.getLocalPort()); + } else { + prefix.append(httpRequest.getServerName()); + waybackPort = String.valueOf(httpRequest.getServerPort()); + } + if (!waybackPort.equals(WaybackConstants.HTTP_DEFAULT_PORT)) { + prefix.append(":").append(waybackPort); + } + String contextPath = getContextPath(httpRequest); +// if(contextPath.length() > 1) { +// prefix.append(contextPath); +// } else { +// prefix.append(contextPath); +// } + prefix.append(contextPath); + return prefix.toString(); + } + + /** + * @param httpRequest + * @return absolute URL pointing to the base of this WaybackContext, using + * Server and port information from the HttpServletRequest argument. + */ + public String getAbsoluteServerPrefix(HttpServletRequest httpRequest) { + return getAbsoluteContextPrefix(httpRequest, true); + } + + /** + * @param httpRequest + * @return absolute URL pointing to the base of this WaybackContext, using + * Canonical server and port information. + */ + public String getAbsoluteLocalPrefix(HttpServletRequest httpRequest) { + return getAbsoluteContextPrefix(httpRequest, false); + } + + private boolean dispatchLocal(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) + throws ServletException, IOException { + + WaybackRequest wbRequest = new WaybackRequest(); + wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); + UIResults uiResults = new UIResults(wbRequest); + uiResults.storeInRequest(httpRequest); + RequestDispatcher dispatcher = null; + String translated = "/" + translateRequestPathQuery(httpRequest); +// // special case for the front '/' page: +// if(translated.length() == 0) { +// translated = "/"; +// } else { +// translated = "/" + translated; +// } + dispatcher = httpRequest.getRequestDispatcher(translated); + if(dispatcher != null) { + dispatcher.forward(httpRequest, httpResponse); + return true; + } + return false; + } + + /** + * @param httpRequest + * @param httpResponse + * @return true if the request was actually handled + * @throws ServletException + * @throws IOException + */ + public boolean handleRequest(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) + throws ServletException, IOException { + + WaybackRequest wbRequest = null; + boolean handled = false; + try { + + wbRequest = parser.parse(httpRequest, this); + + if(wbRequest != null) { + + handled = true; + wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); +// wbRequest.setWbContext(this); + + if(wbRequest.isReplayRequest()) { + + // maybe redirect to a better URI for the request given: + wbRequest.checkBetterRequest(); + + handleReplay(wbRequest,httpRequest,httpResponse); + + } else { + + handleQuery(wbRequest,httpRequest,httpResponse); + } + } else { + handled = dispatchLocal(httpRequest,httpResponse); +// throw new BadQueryException("Unable to understand request"); + } + + } catch (BetterRequestException bre) { + + httpResponse.sendRedirect(bre.getBetterURI()); + + } catch (WaybackException e) { + query.renderException(httpRequest, httpResponse, wbRequest, e); + } + return handled; + } + + private void handleReplay(WaybackRequest wbRequest, + HttpServletRequest httpRequest, HttpServletResponse httpResponse) + throws IOException, ServletException, ResourceIndexNotAvailableException, + ResourceNotInArchiveException, BadQueryException, AccessControlException, + ResourceNotAvailableException { + + + SearchResults results = index.query(wbRequest); + if(!(results instanceof CaptureSearchResults)) { + throw new ResourceNotAvailableException("Bad results..."); + } + CaptureSearchResults captureResults = (CaptureSearchResults) results; + + // TODO: check which versions are actually accessible right now? + SearchResult closest = captureResults.getClosest(wbRequest); + Resource resource = store.retrieveResource(closest); + + replay.renderResource(httpRequest, httpResponse, wbRequest, + closest, resource, uriConverter); + } + + private void handleQuery(WaybackRequest wbRequest, + HttpServletRequest httpRequest, HttpServletResponse httpResponse) + throws ServletException, IOException, ResourceIndexNotAvailableException, + ResourceNotInArchiveException, BadQueryException, AccessControlException { + + SearchResults results = index.query(wbRequest); + + query.renderUrlResults(httpRequest,httpResponse,wbRequest, + results,uriConverter); + } + + /** + * @param contextPort the contextPort to set + */ + public void setContextPort(int contextPort) { + this.contextPort = contextPort; + } + + /** + * @param contextName the contextName to set + */ + public void setContextName(String contextName) { + this.contextName = contextName; + } + + /** + * @param index the index to set + */ + public void setIndex(ResourceIndex index) { + this.index = index; + } + + /** + * @param store the store to set + */ + public void setStore(ResourceStore store) { + this.store = store; + } + + /** + * @param replay the replay to set + */ + public void setReplay(ReplayRenderer replay) { + this.replay = replay; + } + + /** + * @param query the query to set + */ + public void setQuery(QueryRenderer query) { + this.query = query; + } + + /** + * @param parser the parser to set + */ + public void setParser(RequestParser parser) { + this.parser = parser; + } + + /** + * @param uriConverter the uriConverter to set + */ + public void setUriConverter(ResultURIConverter uriConverter) { + this.uriConverter = uriConverter; + } + + + /** + * @return the contextPort + */ + public int getContextPort() { + return contextPort; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-16 23:34:06
|
Revision: 1795 http://archive-access.svn.sourceforge.net/archive-access/?rev=1795&view=rev Author: bradtofel Date: 2007-07-16 16:33:58 -0700 (Mon, 16 Jul 2007) Log Message: ----------- REFACTOR: most of the request parsing code was moved into this package, including some abstract classes for consolidating common code from implementation classes for various replay/query modes. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2007-07-16 23:33:58 UTC (rev 1795) @@ -0,0 +1,154 @@ +/* BaseRequestParser + * + * $Id$ + * + * Created on 3:15:12 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.requestparser; + +import java.util.Map; +import java.util.Properties; + +import javax.servlet.http.HttpServletRequest; + +import org.archive.wayback.RequestParser; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.PropertyConfiguration; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.ConfigurationException; +import org.archive.wayback.webapp.WaybackContext; + +/** + * Class that implements the RequestParser interface, and also understands how + * to: + * + * + * This class will attempt to use the overridable parseCustom() method to + * create the WaybackRequest object, but if that fails (returns null), it will + * fall back to: + + * A) attempting to parse out an incoming OpenSearch format query + * B) attempting to parse out any and all incoming form elements submitted as + * either GET or POST arguments + * + * This class also contains the functionality to extract HTTP header + * information into WaybackRequest objects, including Http auth info, referer, + * remote IPs, etc. + * + * @author brad + * @version $Date$, $Revision$ + */ +public abstract class BaseRequestParser implements RequestParser { + + protected final static String QUERY_BASE = "query"; + + protected final static String REPLAY_BASE = "replay"; + + protected final static int DEFAULT_MAX_RECORDS = 10; + + protected int maxRecords = DEFAULT_MAX_RECORDS; + + public void init(final Properties p) throws ConfigurationException { + PropertyConfiguration pc = new PropertyConfiguration(p); + maxRecords = pc.getInt(WaybackConstants.RESULTS_PER_PAGE_CONFIG_NAME, + DEFAULT_MAX_RECORDS); + } + + protected static String getMapParam(Map<String,String[]> queryMap, + String field) { + String arr[] = queryMap.get(field); + if (arr == null || arr.length == 0) { + return null; + } + return arr[0]; + } + + protected static String getRequiredMapParam(Map<String,String[]> queryMap, + String field) + throws BadQueryException { + String value = getMapParam(queryMap,field); + if(value == null) { + throw new BadQueryException("missing field " + field); + } + if(value.length() == 0) { + throw new BadQueryException("empty field " + field); + } + return value; + } + + protected static String getMapParamOrEmpty(Map<String,String[]> map, + String param) { + String val = getMapParam(map,param); + return (val == null) ? "" : val; + } + + + private void putUnlessNull(WaybackRequest request, String key, String val) { + if(val != null) { + request.put(key, val); + } + } + + protected void addHttpHeaderFields(WaybackRequest wbRequest, + HttpServletRequest httpRequest) { + + // attempt to get the HTTP referer if present.. + putUnlessNull(wbRequest,WaybackConstants.REQUEST_REFERER_URL, + httpRequest.getHeader("REFERER")); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_REMOTE_ADDRESS, + httpRequest.getRemoteAddr()); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_WAYBACK_HOSTNAME, + httpRequest.getLocalName()); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_WAYBACK_PORT, + String.valueOf(httpRequest.getLocalPort())); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_WAYBACK_CONTEXT, + httpRequest.getContextPath()); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_AUTH_TYPE, + httpRequest.getAuthType()); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_REMOTE_USER, + httpRequest.getRemoteUser()); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_LOCALE_LANG, + httpRequest.getLocale().getDisplayLanguage()); + + wbRequest.setLocale(httpRequest.getLocale()); + } + + /* (non-Javadoc) + * @see org.archive.wayback.RequestParser#parse(javax.servlet.http.HttpServletRequest) + */ + public abstract WaybackRequest parse(HttpServletRequest httpRequest, + WaybackContext wbContext) throws BadQueryException; + + /** + * @return the maxRecords + */ + public int getMaxRecords() { + return maxRecords; + } + + /** + * @param maxRecords the maxRecords to set + */ + public void setMaxRecords(int maxRecords) { + this.maxRecords = maxRecords; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2007-07-16 23:33:58 UTC (rev 1795) @@ -0,0 +1,89 @@ +/* CompositeRequestParser + * + * $Id$ + * + * Created on 4:52:13 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.requestparser; + +import java.util.Properties; + +import javax.servlet.http.HttpServletRequest; + +import org.archive.wayback.RequestParser; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.PropertyConfiguration; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.ConfigurationException; +import org.archive.wayback.webapp.WaybackContext; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class CompositeRequestParser extends BaseRequestParser { + private RequestParser[] parsers = null; + + public void init(final Properties p) throws ConfigurationException { + parsers = getRequestParsers(); + for(int i = 0; i < parsers.length; i++) { + parsers[i].init(p); + } + PropertyConfiguration pc = new PropertyConfiguration(p); + maxRecords = pc.getInt(WaybackConstants.RESULTS_PER_PAGE_CONFIG_NAME, + DEFAULT_MAX_RECORDS); + } + /** + * + */ + public void init() { + parsers = getRequestParsers(); + } + + + protected RequestParser[] getRequestParsers() { + RequestParser[] theParsers = { + new OpenSearchRequestParser(), + new FormRequestParser() + }; + return theParsers; + } + + /* (non-Javadoc) + * @see org.archive.wayback.RequestParser#parse(javax.servlet.http.HttpServletRequest) + */ + public WaybackRequest parse(HttpServletRequest httpRequest, + WaybackContext wbContext) throws BadQueryException { + + WaybackRequest wbRequest = null; + + for(int i = 0; i < parsers.length; i++) { + wbRequest = parsers[i].parse(httpRequest, wbContext); + if(wbRequest != null) { + break; + } + } + return wbRequest; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-07-16 23:33:58 UTC (rev 1795) @@ -0,0 +1,91 @@ +/* FormRequestParser + * + * $Id$ + * + * Created on 4:45:06 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.requestparser; + +import java.util.Iterator; +import java.util.Map; +import java.util.Set; + +import javax.servlet.http.HttpServletRequest; + +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.webapp.WaybackContext; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class FormRequestParser extends BaseRequestParser { + /** + * CGI argument name for Submit buttom... + */ + private final static String SUBMIT_BUTTON = "Submit"; + + /* + * Stuff whatever GET/POST arguments are sent up into the returned + * WaybackRequest object, except the Submit button argument. + */ + public WaybackRequest parse(HttpServletRequest httpRequest, + WaybackContext wbContext) { + + WaybackRequest wbRequest = null; + @SuppressWarnings("unchecked") + Map<String,String[]> queryMap = httpRequest.getParameterMap(); + if(queryMap.size() > 0) { + wbRequest = new WaybackRequest(); + + String base = wbContext.translateRequestPath(httpRequest); + if(base.startsWith(REPLAY_BASE)) { + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_REPLAY_QUERY); + } else if(base.startsWith(QUERY_BASE)) { + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_QUERY); + } else { + return null; + } + + Set<String> keys = queryMap.keySet(); + Iterator<String> itr = keys.iterator(); + while(itr.hasNext()) { + String key = itr.next(); + if(key.equals(SUBMIT_BUTTON)) { + continue; + } + // just jam everything else in: + String val = getMapParam(queryMap,key); + wbRequest.put(key,val); + } + } + if(wbRequest != null) { + addHttpHeaderFields(wbRequest, httpRequest); + } + + return wbRequest; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2007-07-16 23:33:58 UTC (rev 1795) @@ -0,0 +1,147 @@ +/* OpenSearchRequestParser + * + * $Id$ + * + * Created on 4:47:03 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.requestparser; + +import java.util.Map; +import java.util.regex.Pattern; + +import javax.servlet.http.HttpServletRequest; + +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.webapp.WaybackContext; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class OpenSearchRequestParser extends BaseRequestParser { + + /** + * CGI argument name for query arguments + */ + public final static String SEARCH_QUERY = "q"; + + /** + * CGI argument name for number of results per page, 1 based + */ + public final static String SEARCH_RESULTS = "count"; + + /** + * CGI argument name for page number of results, 1 based + */ + public final static String START_PAGE = "start_page"; + + + // private final static String START_INDEX = "start_index"; + + private final static Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+"); + + // singles consume the next non-whitespace token following the term + // private String[] singleTokens = { "url", "site", "mimetype", "noredirect" }; + + // lines consume the entire rest of the query + private String[] lineTokens = { "terms" }; + + /* + * If the request includes a 'q' (query) argument, treat the request + * as an OpenSearch query, and extract all query terms, plus pagination + * info from the httpRequest object. + */ + public WaybackRequest parse(HttpServletRequest httpRequest, + WaybackContext wbContext) throws BadQueryException { + + WaybackRequest wbRequest = null; + @SuppressWarnings("unchecked") + Map<String,String[]> queryMap = httpRequest.getParameterMap(); + String query = getMapParam(queryMap, SEARCH_QUERY); + if(query == null) { + return null; + } + wbRequest = new WaybackRequest(); + + String base = wbContext.translateRequestPath(httpRequest); + if(base.startsWith(REPLAY_BASE)) { + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_REPLAY_QUERY); + } else if(base.startsWith(QUERY_BASE)){ + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_QUERY); + } else { + return null; + } + + String numResults = getMapParam(queryMap, SEARCH_RESULTS); + String startPage = getMapParam(queryMap, START_PAGE); + + if (numResults != null) { + int nr = Integer.parseInt(numResults); + wbRequest.setResultsPerPage(nr); + } else { + wbRequest.setResultsPerPage(maxRecords); + } + if (startPage != null) { + int sp = Integer.parseInt(startPage); + wbRequest.setPageNum(sp); + } else { + wbRequest.setPageNum(1); + } + + // first try the entire line_tokens: + for (int i = 0; i < lineTokens.length; i++) { + String token = lineTokens[i] + ":"; + int index = query.indexOf(token); + if (index > -1) { + // found it, take value as the remainder of the query + String value = query.substring(index + token.length()); + // TODO: trim trailing whitespace? + wbRequest.put(lineTokens[i], value); + query = query.substring(0, index); + } + } + + // now split whatever is left on whitespace: + String[] parts = WHITESPACE_PATTERN.split(query); + for (int i = 0; i < parts.length; i++) { + String token = parts[i]; + int colonIndex = token.indexOf(":"); + if (colonIndex == -1) { + throw new BadQueryException("Bad search token(" + token + ")"); + } + String key = token.substring(0, colonIndex); + String value = token.substring(colonIndex + 1); + // TODO: make sure key is in singleTokens? + // let's just let em all thru for now: + wbRequest.put(key, value); + } + + addHttpHeaderFields(wbRequest, httpRequest); + + return wbRequest; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2007-07-16 23:33:58 UTC (rev 1795) @@ -0,0 +1,75 @@ +/* PathRequestParser + * + * $Id$ + * + * Created on 6:47:21 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.requestparser; + +import javax.servlet.http.HttpServletRequest; + +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.webapp.WaybackContext; + +/** + * Subclass of RequestParser that acquires key request information from the + * path component following the wayback context. + * + * @author brad + * @version $Date$, $Revision$ + */ +public abstract class PathRequestParser extends BaseRequestParser { + + /** + * @param requestPath + * @return WaybackRequest with information parsed from the requestPath, or + * null if information could not be extracted. + */ + public abstract WaybackRequest parse(String requestPath); + + /* (non-Javadoc) + * @see org.archive.wayback.requestparser.BaseRequestParser#parse(javax.servlet.http.HttpServletRequest, org.archive.wayback.webapp.WaybackContext) + */ + @Override + public WaybackRequest parse(HttpServletRequest httpRequest, + WaybackContext wbContext) throws BadQueryException { + + String queryString = httpRequest.getQueryString(); + String origRequestPath = httpRequest.getRequestURI(); + + if (queryString != null) { + origRequestPath += "?" + queryString; + } + String contextPath = wbContext.getContextPath(httpRequest); + if (!origRequestPath.startsWith(contextPath)) { + return null; + } + String requestPath = origRequestPath.substring(contextPath.length()); + + WaybackRequest wbRequest = parse(requestPath); + if(wbRequest != null) { + addHttpHeaderFields(wbRequest, httpRequest); + } + + return wbRequest; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-19 22:01:59
|
Revision: 1808 http://archive-access.svn.sourceforge.net/archive-access/?rev=1808&view=rev Author: bradtofel Date: 2007-07-19 13:36:58 -0700 (Thu, 19 Jul 2007) Log Message: ----------- FEATURE: added setMaxRecords() to RequestParser interface. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java 2007-07-16 23:58:41 UTC (rev 1807) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java 2007-07-19 20:36:58 UTC (rev 1808) @@ -48,4 +48,8 @@ */ public abstract WaybackRequest parse(HttpServletRequest httpRequest, WaybackContext wbContext) throws BadQueryException; + /** + * @param maxRecords + */ + public void setMaxRecords(int maxRecords); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2007-07-16 23:58:41 UTC (rev 1807) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2007-07-19 20:36:58 UTC (rev 1808) @@ -59,6 +59,9 @@ */ public void init() { parsers = getRequestParsers(); + for(int i = 0; i < parsers.length; i++) { + parsers[i].setMaxRecords(maxRecords); + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-07-16 23:58:41 UTC (rev 1807) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-07-19 20:36:58 UTC (rev 1808) @@ -69,7 +69,7 @@ } else { return null; } - + wbRequest.setResultsPerPage(maxRecords); Set<String> keys = queryMap.keySet(); Iterator<String> itr = keys.iterator(); while(itr.hasNext()) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-19 23:30:48
|
Revision: 1842 http://archive-access.svn.sourceforge.net/archive-access/?rev=1842&view=rev Author: bradtofel Date: 2007-07-19 16:30:47 -0700 (Thu, 19 Jul 2007) Log Message: ----------- REMOVED: superceded by WaybackContext and RequestParser refactor Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/QueryFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ReplayFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/parser/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/RequestFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackContextListener.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ReplayFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/QueryServlet.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/ReplayServlet.java Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlParser.java 2007-07-19 21:37:00 UTC (rev 1841) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlParser.java 2007-07-19 23:30:47 UTC (rev 1842) @@ -1,43 +0,0 @@ -/* ArchivalUrlParser - * - * $Id$ - * - * Created on 3:47:00 PM Oct 24, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.archivalurl; - -import org.archive.wayback.core.WaybackRequest; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public interface ArchivalUrlParser { - /** - * attempt to transform a string into a WaybackRequest - * - * @param requestPath - * @return WaybackRequest or null if the String was not parseable - */ - public abstract WaybackRequest parse(String requestPath); -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/QueryFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/QueryFilter.java 2007-07-19 21:37:00 UTC (rev 1841) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/QueryFilter.java 2007-07-19 23:30:47 UTC (rev 1842) @@ -1,90 +0,0 @@ -/* QueryFilter - * - * $Id$ - * - * Created on 1:22:14 PM Nov 8, 2005. - * - * Copyright (C) 2005 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.archivalurl; - -import java.util.Properties; - -import javax.servlet.http.HttpServletRequest; - -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.archivalurl.parser.PathDatePrefixParser; -import org.archive.wayback.archivalurl.parser.PathDateRangeParser; -import org.archive.wayback.archivalurl.parser.PathPrefixDatePrefixParser; -import org.archive.wayback.archivalurl.parser.PathPrefixDateRangeParser; -import org.archive.wayback.core.RequestFilter; -import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.exception.ConfigurationException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class QueryFilter extends RequestFilter { - - private int defaultResultsPerPage = 10; - - private ArchivalUrlParser parsers[] = { - new PathDatePrefixParser(), - new PathDateRangeParser(), - new PathPrefixDatePrefixParser(), - new PathPrefixDateRangeParser() - }; - - public void init(Properties p) throws ConfigurationException { - super.init(p); - String resultsPerPage = (String) p.get( - WaybackConstants.RESULTS_PER_PAGE_CONFIG_NAME); - if(resultsPerPage != null) { - defaultResultsPerPage = Integer.parseInt(resultsPerPage); - } - } - - public WaybackRequest parseRequest(HttpServletRequest httpRequest) { - String queryString = httpRequest.getQueryString(); - String origRequestPath = httpRequest.getRequestURI(); - if (queryString != null) { - origRequestPath = httpRequest.getRequestURI() + "?" + queryString; - } - String contextPath = httpRequest.getContextPath(); - if (!origRequestPath.startsWith(contextPath)) { - return null; - } - String requestPath = origRequestPath.substring(contextPath.length()); - - WaybackRequest wbRequest = null; - for(int i=0; i< parsers.length; i++) { - wbRequest = parsers[i].parse(requestPath); - if(wbRequest != null) break; - } - - if(wbRequest != null) { - wbRequest.setResultsPerPage(defaultResultsPerPage); - wbRequest.fixup(httpRequest); - } - return wbRequest; - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ReplayFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ReplayFilter.java 2007-07-19 21:37:00 UTC (rev 1841) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ReplayFilter.java 2007-07-19 23:30:47 UTC (rev 1842) @@ -1,133 +0,0 @@ -/* ReplayFilter - * - * $Id$ - * - * Created on 1:08:38 PM Nov 8, 2005. - * - * Copyright (C) 2005 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.archivalurl; - -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import javax.servlet.http.HttpServletRequest; - -import org.apache.commons.httpclient.URIException; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.RequestFilter; -import org.archive.wayback.core.Timestamp; -import org.archive.wayback.core.WaybackRequest; - - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class ReplayFilter extends RequestFilter { - - /** - * Regex which parses Archival URL replay requests into timestamp + url - */ - private final Pattern WB_REQUEST_REGEX = Pattern - .compile("^/(\\d{1,14})/(.*)$"); - - - /** - * Constructor - */ - public ReplayFilter() { - super(); - } - public WaybackRequest parseRequest(HttpServletRequest httpRequest) { - WaybackRequest wbRequest = null; - Matcher matcher = null; - - String queryString = httpRequest.getQueryString(); - String origRequestPath = httpRequest.getRequestURI(); - if (queryString != null && queryString.length() > 0) { - origRequestPath = httpRequest.getRequestURI() + "?" + queryString; - } - String contextPath = httpRequest.getContextPath(); - if (!origRequestPath.startsWith(contextPath)) { - return null; - } - String requestPath = origRequestPath.substring(contextPath.length()); - - matcher = WB_REQUEST_REGEX.matcher(requestPath); - if (matcher != null && matcher.matches()) { - wbRequest = new WaybackRequest(); - String dateStr = matcher.group(1); - String urlStr = matcher.group(2); - if (!urlStr.startsWith("http://")) { - urlStr = "http://" + urlStr; - } - - // The logic of the classic WM wrt timestamp bounding: - // if 14-digits are specified, assume min-max range boundaries - // if less than 14 are specified, assume min-max range boundaries - // based upon amount given (2001 => 20010101... - 20011231...) - // AND assume the user asked for the LATEST possible date - // within that range... - // - // ...don't ask me, I just work here. - - String startDate = null; - String endDate = null; - if(dateStr.length() == 14) { - startDate = Timestamp.earliestTimestamp().getDateStr(); - endDate = Timestamp.currentTimestamp().getDateStr(); - } else { - - // classic behavior: -// startDate = Timestamp.parseBefore(dateStr).getDateStr(); -// endDate = Timestamp.parseAfter(dateStr).getDateStr(); -// dateStr = endDate; - - // "better" behavior: - startDate = Timestamp.earliestTimestamp().getDateStr(); - endDate = Timestamp.currentTimestamp().getDateStr(); - dateStr = Timestamp.parseAfter(dateStr).getDateStr(); - - } - wbRequest.put(WaybackConstants.REQUEST_EXACT_DATE,dateStr); - wbRequest.put(WaybackConstants.REQUEST_START_DATE,startDate); - wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); - - wbRequest.put(WaybackConstants.REQUEST_TYPE, - WaybackConstants.REQUEST_REPLAY_QUERY); - - try { - wbRequest.fixup(httpRequest); - String wbPrefix = wbRequest.getContextPrefix(); - if(urlStr.startsWith(wbPrefix)) { - wbRequest.setBetterRequestURI(urlStr); - } - wbRequest.setRequestUrl(urlStr); - - } catch (URIException e) { - e.printStackTrace(); - wbRequest = null; - } - } - return wbRequest; - } -} \ No newline at end of file Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/RequestFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/RequestFilter.java 2007-07-19 21:37:00 UTC (rev 1841) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/RequestFilter.java 2007-07-19 23:30:47 UTC (rev 1842) @@ -1,162 +0,0 @@ -/* RequestFilter - * - * $Id$ - * - * Created on 1:17:08 PM Nov 8, 2005. - * - * Copyright (C) 2005 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.core; - -import java.io.IOException; -import java.util.Enumeration; -import java.util.Properties; - -import javax.servlet.Filter; -import javax.servlet.FilterChain; -import javax.servlet.FilterConfig; -import javax.servlet.RequestDispatcher; -import javax.servlet.ServletContext; -import javax.servlet.ServletException; -import javax.servlet.ServletRequest; -import javax.servlet.ServletResponse; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.archive.wayback.exception.ConfigurationException; - -/** - * Common Servlet Filter functionality for parsing incoming URL requests - * into WaybackRequest objects. - * - * @author brad - * @version $Date$, $Revision$ - */ -public abstract class RequestFilter implements Filter { - /** - * name of attribute on Request Object to store filtered WaybackRequest - */ - private static final String WMREQUEST_ATTRIBUTE = "wmrequest.attribute"; - - /** - * name of configuration for context-relative path to servlet that - * can handle the request, if a WaybackRequest is found in the request URL - */ - private static final String HANDLER_URL = "handler.url"; - - /** - * context-relative URL to servlet that handles requests - */ - private String handlerUrl = null; - - /** - * Constructor - */ - public RequestFilter() { - super(); - } - /* (non-Javadoc) - * @see javax.servlet.Filter#destroy() - */ - public void destroy() {} - - /* (non-Javadoc) - * @see javax.servlet.Filter#init(javax.servlet.FilterConfig) - */ - public void init(FilterConfig c) throws ServletException { - - Properties p = new Properties(); - ServletContext sc = c.getServletContext(); - for (Enumeration e = sc.getInitParameterNames(); e.hasMoreElements();) { - String key = (String) e.nextElement(); - p.put(key, sc.getInitParameter(key)); - } - for (Enumeration e = c.getInitParameterNames(); e.hasMoreElements();) { - String key = (String) e.nextElement(); - p.put(key, c.getInitParameter(key)); - } - try { - this.init(p); - } catch (ConfigurationException e) { - //e.printStackTrace(); - throw new ServletException(e); - } - } - - /* (non-Javadoc) - * @see javax.servlet.Filter#doFilter(javax.servlet.ServletRequest, javax.servlet.ServletResponse, javax.servlet.FilterChain) - */ - public void doFilter(ServletRequest request, ServletResponse response, - FilterChain chain) throws IOException, ServletException { - if (!handle(request, response)) { - chain.doFilter(request, response); - } - } - - /** - * initialize this RequestFilter based on a Properties assembled from - * Context and Filter init-params. - * @param p - * @throws ConfigurationException - */ - public void init(Properties p) throws ConfigurationException { - PropertyConfiguration pc = new PropertyConfiguration(p); - handlerUrl = pc.getString(HANDLER_URL); - } - - /** - * @param request - * @param response - * @return boolean, true if a WaybackRequest was parsed from the URL - * @throws IOException - * @throws ServletException - */ - protected boolean handle(final ServletRequest request, - final ServletResponse response) throws IOException, - ServletException { - if (!(request instanceof HttpServletRequest)) { - return false; - } - if (!(response instanceof HttpServletResponse)) { - return false; - } - HttpServletRequest httpRequest = (HttpServletRequest) request; - //HttpServletResponse httpResponse = (HttpServletResponse) response; - WaybackRequest wbRequest = parseRequest(httpRequest); - - if (wbRequest == null) { - return false; - } - - request.setAttribute(WMREQUEST_ATTRIBUTE, wbRequest); - RequestDispatcher dispatcher = request.getRequestDispatcher(handlerUrl); - - dispatcher.forward(request, response); - - return true; - } - - /** attempt to extract a WaybackRequest from a request URL - * @param httpRequest - * @return WaybackRequest if successful, null otherwise - */ - protected abstract WaybackRequest parseRequest( - HttpServletRequest httpRequest); - -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackContextListener.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackContextListener.java 2007-07-19 21:37:00 UTC (rev 1841) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackContextListener.java 2007-07-19 23:30:47 UTC (rev 1842) @@ -1,79 +0,0 @@ -/* WaybackContextListener - * - * $Id$ - * - * Created on 5:17:59 PM Oct 16, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.core; - -import java.util.Enumeration; -import java.util.Properties; - -import javax.servlet.ServletContext; -import javax.servlet.ServletContextEvent; -import javax.servlet.ServletContextListener; -import org.archive.wayback.exception.ConfigurationException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class WaybackContextListener implements ServletContextListener { - - public void contextInitialized(ServletContextEvent sce) { - // attempt to initialize a ResourceStore and a ResourceIndex, - // so their worker threads can start up... - - - // throwaway WaybackLogic.. - WaybackLogic wayback = new WaybackLogic(); - - Properties p = new Properties(); - ServletContext sc = sce.getServletContext(); - for (Enumeration e = sc.getInitParameterNames(); e.hasMoreElements();) { - String key = (String) e.nextElement(); - p.put(key, sc.getInitParameter(key)); - } - wayback.init(p); - try { - wayback.getResourceStore(); - } catch (ConfigurationException e) { - // Just dump the error and try to barrel on... - e.printStackTrace(); - } - try { - wayback.getResourceIndex(); - } catch (ConfigurationException e) { - // Just dump the error and try to barrel on... - e.printStackTrace(); - } - } - - public void contextDestroyed(ServletContextEvent sce) { - // TODO: kill threads somehow? - - // I think this implies some interface for interogating implementations - // for threads, which seems like more than we need, if the daemon - // threads are just gonna get killed anyways.. - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ReplayFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ReplayFilter.java 2007-07-19 21:37:00 UTC (rev 1841) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ReplayFilter.java 2007-07-19 23:30:47 UTC (rev 1842) @@ -1,116 +0,0 @@ -/* ProxyReplayFilter - * - * $Id$ - * - * Created on 6:08:59 PM Nov 14, 2005. - * - * Copyright (C) 2005 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.proxy; - -import java.util.List; - -import javax.servlet.FilterConfig; -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; - -import org.apache.commons.httpclient.URIException; -import org.archive.util.InetAddressUtil; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.Timestamp; -import org.archive.wayback.core.RequestFilter; -import org.archive.wayback.core.WaybackRequest; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class ReplayFilter extends RequestFilter { - - /** - * name of attribute in web.xml for specifying an additional hostname that - * should be considered "local" for discriminating between Replays and - * Queries - */ - private static final String LOCAL_HOSTNAME = "query.localhostname"; - - private List<String> localhostNames = null; - - /** - * Constructor - */ - public ReplayFilter() { - super(); - } - public void init(final FilterConfig c) throws ServletException { - this.localhostNames = InetAddressUtil.getAllLocalHostNames(); - String extraLocalHostname = c.getInitParameter(LOCAL_HOSTNAME); - if ((extraLocalHostname != null) && (extraLocalHostname.length() > 0)) { - localhostNames.add(extraLocalHostname); - } - super.init(c); - } - /* (non-Javadoc) - * @see org.archive.wayback.core.RequestFilter#parseRequest(javax.servlet.http.HttpServletRequest) - */ - protected WaybackRequest parseRequest(HttpServletRequest httpRequest) { - WaybackRequest wbRequest = null; - if(isLocalRequest(httpRequest)) { - return wbRequest; - } - String requestServer = httpRequest.getServerName(); - String requestPath = httpRequest.getRequestURI(); - //int port = httpRequest.getServerPort(); - String requestQuery = httpRequest.getQueryString(); - String requestScheme = httpRequest.getScheme(); - if (requestQuery != null) { - requestPath = requestPath + "?" + requestQuery; - } - - String requestUrl = requestScheme + "://" + requestServer + requestPath; - - wbRequest = new WaybackRequest(); - try { - wbRequest.setRequestUrl(requestUrl); - } catch (URIException e) { - e.printStackTrace(); - return null; - } - wbRequest.put(WaybackConstants.REQUEST_TYPE, - WaybackConstants.REQUEST_REPLAY_QUERY); - - - // Get the id from the request. If no id, use the ip-address instead. - // Then get the timestamp (or rather datestr) matching this id. - String id = httpRequest.getHeader("Proxy-Id"); - if(id == null) id = httpRequest.getRemoteAddr(); - wbRequest.put(WaybackConstants.REQUEST_EXACT_DATE, - Timestamp.getTimestampForId(httpRequest.getContextPath(),id)); - - wbRequest.fixup(httpRequest); - - return wbRequest; - } - protected boolean isLocalRequest(HttpServletRequest httpRequest) { - return this.localhostNames.contains(httpRequest.getServerName()); - } - -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/QueryServlet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/QueryServlet.java 2007-07-19 21:37:00 UTC (rev 1841) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/QueryServlet.java 2007-07-19 23:30:47 UTC (rev 1842) @@ -1,176 +0,0 @@ -/* QueryServlet - * - * $Id$ - * - * Created on 2:42:50 PM Nov 7, 2005. - * - * Copyright (C) 2005 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.query; - -import java.io.IOException; -import java.text.ParseException; -import java.util.Iterator; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.QueryRenderer; -import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.ResourceIndex; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; -import org.archive.wayback.core.Timestamp; -import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.core.WaybackServlet; -import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.exception.ConfigurationException; -import org.archive.wayback.exception.WaybackException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class QueryServlet extends WaybackServlet { - /** - * - */ - private static final String WMREQUEST_ATTRIBUTE = "wmrequest.attribute"; - - private static final long serialVersionUID = 1L; - - /** - * Constructor - */ - public QueryServlet() { - super(); - } - - public void doGet(HttpServletRequest httpRequest, - HttpServletResponse httpResponse) throws IOException, - ServletException { - - WaybackRequest wbRequest = (WaybackRequest) httpRequest - .getAttribute(WMREQUEST_ATTRIBUTE); - QueryRenderer renderer; - try { - renderer = wayback.getQueryRenderer(); - } catch (ConfigurationException e) { - e.printStackTrace(); - throw new ServletException(e.getMessage()); - } - - try { - - if (wbRequest == null) { - wbRequest = wayback.getQueryParser().parseQuery(httpRequest); - } - - ResourceIndex idx = wayback.getResourceIndex(); - ResultURIConverter uriConverter = wayback.getURIConverter(); - - SearchResults results; - - results = idx.query(wbRequest); - - if (wbRequest.get(WaybackConstants.REQUEST_TYPE).equals( - WaybackConstants.REQUEST_URL_QUERY)) { - - // Annotate the closest matching hit so that it can - // be retrieved later from the xml. - try { - annotateClosest(results, wbRequest, httpRequest); - } catch (ParseException e) { - e.printStackTrace(); - } - - renderer.renderUrlResults(httpRequest, httpResponse, - wbRequest, results, uriConverter); - - } else if (wbRequest.get(WaybackConstants.REQUEST_TYPE).equals( - WaybackConstants.REQUEST_REPLAY_QUERY)) { - - // You ask what a REPLAY request is doing in a QueryServlet? - // In one mode, with a remote BDB JE index, and HTTP-XML as - // transport, the whole request is wrapped up as an OpenSearch - // query, and the results are marshalled/unmarshalled to/from - // XML. In this case, a REPLAY request may be recieved and - // handled by this class. - renderer.renderUrlResults(httpRequest, httpResponse, - wbRequest, results, uriConverter); - - } else if (wbRequest.get(WaybackConstants.REQUEST_TYPE).equals( - WaybackConstants.REQUEST_CLOSEST_QUERY)) { - - renderer.renderUrlResults(httpRequest, httpResponse, - wbRequest, results, uriConverter); - - } else if (wbRequest.get(WaybackConstants.REQUEST_TYPE).equals( - WaybackConstants.REQUEST_URL_PREFIX_QUERY)) { - - renderer.renderUrlPrefixResults(httpRequest, httpResponse, - wbRequest, results, uriConverter); - } else { - throw new BadQueryException("Unknown query " + - wbRequest.get(WaybackConstants.REQUEST_TYPE)); - } - - } catch (WaybackException wbe) { - - renderer.renderException(httpRequest, httpResponse, wbRequest, wbe); - - } - } - - // Method annotating the searchresult closest in time to the timestamp - // belonging to this request. - private void annotateClosest(SearchResults results, - WaybackRequest wbRequest, HttpServletRequest request) throws ParseException { - - SearchResult closest = null; - long closestDistance = 0; - SearchResult cur = null; - String id = request.getHeader("Proxy-Id"); - if(id == null) id = request.getRemoteAddr(); - String requestsDate = Timestamp.getTimestampForId(request.getContextPath(),id); - Timestamp wantTimestamp; - wantTimestamp = Timestamp.parseBefore(requestsDate); - - Iterator<SearchResult> itr = results.iterator(); - while (itr.hasNext()) { - cur = itr.next(); - long curDistance; - Timestamp curTimestamp = Timestamp.parseBefore(cur - .get(WaybackConstants.RESULT_CAPTURE_DATE)); - curDistance = curTimestamp.absDistanceFromTimestamp(wantTimestamp); - - if ((closest == null) || (curDistance < closestDistance)) { - closest = cur; - closestDistance = curDistance; - } - } - if(closest != null) { - closest.put("closest", "true"); - } - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/ReplayServlet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/ReplayServlet.java 2007-07-19 21:37:00 UTC (rev 1841) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/ReplayServlet.java 2007-07-19 23:30:47 UTC (rev 1842) @@ -1,134 +0,0 @@ -/* WBReplayUIServlet - * - * Created on 2005/10/18 14:00:00 - * - * Copyright (C) 2005 Internet Archive. - * - * This file is part of the Wayback Machine (crawler.archive.org). - * - * Wayback Machine is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback Machine is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback Machine; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -package org.archive.wayback.replay; - -import java.io.IOException; -import java.util.logging.Logger; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.ReplayRenderer; -import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.ResourceIndex; -import org.archive.wayback.ResourceStore; -import org.archive.wayback.core.CaptureSearchResults; -import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; -import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.core.WaybackServlet; -import org.archive.wayback.exception.BetterRequestException; -import org.archive.wayback.exception.ConfigurationException; -import org.archive.wayback.exception.ResourceNotInArchiveException; -import org.archive.wayback.exception.WaybackException; - -/** - * Servlet implementation for Wayback Replay requests. - * - * @author Brad Tofel - * @version $Date$, $Revision$ - */ -public class ReplayServlet extends WaybackServlet { - private static final Logger LOGGER = Logger.getLogger(ReplayServlet.class - .getName()); - - private static final String WMREQUEST_ATTRIBUTE = "wmrequest.attribute"; - - private static final long serialVersionUID = 1L; - - /** - * Constructor - */ - public ReplayServlet() { - super(); - } - - public void doGet(HttpServletRequest httpRequest, - HttpServletResponse httpResponse) throws IOException, - ServletException { - - WaybackRequest wbRequest = (WaybackRequest) httpRequest - .getAttribute(WMREQUEST_ATTRIBUTE); - - ReplayRenderer renderer; - try { - renderer = wayback.getReplayRenderer(); - } catch (ConfigurationException e1) { - throw new ServletException(e1); - } - Resource resource = null; - try { - - if (wbRequest == null) { - wbRequest = wayback.getQueryParser().parseQuery(httpRequest); - } - // maybe redirect to a better URI for the request given: - wbRequest.checkBetterRequest(); - - ResourceIndex idx = wayback.getResourceIndex(); - ResourceStore store = wayback.getResourceStore(); - ResultURIConverter uriConverter = wayback.getURIConverter(); -// uriConverter.setWbRequest(wbRequest); - - SearchResults results = idx.query(wbRequest); - if(!(results instanceof CaptureSearchResults)) { - throw new ConfigurationException("Bad results..."); - } - CaptureSearchResults captureResults = (CaptureSearchResults) results; - - // TODO: check which versions are actually accessible right now? - SearchResult closest = captureResults.getClosest(wbRequest); - resource = store.retrieveResource(closest); - - renderer.renderResource(httpRequest, httpResponse, wbRequest, - closest, resource, uriConverter); - - } catch (ResourceNotInArchiveException nia) { - - LOGGER.info("NotInArchive\t" - + wbRequest.get(WaybackConstants.REQUEST_URL)); - renderer.renderException(httpRequest, httpResponse, wbRequest, nia); - - } catch (BetterRequestException bre) { - - httpResponse.sendRedirect(bre.getBetterURI()); - - } catch (WaybackException wbe) { - - renderer.renderException(httpRequest, httpResponse, wbRequest, wbe); - - } catch (Exception e) { - // TODO show something Wayback'ish to the user rather than letting - // the container deal? - throw new ServletException(e); - } finally { - if (resource != null) { - resource.close(); - } - } - } -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-25 00:15:10
|
Revision: 1854 http://archive-access.svn.sourceforge.net/archive-access/?rev=1854&view=rev Author: bradtofel Date: 2007-07-24 17:15:12 -0700 (Tue, 24 Jul 2007) Log Message: ----------- INTERFACE: HTTP headers are now stored in a Map instead of a Properties. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/UIReplayResult.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2007-07-20 01:27:27 UTC (rev 1853) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2007-07-25 00:15:12 UTC (rev 1854) @@ -27,9 +27,10 @@ import java.io.IOException; import java.io.InputStream; import java.util.Enumeration; +import java.util.HashMap; +import java.util.Hashtable; import java.util.Iterator; import java.util.Map; -import java.util.Properties; import java.util.Set; import java.util.logging.Logger; @@ -81,7 +82,7 @@ * Expandable property bag for holding metadata associated with this * resource */ - Properties metaData = new Properties(); + Hashtable<String,String> metaData = new Hashtable<String,String>(); private BufferedInputStream bis; @@ -117,16 +118,17 @@ // copy all ARC record header fields to metaData, prefixing with // ARC_META_PREFIX - Map headerMetaMap = arcRecord.getMetaData().getHeaderFields(); - Set keys = headerMetaMap.keySet(); - Iterator itr = keys.iterator(); + @SuppressWarnings("unchecked") + Map<String,String> headerMetaMap = arcRecord.getMetaData().getHeaderFields(); + Set<String> keys = headerMetaMap.keySet(); + Iterator<String> itr = keys.iterator(); while(itr.hasNext()) { - Object metaKey = itr.next(); - Object metaValue = headerMetaMap.get(metaKey); - String metaStringValue = (metaValue == null) ? "" : - metaValue.toString(); - metaData.put(ARC_META_PREFIX + metaKey.toString(), - metaStringValue); + String metaKey = itr.next(); + String metaValue = headerMetaMap.get(metaKey); + if(metaValue == null) { + metaValue = ""; + } + metaData.put(ARC_META_PREFIX + metaKey,metaValue); } parsedHeader = true; @@ -138,13 +140,13 @@ * @return a Properties of all elements in metaData starting with 'prefix'. * keys in the returned Properties have 'prefix' removed. */ - public Properties filterMeta(String prefix) { - Properties matching = new Properties(); - for (Enumeration e = metaData.keys(); e.hasMoreElements();) { - String key = (String) e.nextElement(); + public Map<String,String> filterMeta(String prefix) { + HashMap<String,String> matching = new HashMap<String,String>(); + for (Enumeration<String> e = metaData.keys(); e.hasMoreElements();) { + String key = e.nextElement(); if (key.startsWith(prefix)) { String finalKey = key.substring(prefix.length()); - String value = (String) metaData.get(key); + String value = metaData.get(key); matching.put(finalKey, value); } } @@ -154,14 +156,14 @@ /** * @return a Properties containing all HTTP header fields for this record */ - public Properties getHttpHeaders() { + public Map<String,String> getHttpHeaders() { return filterMeta(HTTP_HEADER_PREFIX); } /** * @return a Properties containing all ARC Meta fields for this record */ - public Properties getARCMetadata() { + public Map<String,String> getARCMetadata() { return filterMeta(ARC_META_PREFIX); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/UIReplayResult.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/UIReplayResult.java 2007-07-20 01:27:27 UTC (rev 1853) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/UIReplayResult.java 2007-07-25 00:15:12 UTC (rev 1854) @@ -25,7 +25,7 @@ package org.archive.wayback.replay; import java.io.IOException; -import java.util.Properties; +import java.util.Map; import javax.servlet.http.HttpServletRequest; @@ -152,7 +152,7 @@ /** * @return the HTTP Headers as Properties */ - public Properties getHttpHeaders() { + public Map<String,String> getHttpHeaders() { return resource.getHttpHeaders(); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-25 00:20:51
|
Revision: 1858 http://archive-access.svn.sourceforge.net/archive-access/?rev=1858&view=rev Author: bradtofel Date: 2007-07-24 17:20:53 -0700 (Tue, 24 Jul 2007) Log Message: ----------- REFACTOR: moved directory ensure code to a util class REFACTOR: removed all references to PropertyConfigurable interface Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/DirMaker.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2007-07-25 00:19:26 UTC (rev 1857) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2007-07-25 00:20:53 UTC (rev 1858) @@ -28,7 +28,6 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; -import java.util.Properties; import java.util.logging.Logger; import org.archive.io.ArchiveRecord; @@ -39,10 +38,8 @@ import org.archive.io.arc.ARCRecord; import org.archive.io.arc.ARCWriter; import org.archive.io.arc.ARCWriterPool; -import org.archive.wayback.PropertyConfigurable; -import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.Resource; -import org.archive.wayback.exception.ConfigurationException; +import org.archive.wayback.util.DirMaker; /** * Class which manages a growing set of ARC files, managed by an ARCWriterPool. @@ -55,7 +52,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class ARCCacheDirectory implements PropertyConfigurable { +public class ARCCacheDirectory { private static final Logger LOGGER = Logger.getLogger( ARCCacheDirectory.class.getName()); @@ -74,12 +71,16 @@ */ public static final String LIVE_WEB_ARC_PREFIX = "liveweb.arc.prefix"; private ARCWriterPool pool = null; + private String arcPath = null; + private String arcPrefix = "wayback-live"; private File arcDir = null; - public void init(Properties p) throws ConfigurationException { - PropertyConfiguration pc = new PropertyConfiguration(p); - arcDir = pc.getDir(LIVE_WEB_ARC_DIR,true); - String arcPrefix = pc.getString(LIVE_WEB_ARC_PREFIX); + /** + * @throws IOException + */ + public void init() throws IOException { + // TODO: check that all props have been set + arcDir = DirMaker.ensureDir(arcPath,"arcPath"); File[] files = { arcDir }; WriterPoolSettings settings = getSettings(true, arcPrefix, files); pool = new ARCWriterPool(settings, MAX_POOL_WRITERS, MAX_POOL_WAIT); @@ -166,6 +167,7 @@ return isCompressed; } + @SuppressWarnings("unchecked") public List getMetadata() { return null; } Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/DirMaker.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/DirMaker.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/DirMaker.java 2007-07-25 00:20:53 UTC (rev 1858) @@ -0,0 +1,62 @@ +/* DirMaker + * + * $Id$ + * + * Created on 7:02:37 PM Jul 19, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.util; + +import java.io.File; +import java.io.IOException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DirMaker { + /** + * Ensure that the path pointed to by 'path' is a directory, if possible + * @param path + * @param name + * @return File that is a created directory + * @throws IOException + */ + public static File ensureDir(String path, String name) throws IOException { + if((path == null) || (path.length() == 0)) { + throw new IOException("No configuration for (" + name + ")"); + } + File dir = new File(path); + if(dir.exists()) { + if(!dir.isDirectory()) { + throw new IOException("Dir(" + name + ") at (" + path + + ") exists but is not a directory!"); + } + } else { + if(!dir.mkdirs()) { + throw new IOException("Unable to create dir(" + name +") at (" + + path + ")"); + } + } + return dir; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-25 00:29:16
|
Revision: 1868 http://archive-access.svn.sourceforge.net/archive-access/?rev=1868&view=rev Author: bradtofel Date: 2007-07-24 17:29:19 -0700 (Tue, 24 Jul 2007) Log Message: ----------- REFACTOR: removed all references to PropertyConfigurable interface Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/QueryRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResourceStore.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResultURIConverter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/QueryRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/QueryRenderer.java 2007-07-25 00:28:30 UTC (rev 1867) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/QueryRenderer.java 2007-07-25 00:29:19 UTC (rev 1868) @@ -40,7 +40,7 @@ * @author brad * @version $Date$, $Revision$ */ -public interface QueryRenderer extends PropertyConfigurable { +public interface QueryRenderer { /** Show an output page indicating that a request to the Wayback Machine * failed for some reason, as determined by the WaybackException argument. Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayRenderer.java 2007-07-25 00:28:30 UTC (rev 1867) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayRenderer.java 2007-07-25 00:29:19 UTC (rev 1868) @@ -41,7 +41,7 @@ * @author brad * @version $Date$, $Revision$ */ -public interface ReplayRenderer extends PropertyConfigurable { +public interface ReplayRenderer { /** * return a page to the client indicating that something went wrong, and Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java 2007-07-25 00:28:30 UTC (rev 1867) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java 2007-07-25 00:29:19 UTC (rev 1868) @@ -36,7 +36,7 @@ * @author brad * @version $Date$, $Revision$ */ -public interface RequestParser extends PropertyConfigurable { +public interface RequestParser { /** * attempt to transform an incoming HttpServletRequest into a * WaybackRequest object. returns null if there is missing information. Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResourceIndex.java 2007-07-25 00:28:30 UTC (rev 1867) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResourceIndex.java 2007-07-25 00:29:19 UTC (rev 1868) @@ -36,7 +36,7 @@ * @author Brad Tofel * @version $Date$, $Revision$ */ -public interface ResourceIndex extends PropertyConfigurable { +public interface ResourceIndex { /** * Transform a WMRequest into a ResourceResults. * Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResourceStore.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResourceStore.java 2007-07-25 00:28:30 UTC (rev 1867) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResourceStore.java 2007-07-25 00:29:19 UTC (rev 1868) @@ -35,7 +35,7 @@ * @author Brad Tofel * @version $Date$, $Revision$ */ -public interface ResourceStore extends PropertyConfigurable { +public interface ResourceStore { /** * Transform a SearchResult into a Resource * Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResultURIConverter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResultURIConverter.java 2007-07-25 00:28:30 UTC (rev 1867) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResultURIConverter.java 2007-07-25 00:29:19 UTC (rev 1868) @@ -30,7 +30,7 @@ * @author brad * @version $Date$, $Revision$ */ -public interface ResultURIConverter extends PropertyConfigurable { +public interface ResultURIConverter { /** * return an absolute URL that will replay URL url at time datespec. * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-25 00:52:41
|
Revision: 1890 http://archive-access.svn.sourceforge.net/archive-access/?rev=1890&view=rev Author: bradtofel Date: 2007-07-24 17:52:41 -0700 (Tue, 24 Jul 2007) Log Message: ----------- REFACTOR: removed all references to PropertyConfigurable interface Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/PropertyConfigurable.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/PropertyConfiguration.java Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/PropertyConfigurable.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/PropertyConfigurable.java 2007-07-25 00:51:38 UTC (rev 1889) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/PropertyConfigurable.java 2007-07-25 00:52:41 UTC (rev 1890) @@ -1,48 +0,0 @@ -/* PropertyConfigurable - * - * $Id$ - * - * Created on 3:46:34 PM Nov 7, 2005. - * - * Copyright (C) 2005 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback; - -import java.util.Properties; - -import org.archive.wayback.exception.ConfigurationException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public interface PropertyConfigurable { - /** - * Initialize this Object. Pass in the specific - * configurations via Properties. - * - * @param p - * Generic properties bag for configurations - * @throws ConfigurationException - */ - public void init(final Properties p) throws ConfigurationException; - -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/PropertyConfiguration.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/PropertyConfiguration.java 2007-07-25 00:51:38 UTC (rev 1889) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/PropertyConfiguration.java 2007-07-25 00:52:41 UTC (rev 1890) @@ -1,240 +0,0 @@ -/* PropertyConfiguration - * - * $Id$ - * - * Created on 5:40:23 PM Apr 2, 2007. - * - * Copyright (C) 2007 Internet Archive. - * - * This file is part of wayback-core. - * - * wayback-core is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback-core is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback-core; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.core; - -import java.io.File; -import java.util.Properties; - -import org.archive.wayback.exception.ConfigurationException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class PropertyConfiguration { - Properties p = null; - /** - * @param p - */ - public PropertyConfiguration(Properties p) { - this.p = p; - } - - private String invalidConfig(String propName) { - return "Invalid " + propName + " configuration."; - } - private String missingConfig(String propName) { - return "No " + propName + " configuration."; - } - /** - * @param propName - * @param defaultValue - * @return int value from the Properties, or defaultValue if - * no value is present, or the value is not parseable - */ - public int getInt(final String propName, int defaultValue) { - String intV = (String) p.get(propName); - if(intV == null) { - return defaultValue; - } - try { - return Integer.parseInt(intV); - } catch(NumberFormatException e) { - return defaultValue; - } - } - /** - * @param propName - * @return int value from the Properties - * @throws ConfigurationException if not present, or unparseable. - */ - public int getInt(final String propName) throws ConfigurationException { - String intV = (String) p.get(propName); - if(intV == null) { - throw new ConfigurationException(missingConfig(propName)); - } - int i = 0; - try { - i = Integer.parseInt(intV); - } catch(NumberFormatException e) { - throw new ConfigurationException(invalidConfig(propName)); - } - return i; - } - - /** - * @param propName - * @param defaultValue - * @return int value from the Properties, or defaultValue if - * no value is present, or the value is not parseable - */ - public long getLong(final String propName, long defaultValue) { - String longV = (String) p.get(propName); - if(longV == null) { - return defaultValue; - } - try { - return Long.parseLong(longV); - } catch(NumberFormatException e) { - return defaultValue; - } - } - /** - * @param propName - * @return long value from the Properties - * @throws ConfigurationException if not present, or unparseable. - */ - public long getLong(final String propName) throws ConfigurationException { - String longV = (String) p.get(propName); - if(longV == null) { - throw new ConfigurationException(missingConfig(propName)); - } - long l = 0; - try { - l = Long.parseLong(longV); - } catch(NumberFormatException e) { - throw new ConfigurationException(invalidConfig(propName)); - } - return l; - } - /** - * @param propName - * @param defaultValue - * @return String value for propName from the Properties, or defaultValue - * if no value is found, or the value is empty. - * @throws ConfigurationException - */ - public String getString(final String propName, final String defaultValue) - throws ConfigurationException { - - String stringV = (String) p.get(propName); - if((stringV == null) || (stringV.length() == 0)) { - if(defaultValue == null) { - throw new ConfigurationException(missingConfig(propName)); - } - return defaultValue; - } - return stringV; - } - /** - * @param propName - * @return String value for propName from the Properties, - * @throws ConfigurationException - */ - public String getString(final String propName) throws ConfigurationException { - return getString(propName,null); - } - /** - * @param propName - * @param defaultValue - * @param createIfMissing - * @return File for directory specified in Properties by propName - * @throws ConfigurationException if defaultValue is null and property - * is missing, or if a configuration is found, but no directory exists (and - * createIfMissing is false) or if a configuration is found, but the - * directory cannot be created. - */ - public File getDir(final String propName, final String defaultValue, - boolean createIfMissing) throws ConfigurationException { - - String stringPath = getString(propName,defaultValue); - File dir = new File(stringPath); - if(!dir.exists()) { - if(createIfMissing) { - if(!dir.mkdirs()) { - throw new ConfigurationException("Unable to mkdirs(" + - stringPath + ")"); - } - } else { - throw new ConfigurationException("Missing directory(" + - stringPath + ")"); - } - } - return dir; - } - /** - * @param propName - * @param defaultValue - * @return File for directory specified in Properties by propName - * @throws ConfigurationException if the directory cannot be created. - */ - public File getDir(final String propName, final String defaultValue) - throws ConfigurationException { - - return getDir(propName,defaultValue,false); - } - /** - * @param propName - * @param createIfMissing - * @return File for directory specified in Properties by propName - * @throws ConfigurationException if no configuration is found, or if - * createIfMissing is true, but the directory cannot be created. - */ - public File getDir(final String propName, boolean createIfMissing) - throws ConfigurationException { - - return getDir(propName,null,createIfMissing); - } - /** - * @param propName - * @return File for directory specified in Properties by propName - * @throws ConfigurationException if no property is found, or if the directory - * does not exist. - */ - public File getDir(final String propName) throws ConfigurationException { - return getDir(propName,null,false); - } - - /** - * @param propName - * @param defaultValue - * @return File pointing to configuration for propName, or defaultValue if - * no configuration is found - * @throws ConfigurationException if the File pointed to by configuration - * does not exist. - */ - public File getFile(final String propName, final String defaultValue) - throws ConfigurationException { - - String stringPath = getString(propName,defaultValue); - File file = new File(stringPath); - if(!file.exists()) { - throw new ConfigurationException("No file at " + stringPath + - " for configuration " + propName); - } - return file; - } - /** - * @param propName - * @return File pointed to by configuration propName - * @throws ConfigurationException if there is no configuration, or the file - * does not exist. - */ - public File getFile(final String propName) throws ConfigurationException { - return getFile(propName,null); - } -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-26 21:47:19
|
Revision: 1894 http://archive-access.svn.sourceforge.net/archive-access/?rev=1894&view=rev Author: bradtofel Date: 2007-07-26 14:47:22 -0700 (Thu, 26 Jul 2007) Log Message: ----------- REFACTOR: moved ExclusionFilterFactory classes under org.archive.wayback.accesscontrol Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactoryFactory.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactoryFactory.java Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java (from rev 1883, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeExclusionFilterFactory.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java 2007-07-26 21:47:22 UTC (rev 1894) @@ -0,0 +1,81 @@ +/* CompositeExclusionFilterFactory + * + * $Id$ + * + * Created on 4:53:58 PM Mar 19, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-svn; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.accesscontrol; + +import java.util.ArrayList; +import java.util.Iterator; + +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.resourceindex.filters.CompositeExclusionFilter; +import org.archive.wayback.util.ObjectFilter; + +/** + * Class that provides SearchResult Filtering based on multiple + * ExclusionFilterFactory instances by returning a single composite + * SearchResultFilter based on the results of each ExclusionFilter. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class CompositeExclusionFilterFactory implements ExclusionFilterFactory { + + private ArrayList<ExclusionFilterFactory> factories = + new ArrayList<ExclusionFilterFactory>(); + + /** + * @param factory to be added to the composite + */ + public void addFactory(ExclusionFilterFactory factory) { + factories.add(factory); + } + + /* (non-Javadoc) + * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() + */ + public ObjectFilter<SearchResult> get() { + Iterator<ExclusionFilterFactory> itr = factories.iterator(); + CompositeExclusionFilter filter = new CompositeExclusionFilter(); + while(itr.hasNext()) { + filter.addComponent(itr.next().get()); + } + return filter; + } + + + /** + * @return the factories + */ + public ArrayList<ExclusionFilterFactory> getFactories() { + return factories; + } + + + /** + * @param factories the factories to set + */ + public void setFactories(ArrayList<ExclusionFilterFactory> factories) { + this.factories = factories; + } +} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java (from rev 1883, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactory.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java 2007-07-26 21:47:22 UTC (rev 1894) @@ -0,0 +1,41 @@ +/* ExclusionFilterFactory + * + * $Id$ + * + * Created on 8:14:58 PM Mar 5, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-svn. + * + * wayback-svn is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-svn is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-svn; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.accesscontrol; + +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.util.ObjectFilter; +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public interface ExclusionFilterFactory { + /** + * @return an ObjectFilter object that filters records based on + * some set of exclusion rules. + */ + public ObjectFilter<SearchResult> get(); +} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactoryFactory.java (from rev 1884, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactoryFactory.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactoryFactory.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactoryFactory.java 2007-07-26 21:47:22 UTC (rev 1894) @@ -0,0 +1,77 @@ +/* ExclusionFilterFactoryFactory + * + * $Id$ + * + * Created on 8:17:48 PM Mar 5, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-svn. + * + * wayback-svn is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-svn is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-svn; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.accesscontrol; + +import java.util.Properties; + +import org.archive.wayback.accesscontrol.robotstxt.RobotExclusionFilterFactory; +import org.archive.wayback.accesscontrol.staticmap.StaticMapExclusionFilterFactory; +import org.archive.wayback.exception.ConfigurationException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ExclusionFilterFactoryFactory { + private static final String FACTORY_TYPE = "exclusion.factorytype"; + private static final String FACTORY_TYPE_STATIC_MAP = "static-map"; + private static final String FACTORY_TYPE_ROBOT_EXCLUSION = "robot-cache"; + private static final String FACTORY_TYPE_ROBOT_PLUS_MAP = "robot-plus-map"; + + /** + * @param p + * @return the ExclusionFilterFactory, or null if none is configured. + * @throws ConfigurationException + */ + public static ExclusionFilterFactory get(Properties p) + throws ConfigurationException { + + ExclusionFilterFactory factory = null; + String type = (String) p.getProperty(FACTORY_TYPE); + if(type == null) { + return factory; + } + if(type.equals(FACTORY_TYPE_STATIC_MAP)) { + factory = new StaticMapExclusionFilterFactory(); +// factory.init(p); + } else if(type.equals(FACTORY_TYPE_ROBOT_EXCLUSION)){ + factory = new RobotExclusionFilterFactory(); +// factory.init(p); + } else if(type.equals(FACTORY_TYPE_ROBOT_PLUS_MAP)) { + CompositeExclusionFilterFactory composite = null; + composite = new CompositeExclusionFilterFactory(); + ExclusionFilterFactory robot = new RobotExclusionFilterFactory(); + ExclusionFilterFactory staticMap = new StaticMapExclusionFilterFactory(); +// robot.init(p); +// staticMap.init(p); + composite.addFactory(staticMap); + composite.addFactory(robot); + factory = composite; + } + return factory; + } +} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2007-07-26 21:42:35 UTC (rev 1893) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2007-07-26 21:47:22 UTC (rev 1894) @@ -24,8 +24,8 @@ */ package org.archive.wayback.accesscontrol.remote; +import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.resourceindex.ExclusionFilterFactory; import org.archive.wayback.util.ObjectFilter; /** Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2007-07-26 21:42:35 UTC (rev 1893) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2007-07-26 21:47:22 UTC (rev 1894) @@ -24,9 +24,9 @@ */ package org.archive.wayback.accesscontrol.robotstxt; +import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.SearchResult; import org.archive.wayback.liveweb.LiveWebCache; -import org.archive.wayback.resourceindex.ExclusionFilterFactory; import org.archive.wayback.util.ObjectFilter; /** Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2007-07-26 21:42:35 UTC (rev 1893) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2007-07-26 21:47:22 UTC (rev 1894) @@ -30,8 +30,8 @@ import java.util.Map; import java.util.logging.Logger; +import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.resourceindex.ExclusionFilterFactory; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.ObjectFilter; Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeExclusionFilterFactory.java 2007-07-26 21:42:35 UTC (rev 1893) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeExclusionFilterFactory.java 2007-07-26 21:47:22 UTC (rev 1894) @@ -1,81 +0,0 @@ -/* CompositeExclusionFilterFactory - * - * $Id$ - * - * Created on 4:53:58 PM Mar 19, 2007. - * - * Copyright (C) 2007 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback-svn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourceindex; - -import java.util.ArrayList; -import java.util.Iterator; - -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.resourceindex.filters.CompositeExclusionFilter; -import org.archive.wayback.util.ObjectFilter; - -/** - * Class that provides SearchResult Filtering based on multiple - * ExclusionFilterFactory instances by returning a single composite - * SearchResultFilter based on the results of each ExclusionFilter. - * - * @author brad - * @version $Date$, $Revision$ - */ -public class CompositeExclusionFilterFactory implements ExclusionFilterFactory { - - private ArrayList<ExclusionFilterFactory> factories = - new ArrayList<ExclusionFilterFactory>(); - - /** - * @param factory to be added to the composite - */ - public void addFactory(ExclusionFilterFactory factory) { - factories.add(factory); - } - - /* (non-Javadoc) - * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() - */ - public ObjectFilter<SearchResult> get() { - Iterator<ExclusionFilterFactory> itr = factories.iterator(); - CompositeExclusionFilter filter = new CompositeExclusionFilter(); - while(itr.hasNext()) { - filter.addComponent(itr.next().get()); - } - return filter; - } - - - /** - * @return the factories - */ - public ArrayList<ExclusionFilterFactory> getFactories() { - return factories; - } - - - /** - * @param factories the factories to set - */ - public void setFactories(ArrayList<ExclusionFilterFactory> factories) { - this.factories = factories; - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactory.java 2007-07-26 21:42:35 UTC (rev 1893) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactory.java 2007-07-26 21:47:22 UTC (rev 1894) @@ -1,41 +0,0 @@ -/* ExclusionFilterFactory - * - * $Id$ - * - * Created on 8:14:58 PM Mar 5, 2007. - * - * Copyright (C) 2007 Internet Archive. - * - * This file is part of wayback-svn. - * - * wayback-svn is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback-svn is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback-svn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourceindex; - -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.util.ObjectFilter; -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public interface ExclusionFilterFactory { - /** - * @return an ObjectFilter object that filters records based on - * some set of exclusion rules. - */ - public ObjectFilter<SearchResult> get(); -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactoryFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactoryFactory.java 2007-07-26 21:42:35 UTC (rev 1893) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactoryFactory.java 2007-07-26 21:47:22 UTC (rev 1894) @@ -1,77 +0,0 @@ -/* ExclusionFilterFactoryFactory - * - * $Id$ - * - * Created on 8:17:48 PM Mar 5, 2007. - * - * Copyright (C) 2007 Internet Archive. - * - * This file is part of wayback-svn. - * - * wayback-svn is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback-svn is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback-svn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourceindex; - -import java.util.Properties; - -import org.archive.wayback.accesscontrol.robotstxt.RobotExclusionFilterFactory; -import org.archive.wayback.accesscontrol.staticmap.StaticMapExclusionFilterFactory; -import org.archive.wayback.exception.ConfigurationException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class ExclusionFilterFactoryFactory { - private static final String FACTORY_TYPE = "exclusion.factorytype"; - private static final String FACTORY_TYPE_STATIC_MAP = "static-map"; - private static final String FACTORY_TYPE_ROBOT_EXCLUSION = "robot-cache"; - private static final String FACTORY_TYPE_ROBOT_PLUS_MAP = "robot-plus-map"; - - /** - * @param p - * @return the ExclusionFilterFactory, or null if none is configured. - * @throws ConfigurationException - */ - public static ExclusionFilterFactory get(Properties p) - throws ConfigurationException { - - ExclusionFilterFactory factory = null; - String type = (String) p.getProperty(FACTORY_TYPE); - if(type == null) { - return factory; - } - if(type.equals(FACTORY_TYPE_STATIC_MAP)) { - factory = new StaticMapExclusionFilterFactory(); -// factory.init(p); - } else if(type.equals(FACTORY_TYPE_ROBOT_EXCLUSION)){ - factory = new RobotExclusionFilterFactory(); -// factory.init(p); - } else if(type.equals(FACTORY_TYPE_ROBOT_PLUS_MAP)) { - CompositeExclusionFilterFactory composite = null; - composite = new CompositeExclusionFilterFactory(); - ExclusionFilterFactory robot = new RobotExclusionFilterFactory(); - ExclusionFilterFactory staticMap = new StaticMapExclusionFilterFactory(); -// robot.init(p); -// staticMap.init(p); - composite.addFactory(staticMap); - composite.addFactory(robot); - factory = composite; - } - return factory; - } -} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-07-26 21:42:35 UTC (rev 1893) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-07-26 21:47:22 UTC (rev 1894) @@ -43,6 +43,7 @@ import org.archive.wayback.resourceindex.filters.UrlPrefixMatchFilter; import org.archive.wayback.resourceindex.filters.WindowEndFilter; import org.archive.wayback.resourceindex.filters.WindowStartFilter; +import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-08-07 01:15:24
|
Revision: 1898 http://archive-access.svn.sourceforge.net/archive-access/?rev=1898&view=rev Author: bradtofel Date: 2007-08-06 18:15:26 -0700 (Mon, 06 Aug 2007) Log Message: ----------- REFACTOR: nearly complete rework of ArcIndexer, BDBIndexUpdater to stream everything using AdaptedIterators. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndexUpdater.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/IndexClient.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalARCResourceStore.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ARCRecordToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ArcIndexer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2007-08-02 03:02:45 UTC (rev 1897) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2007-08-07 01:15:26 UTC (rev 1898) @@ -26,7 +26,6 @@ import java.io.IOException; import java.net.URL; -import java.text.ParseException; import java.util.Date; import java.util.logging.Logger; @@ -43,7 +42,8 @@ import org.archive.wayback.exception.LiveDocumentNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.exception.WaybackException; -import org.archive.wayback.resourceindex.indexer.ArcIndexer; +import org.archive.wayback.resourcestore.ARCRecordToSearchResultAdapter; +import org.archive.wayback.util.Adapter; import org.archive.wayback.util.UrlCanonicalizer; /** @@ -61,6 +61,8 @@ private URLCacher cacher = null; private LiveWebLocalResourceIndex index = null; static UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); + private static Adapter<ARCRecord,SearchResult> adapter = + new ARCRecordToSearchResultAdapter(); /** * closes all resources (currently unused...) @@ -202,23 +204,15 @@ resource = arcCacheDir.getResource(name, offset); // add the result to the index: ARCRecord record = (ARCRecord) resource.getArcRecord(); - try { - SearchResult result = ArcIndexer.arcRecordToSearchResult(record); - index.addSearchResult(result); - LOGGER.info("Added URL(" + url.toString() + ") in " + - "ARC(" + name + ") at (" + offset + ") to LiveIndex"); + SearchResult result = adapter.adapt(record); + index.addSearchResult(result); + LOGGER.info("Added URL(" + url.toString() + ") in " + + "ARC(" + name + ") at (" + offset + ") to LiveIndex"); - // we just read thru the doc in order to index it. Reset: - resource = arcCacheDir.getResource(name, offset); - - } catch (ParseException e) { - // TODO: This case could be a big problem -- we might be unable - // to store the fact that we have a local copy. That means we - // could be slamming somebody else's site. - e.printStackTrace(); - throw new IOException(e.getLocalizedMessage()); - } + // we just read thru the doc in order to index it. Reset: + resource = arcCacheDir.getResource(name, offset); + } return resource; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java 2007-08-02 03:02:45 UTC (rev 1897) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java 2007-08-07 01:15:26 UTC (rev 1898) @@ -29,7 +29,7 @@ import org.archive.wayback.core.SearchResult; import org.archive.wayback.resourceindex.LocalResourceIndex; import org.archive.wayback.resourceindex.bdb.BDBIndex; -import org.archive.wayback.resourceindex.indexer.SearchResultToBDBRecordAdapter; +import org.archive.wayback.resourceindex.bdb.SearchResultToBDBRecordAdapter; import org.archive.wayback.util.AdaptedIterator; /** Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2007-08-02 03:02:45 UTC (rev 1897) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2007-08-07 01:15:26 UTC (rev 1898) @@ -25,23 +25,23 @@ package org.archive.wayback.resourceindex.bdb; import java.io.BufferedReader; -import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.util.Iterator; -import org.archive.wayback.WaybackConstants; import org.archive.wayback.bdb.BDBRecord; import org.archive.wayback.bdb.BDBRecordSet; -import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResult; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.resourceindex.SearchResultSource; -import org.archive.wayback.resourceindex.indexer.ArcIndexer; +import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; +import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; import org.archive.wayback.util.AdaptedIterator; +import org.archive.wayback.util.Adapter; import org.archive.wayback.util.CloseableIterator; +import org.archive.wayback.util.flatfile.RecordIterator; import com.sleepycat.je.DatabaseException; @@ -133,8 +133,6 @@ String name = args[1]; String op = args[2]; BDBIndex index = new BDBIndex(); - int BATCH_SIZE = 1000; - ArcIndexer indexer = new ArcIndexer(); try { index.initializeDB(path,name); @@ -145,118 +143,75 @@ if(op.compareTo("-r") == 0) { PrintWriter pw = new PrintWriter(System.out); - CaptureSearchResults results = new CaptureSearchResults(); + + CloseableIterator<SearchResult> itrSR = null; + Adapter<SearchResult,String> adapter = + new SearchResultToCDXLineAdapter(); + CloseableIterator<String> itrS; + if(args.length == 4) { String prefix = args[3]; - CloseableIterator<SearchResult> itr = null; try { - itr = index.getPrefixIterator(prefix); + itrSR = index.getPrefixIterator(prefix); } catch (ResourceIndexNotAvailableException e) { e.printStackTrace(); System.exit(1); } - while(itr.hasNext()) { - SearchResult result = (SearchResult) itr.next(); - String urlS = result.get(WaybackConstants.RESULT_URL_KEY); - if(!urlS.startsWith(prefix)) { + itrS = new AdaptedIterator<SearchResult,String>(itrSR,adapter); + while(itrS.hasNext()) { + String line = itrS.next(); + if(!line.startsWith(prefix)) { break; } - results.addSearchResult(result); - if(results.getResultCount() > BATCH_SIZE) { - try { - indexer.serializeResults(results,pw,false); - } catch (IOException e) { - e.printStackTrace(); - System.exit(2); - } - results = new CaptureSearchResults(); - } + pw.println(line); } - if(results.getResultCount() > 0) { - try { - indexer.serializeResults(results,pw,false); - } catch (IOException e) { - e.printStackTrace(); - System.exit(2); - } - } + } else { - CloseableIterator<SearchResult> itr = null; try { - itr = index.getPrefixIterator(" "); + itrSR = index.getPrefixIterator(" "); } catch (ResourceIndexNotAvailableException e) { e.printStackTrace(); System.exit(1); } - while(itr.hasNext()) { - SearchResult result = (SearchResult) itr.next(); - results.addSearchResult(result); - if(results.getResultCount() > BATCH_SIZE) { - try { - indexer.serializeResults(results,pw,false); - } catch (IOException e) { - e.printStackTrace(); - System.exit(2); - } - results = new CaptureSearchResults(); - } + itrS = new AdaptedIterator<SearchResult,String>(itrSR,adapter); + + while(itrS.hasNext()) { + pw.println(itrS.next()); } - if(results.getResultCount() > 0) { - try { - indexer.serializeResults(results,pw,false); - } catch (IOException e) { - e.printStackTrace(); - System.exit(2); - } - } - pw.flush(); - pw.close(); - } - - } else if(op.compareTo("-w") == 0) { - File tmpCDX = null; - int total = 0; - int numInTmp = 0; + try { - tmpCDX = File.createTempFile("reader",".cdx"); - PrintWriter pw = new PrintWriter(tmpCDX); - // need to break the results from STDIN into chunks -- each chunk - // is written to a file, then added to the index. - BufferedReader br = new BufferedReader( - new InputStreamReader(System.in)); - - while(true) { - String line = br.readLine(); - if(line == null) { - break; - } - pw.println(line); - numInTmp++; - total++; - if(numInTmp > BATCH_SIZE) { - pw.flush(); - pw.close(); - index.insertRecords( - indexer.getCDXFileBDBRecordIterator(tmpCDX)); - System.err.println("Wrote " + numInTmp + " to index.."); - pw = new PrintWriter(tmpCDX); - numInTmp = 0; - } - } - if(numInTmp > 0) { - pw.flush(); - pw.close(); - index.insertRecords( - indexer.getCDXFileBDBRecordIterator(tmpCDX)); - System.err.println("Wrote last " + numInTmp + " to index."); - } - tmpCDX.delete(); - System.out.println("Total of " + total + " docs inserted."); + itrS.close(); + itrSR.close(); } catch (IOException e) { + // TODO Auto-generated catch block e.printStackTrace(); - System.exit(1); + System.exit(2); } + pw.flush(); + pw.close(); + + } else if(op.compareTo("-w") == 0) { + + BufferedReader br = new BufferedReader( + new InputStreamReader(System.in)); + + RecordIterator itrS = new RecordIterator(br); + + Adapter<String,SearchResult> adapterStoSR = + new CDXLineToSearchResultAdapter(); + + Iterator<SearchResult> itrSR = + new AdaptedIterator<String,SearchResult>(itrS,adapterStoSR); + + Adapter<SearchResult,BDBRecord> adapterSRtoBDB = + new SearchResultToBDBRecordAdapter(); + + Iterator<BDBRecord> itrBDB = + new AdaptedIterator<SearchResult,BDBRecord>(itrSR, + adapterSRtoBDB); + + index.insertRecords(itrBDB); } else { USAGE(); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndexUpdater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndexUpdater.java 2007-08-02 03:02:45 UTC (rev 1897) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndexUpdater.java 2007-08-07 01:15:26 UTC (rev 1898) @@ -30,8 +30,12 @@ import java.util.logging.Logger; import org.archive.wayback.bdb.BDBRecord; +import org.archive.wayback.core.SearchResult; import org.archive.wayback.exception.ConfigurationException; -import org.archive.wayback.resourceindex.indexer.ArcIndexer; +import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; +//import org.archive.wayback.resourcestore.ArcIndexer; +import org.archive.wayback.util.AdaptedIterator; +import org.archive.wayback.util.flatfile.FlatFile; /** * Class which starts a background thread that repeatedly scans an incoming @@ -64,7 +68,7 @@ private int runInterval = DEFAULT_RUN_INTERVAL_MS; - private ArcIndexer indexer = new ArcIndexer(); +// private ArcIndexer indexer = new ArcIndexer(); /** * Thread object of update thread -- also is flag indicating if the thread @@ -147,7 +151,14 @@ private boolean mergeFile(File cdxFile) { boolean added = false; try { - Iterator<BDBRecord> it = indexer.getCDXFileBDBRecordIterator(cdxFile); + FlatFile ffile = new FlatFile(cdxFile.getAbsolutePath()); + AdaptedIterator<String,SearchResult> searchResultItr = + new AdaptedIterator<String,SearchResult>( + ffile.getSequentialIterator(), + new CDXLineToSearchResultAdapter()); + Iterator<BDBRecord> it = new AdaptedIterator<SearchResult,BDBRecord> + (searchResultItr,new SearchResultToBDBRecordAdapter()); + index.insertRecords(it); added = true; } catch (IOException e) { Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java (from rev 1889, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/SearchResultToBDBRecordAdapter.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java 2007-08-07 01:15:26 UTC (rev 1898) @@ -0,0 +1,86 @@ +/* SearchResultToBDBRecordAdapter + * + * $Id$ + * + * Created on 5:58:22 PM Mar 13, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-svn; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.bdb; + +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.bdb.BDBRecord; +import org.archive.wayback.bdb.BDBRecordSet; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.util.Adapter; + +import com.sleepycat.je.DatabaseEntry; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class SearchResultToBDBRecordAdapter implements + Adapter<SearchResult,BDBRecord> { + + DatabaseEntry key = new DatabaseEntry(); + + DatabaseEntry value = new DatabaseEntry(); + + BDBRecord record = new BDBRecord(key, value); + + private final static String DELIMITER = " "; + + /* + * (non-Javadoc) + * + * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) + */ + public BDBRecord adapt(SearchResult result) { + StringBuilder keySB = new StringBuilder(40); + StringBuilder valSB = new StringBuilder(100); + + + keySB.append(result.get(WaybackConstants.RESULT_URL_KEY)); + keySB.append(DELIMITER); + keySB.append(result.get(WaybackConstants.RESULT_CAPTURE_DATE)); + keySB.append(DELIMITER); + keySB.append(result.get(WaybackConstants.RESULT_OFFSET)); + keySB.append(DELIMITER); + keySB.append(result.get(WaybackConstants.RESULT_ARC_FILE)); + + + valSB.append(result.get(WaybackConstants.RESULT_ORIG_HOST)); + valSB.append(DELIMITER); + valSB.append(result.get(WaybackConstants.RESULT_MIME_TYPE)); + valSB.append(DELIMITER); + valSB.append(result.get(WaybackConstants.RESULT_HTTP_CODE)); + valSB.append(DELIMITER); + valSB.append(result.get(WaybackConstants.RESULT_MD5_DIGEST)); + valSB.append(DELIMITER); + valSB.append(result.get(WaybackConstants.RESULT_REDIRECT_URL)); + + key.setData(BDBRecordSet.stringToBytes(keySB.toString())); + value.setData(BDBRecordSet.stringToBytes(valSB.toString())); + + return record; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java 2007-08-07 01:15:26 UTC (rev 1898) @@ -0,0 +1,71 @@ +/* SearchResultToCDXLineAdapter + * + * $Id$ + * + * Created on 3:22:15 PM Jul 26, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx; + +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.util.Adapter; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class SearchResultToCDXLineAdapter implements +Adapter<SearchResult,String>{ + + private static int DEFAULT_CAPACITY = 120; + private final static String DELIMITER = " "; + + /* (non-Javadoc) + * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) + */ + public String adapt(SearchResult result) { + + StringBuilder sb = new StringBuilder(DEFAULT_CAPACITY); + + sb.append(result.get(WaybackConstants.RESULT_URL_KEY)); + sb.append(DELIMITER); + sb.append(result.get(WaybackConstants.RESULT_CAPTURE_DATE)); + sb.append(DELIMITER); + sb.append(result.get(WaybackConstants.RESULT_ORIG_HOST)); + sb.append(DELIMITER); + sb.append(result.get(WaybackConstants.RESULT_MIME_TYPE)); + sb.append(DELIMITER); + sb.append(result.get(WaybackConstants.RESULT_HTTP_CODE)); + sb.append(DELIMITER); + sb.append(result.get(WaybackConstants.RESULT_MD5_DIGEST)); + sb.append(DELIMITER); + sb.append(result.get(WaybackConstants.RESULT_REDIRECT_URL)); + sb.append(DELIMITER); + sb.append(result.get(WaybackConstants.RESULT_OFFSET)); + sb.append(DELIMITER); + sb.append(result.get(WaybackConstants.RESULT_ARC_FILE)); + + return sb.toString(); + } + +} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/IndexClient.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/IndexClient.java 2007-08-02 03:02:45 UTC (rev 1897) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/IndexClient.java 2007-08-07 01:15:26 UTC (rev 1898) @@ -24,13 +24,17 @@ */ package org.archive.wayback.resourceindex.indexer; +//import java.io.BufferedOutputStream; +import java.io.File; +//import java.io.FileFilter; import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileFilter; import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; -import java.io.OutputStream; import java.io.PrintWriter; +//import java.io.OutputStream; +//import java.io.PrintWriter; +import java.util.Iterator; import java.util.logging.Logger; import org.apache.commons.httpclient.HttpClient; @@ -38,8 +42,13 @@ import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.methods.InputStreamRequestEntity; import org.apache.commons.httpclient.methods.PutMethod; -import org.archive.wayback.core.SearchResults; -import org.archive.wayback.resourcestore.http.FileLocationDBClient; +//import org.archive.wayback.core.SearchResults; +//import org.archive.wayback.resourcestore.ArcIndexer; +//import org.archive.wayback.resourcestore.http.FileLocationDBClient; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; +import org.archive.wayback.util.AdaptedIterator; +import org.archive.wayback.util.Adapter; /** * @@ -51,175 +60,305 @@ private static final Logger LOGGER = Logger.getLogger(IndexClient .class.getName()); - private final static String ARC_SUFFIX = ".arc.gz"; - private final static String CDX_SUFFIX = ".cdx"; +// private final static String ARC_SUFFIX = ".arc"; +// private final static String ARC_GZ_SUFFIX = ".arc.gz"; +// private final static String CDX_SUFFIX = ".cdx"; - private String submitUrl = null; - private HttpClient client = null; - private ArcIndexer indexer = null; - /** - * Create an IndexPipelineClient for adding ARC index information to a - * remote index pipeline. Attempts to create workDir if it does not already - * exist. - * - * @param submitUrl - */ - public IndexClient(final String submitUrl) { - super(); - this.submitUrl = submitUrl; - this.client = new HttpClient(); - this.indexer = new ArcIndexer(); - } + private String target = null; + private File tmpDir = null; +// private String submitUrl = null; + private HttpClient client = new HttpClient(); +// private ArcIndexer indexer = null; + /** - * Inject File argument into the index pipeline specified for this client - * using HTTP PUT - * * @param cdx + * @return true if CDX was added to local or remote index * @throws HttpException * @throws IOException */ - public void uploadCDX(File cdx) throws HttpException, IOException { - String basename = cdx.getName(); - String finalUrl = submitUrl + "/" + basename; - PutMethod method = new PutMethod(finalUrl); - method.setRequestEntity(new InputStreamRequestEntity( - new FileInputStream(cdx))); + public boolean addCDX(File cdx) throws HttpException, IOException { + boolean added = false; + if(target == null) { + throw new IOException("No target set"); + } + String base = cdx.getName(); + if(target.startsWith("http://")) { + String finalUrl = target; + if(target.endsWith("/")) { + finalUrl = target + base; + } else { + finalUrl = target + "/" + base; + } + PutMethod method = new PutMethod(finalUrl); + method.setRequestEntity(new InputStreamRequestEntity( + new FileInputStream(cdx))); - int statusCode = client.executeMethod(method); - if (statusCode != HttpStatus.SC_OK) { - throw new IOException("Method failed: " + method.getStatusLine() - + " for URL " + finalUrl + " on file " - + cdx.getAbsolutePath()); - } - LOGGER.info("Uploaded cdx " + cdx.getAbsolutePath()); + int statusCode = client.executeMethod(method); + if (statusCode == HttpStatus.SC_OK) { + LOGGER.info("Uploaded cdx " + cdx.getAbsolutePath() + " to " + + finalUrl); + if(!cdx.delete()) { + throw new IOException("FAILED delete " + + cdx.getAbsolutePath()); + } + + added = true; + } else { + throw new IOException("Method failed: " + method.getStatusLine() + + " for URL " + finalUrl + " on file " + + cdx.getAbsolutePath()); + } + + } else { + // assume a local directory: + File toBeMergedDir = new File(target); + if(!toBeMergedDir.exists()) { + throw new IOException("Target " + target + " does not exist"); + } + if(!toBeMergedDir.isDirectory()) { + throw new IOException("Target " + target + " is not a dir"); + } + if(!toBeMergedDir.canWrite()) { + throw new IOException("Target " + target + " is not writable"); + } + File toBeMergedFile = new File(toBeMergedDir,base); + if(toBeMergedFile.exists()) { + LOGGER.severe("WARNING: "+toBeMergedFile.getAbsolutePath() + + "already exists!"); + } else { + if(cdx.renameTo(toBeMergedFile)) { + LOGGER.info("Queued " + toBeMergedFile.getAbsolutePath() + + " for merging."); + added = true; + } else { + LOGGER.severe("FAILED rename("+cdx.getAbsolutePath()+ + ") to ("+toBeMergedFile.getAbsolutePath()+")"); + } + } + } + return added; } /** - * Create a CDX file for the arc argument, and add it to the remote - * index pipeline for this client. - * - * @param arc - * @param workDir - * @throws IOException + * @param base + * @param itr + * @return true if data was added to local or remote index + * @throws HttpException + * @throws IOException */ - public void addArcToIndex(File arc,File workDir) throws IOException { - String arcBase = arc.getName(); - if(arcBase.endsWith(ARC_SUFFIX)) { - arcBase = arcBase.substring(0,arcBase.length() - - ARC_SUFFIX.length()); + public boolean addSearchResults(String base, Iterator<SearchResult> itr) + throws HttpException, IOException { + + if(tmpDir == null) { + throw new IOException("No tmpDir argument"); } - String cdxBase = arcBase + CDX_SUFFIX; - File tmpCDX = new File(workDir,cdxBase); - LOGGER.info("Indexing arc " + arc.getAbsolutePath()); - SearchResults results = indexer.indexArc(arc); - indexer.serializeResults(results, tmpCDX); - uploadCDX(tmpCDX); - if(!tmpCDX.delete()) { - throw new IOException("Unable to unlink " + - tmpCDX.getAbsolutePath()); + File tmpFile = new File(tmpDir,base); + if(tmpFile.exists()) { + // TODO: is this safe? + if(!tmpFile.delete()) { + throw new IOException("Unable to remove tmp " + + tmpFile.getAbsolutePath()); + } } + FileOutputStream os = new FileOutputStream(tmpFile); + BufferedOutputStream bos = new BufferedOutputStream(os); + PrintWriter pw = new PrintWriter(bos); + + Adapter<SearchResult,String> adapterSRtoS = + new SearchResultToCDXLineAdapter(); + Iterator<String> itrS = + new AdaptedIterator<SearchResult,String>(itr,adapterSRtoS); + + while(itrS.hasNext()) { + pw.println(itrS.next()); + } + pw.close(); + boolean added = addCDX(tmpFile); + return added; } +// +// /** +// * Inject File argument into the index pipeline specified for this client +// * using HTTP PUT +// * +// * @param cdx +// * @throws HttpException +// * @throws IOException +// */ +// public void uploadCDX(File cdx) throws HttpException, IOException { +// String basename = cdx.getName(); +// String finalUrl = submitUrl + "/" + basename; +// PutMethod method = new PutMethod(finalUrl); +// method.setRequestEntity(new InputStreamRequestEntity( +// new FileInputStream(cdx))); +// +// int statusCode = client.executeMethod(method); +// if (statusCode != HttpStatus.SC_OK) { +// throw new IOException("Method failed: " + method.getStatusLine() +// + " for URL " + finalUrl + " on file " +// + cdx.getAbsolutePath()); +// } +// LOGGER.info("Uploaded cdx " + cdx.getAbsolutePath()); +// } +// +// /** +// * Create a CDX file for the arc argument, and add it to the remote +// * index pipeline for this client. +// * +// * @param arc +// * @param workDir +// * @throws IOException +// */ +// public void addArcToIndex(File arc,File workDir) throws IOException { +// String arcBase = arc.getName(); +// if(arcBase.endsWith(ARC_SUFFIX)) { +// arcBase = arcBase.substring(0,arcBase.length() - +// ARC_SUFFIX.length()); +// } +// String cdxBase = arcBase + CDX_SUFFIX; +// File tmpCDX = new File(workDir,cdxBase); +// LOGGER.info("Indexing arc " + arc.getAbsolutePath()); +// SearchResults results = indexer.indexArc(arc); +// indexer.serializeResults(results, tmpCDX); +// uploadCDX(tmpCDX); +// if(!tmpCDX.delete()) { +// throw new IOException("Unable to unlink " + +// tmpCDX.getAbsolutePath()); +// } +// } +// +// /** +// * @param arc +// * @param os +// * @throws IOException +// */ +// public void dumpArcIndex(File arc, OutputStream os) throws IOException { +// BufferedOutputStream bos = new BufferedOutputStream(os); +// PrintWriter pw = new PrintWriter(bos); +// SearchResults results = indexer.indexArc(arc); +// indexer.serializeResults(results,pw); +// } +// +// /** +// * Index each ARC in directory, upload CDX to the remote pipeline, and +// * poke the remote locationDB to let it know where this ARC can be found. +// * +// * @param directory +// * @param httpPrefix +// * @param locationClient +// * @param workDir +// * @throws IOException +// */ +// public void indexDirectory(File directory, String httpPrefix, +// FileLocationDBClient locationClient, File workDir) +// throws IOException { +// if(!workDir.isDirectory()) { +// if(workDir.exists()) { +// throw new IOException("workDir path " + +// workDir.getAbsolutePath() + " exists but is not a " + +// "directory"); +// } +// if(!workDir.mkdirs()) { +// throw new IOException("Failed to mkdir(" + +// workDir.getAbsolutePath() + ")"); +// } +// } +// +// if(!httpPrefix.endsWith("/")) { +// httpPrefix += "/"; +// } +// +// FileFilter filter = new FileFilter() { +// public boolean accept(File daFile) { +// return daFile.getName().endsWith(ARC_SUFFIX); +// } +// }; +// +// File[] arcs = directory.listFiles(filter); +// if(arcs == null) { +// throw new IOException("Directory " + directory.getAbsolutePath() + +// " is not a directory or had an IO error"); +// } +// for(int i = 0; i < arcs.length; i++) { +// File arc = arcs[i]; +// String arcName = arc.getName(); +// String arcUrl = httpPrefix + arcName; +// addArcToIndex(arc,workDir); +// LOGGER.info("Adding location " + arcUrl + " for arc " + arcName); +// locationClient.addArcUrl(arcName,arcUrl); +// } +// } +// +// /** +// * @param args +// */ +// public static void main(String[] args) { +// if(args.length == 1) { +// File arc = new File(args[0]); +// ArcIndexer indexer = new ArcIndexer(); +// +// BufferedOutputStream bos = new BufferedOutputStream(System.out); +// PrintWriter pw = new PrintWriter(bos); +// SearchResults results; +// try { +// results = indexer.indexArc(arc); +// indexer.serializeResults(results,pw); +// } catch (IOException e) { +// e.printStackTrace(); +// System.exit(1); +// } +// return; +// } else if(args.length != 5) { +// System.err.println("Usage: workDir pipelineUrl locationUrl arcDir arcUrlPrefix"); +// System.err.println("Usage: arcPath"); +// return; +// } +// File workDir = new File(args[0]); +// String pipelineUrl = args[1]; +// String locationUrl = args[2]; +// File arcDir = new File(args[3]); +// String arcDirPrefix = args[4]; +// IndexClient pipeClient; +// FileLocationDBClient locClient = new FileLocationDBClient(locationUrl); +// try { +// pipeClient = new IndexClient(pipelineUrl); +// pipeClient.indexDirectory(arcDir,arcDirPrefix,locClient,workDir); +// } catch (IOException e) { +// e.printStackTrace(); +// System.exit(1); +// } +// } + /** - * @param arc - * @param os - * @throws IOException + * @return the target */ - public void dumpArcIndex(File arc, OutputStream os) throws IOException { - BufferedOutputStream bos = new BufferedOutputStream(os); - PrintWriter pw = new PrintWriter(bos); - SearchResults results = indexer.indexArc(arc); - indexer.serializeResults(results,pw); + public String getTarget() { + return target; } - + /** - * Index each ARC in directory, upload CDX to the remote pipeline, and - * poke the remote locationDB to let it know where this ARC can be found. - * - * @param directory - * @param httpPrefix - * @param locationClient - * @param workDir - * @throws IOException + * @param target the target to set */ - public void indexDirectory(File directory, String httpPrefix, - FileLocationDBClient locationClient, File workDir) - throws IOException { - if(!workDir.isDirectory()) { - if(workDir.exists()) { - throw new IOException("workDir path " + - workDir.getAbsolutePath() + " exists but is not a " + - "directory"); - } - if(!workDir.mkdirs()) { - throw new IOException("Failed to mkdir(" + - workDir.getAbsolutePath() + ")"); - } - } - - if(!httpPrefix.endsWith("/")) { - httpPrefix += "/"; - } - - FileFilter filter = new FileFilter() { - public boolean accept(File daFile) { - return daFile.getName().endsWith(ARC_SUFFIX); - } - }; + public void setTarget(String target) { + this.target = target; + } - File[] arcs = directory.listFiles(filter); - if(arcs == null) { - throw new IOException("Directory " + directory.getAbsolutePath() + - " is not a directory or had an IO error"); + /** + * @return the tmpDir + */ + public String getTmpDir() { + if(tmpDir == null) { + return null; } - for(int i = 0; i < arcs.length; i++) { - File arc = arcs[i]; - String arcName = arc.getName(); - String arcUrl = httpPrefix + arcName; - addArcToIndex(arc,workDir); - LOGGER.info("Adding location " + arcUrl + " for arc " + arcName); - locationClient.addArcUrl(arcName,arcUrl); - } + return tmpDir.getAbsolutePath(); } - + /** - * @param args + * @param tmpDir the tmpDir to set */ - public static void main(String[] args) { - if(args.length == 1) { - File arc = new File(args[0]); - ArcIndexer indexer = new ArcIndexer(); - - BufferedOutputStream bos = new BufferedOutputStream(System.out); - PrintWriter pw = new PrintWriter(bos); - SearchResults results; - try { - results = indexer.indexArc(arc); - indexer.serializeResults(results,pw); - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } - return; - } else if(args.length != 5) { - System.err.println("Usage: workDir pipelineUrl locationUrl arcDir arcUrlPrefix"); - System.err.println("Usage: arcPath"); - return; - } - File workDir = new File(args[0]); - String pipelineUrl = args[1]; - String locationUrl = args[2]; - File arcDir = new File(args[3]); - String arcDirPrefix = args[4]; - IndexClient pipeClient; - FileLocationDBClient locClient = new FileLocationDBClient(locationUrl); - try { - pipeClient = new IndexClient(pipelineUrl); - pipeClient.indexDirectory(arcDir,arcDirPrefix,locClient,workDir); - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } + public void setTmpDir(String tmpDir) { + this.tmpDir = new File(tmpDir); } } Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ARCRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ARCRecordToSearchResultAdapter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ARCRecordToSearchResultAdapter.java 2007-08-07 01:15:26 UTC (rev 1898) @@ -0,0 +1,164 @@ +/* ArcRecordToSearchResultAdapter + * + * $Id$ + * + * Created on 3:27:03 PM Jul 26, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore; + +import java.io.File; +import java.io.IOException; +import java.util.logging.Logger; + +import org.apache.commons.httpclient.Header; +import org.apache.commons.httpclient.URIException; +import org.archive.io.arc.ARCRecord; +import org.archive.io.arc.ARCRecordMetaData; +import org.archive.net.UURI; +import org.archive.net.UURIFactory; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.util.Adapter; +import org.archive.wayback.util.UrlCanonicalizer; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ARCRecordToSearchResultAdapter +implements Adapter<ARCRecord,SearchResult>{ + + private static final Logger LOGGER = Logger.getLogger( + ARCRecordToSearchResultAdapter.class.getName()); + + // TODO: make this configurable based on the ResourceIndex + private static UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); + +// public static SearchResult arcRecordToSearchResult(final ARCRecord rec) +// throws IOException, ParseException { + /* (non-Javadoc) + * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) + */ + public SearchResult adapt(ARCRecord rec) { + try { + return adaptInner(rec); + } catch (IOException e) { + e.printStackTrace(); + return null; + } + } + + private SearchResult adaptInner(ARCRecord rec) throws IOException { + rec.close(); + ARCRecordMetaData meta = rec.getMetaData(); + + SearchResult result = new SearchResult(); + String arcName = meta.getArc(); + int index = arcName.lastIndexOf(File.separator); + if (index > 0 && (index + 1) < arcName.length()) { + arcName = arcName.substring(index + 1); + } + result.put(WaybackConstants.RESULT_ARC_FILE, arcName); + result.put(WaybackConstants.RESULT_OFFSET, String.valueOf(meta + .getOffset())); + + // initialize with default HTTP code... + result.put(WaybackConstants.RESULT_HTTP_CODE, "-"); + + result.put(WaybackConstants.RESULT_MD5_DIGEST, rec.getDigestStr()); + result.put(WaybackConstants.RESULT_MIME_TYPE, meta.getMimetype()); + result.put(WaybackConstants.RESULT_CAPTURE_DATE, meta.getDate()); + + String uriStr = meta.getUrl(); + if (uriStr.startsWith(ARCRecord.ARC_MAGIC_NUMBER)) { + // skip filedesc record altogether... + return null; + } + if (uriStr.startsWith(WaybackConstants.DNS_URL_PREFIX)) { + // skip URL + HTTP header processing for dns records... + + String origHost = uriStr.substring(WaybackConstants.DNS_URL_PREFIX + .length()); + result.put(WaybackConstants.RESULT_ORIG_HOST, origHost); + result.put(WaybackConstants.RESULT_REDIRECT_URL, "-"); + result.put(WaybackConstants.RESULT_URL, uriStr); + result.put(WaybackConstants.RESULT_URL_KEY, uriStr); + + } else { + + UURI uri = UURIFactory.getInstance(uriStr); + result.put(WaybackConstants.RESULT_URL, uriStr); + + String uriHost = uri.getHost(); + if (uriHost == null) { + LOGGER.info("No host in " + uriStr + " in " + meta.getArc()); + } else { + result.put(WaybackConstants.RESULT_ORIG_HOST, uriHost); + + String statusCode = (meta.getStatusCode() == null) ? "-" : meta + .getStatusCode(); + result.put(WaybackConstants.RESULT_HTTP_CODE, statusCode); + + String redirectUrl = "-"; + Header[] headers = rec.getHttpHeaders(); + if (headers != null) { + + for (int i = 0; i < headers.length; i++) { + if (headers[i].getName().equals( + WaybackConstants.LOCATION_HTTP_HEADER)) { + + String locationStr = headers[i].getValue(); + // TODO: "Location" is supposed to be absolute: + // (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html) + // (section 14.30) but Content-Location can be + // relative. + // is it correct to resolve a relative Location, as + // we are? + // it's also possible to have both in the HTTP + // headers... + // should we prefer one over the other? + // right now, we're ignoring "Content-Location" + try { + UURI uriRedirect = UURIFactory.getInstance(uri, + locationStr); + redirectUrl = uriRedirect.getEscapedURI(); + + } catch (URIException e) { + LOGGER.info("Bad Location: " + locationStr + + " for " + uriStr + " in " + + meta.getArc() + " Skipped"); + } + break; + } + } + } + result.put(WaybackConstants.RESULT_REDIRECT_URL, redirectUrl); + + String indexUrl = canonicalizer.urlStringToKey(meta.getUrl()); + result.put(WaybackConstants.RESULT_URL_KEY, indexUrl); + } + + } + return result; + } +} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ArcIndexer.java (from rev 1889, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/ArcIndexer.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ArcIndexer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ArcIndexer.java 2007-08-07 01:15:26 UTC (rev 1898) @@ -0,0 +1,432 @@ +/* ArcIndexer + * + * $Id$ + * + * Created on 2:33:29 PM Oct 11, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore; + +//import java.io.BufferedOutputStream; +import java.io.File; +//import java.io.FileOutputStream; +import java.io.IOException; +//import java.io.PrintWriter; +//import java.text.ParseException; +import java.util.Iterator; +//import java.util.logging.Logger; + +//import org.apache.commons.httpclient.Header; +//import org.apache.commons.httpclient.URIException; +import org.archive.io.ArchiveRecord; +import org.archive.io.arc.ARCReader; +import org.archive.io.arc.ARCReaderFactory; +import org.archive.io.arc.ARCRecord; +//import org.archive.io.arc.ARCRecordMetaData; +//import org.archive.net.UURI; +//import org.archive.net.UURIFactory; +//import org.archive.wayback.WaybackConstants; +//import org.archive.wayback.bdb.BDBRecord; +//import org.archive.wayback.core.CaptureSearchResults; +import org.archive.wayback.core.SearchResult; +//import org.archive.wayback.core.SearchResults; +//import org.archive.wayback.resourceindex.bdb.SearchResultToBDBRecordAdapter; +//import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; +//import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; +import org.archive.wayback.util.AdaptedIterator; +import org.archive.wayback.util.Adapter; +import org.archive.wayback.util.CloseableIterator; +//import org.archive.wayback.util.UrlCanonicalizer; +//import org.archive.wayback.util.flatfile.FlatFile; + +/** + * Transforms an ARC file into SearchResults, or a serialized SearchResults + * file(CDX). + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ArcIndexer { + + /** + * CDX Header line for these fields. not very configurable.. + */ + public final static String CDX_HEADER_MAGIC = " CDX N b h m s k r V g"; + +// /** +// * Logger for this class +// */ +// private static final Logger LOGGER = Logger.getLogger(ArcIndexer.class +// .getName()); + +// /** +// * Constant indicating entire CDX line +// */ +// protected final static int TYPE_CDX_LINE = 0; +// +// /** +// * Constant indicating entire url + timestamp only +// */ +// protected final static int TYPE_CDX_KEY = 1; +// +// /** +// * Constant indicating trailing data fields from CDX line following url + +// * timestamp +// */ +// protected final static int TYPE_CDX_VALUE = 2; + +// static UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); + +// private final static int DEFAULT_CAPACITY = 120; + + /** + * @param arc + * @return Iterator of SearchResults for input arc File + * @throws IOException + */ + public CloseableIterator<SearchResult> iterator(File arc) + throws IOException { + ARCReader arcReader = ARCReaderFactory.get(arc); + arcReader.setParseHttpHeaders(true); + + Adapter<ArchiveRecord,ARCRecord> adapter1 = + new ArchiveRecordToARCRecordAdapter(); + + Adapter<ARCRecord,SearchResult> adapter2 = + new ARCRecordToSearchResultAdapter(); + + Iterator<ArchiveRecord> itr1 = arcReader.iterator(); + + CloseableIterator<ARCRecord> itr2 = + new AdaptedIterator<ArchiveRecord,ARCRecord>(itr1,adapter1); + + return new AdaptedIterator<ARCRecord,SearchResult>(itr2,adapter2); + } + + + private class ArchiveRecordToARCRecordAdapter + implements Adapter<ArchiveRecord,ARCRecord> { + + /* (non-Javadoc) + * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) + */ + public ARCRecord adapt(ArchiveRecord o) { + ARCRecord rec = null; + if(o instanceof ARCRecord) { + rec = (ARCRecord) o; + } + return rec; + } + + } + +// /** +// * Create a ResourceResults representing the records in ARC file at arcPath. +// * +// * @param arc +// * @return ResourceResults in arcPath. +// * @throws IOException +// */ +// public SearchResults indexArc(File arc) throws IOException { +// CaptureSearchResults results = new CaptureSearchResults(); +// ARCReader arcReader = ARCReaderFactory.get(arc); +// try { +// arcReader.setParseHttpHeaders(true); +// // doh. this does not generate quite the columns we need: +// // arcReader.createCDXIndexFile(arcPath); +// Iterator<ArchiveRecord> itr = arcReader.iterator(); +// while (itr.hasNext()) { +// ARCRecord rec = (ARCRecord) itr.next(); +// SearchResult result; +// try { +// result = arcRecordToSearchResult(rec); +// } catch (NullPointerException e) { +// e.printStackTrace(); +// continue; +// } catch (ParseException e) { +// e.printStackTrace(); +// continue; +// } +// if (result != null) { +// results.addSearchResult(result); +// } +// } +// } finally { +// arcReader.close(); +// } +// return results; +// } + +// /** +// * transform an ARCRecord into a SearchResult +// * +// * @param rec +// * @param arc +// * @return SearchResult for this document +// * @throws IOException +// * @throws ParseException +// */ +// public static SearchResult arcRecordToSearchResult(final ARCRecord rec) +// throws IOException, ParseException { +// rec.close(); +// ARCRecordMetaData meta = rec.getMetaData(); +// +// SearchResult result = new SearchResult(); +// String arcName = meta.getArc(); +// int index = arcName.lastIndexOf(File.separator); +// if (index > 0 && (index + 1) < arcName.length()) { +// arcName = arcName.substring(index + 1); +// } +// result.put(WaybackConstants.RESULT_ARC_FILE, arcName); +// result.put(WaybackConstants.RESULT_OFFSET, String.valueOf(meta +// .getOffset())); +// +// // initialize with default HTTP code... +// result.put(WaybackConstants.RESULT_HTTP_CODE, "-"); +// +// result.put(WaybackConstants.RESULT_MD5_DIGEST, rec.getDigestStr()); +// result.put(WaybackConstants.RESULT_MIME_TYPE, meta.getMimetype()); +// result.put(WaybackConstants.RESULT_CAPTURE_DATE, meta.getDate()); +// +// String uriStr = meta.getUrl(); +// if (uriStr.startsWith(ARCRecord.ARC_MAGIC_NUMBER)) { +// // skip filedesc record altogether... +// return null; +// } +// if (uriStr.startsWith(WaybackConstants.DNS_URL_PREFIX)) { +// // skip URL + HTTP header processing for dns records... +// +// String origHost = uriStr.substring(WaybackConstants.DNS_URL_PREFIX +// .length()); +// result.put(WaybackConstants.RESULT_ORIG_HOST, origHost); +// result.put(WaybackConstants.RESULT_REDIRECT_URL, "-"); +// result.put(WaybackConstants.RESULT_URL, uriStr); +// result.put(WaybackConstants.RESULT_URL_KEY, uriStr); +// +// } else { +// +// UURI uri = UURIFactory.getInstance(uriStr); +// result.put(WaybackConstants.RESULT_URL, uriStr); +// +// String uriHost = uri.getHost(); +// if (uriHost == null) { +// LOGGER.info("No host in " + uriStr + " in " + meta.getArc()); +// } else { +// result.put(WaybackConstants.RESULT_ORIG_HOST, uriHost); +// +// String statusCode = (meta.getStatusCode() == null) ? "-" : meta +// .getStatusCode(); +// result.put(WaybackConstants.RESULT_HTTP_CODE, statusCode); +// +// String redirectUrl = "-"; +// Header[] headers = rec.getHttpHeaders(); +// if (headers != null) { +// +// for (int i = 0; i < headers.length; i++) { +// if (headers[i].getName().equals(LOCATION_HTTP_HEADER)) { +// String locationStr = headers[i].getValue(); +// // TODO: "Location" is supposed to be absolute: +// // (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html) +// // (section 14.30) but Content-Location can be +// // relative. +// // is it correct to resolve a relative Location, as +// // we are? +// // it's also possible to have both in the HTTP +// // headers... +// // should we prefer one over the other? +// // right now, we're ignoring "Content-Location" +// try { +// UURI uriRedirect = UURIFactory.getInstance(uri, +// locationStr); +// redirectUrl = uriRedirect.getEscapedURI(); +// +// } catch (URIException e) { +// LOGGER.info("Bad Location: " + locationStr +// + " for " + uriStr + " in " +// + meta.getArc() + " Skipped"); +// } +// break; +// } +// } +// } +// result.put(WaybackConstants.RESULT_REDIRECT_URL, redirectUrl); +// +// String indexUrl = canonicalizer.urlStringToKey(meta.getUrl()); +// result.put(WaybackConstants.RESULT_URL_KEY, indexUrl); +// } +// +// } +// return result; +// } +// +// /** +// * Write out ResourceResults into CDX file at cdxPath +// * +// * @param results +// * @param target +// * @throws IOException +// */ +// public void serializeResults(final SearchResults results, File target) +// throws IOException { +// +// FileOutputStream os = new FileOutputStream(target); +// BufferedOutputStream bos = new BufferedOutputStream(os); +// PrintWriter pw = new PrintWriter(bos); +// try { +// serializeResults(results, pw); +// } finally { +// pw.close(); +// } +// } +// +// /** +// * @param results +// * @param pw +// * @param addHeader +// * @throws IOException +// */ +// public void serializeResults(final SearchResults results, PrintWriter pw, +// final boolean addHeader) +// throws IOException { +// if(addHeader) { +// pw.println(CDX_HEADER_MAGIC); +// } +// Iterator<SearchResult> itrR = results.iterator(); +// Iterator<String> itrS = new AdaptedIterator<SearchResult,String>(itrR, +// new SearchResultToCDXLineAdapter()); +// while (itrS.hasNext()) { +// pw.println(itrS.next()); +// } +// pw.flush(); +// } +// +// +// /** +// * @param results +// * @param pw +// * @throws IOException +// */ +// public void serializeResults(final SearchResults results, PrintWriter pw) +// throws IOException { +// serializeResults(results,pw,true); +// } + +// /** +// * @param rec +// * @return String in "CDX format" for rec argument +// * @throws IOException +// * @throws ParseException +// */ +// public static String arcRecordToCDXLine(ARCRecord rec) +// throws IOException, ParseException { +// return searchResultToString(arcRecordToSearchResult(rec),TYPE_CDX_LINE); +// } + +// /** +// * Transform a SearchResult into a String representation. +// * +// * @param result +// * @param type +// * @return String value of either line, key or value for the SearchResult +// */ +// protected static String searchResultToString(final SearchResult result, +// int type) { +// +// StringBuilder sb = new StringBuilder(DEFAULT_CAPACITY); +// +// if (type == TYPE_CDX_LINE) { +// +// sb.append(result.get(WaybackConstants.RESULT_URL_KEY)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_CAPTURE_DATE)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_ORIG_HOST)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_MIME_TYPE)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_HTTP_CODE)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_MD5_DIGEST)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_REDIRECT_URL)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_OFFSET)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_ARC_FILE)); +// +// } else if (type == TYPE_CDX_KEY) { +// +// sb.append(result.get(WaybackConstants.RESULT_URL_KEY)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_CAPTURE_DATE)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_OFFSET)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_ARC_FILE)); +// +// } else if (type == TYPE_CDX_VALUE) { +// +// sb.append(result.get(WaybackConstants.RESULT_ORIG_HOST)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_MIME_TYPE)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_HTTP_CODE)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_MD5_DIGEST)); +// sb.append(" "); +// sb.append(result.get(WaybackConstants.RESULT_REDIRECT_URL)); +// +// } else { +// throw new IllegalArgumentException("Unknown transformation type"); +// } +// return sb.toString(); +// } + +// /** +// * @param cdxFile +// * @return Iterator that will return BDBRecords, one for each line in +// * cdxFile argument +// * @throws IOException +// */ +// public Iterator<BDBRecord> getCDXFileBDBRecordIterator(File cdxFile) throws IOException { +// FlatFile ffile = new FlatFile(cdxFile.getAbsolutePath()); +// AdaptedIterator<String,SearchResult> searchResultItr = +// new AdaptedIterator<String,SearchResult>( +// ffile.getSequentialIterator(), +// new CDXLineToSearchResultAdapter()); +// return new AdaptedIterator<SearchResult,BDBRecord>(searchResultItr, +// new SearchResultToBDBRecordAdapter()); +// } + +// /** +// * @param args +// */ +// public static void main(String[] args) { +// ArcIndexer indexer = new ArcIndexer(); +// File arc = new File(args[0]); +// File cdx = new File(args[1]); +// try { +// SearchResults results = indexer.indexArc(arc); +// indexer.serializeResults(results, cdx); +// } catch (Exception e) { +// e.printStackTrace(); +// } +// } +} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalARCResourceStore.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalARCResourceStore.java 2007-08-02 03:02:45 UTC (rev 1897) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalARCResourceStore.java 2007-08-07 01:15:26 UTC (rev 1898) @@ -28,9 +28,10 @@ import java.io.IOException; import java.net.MalformedURLException; import java.util.HashMap; +import java.util.Iterator; import java.util.logging.Logger; -import org.apache.commons.httpclient.HttpException; +//import org.apache.commons.httpclient.HttpException; import org.archive.io.ArchiveRecord; import org.archive.io.arc.ARCReader; import org.archive.io.arc.ARCReaderFactory; @@ -39,11 +40,11 @@ import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; +//import org.archive.wayback.core.SearchResults; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.ResourceNotAvailableException; -import org.archive.wayback.resourceindex.indexer.ArcIndexer; import org.archive.wayback.resourceindex.indexer.IndexClient; +//import org.archive.wayback.util.CloseableIterator; /** * Implements ResourceStore using a local directory of ARC files. @@ -58,10 +59,10 @@ private final static int DEFAULT_RUN_INTERVAL_MS = 10000; private File arcDir = null; - private File tmpDir = null; +// private File tmpDir = null; private File workDir = null; private File queuedDir = null; - private String indexTarget = null; +// private String indexTarget = null; private int runInterval = DEFAULT_RUN_INTERVAL_MS; private IndexClient indexClient = null; private ArcIndexer indexer = new ArcIndexer(); @@ -79,7 +80,7 @@ if(arcDir == null) { throw new ConfigurationException("No arcDir set"); } - if(indexTarget != null) { + if(indexClient != null) { startAutoIndexThread(); } } @@ -158,40 +159,40 @@ } } - private boolean uploadCDX(File cdxFile) { - boolean uploaded = false; - if(indexClient == null) { - // assume we just need to move it to a local directory: - File toBeMergedDir = new File(indexTarget); - File toBeMergedFile = new File(toBeMergedDir,cdxFile.getName()); - if(toBeMergedFile.exists()) { - LOGGER.severe("WARNING: "+toBeMergedFile.getAbsolutePath() + - "already exists!"); - } else { - if(cdxFile.renameTo(toBeMergedFile)) { - LOGGER.info("Queued " + toBeMergedFile.getAbsolutePath() + - " for merging."); - uploaded = true; - } else { - LOGGER.severe("FAILED rename("+cdxFile.getAbsolutePath()+ - ") to ("+toBeMergedFile.getAbsolutePath()+")"); - } - } - } else { - // use indexClient to upload: - try { - indexClient.uploadCDX(cdxFile); - LOGGER.info("Uploaded " + cdxFile.getAbsolutePath()); - uploaded = true; - } catch (HttpException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - } - return uploaded; - } - +// private boolean uploadCDX(File cdxFile) { +// boolean uploaded = false; +// if(indexClient == null) { +// // assume we just need to move it to a local directory: +// File toBeMergedDir = new File(indexTarget); +// File toBeMergedFile = new File(toBeMergedDir,cdxFile.getName()); +// if(toBeMergedFile.exists()) { +// LOGGER.severe("WARNING: "+toBeMergedFile.getAbsolutePath() + +/... [truncated message content] |
From: <bra...@us...> - 2007-08-18 00:48:11
|
Revision: 1901 http://archive-access.svn.sourceforge.net/archive-access/?rev=1901&view=rev Author: bradtofel Date: 2007-08-17 17:48:14 -0700 (Fri, 17 Aug 2007) Log Message: ----------- FEATURE: form and open search parsers now accept 'xmlquery' requests and annotate the WaybackRequest object indicating the user wants XML data returned. FEATURE: all request parsers now accept a default "earliestTimestamp" property which is used if the user does not specify a start date in their request. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java 2007-08-07 01:19:24 UTC (rev 1900) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java 2007-08-18 00:48:14 UTC (rev 1901) @@ -58,7 +58,12 @@ String dateStr = matcher.group(1); String urlStr = matcher.group(2); - String startDate = Timestamp.parseBefore(dateStr).getDateStr(); + String startDate; + if(dateStr.length() == 0) { + startDate = earliestTimestamp; + } else { + startDate = Timestamp.parseBefore(dateStr).getDateStr(); + } String endDate = Timestamp.parseAfter(dateStr).getDateStr(); wbRequest.put(WaybackConstants.REQUEST_START_DATE,startDate); wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java 2007-08-07 01:19:24 UTC (rev 1900) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java 2007-08-18 00:48:14 UTC (rev 1901) @@ -56,7 +56,14 @@ wbRequest = new WaybackRequest(); String dateStr = matcher.group(1); String urlStr = matcher.group(2); - String startDate = Timestamp.parseBefore(dateStr).getDateStr(); + + String startDate; + if(dateStr.length() == 0) { + startDate = earliestTimestamp; + } else { + startDate = Timestamp.parseBefore(dateStr).getDateStr(); + } + String endDate = Timestamp.parseAfter(dateStr).getDateStr(); wbRequest.put(WaybackConstants.REQUEST_START_DATE, startDate); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2007-08-07 01:19:24 UTC (rev 1900) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2007-08-18 00:48:14 UTC (rev 1901) @@ -30,6 +30,7 @@ import org.archive.wayback.RequestParser; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.webapp.WaybackContext; @@ -58,11 +59,16 @@ protected final static String QUERY_BASE = "query"; + protected final static String XQUERY_BASE = "xmlquery"; + protected final static String REPLAY_BASE = "replay"; protected final static int DEFAULT_MAX_RECORDS = 10; protected int maxRecords = DEFAULT_MAX_RECORDS; + + protected String earliestTimestamp = + Timestamp.earliestTimestamp().getDateStr(); protected static String getMapParam(Map<String,String[]> queryMap, String field) { @@ -142,4 +148,17 @@ public void setMaxRecords(int maxRecords) { this.maxRecords = maxRecords; } + /** + * @param timestamp + */ + public void setEarliestTimestamp(String timestamp) { + earliestTimestamp = Timestamp.parseBefore(timestamp).getDateStr(); + } + /** + * @return + */ + public String getEarliestTimestamp() { + return earliestTimestamp; + } + } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2007-08-07 01:19:24 UTC (rev 1900) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2007-08-18 00:48:14 UTC (rev 1901) @@ -47,6 +47,7 @@ parsers = getRequestParsers(); for(int i = 0; i < parsers.length; i++) { parsers[i].setMaxRecords(maxRecords); + parsers[i].setEarliestTimestamp(earliestTimestamp); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-08-07 01:19:24 UTC (rev 1900) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-08-18 00:48:14 UTC (rev 1901) @@ -66,6 +66,11 @@ } else if(base.startsWith(QUERY_BASE)) { wbRequest.put(WaybackConstants.REQUEST_TYPE, WaybackConstants.REQUEST_URL_QUERY); + } else if(base.startsWith(XQUERY_BASE)){ + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_QUERY); + wbRequest.put(WaybackConstants.REQUEST_XML_DATA,"1"); + } else { return null; } @@ -81,6 +86,10 @@ String val = getMapParam(queryMap,key); wbRequest.put(key,val); } + if(wbRequest.get(WaybackConstants.REQUEST_START_DATE) == null) { + wbRequest.put(WaybackConstants.REQUEST_START_DATE, + earliestTimestamp); + } } if(wbRequest != null) { addHttpHeaderFields(wbRequest, httpRequest); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2007-08-07 01:19:24 UTC (rev 1900) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2007-08-18 00:48:14 UTC (rev 1901) @@ -92,6 +92,11 @@ } else if(base.startsWith(QUERY_BASE)){ wbRequest.put(WaybackConstants.REQUEST_TYPE, WaybackConstants.REQUEST_URL_QUERY); + } else if(base.startsWith(XQUERY_BASE)){ + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_QUERY); + wbRequest.put(WaybackConstants.REQUEST_XML_DATA,"1"); + } else { return null; } @@ -139,7 +144,10 @@ // let's just let em all thru for now: wbRequest.put(key, value); } - + if(wbRequest.get(WaybackConstants.REQUEST_START_DATE) == null) { + wbRequest.put(WaybackConstants.REQUEST_START_DATE, + earliestTimestamp); + } addHttpHeaderFields(wbRequest, httpRequest); return wbRequest; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2007-08-07 01:19:24 UTC (rev 1900) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2007-08-18 00:48:14 UTC (rev 1901) @@ -68,6 +68,7 @@ WaybackRequest wbRequest = parse(requestPath); if(wbRequest != null) { addHttpHeaderFields(wbRequest, httpRequest); + wbRequest.setResultsPerPage(maxRecords); } return wbRequest; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-08-23 21:09:35
|
Revision: 1913 http://archive-access.svn.sourceforge.net/archive-access/?rev=1913&view=rev Author: bradtofel Date: 2007-08-23 14:09:34 -0700 (Thu, 23 Aug 2007) Log Message: ----------- REFACTOR: entire ReplayUI refactoring, splitting the bullk of the code into several org.archive.wayback.replay.* utility classes, which make the actual ReplayRenderers quite small. Also introduces a new ReplayDispatcher interface, which inspects a Resource, and dispatches the render operation to the correct ReplayRenderer implementation. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/StringHttpServletResponseWrapper.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TransparentReplayRenderer.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,59 @@ +/* ReplayDispatcher + * + * $Id$ + * + * Created on 6:10:18 PM Aug 9, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.WaybackException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public interface ReplayDispatcher extends ReplayRenderer { + /** + * Render the contents of a WaybackException in either html, javascript, or + * css format, depending on the guessed context, so errors in embedded + * documents do not cause unneeded errors in the embedding document. + * + * @param httpRequest + * @param httpResponse + * @param wbRequest + * @param exception + * @throws ServletException + * @throws IOException + */ + public void renderException(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + WaybackException exception) throws ServletException, IOException; + +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayDispatcher.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayDispatcher.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,176 @@ +/* ReplayRendererDispatcher + * + * $Id$ + * + * Created on 5:23:35 PM Aug 8, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ReplayDispatcher; +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.UIResults; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.WaybackException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public abstract class BaseReplayDispatcher implements ReplayDispatcher { + + private String errorJsp = "/jsp/HTMLError.jsp"; + private String imageErrorJsp = "/jsp/HTMLError.jsp"; + private String javascriptErrorJsp = "/jsp/JavaScriptError.jsp"; + private String cssErrorJsp = "/jsp/CSSError.jsp"; + + protected final Pattern IMAGE_REGEX = Pattern + .compile(".*\\.(jpg|jpeg|gif|png|bmp|tiff|tif)$"); + + /* ERROR HANDLING RESPONSES: */ + + private boolean requestIsEmbedded(HttpServletRequest httpRequest, + WaybackRequest wbRequest) { + // without a wbRequest, assume it is not embedded: send back HTML + if (wbRequest == null) { + return false; + } + String referer = wbRequest.get(WaybackConstants.REQUEST_REFERER_URL); + return (referer != null && referer.length() > 0); + } + + private boolean requestIsImage(HttpServletRequest httpRequest, + WaybackRequest wbRequest) { + String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); + if (requestUrl == null) + return false; + Matcher matcher = IMAGE_REGEX.matcher(requestUrl); + return (matcher != null && matcher.matches()); + } + + private boolean requestIsJavascript(HttpServletRequest httpRequest, + WaybackRequest wbRequest) { + + String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); + return (requestUrl != null) && requestUrl.endsWith(".js"); + } + + private boolean requestIsCSS(HttpServletRequest httpRequest, + WaybackRequest wbRequest) { + + String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); + return (requestUrl != null) && requestUrl.endsWith(".css"); + } + + /* + * (non-Javadoc) + * + * @see org.archive.wayback.ReplayRenderer#renderException(javax.servlet.http.HttpServletRequest, + * javax.servlet.http.HttpServletResponse, + * org.archive.wayback.core.WaybackRequest, + * org.archive.wayback.exception.WaybackException) + */ + public void renderException(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + WaybackException exception) throws ServletException, IOException { + + // the "standard HTML" response handler: + String finalJspPath = errorJsp; + + // try to not cause client errors by sending the HTML response if + // this request is ebedded, and is obviously one of the special types: + if (requestIsEmbedded(httpRequest, wbRequest)) { + + if (requestIsJavascript(httpRequest, wbRequest)) { + + finalJspPath = javascriptErrorJsp; + + } else if (requestIsCSS(httpRequest, wbRequest)) { + + finalJspPath = cssErrorJsp; + + } else if (requestIsImage(httpRequest, wbRequest)) { + + finalJspPath = imageErrorJsp; + + } + } + + httpRequest.setAttribute("exception", exception); + UIResults uiResults = new UIResults(wbRequest); + uiResults.storeInRequest(httpRequest, finalJspPath); + + RequestDispatcher dispatcher = httpRequest + .getRequestDispatcher(finalJspPath); + + dispatcher.forward(httpRequest, httpResponse); + } + + /** + * @param wbRequest + * @param result + * @param resource + * @return the correct ReplayRenderer for the Resource + */ + public abstract ReplayRenderer getRenderer(WaybackRequest wbRequest, + SearchResult result, Resource resource); + + /* + * (non-Javadoc) + * + * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, + * javax.servlet.http.HttpServletResponse, + * org.archive.wayback.core.WaybackRequest, + * org.archive.wayback.core.SearchResult, + * org.archive.wayback.core.Resource, + * org.archive.wayback.ResultURIConverter, + * org.archive.wayback.core.SearchResults) + */ + public void renderResource(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + SearchResult result, Resource resource, + ResultURIConverter uriConverter, SearchResults results) + throws ServletException, IOException { + + ReplayRenderer renderer = getRenderer(wbRequest, result, resource); + try { + renderer.renderResource(httpRequest, httpResponse, wbRequest, result, + resource, uriConverter, results); + } catch (WaybackException e) { + renderException(httpRequest, httpResponse, wbRequest, e); + } + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectReplayRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectReplayRenderer.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,63 @@ +/* DateRedirectReplayRenderer + * + * $Id$ + * + * Created on 11:42:50 AM Aug 9, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.WaybackRequest; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DateRedirectReplayRenderer implements ReplayRenderer { + + /* (non-Javadoc) + * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResults) + */ + public void renderResource(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + SearchResult result, Resource resource, + ResultURIConverter uriConverter, SearchResults results) + throws ServletException, IOException { + + // redirect to the better version: + String url = result.getAbsoluteUrl(); + String captureDate = result.getCaptureDate(); + String betterURI = uriConverter.makeReplayURI(captureDate,url); + httpResponse.sendRedirect(betterURI); + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,376 @@ +/* HTMLPage + * + * $Id$ + * + * Created on 12:39:52 PM Aug 7, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.text.ParseException; +import java.util.Map; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.query.UIQueryResults; +import org.mozilla.universalchardet.UniversalDetector; + +/** + * Class which wraps functionality for converting a Resource(InputStream + + * HTTP headers) into a StringBuilder, performing several common URL + * resolution methods against that StringBuilder, inserting arbitrary Strings + * into the page, and then converting the page back to a byte array. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class HTMLPage { + + // hand off this many bytes to the chardet library + private final static int MAX_CHARSET_READAHEAD = 65536; + // ...if it also includes "charset=" + private final static String CHARSET_TOKEN = "charset="; + // ...and if the chardet library fails, use the Content-Type header + private final static String HTTP_CONTENT_TYPE_HEADER = "Content-Type"; + // if documents are marked up before sending to clients, the data is + // decoded into a String in chunks. This is how big a chunk to decode with. + private final static int C_BUFFER_SIZE = 4096; + + private Resource resource = null; + private SearchResult result = null; + private ResultURIConverter uriConverter = null; + /** + * the internal StringBuilder + */ + public StringBuilder sb = null; + private String charSet = null; + private byte[] resultBytes = null; + + /** + * @param resource + * @param result + * @param uriConverter + */ + public HTMLPage(Resource resource, SearchResult result, + ResultURIConverter uriConverter) { + this.resource = resource; + this.result = result; + this.uriConverter = uriConverter; + } + + private String contentTypeToCharset(final String contentType) { + int offset = contentType.indexOf(CHARSET_TOKEN); + if (offset != -1) { + return contentType.substring(offset + CHARSET_TOKEN.length()); + } + return null; + } + + /** + * Attempt to divine the character encoding of the document from the + * Content-Type HTTP header (with a "charset=") + * + * @param resource + * @return String character set found or null if the header was not present + * @throws IOException + */ + protected String getCharsetFromHeaders(Resource resource) + throws IOException { + + String charsetName = null; + + Map<String,String> httpHeaders = resource.getHttpHeaders(); + String ctype = httpHeaders.get(HTTP_CONTENT_TYPE_HEADER); + if (ctype != null) { + charsetName = contentTypeToCharset(ctype); + } + return charsetName; + } + + /** + * Attempt to find a META tag in the HTML that hints at the character set + * used to write the document. + * + * @param resource + * @return String character set found from META tags in the HTML + * @throws IOException + */ + protected String getCharsetFromMeta(Resource resource) throws IOException { + String charsetName = null; + + byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; + resource.mark(MAX_CHARSET_READAHEAD); + resource.read(bbuffer, 0, MAX_CHARSET_READAHEAD); + resource.reset(); + // convert to UTF-8 String -- which hopefully will not mess up the + // characters we're interested in... + StringBuilder sb = new StringBuilder(new String(bbuffer,"UTF-8")); + String metaContentType = TagMagix.getTagAttrWhere(sb, "META", + "content", "http-equiv", "Content-Type"); + if(metaContentType != null) { + charsetName = contentTypeToCharset(metaContentType); + } + return charsetName; + } + + /** + * Attempts to figure out the character set of the document using + * the excellent juniversalchardet library. + * + * @param resource + * @return String character encoding found, or null if nothing looked good. + * @throws IOException + */ + protected String getCharsetFromBytes(Resource resource) throws IOException { + String charsetName = null; + + byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; + // (1) + UniversalDetector detector = new UniversalDetector(null); + + // (2) + resource.mark(MAX_CHARSET_READAHEAD); + int len = resource.read(bbuffer, 0, MAX_CHARSET_READAHEAD); + resource.reset(); + detector.handleData(bbuffer, 0, len); + // (3) + detector.dataEnd(); + // (4) + charsetName = detector.getDetectedCharset(); + + // (5) + detector.reset(); + + return charsetName; + } + + /** + * Use META tags, byte-character-detection, HTTP headers, hope, and prayer + * to figure out what character encoding is being used for the document. + * If nothing else works, assumes UTF-8 for now. + * + * @param resource + * @return String charset for Resource + * @throws IOException + */ + protected String guessCharset() throws IOException { + + String charSet = getCharsetFromMeta(resource); + if(charSet == null) { + charSet = getCharsetFromBytes(resource); + if(charSet == null) { + charSet = getCharsetFromHeaders(resource); + if(charSet == null) { + charSet = "UTF-8"; + } + } + } + return charSet; + } + + /** + * Update URLs inside the page, so those URLs which must be correct at + * page load time resolve correctly to absolute URLs. + * + * This means ensuring there is a BASE HREF tag, adding one if missing, + * and then resolving: + * FRAME-SRC, META-URL, LINK-HREF, SCRIPT-SRC + * tag-attribute pairs against either the existing BASE-HREF, or the + * page's absolute URL if it was missing. + */ + public void resolvePageUrls() { + + // TODO: get url from Resource instead of SearchResult? + String pageUrl = result.getAbsoluteUrl(); + String captureDate = result.getCaptureDate(); + + String existingBaseHref = TagMagix.getBaseHref(sb); + if (existingBaseHref != null) { + pageUrl = existingBaseHref; + } + + TagMagix.markupTagREURIC(sb, uriConverter, captureDate, pageUrl, + "FRAME", "SRC"); +// TagMagix.markupTagREURIC(page, uriConverter, captureDate, pageUrl, +// "IFRAME", "SRC"); + TagMagix.markupTagREURIC(sb, uriConverter, captureDate, pageUrl, + "META", "URL"); + TagMagix.markupTagREURIC(sb, uriConverter, captureDate, pageUrl, + "LINK", "HREF"); + // TODO: The classic WM added a js_ to the datespec, so NotInArchives + // can return an valid javascript doc, and not cause Javascript errors. + TagMagix.markupTagREURIC(sb, uriConverter, captureDate, pageUrl, + "SCRIPT", "SRC"); + + if (existingBaseHref == null) { + String baseTag = "<base href=\"" + pageUrl + "\" />"; + int insertPoint = sb.indexOf("<head>"); + if (-1 == insertPoint) { + insertPoint = sb.indexOf("<HEAD>"); + } + if (-1 == insertPoint) { + insertPoint = 0; + } else { + insertPoint += 6; // just after the tag + } + sb.insert(insertPoint, baseTag); + } + } + + + /** + * @param charSet + * @throws IOException + */ + public void readFully(String charSet) throws IOException { + if(charSet == null) { + charSet = guessCharset(); + } + this.charSet = charSet; + int recordLength = (int) resource.getRecordLength(); + + // convert bytes to characters for charset: + InputStreamReader isr = new InputStreamReader(resource, charSet); + + char[] cbuffer = new char[C_BUFFER_SIZE]; + + // slurp the whole thing into RAM: + sb = new StringBuilder(recordLength); + for (int r = -1; (r = isr.read(cbuffer, 0, C_BUFFER_SIZE)) != -1;) { + sb.append(cbuffer, 0, r); + } + } + + /** + * Read bytes from input stream, using best-guess for character encoding + * @throws IOException + */ + public void readFully() throws IOException { + readFully(null); + } + + /** + * @return raw bytes contained in internal StringBuilder + * @throws UnsupportedEncodingException + */ + public byte[] getBytes() throws UnsupportedEncodingException { + if(sb == null) { + throw new IllegalStateException("No interal StringBuffer"); + } + if(resultBytes == null) { + resultBytes = sb.toString().getBytes(charSet); + } + return resultBytes; + } + + /** + * Write the contents of the page to the client. + * + * @param os + * @throws IOException + */ + public void writeToOutputStream(OutputStream os) throws IOException { + if(sb == null) { + throw new IllegalStateException("No interal StringBuffer"); + } + byte[] b; + try { + b = getBytes(); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + os.write(b); + } + + /** + * @param toInsert + */ + public void insertAtEndOfBody(String toInsert) { + int insertPoint = sb.lastIndexOf("</body>"); + if (-1 == insertPoint) { + insertPoint = sb.lastIndexOf("</BODY>"); + } + if (-1 == insertPoint) { + insertPoint = sb.length(); + } + sb.insert(insertPoint,toInsert); + } + /** + * @param jspPath + * @param httpRequest + * @param httpResponse + * @param wbRequest + * @param results + * @return + * @throws IOException + * @throws ServletException + * @throws ParseException + */ + public String includeJspString(String jspPath, + HttpServletRequest httpRequest, HttpServletResponse httpResponse, + WaybackRequest wbRequest, SearchResults results) + throws ServletException, IOException { + + UIQueryResults uiResults = new UIQueryResults(httpRequest, wbRequest, + results, uriConverter); + + StringHttpServletResponseWrapper wrappedResponse = + new StringHttpServletResponseWrapper(httpResponse); + uiResults.storeInRequest(httpRequest,jspPath); + RequestDispatcher dispatcher = httpRequest.getRequestDispatcher(jspPath); + dispatcher.forward(httpRequest, wrappedResponse); + return wrappedResponse.getStringResponse(); + } + + /** + * @param jsUrl + * @return + */ + public String getJSIncludeString(final String jsUrl) { + return "<script type=\"text/javascript\" src=\"" + + jsUrl + "\" ></script>\n"; + } + + /** + * @return the charSet + */ + public String getCharSet() { + return charSet; + } + + /** + * @param charSet the charSet to set + */ + public void setCharSet(String charSet) { + this.charSet = charSet; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,105 @@ +/* HttpHeaderProcessor + * + * $Id$ + * + * Created on 6:44:10 PM Aug 8, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.exception.BadContentException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class HttpHeaderOperation { + + /** + * @param resource + * @param httpResponse + * @throws BadContentException + */ + public static void copyHTTPMessageHeader(Resource resource, + HttpServletResponse httpResponse) throws BadContentException { + + // set status code from original resource (which will definitely confuse + // many clients...) + int code = resource.getStatusCode(); + // Only return legit status codes -- don't return any minus + // codes, etc. + if (code <= HttpServletResponse.SC_CONTINUE) { + throw new BadContentException("Bad status code " + code); + } + httpResponse.setStatus(code); + } + + /** + * @param resource + * @param result + * @param uriConverter + * @param filter + * @return + */ + public static Map<String,String> processHeaders(Resource resource, + SearchResult result, ResultURIConverter uriConverter, + HttpHeaderProcessor filter) { + HashMap<String,String> output = new HashMap<String,String>(); + + // copy all HTTP headers, as-is, sending "" instead of nulls. + Map<String,String> headers = resource.getHttpHeaders(); + if (headers != null) { + Iterator<String> itr = headers.keySet().iterator(); + while(itr.hasNext()) { + String key = itr.next(); + String value = headers.get(key); + value = (value == null) ? "" : value; + filter.filter(output, key, value, uriConverter, result); + } + } + return output; + } + + /** + * @param headers + * @param response + */ + public static void sendHeaders(Map<String,String> headers, + HttpServletResponse response) { + Iterator<String> itr = headers.keySet().iterator(); + while(itr.hasNext()) { + String key = itr.next(); + String value = headers.get(key); + value = (value == null) ? "" : value; + response.setHeader(key,value); + } + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,51 @@ +/* HeaderFilter + * + * $Id$ + * + * Created on 6:41:12 PM Aug 8, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.util.Map; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.SearchResult; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public interface HttpHeaderProcessor { + + /** + * optionally add header key:value to output for later returning to client + * + * @param output + * @param key + * @param value + * @param uriConverter + * @param result + */ + public void filter(Map<String,String> output, String key, String value, + final ResultURIConverter uriConverter, SearchResult result); +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/StringHttpServletResponseWrapper.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/StringHttpServletResponseWrapper.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/StringHttpServletResponseWrapper.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,58 @@ +/* StringHttpServletResponseWrapper + * + * $Id$ + * + * Created on 4:35:39 PM Aug 6, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.io.PrintWriter; +import java.io.StringWriter; + +import javax.servlet.http.HttpServletResponse; +import javax.servlet.http.HttpServletResponseWrapper; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class StringHttpServletResponseWrapper extends HttpServletResponseWrapper { + + private StringWriter sw = new StringWriter(); + + /** + * @param response + */ + public StringHttpServletResponseWrapper(HttpServletResponse response) { + super(response); + } + public PrintWriter getWriter() { + return new PrintWriter(sw); + } + /** + * @return + */ + public String getStringResponse() { + return sw.toString(); + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TransparentReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TransparentReplayRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TransparentReplayRenderer.java 2007-08-23 21:09:34 UTC (rev 1913) @@ -0,0 +1,91 @@ +/* TransparentReplayRenderer + * + * $Id$ + * + * Created on 5:38:11 PM Aug 8, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Map; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadContentException; + +/** + * ReplayRenderer implementation which returns the archive document as + * pristinely as possible -- no modifications to response code, HTTP headers, + * or original byte-stream. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class TransparentReplayRenderer implements ReplayRenderer, HttpHeaderProcessor { + + private final static int BUFFER_SIZE = 4096; + + /* (non-Javadoc) + * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResults) + */ + public void renderResource(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + SearchResult result, Resource resource, + ResultURIConverter uriConverter, SearchResults results) + throws ServletException, IOException, BadContentException { + + // cause underlying resource to read thru HTTP headers: + resource.parseHeaders(); + + HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse); + + Map<String,String> headers = HttpHeaderOperation.processHeaders( + resource, result, uriConverter, this); + + HttpHeaderOperation.sendHeaders(headers, httpResponse); + + // and copy the raw byte-stream. + OutputStream os = httpResponse.getOutputStream(); + byte[] buffer = new byte[BUFFER_SIZE]; + for (int r = -1; (r = resource.read(buffer, 0, BUFFER_SIZE)) != -1;) { + os.write(buffer, 0, r); + } + } + + /* (non-Javadoc) + * @see org.archive.wayback.replay.HeaderFilter#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) + */ + public void filter(Map<String, String> output, String key, String value, + ResultURIConverter uriConverter, SearchResult result) { + + // copy all HTTP headers, as-is. + output.put(key, value); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-08-23 21:16:46
|
Revision: 1917 http://archive-access.svn.sourceforge.net/archive-access/?rev=1917&view=rev Author: bradtofel Date: 2007-08-23 14:16:48 -0700 (Thu, 23 Aug 2007) Log Message: ----------- INITIAL REV: new replay mode based on replay context stored in hostname, while paths remain static -- somewhat working, but not ready for primetime, yet. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixCompositeRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixResultURIConverter.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixCompositeRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixCompositeRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixCompositeRequestParser.java 2007-08-23 21:16:48 UTC (rev 1917) @@ -0,0 +1,60 @@ +/* DomainPrefixCompositeRequestParser + * + * $Id$ + * + * Created on 11:20:17 AM Aug 10, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.domainprefix; + +import org.archive.wayback.RequestParser; +import org.archive.wayback.requestparser.CompositeRequestParser; +import org.archive.wayback.requestparser.FormRequestParser; +import org.archive.wayback.requestparser.OpenSearchRequestParser; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DomainPrefixCompositeRequestParser extends CompositeRequestParser { + DomainPrefixRequestParser dprp = new DomainPrefixRequestParser(); + protected RequestParser[] getRequestParsers() { + RequestParser[] theParsers = { + dprp, + new OpenSearchRequestParser(), + new FormRequestParser() + }; + return theParsers; + } + /** + * @param hostPort + */ + public void setHostPort(String hostPort) { + dprp.setHostPort(hostPort); + } + /** + * @return + */ + public String getHostPort() { + return dprp.getHostPort(); + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java 2007-08-23 21:16:48 UTC (rev 1917) @@ -0,0 +1,87 @@ +/* DomainPrefixReplayDispatcher + * + * $Id$ + * + * Created on 10:20:49 AM Aug 10, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.domainprefix; + +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.replay.BaseReplayDispatcher; +import org.archive.wayback.replay.DateRedirectReplayRenderer; +import org.archive.wayback.replay.TransparentReplayRenderer; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DomainPrefixReplayDispatcher extends BaseReplayDispatcher { + + private final static String TEXT_HTML_MIME = "text/html"; + private final static String TEXT_XHTML_MIME = "application/xhtml"; + + // TODO: make this configurable + private final static long MAX_HTML_MARKUP_LENGTH = 1024 * 1024 * 5; + + private ReplayRenderer redirect = new DateRedirectReplayRenderer(); + + private ReplayRenderer transparent = new TransparentReplayRenderer(); + private DomainPrefixReplayRenderer html = new DomainPrefixReplayRenderer(); + + /* (non-Javadoc) + * @see org.archive.wayback.replay.BaseReplayDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) + */ + @Override + public ReplayRenderer getRenderer(WaybackRequest wbRequest, + SearchResult result, Resource resource) { + // if the result is not for the exact date requested, redirect to the + // exact date. some capture dates are not 14 digits, only compare as + // many digits as are in the result date: + String reqDateStr = wbRequest.get(WaybackConstants.REQUEST_EXACT_DATE); + String resDateStr = result.get(WaybackConstants.RESULT_CAPTURE_DATE); + if((resDateStr.length() > reqDateStr.length()) || + !resDateStr.equals(reqDateStr.substring(0, resDateStr.length()))) { + return redirect; + } + + // HTML and XHTML docs smaller than some size get marked up as HTML + if (resource.getRecordLength() < MAX_HTML_MARKUP_LENGTH) { + + if (-1 != result.get(WaybackConstants.RESULT_MIME_TYPE).indexOf( + TEXT_HTML_MIME)) { + return html; + } + if (-1 != result.get(WaybackConstants.RESULT_MIME_TYPE).indexOf( + TEXT_XHTML_MIME)) { + return html; + } + } + + // everything else goes transparently: + return transparent; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java 2007-08-23 21:16:48 UTC (rev 1917) @@ -0,0 +1,140 @@ +/* DomainPrefixReplayRenderer + * + * $Id$ + * + * Created on 10:21:04 AM Aug 10, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.domainprefix; + +import java.io.IOException; +//import java.util.Date; +//import java.util.Iterator; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.ResultURIConverter; +//import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadContentException; +import org.archive.wayback.replay.HTMLPage; +import org.archive.wayback.replay.HttpHeaderProcessor; +import org.archive.wayback.replay.HttpHeaderOperation; +//import org.archive.wayback.util.StringFormatter; +import org.archive.wayback.util.UrlCanonicalizer; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DomainPrefixReplayRenderer implements ReplayRenderer, HttpHeaderProcessor { + private final static String HTTP_LENGTH_HEADER = "Content-Length"; + private final static String HTTP_LENGTH_HEADER_UP = + HTTP_LENGTH_HEADER.toUpperCase(); + + private final static String HTTP_LOCATION_HEADER = "Location"; + private final static String HTTP_LOCATION_HEADER_UP = + HTTP_LOCATION_HEADER.toUpperCase(); + + private final static Pattern httpPattern = + Pattern.compile("(http://[^/]*/)"); + + /* (non-Javadoc) + * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResults) + */ + public void renderResource(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + SearchResult result, Resource resource, + ResultURIConverter uriConverter, SearchResults results) + throws ServletException, IOException, BadContentException { + resource.parseHeaders(); + + HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse); + + Map<String,String> headers = HttpHeaderOperation.processHeaders( + resource, result, uriConverter, this); + + // Load content into an HTML page, and resolve load-time URLs: + HTMLPage page = new HTMLPage(resource,result,uriConverter); + page.readFully(); + + String resourceTS = result.getCaptureDate(); + String captureTS = Timestamp.parseBefore(resourceTS).getDateStr(); + + + StringBuilder sb = page.sb; + StringBuffer replaced = new StringBuffer(sb.length()); + Matcher m = httpPattern.matcher(sb); + while(m.find()) { + String host = m.group(1); + String replacement = uriConverter.makeReplayURI(captureTS,host); + m.appendReplacement(replaced, replacement); + } + m.appendTail(replaced); + byte b[] = replaced.toString().getBytes(page.getCharSet()); + int bytes = b.length; + headers.put(HTTP_LENGTH_HEADER, String.valueOf(bytes)); + + HttpHeaderOperation.sendHeaders(headers, httpResponse); + httpResponse.getOutputStream().write(b); + + } + + /* (non-Javadoc) + * @see org.archive.wayback.replay.HeaderFilter#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) + */ + public void filter(Map<String, String> output, String key, String value, + ResultURIConverter uriConverter, SearchResult result) { + String keyUp = key.toUpperCase(); + + // omit Content-Length header + if (keyUp.equals(HTTP_LENGTH_HEADER_UP)) { + return; + } + + // rewrite Location header URLs + if (keyUp.startsWith(HTTP_LOCATION_HEADER_UP)) { + + String baseUrl = result.getAbsoluteUrl(); + String cd = result.getCaptureDate(); + // by the spec, these should be absolute already, but just in case: + String u = UrlCanonicalizer.resolveUrl(baseUrl, value); + + output.put(key, uriConverter.makeReplayURI(cd,u)); + + } else { + // others go out as-is: + + output.put(key, value); + } + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java 2007-08-23 21:16:48 UTC (rev 1917) @@ -0,0 +1,177 @@ +/* DomainPrefixRequestParser + * + * $Id$ + * + * Created on 10:20:21 AM Aug 10, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.domainprefix; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.servlet.http.HttpServletRequest; + +import org.apache.commons.httpclient.URIException; +import org.archive.wayback.RequestParser; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.webapp.WaybackContext; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DomainPrefixRequestParser implements RequestParser { + + String hostPort = "localhost:8081"; + String earliest = Timestamp.earliestTimestamp().getDateStr(); + int maxRecords = 1000; + + private final Pattern REPLAY_REGEX = + Pattern.compile("^(\\d{1,14})\\.(.*)$"); + private final Pattern QUERY_REGEX = + Pattern.compile("^(\\d{0,13})\\*\\.(.*)$"); + + private String getRequestString(final String host, + HttpServletRequest httpRequest) { + String path = httpRequest.getRequestURI(); + String query = httpRequest.getQueryString(); + + String r = ""; + if(path == null) { + path = "/"; + } + if(query != null && query.length() > 0) { + r = "http://" + host + path + "?" + query; + } else { + r = "http://" + host + path; + } + return r; + } + + /* (non-Javadoc) + * @see org.archive.wayback.RequestParser#parse(javax.servlet.http.HttpServletRequest, org.archive.wayback.webapp.WaybackContext) + */ + public WaybackRequest parse(HttpServletRequest httpRequest, + WaybackContext wbContext) throws BadQueryException { + + WaybackRequest wbRequest = null; + String server = httpRequest.getServerName() + + ":" + httpRequest.getServerPort(); + if(server.endsWith(hostPort)) { + int length = server.length() - hostPort.length(); + if(server.length() > hostPort.length()) { + String prefix = server.substring(0,length - 1); + Matcher replayMatcher = REPLAY_REGEX.matcher(prefix); + if (replayMatcher != null && replayMatcher.matches()) { + wbRequest = new WaybackRequest(); + String dateStr = replayMatcher.group(1); + String host = replayMatcher.group(2); + + String requestUrl = getRequestString(host,httpRequest); + + wbRequest.put(WaybackConstants.REQUEST_EXACT_DATE, dateStr); + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_REPLAY_QUERY); + try { + wbRequest.setRequestUrl(requestUrl); + } catch (URIException e) { + e.printStackTrace(); + wbRequest = null; + } + } else { + Matcher queryMatcher = QUERY_REGEX.matcher(prefix); + if(queryMatcher != null && queryMatcher.matches()) { + wbRequest = new WaybackRequest(); + String dateStr = queryMatcher.group(1); + String host = queryMatcher.group(2); + String startDate; + if(dateStr.length() == 0) { + startDate = earliest; + } else { + startDate = Timestamp.parseBefore(dateStr).getDateStr(); + } + String endDate = Timestamp.parseAfter(dateStr).getDateStr(); + wbRequest.put(WaybackConstants.REQUEST_START_DATE,startDate); + wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_QUERY); + + String requestUrl = getRequestString(host,httpRequest); + + try { + wbRequest.setRequestUrl(requestUrl); + } catch (URIException e) { + e.printStackTrace(); + wbRequest = null; + } + } + } + } + } + return wbRequest; + } + + /* (non-Javadoc) + * @see org.archive.wayback.RequestParser#setEarliestTimestamp(java.lang.String) + */ + public void setEarliestTimestamp(String timestamp) { + earliest = timestamp; + } + + /** + * @return the earliest timestamp + */ + public String getEarliestTimestamp() { + return earliest; + } + + /* (non-Javadoc) + * @see org.archive.wayback.RequestParser#setMaxRecords(int) + */ + public void setMaxRecords(int maxRecords) { + this.maxRecords = maxRecords; + } + /** + * @return the maxRecords + */ + public int getMaxRecords() { + return maxRecords; + } + + /** + * @return the hostPort + */ + public String getHostPort() { + return hostPort; + } + + /** + * @param hostPort the hostPort to set + */ + public void setHostPort(String hostPort) { + this.hostPort = hostPort; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixResultURIConverter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixResultURIConverter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixResultURIConverter.java 2007-08-23 21:16:48 UTC (rev 1917) @@ -0,0 +1,81 @@ +/* DomainPrefixResultURIConverter + * + * $Id$ + * + * Created on 10:20:35 AM Aug 10, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.domainprefix; + +import java.net.URI; +import java.net.URISyntaxException; + +import org.archive.wayback.ResultURIConverter; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DomainPrefixResultURIConverter implements ResultURIConverter { + + private String hostPort = "localhost:8081"; + + /* (non-Javadoc) + * @see org.archive.wayback.ResultURIConverter#makeReplayURI(java.lang.String, java.lang.String) + */ + public String makeReplayURI(String datespec, String url) { + String replayURI = ""; + try { + URI uri = new URI(url); + StringBuilder sb = new StringBuilder(90); + sb.append("http://"); + sb.append(datespec).append("."); + sb.append(uri.getHost()).append("."); + sb.append(hostPort); + sb.append(uri.getPath()); + String query = uri.getQuery(); + if(query != null && query.length() > 0) { + sb.append("?").append(query); + } + replayURI = sb.toString(); + + } catch (URISyntaxException e) { + e.printStackTrace(); + } + return replayURI; + } + + /** + * @return the hostPort + */ + public String getHostPort() { + return hostPort; + } + + /** + * @param hostPort the hostPort to set + */ + public void setHostPort(String hostPort) { + this.hostPort = hostPort; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-08-23 21:33:51
|
Revision: 1926 http://archive-access.svn.sourceforge.net/archive-access/?rev=1926&view=rev Author: bradtofel Date: 2007-08-23 14:33:51 -0700 (Thu, 23 Aug 2007) Log Message: ----------- INTERFACE: get() now accepts a WaybackRequest object, so the particular filters returned can depend on user request info: source IP, for example. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java 2007-08-23 21:32:24 UTC (rev 1925) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java 2007-08-23 21:33:51 UTC (rev 1926) @@ -28,6 +28,7 @@ import java.util.Iterator; import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.resourceindex.filters.CompositeExclusionFilter; import org.archive.wayback.util.ObjectFilter; @@ -54,11 +55,11 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<SearchResult> get() { + public ObjectFilter<SearchResult> get(WaybackRequest wbRequest) { Iterator<ExclusionFilterFactory> itr = factories.iterator(); CompositeExclusionFilter filter = new CompositeExclusionFilter(); while(itr.hasNext()) { - filter.addComponent(itr.next().get()); + filter.addComponent(itr.next().get(wbRequest)); } return filter; } @@ -78,4 +79,15 @@ public void setFactories(ArrayList<ExclusionFilterFactory> factories) { this.factories = factories; } + + /* (non-Javadoc) + * @see org.archive.wayback.accesscontrol.ExclusionFilterFactory#shutdown() + */ + public void shutdown() { + Iterator<ExclusionFilterFactory> itr = factories.iterator(); + while(itr.hasNext()) { + ExclusionFilterFactory i = itr.next(); + i.shutdown(); + } + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java 2007-08-23 21:32:24 UTC (rev 1925) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java 2007-08-23 21:33:51 UTC (rev 1926) @@ -25,6 +25,7 @@ package org.archive.wayback.accesscontrol; import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.util.ObjectFilter; /** * @@ -34,8 +35,13 @@ */ public interface ExclusionFilterFactory { /** + * @param wbRequest * @return an ObjectFilter object that filters records based on * some set of exclusion rules. */ - public ObjectFilter<SearchResult> get(); + public ObjectFilter<SearchResult> get(WaybackRequest wbRequest); + /** + * close any resources used by this ExclusionFilter system. + */ + public void shutdown(); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2007-08-23 21:32:24 UTC (rev 1925) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2007-08-23 21:33:51 UTC (rev 1926) @@ -26,6 +26,7 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.util.ObjectFilter; /** @@ -43,7 +44,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<SearchResult> get() { + public ObjectFilter<SearchResult> get(WaybackRequest wbRequest) { return new RemoteExclusionFilter(exclusionUrlPrefix, exclusionUserAgent); } @@ -75,5 +76,12 @@ this.exclusionUserAgent = exclusionUserAgent; } + /* (non-Javadoc) + * @see org.archive.wayback.accesscontrol.ExclusionFilterFactory#shutdown() + */ + public void shutdown() { + // nothing to do.. + } + } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2007-08-23 21:32:24 UTC (rev 1925) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2007-08-23 21:33:51 UTC (rev 1926) @@ -26,6 +26,7 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.util.ObjectFilter; @@ -44,7 +45,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<SearchResult> get() { + public ObjectFilter<SearchResult> get(WaybackRequest wbRequest) { return new RobotExclusionFilter(webCache,userAgent,maxCacheMS); } @@ -89,4 +90,11 @@ public void setMaxCacheMS(long maxCacheMS) { this.maxCacheMS = maxCacheMS; } + + /* (non-Javadoc) + * @see org.archive.wayback.accesscontrol.ExclusionFilterFactory#shutdown() + */ + public void shutdown() { + webCache.shutdown(); + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2007-08-23 21:32:24 UTC (rev 1925) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2007-08-23 21:33:51 UTC (rev 1926) @@ -32,6 +32,7 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.ObjectFilter; @@ -47,7 +48,7 @@ private static final Logger LOGGER = Logger.getLogger(StaticMapExclusionFilterFactory.class.getName()); - private final static int checkInterval = 10; + private int checkInterval = 10; private Map<String,Object> currentMap = null; private File file = null; long lastUpdated = 0; @@ -88,6 +89,10 @@ CloseableIterator<String> itr = ff.getSequentialIterator(); while(itr.hasNext()) { String line = (String) itr.next(); + line = line.trim(); + if(line.length() == 0) { + continue; + } String surt = line.startsWith("(") ? line : SURTTokenizer.prefixKey(line); newMap.put(surt, null); @@ -97,9 +102,10 @@ } /** + * @param wbRequest * @return SearchResultFilter */ - public ObjectFilter<SearchResult> get() { + public ObjectFilter<SearchResult> get(WaybackRequest wbRequest) { if(currentMap == null) { return null; } @@ -113,6 +119,12 @@ updateThread = new CacheUpdaterThread(this,checkInterval); updateThread.start(); } + private synchronized void stopUpdateThread() { + if (updateThread == null) { + return; + } + updateThread.interrupt(); + } private class CacheUpdaterThread extends Thread { /** @@ -147,8 +159,45 @@ Thread.sleep(sleepInterval * 1000); } catch (InterruptedException e) { e.printStackTrace(); + return; } } } } + + /** + * @return the checkInterval + */ + public int getCheckInterval() { + return checkInterval; + } + + /** + * @param checkInterval the checkInterval to set + */ + public void setCheckInterval(int checkInterval) { + this.checkInterval = checkInterval; + } + + /** + * @return the path + */ + public String getFile() { + return file.getAbsolutePath(); + } + + /** + * @param path the file to set + */ + public void setFile(String path) { + this.file = new File(path); + } + + /* (non-Javadoc) + * @see org.archive.wayback.accesscontrol.ExclusionFilterFactory#shutdown() + */ + public void shutdown() { + // TODO Auto-generated method stub + stopUpdateThread(); + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-08-23 21:32:24 UTC (rev 1925) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-08-23 21:33:51 UTC (rev 1926) @@ -80,11 +80,12 @@ private UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); - private ObjectFilter<SearchResult> getExclusionFilter() + private ObjectFilter<SearchResult> getExclusionFilter( + WaybackRequest wbRequest) throws ResourceIndexNotAvailableException { ObjectFilter<SearchResult> filter = null; if(exclusionFactory != null) { - filter = exclusionFactory.get(); + filter = exclusionFactory.get(wbRequest); if(filter == null) { throw new ResourceIndexNotAvailableException("Exclusion " + "Service Unavailable"); @@ -210,7 +211,7 @@ GuardRailFilter guardrail = new GuardRailFilter(maxRecords); // checks an exclusion service for every matching record - ObjectFilter<SearchResult> exclusion = getExclusionFilter(); + ObjectFilter<SearchResult> exclusion = getExclusionFilter(wbRequest); // count how many results got to the ExclusionFilter: CounterFilter preExCounter = new CounterFilter(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-18 23:39:42
|
Revision: 1993 http://archive-access.svn.sourceforge.net/archive-access/?rev=1993&view=rev Author: bradtofel Date: 2007-09-18 16:39:45 -0700 (Tue, 18 Sep 2007) Log Message: ----------- INITIAL REV: two BooleanOperator classes for determining if a user is logged in as a user defined in a set, or if they are accessing the wayback from an IPRange. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/HTTPAuthBooleanOperator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/IPMatchesBooleanOperator.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/HTTPAuthBooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/HTTPAuthBooleanOperator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/HTTPAuthBooleanOperator.java 2007-09-18 23:39:45 UTC (rev 1993) @@ -0,0 +1,27 @@ +package org.archive.wayback.authenticationcontrol; + +import java.util.List; + +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.util.operator.BooleanOperator; + +public class HTTPAuthBooleanOperator implements BooleanOperator<WaybackRequest> { + private List<String> allowedUsers = null; + public boolean isTrue(WaybackRequest value) { + if(allowedUsers == null) { + return false; + } + String currentUser = value.get(WaybackConstants.REQUEST_REMOTE_USER); + if(currentUser == null) { + return false; + } + return allowedUsers.contains(currentUser); + } + public List<String> getAllowedUsers() { + return allowedUsers; + } + public void setAllowedUsers(List<String> allowedUsers) { + this.allowedUsers = allowedUsers; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/IPMatchesBooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/IPMatchesBooleanOperator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/IPMatchesBooleanOperator.java 2007-09-18 23:39:45 UTC (rev 1993) @@ -0,0 +1,47 @@ +package org.archive.wayback.authenticationcontrol; + +import java.util.List; + +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.util.IPRange; +import org.archive.wayback.util.operator.BooleanOperator; + +public class IPMatchesBooleanOperator implements BooleanOperator<WaybackRequest> { + private List<IPRange> allowedRanges = null; + private IPRange range = null; + + public List<IPRange> getAllowedRanges() { + return allowedRanges; + } + + public void setAllowedRanges(List<IPRange> allowedRanges) { + this.allowedRanges = allowedRanges; + } + + public boolean isTrue(WaybackRequest value) { + if(allowedRanges == null) { + return false; + } + String ipString = value.get(WaybackConstants.REQUEST_REMOTE_ADDRESS); + if(ipString == null) { + return false; + } + byte[] ip = IPRange.matchIP(ipString); + return range.contains(ip); +// for(IPRange range : allowedRanges) { +// if(range.contains(ip)) { +// return true; +// } +// } +// return false; + } + + public IPRange getRange() { + return range; + } + + public void setRange(IPRange range) { + this.range = range; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-18 23:46:10
|
Revision: 1996 http://archive-access.svn.sourceforge.net/archive-access/?rev=1996&view=rev Author: bradtofel Date: 2007-09-18 16:46:12 -0700 (Tue, 18 Sep 2007) Log Message: ----------- REFACTOR: moved exclusion mechanism from resourceIndex to WaybackContext, which is clearer conceptually, and will allow a common resource index to be shared across multiple WaybackContexts which may have different exclusion policies. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExternalExcluder.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -28,7 +28,6 @@ import java.util.Iterator; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.resourceindex.filters.CompositeExclusionFilter; import org.archive.wayback.util.ObjectFilter; @@ -55,11 +54,11 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<SearchResult> get(WaybackRequest wbRequest) { + public ObjectFilter<SearchResult> get() { Iterator<ExclusionFilterFactory> itr = factories.iterator(); CompositeExclusionFilter filter = new CompositeExclusionFilter(); while(itr.hasNext()) { - filter.addComponent(itr.next().get(wbRequest)); + filter.addComponent(itr.next().get()); } return filter; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -25,7 +25,6 @@ package org.archive.wayback.accesscontrol; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.util.ObjectFilter; /** * @@ -35,11 +34,10 @@ */ public interface ExclusionFilterFactory { /** - * @param wbRequest * @return an ObjectFilter object that filters records based on - * some set of exclusion rules appropriate to the wbRequest. + * some set of exclusion rules */ - public ObjectFilter<SearchResult> get(WaybackRequest wbRequest); + public ObjectFilter<SearchResult> get(); /** * close any resources used by this ExclusionFilter system. */ Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExternalExcluder.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExternalExcluder.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExternalExcluder.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -28,7 +28,6 @@ import org.archive.net.LaxURI; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.util.ObjectFilter; import org.springframework.beans.factory.xml.XmlBeanFactory; import org.springframework.core.io.FileSystemResource; @@ -94,8 +93,7 @@ * at configPath */ public static ExternalExcluder getExcluder(String configPath) { - WaybackRequest wbRequest = null; - return new ExternalExcluder(getFactory(configPath).get(wbRequest)); + return new ExternalExcluder(getFactory(configPath).get()); } /** * shutdown underlying resources. Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -26,7 +26,6 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.util.ObjectFilter; /** @@ -44,7 +43,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<SearchResult> get(WaybackRequest wbRequest) { + public ObjectFilter<SearchResult> get() { return new RemoteExclusionFilter(exclusionUrlPrefix, exclusionUserAgent); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -26,7 +26,6 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.util.ObjectFilter; @@ -45,7 +44,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<SearchResult> get(WaybackRequest wbRequest) { + public ObjectFilter<SearchResult> get() { return new RobotExclusionFilter(webCache,userAgent,maxCacheMS); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -58,7 +58,9 @@ if(nextSearch == null) { break; } + System.err.println("EXCLUSION-MAP:Checking " + nextSearch); if(exclusionMap.containsKey(nextSearch)) { + System.err.println("EXCLUSION-MAP: EXCLUDED: \"" + nextSearch + "\" (" + url +")"); return true; } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -32,7 +32,6 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.ObjectFilter; @@ -48,7 +47,7 @@ private static final Logger LOGGER = Logger.getLogger(StaticMapExclusionFilterFactory.class.getName()); - private int checkInterval = 10; + private int checkInterval = 0; private Map<String,Object> currentMap = null; private File file = null; long lastUpdated = 0; @@ -64,7 +63,9 @@ */ public void init() throws IOException { reloadFile(); - startUpdateThread(); + if(checkInterval > 0) { + startUpdateThread(); + } } protected void reloadFile() throws IOException { @@ -95,6 +96,7 @@ } String surt = line.startsWith("(") ? line : SURTTokenizer.prefixKey(line); + System.err.println("EXCLUSION-MAP: adding " + surt); newMap.put(surt, null); } itr.close(); @@ -105,7 +107,7 @@ * @param wbRequest * @return SearchResultFilter */ - public ObjectFilter<SearchResult> get(WaybackRequest wbRequest) { + public ObjectFilter<SearchResult> get() { if(currentMap == null) { return null; } @@ -166,14 +168,14 @@ } /** - * @return the checkInterval + * @return the checkInterval in seconds */ public int getCheckInterval() { return checkInterval; } /** - * @param checkInterval the checkInterval to set + * @param checkInterval the checkInterval in seconds to set */ public void setCheckInterval(int checkInterval) { this.checkInterval = checkInterval; @@ -197,7 +199,6 @@ * @see org.archive.wayback.accesscontrol.ExclusionFilterFactory#shutdown() */ public void shutdown() { - // TODO Auto-generated method stub stopUpdateThread(); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -37,6 +37,7 @@ import org.archive.net.UURIFactory; import org.archive.wayback.WaybackConstants; import org.archive.wayback.requestparser.OpenSearchRequestParser; +import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.StringFormatter; import org.archive.wayback.webapp.WaybackContext; @@ -56,6 +57,7 @@ private String contextPrefix = null; private String serverPrefix = null; private WaybackContext context = null; + private ObjectFilter<SearchResult> exclusionFilter = null; private HashMap<String,String> filters = new HashMap<String,String>(); @@ -391,4 +393,12 @@ public void setContext(WaybackContext context) { this.context = context; } + + public ObjectFilter<SearchResult> getExclusionFilter() { + return exclusionFilter; + } + + public void setExclusionFilter(ObjectFilter<SearchResult> exclusionFilter) { + this.exclusionFilter = exclusionFilter; + } } \ No newline at end of file Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -43,7 +43,6 @@ import org.archive.wayback.resourceindex.filters.UrlPrefixMatchFilter; import org.archive.wayback.resourceindex.filters.WindowEndFilter; import org.archive.wayback.resourceindex.filters.WindowStartFilter; -import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; @@ -76,24 +75,8 @@ protected SearchResultSource source; - private ExclusionFilterFactory exclusionFactory = null; - private UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); - private ObjectFilter<SearchResult> getExclusionFilter( - WaybackRequest wbRequest) - throws ResourceIndexNotAvailableException { - ObjectFilter<SearchResult> filter = null; - if(exclusionFactory != null) { - filter = exclusionFactory.get(wbRequest); - if(filter == null) { - throw new ResourceIndexNotAvailableException("Exclusion " + - "Service Unavailable"); - } - } - return filter; - } - private void filterRecords(CloseableIterator<SearchResult> itr, ObjectFilter<SearchResult> filter, SearchResults results, boolean forwards) throws IOException { @@ -211,7 +194,7 @@ GuardRailFilter guardrail = new GuardRailFilter(maxRecords); // checks an exclusion service for every matching record - ObjectFilter<SearchResult> exclusion = getExclusionFilter(wbRequest); + ObjectFilter<SearchResult> exclusion = wbRequest.getExclusionFilter(); // count how many results got to the ExclusionFilter: CounterFilter preExCounter = new CounterFilter(); @@ -267,18 +250,15 @@ reverseFilters.addFilter(selfRedirectFilter); // possibly filter via exclusions: - if(exclusion == null) { - forwardFilters.addFilter(finalCounter); - reverseFilters.addFilter(finalCounter); - } else { + if(exclusion != null) { forwardFilters.addFilter(preExCounter); forwardFilters.addFilter(exclusion); - forwardFilters.addFilter(finalCounter); reverseFilters.addFilter(preExCounter); reverseFilters.addFilter(exclusion); - reverseFilters.addFilter(finalCounter); } + forwardFilters.addFilter(finalCounter); + reverseFilters.addFilter(finalCounter); int resultsPerDirection = (int) Math.floor(resultsPerPage / 2); if (resultsPerDirection * 2 == resultsPerPage) { @@ -318,13 +298,11 @@ } filters.addFilter(new EndDateFilter(endDate)); // possibly filter via exclusions: - if (exclusion == null) { - filters.addFilter(finalCounter); - } else { + if (exclusion != null) { filters.addFilter(preExCounter); filters.addFilter(exclusion); - filters.addFilter(finalCounter); } + filters.addFilter(finalCounter); startKey = keyUrl + " " + startDate; // add the start and end windowing filters: @@ -355,13 +333,12 @@ // possibly filter via exclusions: if (exclusion == null) { filters.addFilter(new CaptureToUrlResultFilter()); - filters.addFilter(finalCounter); } else { filters.addFilter(preExCounter); filters.addFilter(exclusion); filters.addFilter(new CaptureToUrlResultFilter()); - filters.addFilter(finalCounter); } + filters.addFilter(finalCounter); startKey = keyUrl; // add the start and end windowing filters: @@ -431,12 +408,4 @@ public void setSource(SearchResultSource source) { this.source = source; } - - /** - * @param exclusionFactory the exclusionFactory to set - */ - public void setExclusionFactory(ExclusionFilterFactory exclusionFactory) { - this.exclusionFactory = exclusionFactory; - } - } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -39,20 +39,23 @@ import org.archive.wayback.ResourceStore; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.UIResults; import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.AuthenticationControlException; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.ResourceNotAvailableException; import org.archive.wayback.exception.WaybackException; +import org.archive.wayback.util.operator.BooleanOperator; import org.springframework.beans.factory.BeanNameAware; /** * Retains all information about a particular Wayback configuration - * withing a ServletContext, including holding references to the + * within a ServletContext, including holding references to the * implementation instances of the primary Wayback classes: * * ResourceIndex @@ -75,6 +78,8 @@ private RequestParser parser = null; private ResultURIConverter uriConverter = null; private Properties configs = null; + private ExclusionFilterFactory exclusionFactory = null; + private BooleanOperator<WaybackRequest> authentication = null; /** * @@ -255,7 +260,15 @@ wbRequest.setContext(this); handled = true; wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); - + if(authentication != null) { + if(!authentication.isTrue(wbRequest)) { + throw new AuthenticationControlException("Not authorized"); + } + } + + if(exclusionFactory != null) { + wbRequest.setExclusionFilter(exclusionFactory.get()); + } if(wbRequest.isReplayRequest()) { handleReplay(wbRequest,httpRequest,httpResponse); @@ -270,9 +283,10 @@ } catch (BadQueryException e) { query.renderException(httpRequest, httpResponse, wbRequest, e); + } catch (AuthenticationControlException e) { + query.renderException(httpRequest, httpResponse, wbRequest, e); } - return handled; } @@ -414,4 +428,20 @@ public void setUseServerName(boolean useServerName) { this.useServerName = useServerName; } + + public ExclusionFilterFactory getExclusionFactory() { + return exclusionFactory; + } + + public void setExclusionFactory(ExclusionFilterFactory exclusionFactory) { + this.exclusionFactory = exclusionFactory; + } + + public BooleanOperator<WaybackRequest> getAuthentication() { + return authentication; + } + + public void setAuthentication(BooleanOperator<WaybackRequest> authentication) { + this.authentication = authentication; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 18:43:29
|
Revision: 2014 http://archive-access.svn.sourceforge.net/archive-access/?rev=2014&view=rev Author: bradtofel Date: 2007-09-28 11:43:31 -0700 (Fri, 28 Sep 2007) Log Message: ----------- REFACTOR: changed name of WaybackContext >> AccessPoint Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -28,7 +28,7 @@ import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -47,7 +47,7 @@ * @throws BadQueryException */ public abstract WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException; + AccessPoint wbContext) throws BadQueryException; /** * @param maxRecords */ Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -29,7 +29,7 @@ import javax.servlet.http.HttpServletRequest; import org.archive.wayback.util.StringFormatter; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -189,7 +189,7 @@ */ public String getContextConfig(final String configName) { String configValue = null; - WaybackContext context = getWbRequest().getContext(); + AccessPoint context = getWbRequest().getContext(); if(context != null) { Properties configs = context.getConfigs(); if(configs != null) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -39,7 +39,7 @@ import org.archive.wayback.requestparser.OpenSearchRequestParser; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.StringFormatter; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * Abstraction of all the data associated with a users request to the Wayback @@ -56,7 +56,7 @@ private String contextPrefix = null; private String serverPrefix = null; - private WaybackContext context = null; + private AccessPoint context = null; private ObjectFilter<SearchResult> exclusionFilter = null; private HashMap<String,String> filters = new HashMap<String,String>(); @@ -383,14 +383,14 @@ /** * @return the context */ - public WaybackContext getContext() { + public AccessPoint getContext() { return context; } /** * @param context the context to set */ - public void setContext(WaybackContext context) { + public void setContext(AccessPoint context) { this.context = context; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -35,7 +35,7 @@ import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -75,7 +75,7 @@ * @see org.archive.wayback.RequestParser#parse(javax.servlet.http.HttpServletRequest, org.archive.wayback.webapp.WaybackContext) */ public WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException { + AccessPoint wbContext) throws BadQueryException { WaybackRequest wbRequest = null; String server = httpRequest.getServerName() + Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -35,7 +35,7 @@ import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.requestparser.BaseRequestParser; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -66,7 +66,7 @@ */ @Override public WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException { + AccessPoint wbContext) throws BadQueryException { if (isLocalRequest(httpRequest)) { // local means query: let the following RequestParsers have a go Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -33,7 +33,7 @@ import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * Class that implements the RequestParser interface, and also understands how @@ -133,7 +133,7 @@ * @see org.archive.wayback.RequestParser#parse(javax.servlet.http.HttpServletRequest) */ public abstract WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException; + AccessPoint wbContext) throws BadQueryException; /** * @return the maxRecords Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -29,7 +29,7 @@ import org.archive.wayback.RequestParser; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -64,7 +64,7 @@ * @see org.archive.wayback.RequestParser#parse(javax.servlet.http.HttpServletRequest) */ public WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException { + AccessPoint wbContext) throws BadQueryException { WaybackRequest wbRequest = null; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -32,7 +32,7 @@ import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -51,7 +51,7 @@ * WaybackRequest object, except the Submit button argument. */ public WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) { + AccessPoint wbContext) { WaybackRequest wbRequest = null; @SuppressWarnings("unchecked") Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -32,7 +32,7 @@ import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -74,7 +74,7 @@ * info from the httpRequest object. */ public WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException { + AccessPoint wbContext) throws BadQueryException { WaybackRequest wbRequest = null; @SuppressWarnings("unchecked") Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -28,7 +28,7 @@ import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * Subclass of RequestParser that acquires key request information from the @@ -51,7 +51,7 @@ */ @Override public WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException { + AccessPoint wbContext) throws BadQueryException { String queryString = httpRequest.getQueryString(); String origRequestPath = httpRequest.getRequestURI(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -36,10 +36,10 @@ import org.archive.wayback.ResourceIndex; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; -import org.archive.wayback.core.UrlSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AccessControlException; import org.archive.wayback.exception.BadQueryException; @@ -68,11 +68,12 @@ private static final String NUTCH_NS = "http://www.nutch.org/opensearchrss/1.0/"; private String searchUrlBase; - private DocumentBuilderFactory factory; + private DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); private DocumentBuilder builder; private static final String NUTCH_ARCNAME = "arcname"; private static final String NUTCH_ARCOFFSET = "arcoffset"; private static final String NUTCH_ARCDATE = "tstamp"; + private static final String NUTCH_ARCDATE_ALT = "arcdate"; private static final String NUTCH_DIGEST = "digest"; private static final String NUTCH_PRIMARY_TYPE = "primaryType"; private static final String NUTCH_SUB_TYPE = "subType"; @@ -95,7 +96,7 @@ LOGGER.info("initializing NutchResourceIndex..."); LOGGER.info("Using base search url " + this.searchUrlBase); - this.factory = DocumentBuilderFactory.newInstance(); +// this.factory = DocumentBuilderFactory.newInstance(); this.factory.setNamespaceAware(true); try { this.builder = this.factory.newDocumentBuilder(); @@ -129,7 +130,15 @@ e.getMessage()); } - SearchResults results = new UrlSearchResults(); + SearchResults results; + String type = wbRequest.get(WaybackConstants.REQUEST_TYPE); + if(type.equals(WaybackConstants.REQUEST_REPLAY_QUERY) || + type.equals(WaybackConstants.REQUEST_URL_QUERY)) { + results = new CaptureSearchResults(); + } else { + // TODO: this is wrong, but needs exploration into what NutchWax can actually do. + throw new BadQueryException("Unable to perform path prefix requests with this index type"); + } NodeList channel = getSearchChannel(document); NodeList nodes = getSearchItems(document); @@ -174,7 +183,8 @@ return results; } - private SearchResult elementToSearchResult(Element e) { + private SearchResult elementToSearchResult(Element e) + throws ResourceIndexNotAvailableException { SearchResult result = new SearchResult(); @@ -184,6 +194,12 @@ // The date in nutchwax is now named 'tstamp' and its // 17 characters rather than 14. Pass first 14 only. String d = getNodeNutchContent(e,NUTCH_ARCDATE); + if(d == null) { + d = getNodeNutchContent(e,NUTCH_ARCDATE_ALT); + } + if(d == null) { + throw new ResourceIndexNotAvailableException("Missing arcdate field in search results"); + } if (d.length() == 17) { d = d.substring(0, 14); } @@ -289,8 +305,8 @@ } // when searching for exacturl, we are mostly // interested in the different versions over the time - ms.append("&sort=date"); - ms.append("&reverse=true"); +// ms.append("&sort=date"); +// ms.append("&reverse=true"); } ms.append("&hitsPerPage=").append(hitsPerPage); ms.append("&start=").append(start); Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java (from rev 1996, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -0,0 +1,447 @@ +/* WaybackContext + * + * $Id$ + * + * Created on 5:37:31 PM Apr 20, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-webapp. + * + * wayback-webapp is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-webapp is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-webapp; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.webapp; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.QueryRenderer; +import org.archive.wayback.ReplayDispatcher; +import org.archive.wayback.RequestParser; +import org.archive.wayback.ResourceIndex; +import org.archive.wayback.ResourceStore; +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.accesscontrol.ExclusionFilterFactory; +import org.archive.wayback.core.CaptureSearchResults; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.UIResults; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.AuthenticationControlException; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.ResourceNotAvailableException; +import org.archive.wayback.exception.WaybackException; +import org.archive.wayback.util.operator.BooleanOperator; +import org.springframework.beans.factory.BeanNameAware; + +/** + * Retains all information about a particular Wayback configuration + * within a ServletContext, including holding references to the + * implementation instances of the primary Wayback classes: + * + * ResourceIndex + * ResourceStore + * QueryUI + * ReplayUI + * + * @author brad + * @version $Date$, $Revision$ + */ +public class AccessPoint implements RequestContext, BeanNameAware { + + private boolean useServerName = false; + private int contextPort = 0; + private String contextName = null; + private ResourceIndex index = null; + private ResourceStore store = null; + private ReplayDispatcher replay = null; + private QueryRenderer query = null; + private RequestParser parser = null; + private ResultURIConverter uriConverter = null; + private Properties configs = null; + private ExclusionFilterFactory exclusionFactory = null; + private BooleanOperator<WaybackRequest> authentication = null; + + /** + * + */ + public AccessPoint() { + + } + + /* (non-Javadoc) + * @see org.springframework.beans.factory.BeanNameAware#setBeanName(java.lang.String) + */ + public void setBeanName(String beanName) { + // TODO Auto-generated method stub + this.contextName = ""; + int idx = beanName.indexOf(":"); + if(idx > -1) { + contextPort = Integer.valueOf(beanName.substring(0,idx)); + contextName = beanName.substring(idx + 1); + } else { + try { + this.contextPort = Integer.valueOf(beanName); + } catch(NumberFormatException e) { + e.printStackTrace(); + } + } + } + /** + * @param httpRequest + * @return the prefix of paths recieved by this server that are handled by + * this WaybackContext, including the trailing '/' + */ + public String getContextPath(HttpServletRequest httpRequest) { +// if(contextPort != 0) { +// return httpRequest.getContextPath(); +// } + String httpContextPath = httpRequest.getContextPath(); + if(contextName.length() == 0) { + return httpContextPath + "/"; + } + return httpContextPath + "/" + contextName + "/"; + } + + /** + * @param httpRequest + * @param includeQuery + * @return the portion of the request following the path to this context + * without leading '/' + */ + private String translateRequest(HttpServletRequest httpRequest, + boolean includeQuery) { + + String origRequestPath = httpRequest.getRequestURI(); + if(includeQuery) { + String queryString = httpRequest.getQueryString(); + if (queryString != null) { + origRequestPath += "?" + queryString; + } + } + String contextPath = getContextPath(httpRequest); + if (!origRequestPath.startsWith(contextPath)) { + return null; + } + return origRequestPath.substring(contextPath.length()); + } + + /** + * @param httpRequest + * @return the portion of the request following the path to this context, + * including any query information,without leading '/' + */ + public String translateRequestPathQuery(HttpServletRequest httpRequest) { + return translateRequest(httpRequest,true); + } + + /** + * @param httpRequest + * @return the portion of the request following the path to this context, + * excluding any query information, without leading '/' + */ + public String translateRequestPath(HttpServletRequest httpRequest) { + return translateRequest(httpRequest,false); + } + + /** + * Construct an absolute URL that points to the root of the context that + * recieved the request, including a trailing "/". + * + * @return String absolute URL pointing to the Context root where the + * request was revieved. + */ + private String getAbsoluteContextPrefix(HttpServletRequest httpRequest, + boolean useRequestServer) { + + StringBuilder prefix = new StringBuilder(); + prefix.append(WaybackConstants.HTTP_URL_PREFIX); + String waybackPort = null; + if(useRequestServer) { + prefix.append(httpRequest.getLocalName()); + waybackPort = String.valueOf(httpRequest.getLocalPort()); + } else { + prefix.append(httpRequest.getServerName()); + waybackPort = String.valueOf(httpRequest.getServerPort()); + } + if (!waybackPort.equals(WaybackConstants.HTTP_DEFAULT_PORT)) { + prefix.append(":").append(waybackPort); + } + String contextPath = getContextPath(httpRequest); +// if(contextPath.length() > 1) { +// prefix.append(contextPath); +// } else { +// prefix.append(contextPath); +// } + prefix.append(contextPath); + return prefix.toString(); + } + + /** + * @param httpRequest + * @return absolute URL pointing to the base of this WaybackContext, using + * Server and port information from the HttpServletRequest argument. + */ + public String getAbsoluteServerPrefix(HttpServletRequest httpRequest) { + return getAbsoluteContextPrefix(httpRequest, true); + } + + /** + * @param httpRequest + * @return absolute URL pointing to the base of this WaybackContext, using + * Canonical server and port information. + */ + public String getAbsoluteLocalPrefix(HttpServletRequest httpRequest) { + return getAbsoluteContextPrefix(httpRequest, useServerName); + } + + private boolean dispatchLocal(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) + throws ServletException, IOException { + + WaybackRequest wbRequest = new WaybackRequest(); + wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); + wbRequest.setContext(this); + UIResults uiResults = new UIResults(wbRequest); + String translated = "/" + translateRequestPathQuery(httpRequest); + uiResults.storeInRequest(httpRequest,translated); + RequestDispatcher dispatcher = null; +// // special case for the front '/' page: +// if(translated.length() == 0) { +// translated = "/"; +// } else { +// translated = "/" + translated; +// } + dispatcher = httpRequest.getRequestDispatcher(translated); + if(dispatcher != null) { + dispatcher.forward(httpRequest, httpResponse); + return true; + } + return false; + } + + /** + * @param httpRequest + * @param httpResponse + * @return true if the request was actually handled + * @throws ServletException + * @throws IOException + */ + public boolean handleRequest(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) + throws ServletException, IOException { + + WaybackRequest wbRequest = null; + boolean handled = false; + + try { + wbRequest = parser.parse(httpRequest, this); + + if(wbRequest != null) { + wbRequest.setContext(this); + handled = true; + wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); + if(authentication != null) { + if(!authentication.isTrue(wbRequest)) { + throw new AuthenticationControlException("Not authorized"); + } + } + + if(exclusionFactory != null) { + wbRequest.setExclusionFilter(exclusionFactory.get()); + } + if(wbRequest.isReplayRequest()) { + + handleReplay(wbRequest,httpRequest,httpResponse); + + } else { + + handleQuery(wbRequest,httpRequest,httpResponse); + } + } else { + handled = dispatchLocal(httpRequest,httpResponse); + } + + } catch (BadQueryException e) { + query.renderException(httpRequest, httpResponse, wbRequest, e); + } catch (AuthenticationControlException e) { + query.renderException(httpRequest, httpResponse, wbRequest, e); + } + + return handled; + } + + private void handleReplay(WaybackRequest wbRequest, + HttpServletRequest httpRequest, HttpServletResponse httpResponse) + throws IOException, ServletException { + Resource resource = null; + try { + SearchResults results = index.query(wbRequest); + if(!(results instanceof CaptureSearchResults)) { + throw new ResourceNotAvailableException("Bad results..."); + } + CaptureSearchResults captureResults = (CaptureSearchResults) results; + + // TODO: check which versions are actually accessible right now? + SearchResult closest = captureResults.getClosest(wbRequest); + resource = store.retrieveResource(closest); + + replay.renderResource(httpRequest, httpResponse, wbRequest, + closest, resource, uriConverter, captureResults); + } catch(WaybackException e) { + replay.renderException(httpRequest, httpResponse, wbRequest, e); + } finally { + if(resource != null) { + resource.close(); + } + } + } + + private void handleQuery(WaybackRequest wbRequest, + HttpServletRequest httpRequest, HttpServletResponse httpResponse) + throws ServletException, IOException { + + try { + SearchResults results = index.query(wbRequest); + if(results.getResultsType().equals( + WaybackConstants.RESULTS_TYPE_CAPTURE)) { + + query.renderUrlResults(httpRequest,httpResponse,wbRequest, + results,uriConverter); + + } else { + query.renderUrlPrefixResults(httpRequest,httpResponse,wbRequest, + results,uriConverter); + } + } catch(WaybackException e) { + query.renderException(httpRequest, httpResponse, wbRequest, e); + } + } + + /** + * @param contextPort the contextPort to set + */ + public void setContextPort(int contextPort) { + this.contextPort = contextPort; + } + + /** + * @param contextName the contextName to set + */ + public void setContextName(String contextName) { + this.contextName = contextName; + } + + /** + * @param index the index to set + */ + public void setIndex(ResourceIndex index) { + this.index = index; + } + + /** + * @param store the store to set + */ + public void setStore(ResourceStore store) { + this.store = store; + } + + /** + * @param replay the replay to set + */ + public void setReplay(ReplayDispatcher replay) { + this.replay = replay; + } + + /** + * @param query the query to set + */ + public void setQuery(QueryRenderer query) { + this.query = query; + } + + /** + * @param parser the parser to set + */ + public void setParser(RequestParser parser) { + this.parser = parser; + } + + /** + * @param uriConverter the uriConverter to set + */ + public void setUriConverter(ResultURIConverter uriConverter) { + this.uriConverter = uriConverter; + } + + + /** + * @return the contextPort + */ + public int getContextPort() { + return contextPort; + } + + /** + * @return the configs + */ + public Properties getConfigs() { + return configs; + } + + /** + * @param configs the configs to set + */ + public void setConfigs(Properties configs) { + this.configs = configs; + } + + /** + * @return the useServerName + */ + public boolean isUseServerName() { + return useServerName; + } + + /** + * @param useServerName the useServerName to set + */ + public void setUseServerName(boolean useServerName) { + this.useServerName = useServerName; + } + + public ExclusionFilterFactory getExclusionFactory() { + return exclusionFactory; + } + + public void setExclusionFactory(ExclusionFilterFactory exclusionFactory) { + this.exclusionFactory = exclusionFactory; + } + + public BooleanOperator<WaybackRequest> getAuthentication() { + return authentication; + } + + public void setAuthentication(BooleanOperator<WaybackRequest> authentication) { + this.authentication = authentication; + } +} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -1,447 +0,0 @@ -/* WaybackContext - * - * $Id$ - * - * Created on 5:37:31 PM Apr 20, 2007. - * - * Copyright (C) 2007 Internet Archive. - * - * This file is part of wayback-webapp. - * - * wayback-webapp is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback-webapp is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback-webapp; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.webapp; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.RequestDispatcher; -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.archive.wayback.QueryRenderer; -import org.archive.wayback.ReplayDispatcher; -import org.archive.wayback.RequestParser; -import org.archive.wayback.ResourceIndex; -import org.archive.wayback.ResourceStore; -import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.accesscontrol.ExclusionFilterFactory; -import org.archive.wayback.core.CaptureSearchResults; -import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; -import org.archive.wayback.core.UIResults; -import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.exception.AuthenticationControlException; -import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.exception.ResourceNotAvailableException; -import org.archive.wayback.exception.WaybackException; -import org.archive.wayback.util.operator.BooleanOperator; -import org.springframework.beans.factory.BeanNameAware; - -/** - * Retains all information about a particular Wayback configuration - * within a ServletContext, including holding references to the - * implementation instances of the primary Wayback classes: - * - * ResourceIndex - * ResourceStore - * QueryUI - * ReplayUI - * - * @author brad - * @version $Date$, $Revision$ - */ -public class WaybackContext implements RequestContext, BeanNameAware { - - private boolean useServerName = false; - private int contextPort = 0; - private String contextName = null; - private ResourceIndex index = null; - private ResourceStore store = null; - private ReplayDispatcher replay = null; - private QueryRenderer query = null; - private RequestParser parser = null; - private ResultURIConverter uriConverter = null; - private Properties configs = null; - private ExclusionFilterFactory exclusionFactory = null; - private BooleanOperator<WaybackRequest> authentication = null; - - /** - * - */ - public WaybackContext() { - - } - - /* (non-Javadoc) - * @see org.springframework.beans.factory.BeanNameAware#setBeanName(java.lang.String) - */ - public void setBeanName(String beanName) { - // TODO Auto-generated method stub - this.contextName = ""; - int idx = beanName.indexOf(":"); - if(idx > -1) { - contextPort = Integer.valueOf(beanName.substring(0,idx)); - contextName = beanName.substring(idx + 1); - } else { - try { - this.contextPort = Integer.valueOf(beanName); - } catch(NumberFormatException e) { - e.printStackTrace(); - } - } - } - /** - * @param httpRequest - * @return the prefix of paths recieved by this server that are handled by - * this WaybackContext, including the trailing '/' - */ - public String getContextPath(HttpServletRequest httpRequest) { -// if(contextPort != 0) { -// return httpRequest.getContextPath(); -// } - String httpContextPath = httpRequest.getContextPath(); - if(contextName.length() == 0) { - return httpContextPath + "/"; - } - return httpContextPath + "/" + contextName + "/"; - } - - /** - * @param httpRequest - * @param includeQuery - * @return the portion of the request following the path to this context - * without leading '/' - */ - private String translateRequest(HttpServletRequest httpRequest, - boolean includeQuery) { - - String origRequestPath = httpRequest.getRequestURI(); - if(includeQuery) { - String queryString = httpRequest.getQueryString(); - if (queryString != null) { - origRequestPath += "?" + queryString; - } - } - String contextPath = getContextPath(httpRequest); - if (!origRequestPath.startsWith(contextPath)) { - return null; - } - return origRequestPath.substring(contextPath.length()); - } - - /** - * @param httpRequest - * @return the portion of the request following the path to this context, - * including any query information,without leading '/' - */ - public String translateRequestPathQuery(HttpServletRequest httpRequest) { - return translateRequest(httpRequest,true); - } - - /** - * @param httpRequest - * @return the portion of the request following the path to this context, - * excluding any query information, without leading '/' - */ - public String translateRequestPath(HttpServletRequest httpRequest) { - return translateRequest(httpRequest,false); - } - - /** - * Construct an absolute URL that points to the root of the context that - * recieved the request, including a trailing "/". - * - * @return String absolute URL pointing to the Context root where the - * request was revieved. - */ - private String getAbsoluteContextPrefix(HttpServletRequest httpRequest, - boolean useRequestServer) { - - StringBuilder prefix = new StringBuilder(); - prefix.append(WaybackConstants.HTTP_URL_PREFIX); - String waybackPort = null; - if(useRequestServer) { - prefix.append(httpRequest.getLocalName()); - waybackPort = String.valueOf(httpRequest.getLocalPort()); - } else { - prefix.append(httpRequest.getServerName()); - waybackPort = String.valueOf(httpRequest.getServerPort()); - } - if (!waybackPort.equals(WaybackConstants.HTTP_DEFAULT_PORT)) { - prefix.append(":").append(waybackPort); - } - String contextPath = getContextPath(httpRequest); -// if(contextPath.length() > 1) { -// prefix.append(contextPath); -// } else { -// prefix.append(contextPath); -// } - prefix.append(contextPath); - return prefix.toString(); - } - - /** - * @param httpRequest - * @return absolute URL pointing to the base of this WaybackContext, using - * Server and port information from the HttpServletRequest argument. - */ - public String getAbsoluteServerPrefix(HttpServletRequest httpRequest) { - return getAbsoluteContextPrefix(httpRequest, true); - } - - /** - * @param httpRequest - * @return absolute URL pointing to the base of this WaybackContext, using - * Canonical server and port information. - */ - public String getAbsoluteLocalPrefix(HttpServletRequest httpRequest) { - return getAbsoluteContextPrefix(httpRequest, useServerName); - } - - private boolean dispatchLocal(HttpServletRequest httpRequest, - HttpServletResponse httpResponse) - throws ServletException, IOException { - - WaybackRequest wbRequest = new WaybackRequest(); - wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); - wbRequest.setContext(this); - UIResults uiResults = new UIResults(wbRequest); - String translated = "/" + translateRequestPathQuery(httpRequest); - uiResults.storeInRequest(httpRequest,translated); - RequestDispatcher dispatcher = null; -// // special case for the front '/' page: -// if(translated.length() == 0) { -// translated = "/"; -// } else { -// translated = "/" + translated; -// } - dispatcher = httpRequest.getRequestDispatcher(translated); - if(dispatcher != null) { - dispatcher.forward(httpRequest, httpResponse); - return true; - } - return false; - } - - /** - * @param httpRequest - * @param httpResponse - * @return true if the request was actually handled - * @throws ServletException - * @throws IOException - */ - public boolean handleRequest(HttpServletRequest httpRequest, - HttpServletResponse httpResponse) - throws ServletException, IOException { - - WaybackRequest wbRequest = null; - boolean handled = false; - - try { - wbRequest = parser.parse(httpRequest, this); - - if(wbRequest != null) { - wbRequest.setContext(this); - handled = true; - wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); - if(authentication != null) { - if(!authentication.isTrue(wbRequest)) { - throw new AuthenticationControlException("Not authorized"); - } - } - - if(exclusionFactory != null) { - wbRequest.setExclusionFilter(exclusionFactory.get()); - } - if(wbRequest.isReplayRequest()) { - - handleReplay(wbRequest,httpRequest,httpResponse); - - } else { - - handleQuery(wbRequest,httpRequest,httpResponse); - } - } else { - handled = dispatchLocal(httpRequest,httpResponse); - } - - } catch (BadQueryException e) { - query.renderException(httpRequest, httpResponse, wbRequest, e); - } catch (AuthenticationControlException e) { - query.renderException(httpRequest, httpResponse, wbRequest, e); - } - - return handled; - } - - private void handleReplay(WaybackRequest wbRequest, - HttpServletRequest httpRequest, HttpServletResponse httpResponse) - throws IOException, ServletException { - Resource resource = null; - try { - SearchResults results = index.query(wbRequest); - if(!(results instanceof CaptureSearchResults)) { - throw new ResourceNotAvailableException("Bad results..."); - } - CaptureSearchResults captureResults = (CaptureSearchResults) results; - - // TODO: check which versions are actually accessible right now? - SearchResult closest = captureResults.getClosest(wbRequest); - resource = store.retrieveResource(closest); - - replay.renderResource(httpRequest, httpResponse, wbRequest, - closest, resource, uriConverter, captureResults); - } catch(WaybackException e) { - replay.renderException(httpRequest, httpResponse, wbRequest, e); - } finally { - if(resource != null) { - resource.close(); - } - } - } - - private void handleQuery(WaybackRequest wbRequest, - HttpServletRequest httpRequest, HttpServletResponse httpResponse) - throws ServletException, IOException { - - try { - SearchResults results = index.query(wbRequest); - if(results.getResultsType().equals( - WaybackConstants.RESULTS_TYPE_CAPTURE)) { - - query.renderUrlResults(httpRequest,httpResponse,wbRequest, - results,uriConverter); - - } else { - query.renderUrlPrefixResults(httpRequest,httpResponse,wbRequest, - results,uriConverter); - } - } catch(WaybackException e) { - query.renderException(httpRequest, httpResponse, wbRequest, e); - } - } - - /** - * @param contextPort the contextPort to set - */ - public void setContextPort(int contextPort) { - this.contextPort = contextPort; - } - - /** - * @param contextName the contextName to set - */ - public void setContextName(String contextName) { - this.contextName = contextName; - } - - /** - * @param index the index to set - */ - public void setIndex(ResourceIndex index) { - this.index = index; - } - - /** - * @param store the store to set - */ - public void setStore(ResourceStore store) { - this.store = store; - } - - /** - * @param replay the replay to set - */ - public void setReplay(ReplayDispatcher replay) { - this.replay = replay; - } - - /** - * @param query the query to set - */ - public void setQuery(QueryRenderer query) { - this.query = query; - } - - /** - * @param parser the parser to set - */ - public void setParser(RequestParser parser) { - this.parser = parser; - } - - /** - * @param uriConverter the uriConverter to set - */ - public void setUriConverter(ResultURIConverter uriConverter) { - this.uriConverter = uriConverter; - } - - - /** - * @return the contextPort - */ - public int getContextPort() { - return contextPort; - } - - /** - * @return the configs - */ - public Properties getConfigs() { - return configs; - } - - /** - * @param configs the configs to set - */ - public void setConfigs(Properties configs) { - this.configs = configs; - } - - /** - * @return the useServerName - */ - public boolean isUseServerName() { - return useServerName; - } - - /** - * @param useServerName the useServerName to set - */ - public void setUseServerName(boolean useServerName) { - this.useServerName = useServerName; - } - - public ExclusionFilterFactory getExclusionFactory() { - return exclusionFactory; - } - - public void setExclusionFactory(ExclusionFilterFactory exclusionFactory) { - this.exclusionFactory = exclusionFactory; - } - - public BooleanOperator<WaybackRequest> getAuthentication() { - return authentication; - } - - public void setAuthentication(BooleanOperator<WaybackRequest> authentication) { - this.authentication = authentication; - } -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-11-06 03:48:51
|
Revision: 2073 http://archive-access.svn.sourceforge.net/archive-access/?rev=2073&view=rev Author: bradtofel Date: 2007-11-05 19:48:56 -0800 (Mon, 05 Nov 2007) Log Message: ----------- FEATURE: added latestTimestamp property and access methods Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java 2007-11-06 03:44:31 UTC (rev 2072) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java 2007-11-06 03:48:56 UTC (rev 2073) @@ -56,4 +56,8 @@ * @param timestamp */ public void setEarliestTimestamp(String timestamp); + /** + * @param timestamp + */ + public void setLatestTimestamp(String timestamp); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java 2007-11-06 03:44:31 UTC (rev 2072) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java 2007-11-06 03:48:56 UTC (rev 2073) @@ -59,12 +59,14 @@ String urlStr = matcher.group(2); String startDate; + String endDate; if(dateStr.length() == 0) { startDate = earliestTimestamp; + endDate = latestTimestamp; } else { startDate = Timestamp.parseBefore(dateStr).getDateStr(); + endDate = Timestamp.parseAfter(dateStr).getDateStr(); } - String endDate = Timestamp.parseAfter(dateStr).getDateStr(); wbRequest.put(WaybackConstants.REQUEST_START_DATE,startDate); wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); wbRequest.put(WaybackConstants.REQUEST_TYPE, Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java 2007-11-06 03:44:31 UTC (rev 2072) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java 2007-11-06 03:48:56 UTC (rev 2073) @@ -58,13 +58,15 @@ String urlStr = matcher.group(2); String startDate; + String endDate; if(dateStr.length() == 0) { startDate = earliestTimestamp; + endDate = latestTimestamp; } else { startDate = Timestamp.parseBefore(dateStr).getDateStr(); + endDate = Timestamp.parseAfter(dateStr).getDateStr(); } - String endDate = Timestamp.parseAfter(dateStr).getDateStr(); wbRequest.put(WaybackConstants.REQUEST_START_DATE, startDate); wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java 2007-11-06 03:44:31 UTC (rev 2072) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java 2007-11-06 03:48:56 UTC (rev 2073) @@ -24,6 +24,7 @@ */ package org.archive.wayback.archivalurl.requestparser; +import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -42,6 +43,8 @@ * @version $Date$, $Revision$ */ public class ReplayRequestParser extends PathRequestParser { + private static final Logger LOGGER = Logger.getLogger( + ReplayRequestParser.class.getName()); /** * Regex which parses Archival URL replay requests into timestamp + url */ @@ -51,10 +54,11 @@ public WaybackRequest parse(String requestPath) { WaybackRequest wbRequest = null; Matcher matcher = WB_REQUEST_REGEX.matcher(requestPath); + String urlStr = null; if (matcher != null && matcher.matches()) { wbRequest = new WaybackRequest(); String dateStr = matcher.group(1); - String urlStr = matcher.group(2); + urlStr = matcher.group(2); if (!urlStr.startsWith("http://")) { urlStr = "http://" + urlStr; } @@ -71,8 +75,8 @@ String startDate = null; String endDate = null; if (dateStr.length() == 14) { - startDate = Timestamp.earliestTimestamp().getDateStr(); - endDate = Timestamp.currentTimestamp().getDateStr(); + startDate = earliestTimestamp; + endDate = latestTimestamp; } else { // classic behavior: @@ -81,8 +85,8 @@ // dateStr = endDate; // "better" behavior: - startDate = Timestamp.earliestTimestamp().getDateStr(); - endDate = Timestamp.currentTimestamp().getDateStr(); + startDate = earliestTimestamp; + endDate = latestTimestamp; dateStr = Timestamp.parseAfter(dateStr).getDateStr(); } @@ -100,6 +104,9 @@ // } wbRequest.setRequestUrl(urlStr); } catch (URIException e) { + if(urlStr != null) { + LOGGER.severe("Failed parse of url(" + urlStr + ")"); + } e.printStackTrace(); wbRequest = null; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java 2007-11-06 03:44:31 UTC (rev 2072) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java 2007-11-06 03:48:56 UTC (rev 2073) @@ -30,11 +30,11 @@ import javax.servlet.http.HttpServletRequest; import org.apache.commons.httpclient.URIException; -import org.archive.wayback.RequestParser; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.requestparser.BaseRequestParser; import org.archive.wayback.webapp.AccessPoint; /** @@ -43,11 +43,9 @@ * @author brad * @version $Date$, $Revision$ */ -public class DomainPrefixRequestParser implements RequestParser { +public class DomainPrefixRequestParser extends BaseRequestParser { String hostPort = "localhost:8081"; - String earliest = Timestamp.earliestTimestamp().getDateStr(); - int maxRecords = 1000; private final Pattern REPLAY_REGEX = Pattern.compile("^(\\d{1,14})\\.(.*)$"); @@ -108,12 +106,14 @@ String dateStr = queryMatcher.group(1); String host = queryMatcher.group(2); String startDate; + String endDate; if(dateStr.length() == 0) { - startDate = earliest; + startDate = earliestTimestamp; + endDate = latestTimestamp; } else { startDate = Timestamp.parseBefore(dateStr).getDateStr(); + endDate = Timestamp.parseAfter(dateStr).getDateStr(); } - String endDate = Timestamp.parseAfter(dateStr).getDateStr(); wbRequest.put(WaybackConstants.REQUEST_START_DATE,startDate); wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); wbRequest.put(WaybackConstants.REQUEST_TYPE, @@ -134,34 +134,7 @@ return wbRequest; } - /* (non-Javadoc) - * @see org.archive.wayback.RequestParser#setEarliestTimestamp(java.lang.String) - */ - public void setEarliestTimestamp(String timestamp) { - earliest = timestamp; - } - /** - * @return the earliest timestamp - */ - public String getEarliestTimestamp() { - return earliest; - } - - /* (non-Javadoc) - * @see org.archive.wayback.RequestParser#setMaxRecords(int) - */ - public void setMaxRecords(int maxRecords) { - this.maxRecords = maxRecords; - } - /** - * @return the maxRecords - */ - public int getMaxRecords() { - return maxRecords; - } - - /** * @return the hostPort */ public String getHostPort() { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2007-11-06 03:44:31 UTC (rev 2072) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2007-11-06 03:48:56 UTC (rev 2073) @@ -69,6 +69,8 @@ protected String earliestTimestamp = Timestamp.earliestTimestamp().getDateStr(); + protected String latestTimestamp = + Timestamp.latestTimestamp().getDateStr(); protected static String getMapParam(Map<String,String[]> queryMap, String field) { @@ -160,5 +162,13 @@ public String getEarliestTimestamp() { return earliestTimestamp; } + + public String getLatestTimestamp() { + return latestTimestamp; + } + + public void setLatestTimestamp(String timestamp) { + this.latestTimestamp = Timestamp.parseAfter(timestamp).getDateStr(); + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2007-11-06 03:44:31 UTC (rev 2072) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2007-11-06 03:48:56 UTC (rev 2073) @@ -48,6 +48,7 @@ for(int i = 0; i < parsers.length; i++) { parsers[i].setMaxRecords(maxRecords); parsers[i].setEarliestTimestamp(earliestTimestamp); + parsers[i].setLatestTimestamp(latestTimestamp); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-11-06 03:44:31 UTC (rev 2072) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-11-06 03:48:56 UTC (rev 2073) @@ -90,6 +90,10 @@ wbRequest.put(WaybackConstants.REQUEST_START_DATE, earliestTimestamp); } + if(wbRequest.get(WaybackConstants.REQUEST_END_DATE) == null) { + wbRequest.put(WaybackConstants.REQUEST_END_DATE, + latestTimestamp); + } } if(wbRequest != null) { addHttpHeaderFields(wbRequest, httpRequest); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2007-11-06 03:44:31 UTC (rev 2072) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2007-11-06 03:48:56 UTC (rev 2073) @@ -148,6 +148,10 @@ wbRequest.put(WaybackConstants.REQUEST_START_DATE, earliestTimestamp); } + if(wbRequest.get(WaybackConstants.REQUEST_END_DATE) == null) { + wbRequest.put(WaybackConstants.REQUEST_END_DATE, + latestTimestamp); + } addHttpHeaderFields(wbRequest, httpRequest); return wbRequest; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-11-28 00:40:28
|
Revision: 2077 http://archive-access.svn.sourceforge.net/archive-access/?rev=2077&view=rev Author: bradtofel Date: 2007-11-27 16:39:53 -0800 (Tue, 27 Nov 2007) Log Message: ----------- REFACTOR: common, almost duplicate code BaseRequestParser.addHttpHeaderFields() replaced with WaybackRequest.fixup() Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java 2007-11-06 03:52:49 UTC (rev 2076) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java 2007-11-28 00:39:53 UTC (rev 2077) @@ -103,7 +103,7 @@ id = httpRequest.getRemoteAddr(); wbRequest.put(WaybackConstants.REQUEST_EXACT_DATE, Timestamp .getTimestampForId(httpRequest.getContextPath(), id)); - addHttpHeaderFields(wbRequest, httpRequest); + wbRequest.fixup(httpRequest); return wbRequest; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2007-11-06 03:52:49 UTC (rev 2076) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2007-11-28 00:39:53 UTC (rev 2077) @@ -29,7 +29,6 @@ import javax.servlet.http.HttpServletRequest; import org.archive.wayback.RequestParser; -import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; @@ -100,37 +99,6 @@ return (val == null) ? "" : val; } - - private void putUnlessNull(WaybackRequest request, String key, String val) { - if(val != null) { - request.put(key, val); - } - } - - protected void addHttpHeaderFields(WaybackRequest wbRequest, - HttpServletRequest httpRequest) { - - // attempt to get the HTTP referer if present.. - putUnlessNull(wbRequest,WaybackConstants.REQUEST_REFERER_URL, - httpRequest.getHeader("REFERER")); - putUnlessNull(wbRequest,WaybackConstants.REQUEST_REMOTE_ADDRESS, - httpRequest.getRemoteAddr()); - putUnlessNull(wbRequest,WaybackConstants.REQUEST_WAYBACK_HOSTNAME, - httpRequest.getLocalName()); - putUnlessNull(wbRequest,WaybackConstants.REQUEST_WAYBACK_PORT, - String.valueOf(httpRequest.getLocalPort())); - putUnlessNull(wbRequest,WaybackConstants.REQUEST_WAYBACK_CONTEXT, - httpRequest.getContextPath()); - putUnlessNull(wbRequest,WaybackConstants.REQUEST_AUTH_TYPE, - httpRequest.getAuthType()); - putUnlessNull(wbRequest,WaybackConstants.REQUEST_REMOTE_USER, - httpRequest.getRemoteUser()); - putUnlessNull(wbRequest,WaybackConstants.REQUEST_LOCALE_LANG, - httpRequest.getLocale().getDisplayLanguage()); - - wbRequest.setLocale(httpRequest.getLocale()); - } - /* (non-Javadoc) * @see org.archive.wayback.RequestParser#parse(javax.servlet.http.HttpServletRequest) */ Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-11-06 03:52:49 UTC (rev 2076) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-11-28 00:39:53 UTC (rev 2077) @@ -96,7 +96,7 @@ } } if(wbRequest != null) { - addHttpHeaderFields(wbRequest, httpRequest); + wbRequest.fixup(httpRequest); } return wbRequest; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2007-11-06 03:52:49 UTC (rev 2076) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2007-11-28 00:39:53 UTC (rev 2077) @@ -152,7 +152,7 @@ wbRequest.put(WaybackConstants.REQUEST_END_DATE, latestTimestamp); } - addHttpHeaderFields(wbRequest, httpRequest); + wbRequest.fixup(httpRequest); return wbRequest; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2007-11-06 03:52:49 UTC (rev 2076) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2007-11-28 00:39:53 UTC (rev 2077) @@ -67,7 +67,7 @@ WaybackRequest wbRequest = parse(requestPath); if(wbRequest != null) { - addHttpHeaderFields(wbRequest, httpRequest); + wbRequest.fixup(httpRequest); wbRequest.setResultsPerPage(maxRecords); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-11-28 02:00:27
|
Revision: 2082 http://archive-access.svn.sourceforge.net/archive-access/?rev=2082&view=rev Author: bradtofel Date: 2007-11-27 18:00:31 -0800 (Tue, 27 Nov 2007) Log Message: ----------- REFACTOR/FEATURE: made Resource abstract, moved ARC-specific code to ArcResource, added WARC-specific coercion code to WarcResource. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ArcResource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/WarcResource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2007-11-28 00:59:27 UTC (rev 2081) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2007-11-28 02:00:31 UTC (rev 2082) @@ -26,236 +26,118 @@ import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.Hashtable; -import java.util.Iterator; import java.util.Map; -import java.util.Set; -import java.util.logging.Logger; -import org.apache.commons.httpclient.Header; -import org.archive.io.ArchiveRecord; -import org.archive.io.arc.ARCReader; -import org.archive.io.arc.ARCRecord; - /** - * Slightly more than an ARCRecord. This class is designed to be an abstraction - * to allow the Wayback to operator with non-ARC file format resources. Probably - * the interface required will end up looking very much like ARCRecord, but can - * be reimplemented to handle new ARC formats or non-ARC formats. + * Abstraction on top of a document stored in a WaybackCollection. Currently + * implemented subclasses include ArcResource and WarcResource. * * @author Brad Tofel * @version $Date$, $Revision$ */ -public class Resource extends InputStream { - /** - * Logger for this class - */ - private static final Logger LOGGER = Logger.getLogger(Resource.class - .getName()); - - /** - * String prefix for ARC file related metadata namespace of keys within - * metaData Properties bag. - */ - private static String ARC_META_PREFIX = "arcmeta."; - /** - * String prefix for HTTP Header related metadata namespace of keys within - * metaData Properties bag. - */ - private static String HTTP_HEADER_PREFIX = "httpheader."; - /** - * object for ARCRecord - */ - ARCRecord arcRecord = null; - /** - * object for ARCReader -- need to hold on to this in order to call close() - * to release filehandle after completing access to this record. optional - */ - ARCReader arcReader = null; - /** - * flag to indicate if the ARCRecord skipHTTPHeader() has been called - */ - boolean parsedHeader = false; - /** - * Expandable property bag for holding metadata associated with this - * resource - */ - Hashtable<String,String> metaData = new Hashtable<String,String>(); +public abstract class Resource extends InputStream { - private BufferedInputStream bis; - - /** - * Constructor - * - * @param rec - * @param reader - */ - public Resource(final ARCRecord rec,final ARCReader reader) { - super(); - arcRecord = rec; - arcReader = reader; - bis = new BufferedInputStream(rec); - } + private InputStream is; - /** parse the headers on the underlying ARC record, and extract all - * @throws IOException - */ - public void parseHeaders () throws IOException { - if(!parsedHeader) { - arcRecord.skipHttpHeader(); - // copy all HTTP headers to metaData, prefixing with - // HTTP_HEADER_PREFIX - Header[] headers = arcRecord.getHttpHeaders(); - if (headers != null) { - for (int i = 0; i < headers.length; i++) { - String value = headers[i].getValue(); - String name = headers[i].getName(); - metaData.put(HTTP_HEADER_PREFIX + name,value); - } - } + public abstract void close() throws IOException; + public abstract int getStatusCode(); + public abstract long getRecordLength(); + public abstract Map<String,String> getHttpHeaders(); - // copy all ARC record header fields to metaData, prefixing with - // ARC_META_PREFIX - @SuppressWarnings("unchecked") - Map<String,Object> headerMetaMap = arcRecord.getMetaData().getHeaderFields(); - Set<String> keys = headerMetaMap.keySet(); - Iterator<String> itr = keys.iterator(); - while(itr.hasNext()) { - String metaKey = itr.next(); - Object value = headerMetaMap.get(metaKey); - String metaValue = ""; - if(value != null) { - metaValue = value.toString(); - } - metaData.put(ARC_META_PREFIX + metaKey,metaValue); - } - - parsedHeader = true; + protected void setInputStream(InputStream is) { + if(is.markSupported()) { + this.is = is; + } else { + this.is = new BufferedInputStream(is); } } - /** - * @param prefix - * @return a Properties of all elements in metaData starting with 'prefix'. - * keys in the returned Properties have 'prefix' removed. + * @return + * @throws IOException + * @see java.io.BufferedInputStream#available() */ - public Map<String,String> filterMeta(String prefix) { - HashMap<String,String> matching = new HashMap<String,String>(); - for (Enumeration<String> e = metaData.keys(); e.hasMoreElements();) { - String key = e.nextElement(); - if (key.startsWith(prefix)) { - String finalKey = key.substring(prefix.length()); - String value = metaData.get(key); - matching.put(finalKey, value); - } + public int available() throws IOException { + if(is == null) { + throw new IOException("No InputStream"); } - return matching; + return is.available(); } - /** - * @return a Properties containing all HTTP header fields for this record + * @param readlimit + * @see java.io.BufferedInputStream#mark(int) */ - public Map<String,String> getHttpHeaders() { - return filterMeta(HTTP_HEADER_PREFIX); + public void mark(int readlimit) { + if(is != null) { + is.mark(readlimit); + } } - /** - * @return a Properties containing all ARC Meta fields for this record + * @return + * @see java.io.BufferedInputStream#markSupported() */ - public Map<String,String> getARCMetadata() { - return filterMeta(ARC_META_PREFIX); + public boolean markSupported() { + if(is == null) { + return false; + } + return is.markSupported(); } - /** - * (non-Javadoc) - * @see org.archive.io.arc.ARCRecord#getStatusCode() - * @return int HTTP status code returned with this document. + * @return + * @throws IOException + * @see java.io.BufferedInputStream#read() */ - public int getStatusCode() { - return arcRecord.getStatusCode(); + public int read() throws IOException { + if(is == null) { + throw new IOException("No InputStream"); + } + return is.read(); } - /** - * @return the ARCRecord underlying this Resource. + * @param b + * @param off + * @param len + * @return + * @throws IOException + * @see java.io.BufferedInputStream#read(byte[], int, int) */ - public ArchiveRecord getArcRecord() { - return arcRecord; + public int read(byte[] b, int off, int len) throws IOException { + if(is == null) { + throw new IOException("No InputStream"); + } + return is.read(b, off, len); } - - /* (non-Javadoc) - * @see org.archive.io.arc.ARCRecord#read() + /** + * @param b + * @return + * @throws IOException + * @see java.io.FilterInputStream#read(byte[]) */ - public int read() throws IOException { - return bis.read(); - } - - /* (non-Javadoc) - * @see org.archive.io.arc.ARCRecord#read(byte[], int, int) - */ - public int read(byte[] arg0, int arg1, int arg2) throws IOException { - return bis.read(arg0, arg1, arg2); - } - - /* (non-Javadoc) - * @see java.io.InputStream#read(byte[]) - */ public int read(byte[] b) throws IOException { - return bis.read(b); + if(is == null) { + throw new IOException("No InputStream"); + } + return is.read(b); } - - /* (non-Javadoc) - * @see org.archive.io.arc.ARCRecord#skip(long) - */ - public long skip(long arg0) throws IOException { - return bis.skip(arg0); - } - - /* (non-Javadoc) - * @see java.io.BufferedInputStream#available() - */ - public int available() throws IOException { - return bis.available(); - } - - /* (non-Javadoc) - * @see java.io.BufferedInputStream#mark(int) - */ - public void mark(int readlimit) { - bis.mark(readlimit); - } - - /* (non-Javadoc) - * @see java.io.BufferedInputStream#markSupported() - */ - public boolean markSupported() { - return bis.markSupported(); - } - - /* (non-Javadoc) + /** + * @throws IOException * @see java.io.BufferedInputStream#reset() */ public void reset() throws IOException { - bis.reset(); - } - - /* (non-Javadoc) - * @see org.archive.io.arc.ARCRecord#close() - */ - public void close() throws IOException { - //LOGGER.info("About to close..("+arcReader+")"); - arcRecord.close(); - if(arcReader != null) { - arcReader.close(); - LOGGER.info("closed..("+arcReader+")"); + if(is == null) { + throw new IOException("No InputStream"); } + is.reset(); } - /** - * @return byte length claimed in ARC record metadata line. + * @param n + * @return + * @throws IOException + * @see java.io.BufferedInputStream#skip(long) */ - public long getRecordLength() { - return arcRecord.getMetaData().getLength(); + public long skip(long n) throws IOException { + if(is == null) { + throw new IOException("No InputStream"); + } + return is.skip(n); } } Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ArcResource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ArcResource.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ArcResource.java 2007-11-28 02:00:31 UTC (rev 2082) @@ -0,0 +1,170 @@ +package org.archive.wayback.resourcestore; + +import java.io.IOException; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Hashtable; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.logging.Logger; + +import org.apache.commons.httpclient.Header; +import org.archive.io.ArchiveRecord; +import org.archive.io.arc.ARCReader; +import org.archive.io.arc.ARCRecord; +import org.archive.wayback.core.Resource; + +public class ArcResource extends Resource { + /** + * Logger for this class + */ + private static final Logger LOGGER = Logger.getLogger(ArcResource.class + .getName()); + + /** + * String prefix for ARC file related metadata namespace of keys within + * metaData Properties bag. + */ + private static String ARC_META_PREFIX = "arcmeta."; + /** + * String prefix for HTTP Header related metadata namespace of keys within + * metaData Properties bag. + */ + private static String HTTP_HEADER_PREFIX = "httpheader."; + /** + * object for ARCRecord + */ + ARCRecord arcRecord = null; + /** + * object for ARCReader -- need to hold on to this in order to call close() + * to release filehandle after completing access to this record. optional + */ + ARCReader arcReader = null; + /** + * flag to indicate if the ARCRecord skipHTTPHeader() has been called + */ + boolean parsedHeader = false; + /** + * Expandable property bag for holding metadata associated with this + * resource + */ + Hashtable<String,String> metaData = new Hashtable<String,String>(); + + /** + * Constructor + * + * @param rec + * @param reader + */ + public ArcResource(final ARCRecord rec,final ARCReader reader) { + super(); + arcRecord = rec; + arcReader = reader; + setInputStream(rec); + } + + /** parse the headers on the underlying ARC record, and extract all + * @throws IOException + */ + public void parseHeaders () throws IOException { + if(!parsedHeader) { + arcRecord.skipHttpHeader(); + // copy all HTTP headers to metaData, prefixing with + // HTTP_HEADER_PREFIX + Header[] headers = arcRecord.getHttpHeaders(); + if (headers != null) { + for (int i = 0; i < headers.length; i++) { + String value = headers[i].getValue(); + String name = headers[i].getName(); + metaData.put(HTTP_HEADER_PREFIX + name,value); + } + } + + // copy all ARC record header fields to metaData, prefixing with + // ARC_META_PREFIX + @SuppressWarnings("unchecked") + Map<String,Object> headerMetaMap = arcRecord.getMetaData().getHeaderFields(); + Set<String> keys = headerMetaMap.keySet(); + Iterator<String> itr = keys.iterator(); + while(itr.hasNext()) { + String metaKey = itr.next(); + Object value = headerMetaMap.get(metaKey); + String metaValue = ""; + if(value != null) { + metaValue = value.toString(); + } + metaData.put(ARC_META_PREFIX + metaKey,metaValue); + } + + parsedHeader = true; + } + } + + /** + * @param prefix + * @return a Properties of all elements in metaData starting with 'prefix'. + * keys in the returned Properties have 'prefix' removed. + */ + public Map<String,String> filterMeta(String prefix) { + HashMap<String,String> matching = new HashMap<String,String>(); + for (Enumeration<String> e = metaData.keys(); e.hasMoreElements();) { + String key = e.nextElement(); + if (key.startsWith(prefix)) { + String finalKey = key.substring(prefix.length()); + String value = metaData.get(key); + matching.put(finalKey, value); + } + } + return matching; + } + + /** + * @return a Properties containing all HTTP header fields for this record + */ + public Map<String,String> getHttpHeaders() { + return filterMeta(HTTP_HEADER_PREFIX); + } + + /** + * @return a Properties containing all ARC Meta fields for this record + */ + public Map<String,String> getARCMetadata() { + return filterMeta(ARC_META_PREFIX); + } + + /** + * (non-Javadoc) + * @see org.archive.io.arc.ARCRecord#getStatusCode() + * @return int HTTP status code returned with this document. + */ + public int getStatusCode() { + return arcRecord.getStatusCode(); + } + + /** + * @return the ARCRecord underlying this Resource. + */ + public ArchiveRecord getArcRecord() { + return arcRecord; + } + + /* (non-Javadoc) + * @see org.archive.io.arc.ARCRecord#close() + */ + public void close() throws IOException { + //LOGGER.info("About to close..("+arcReader+")"); + arcRecord.close(); + if(arcReader != null) { + arcReader.close(); + LOGGER.info("closed..("+arcReader+")"); + } + } + + /** + * @return byte length claimed in ARC record metadata line. + */ + public long getRecordLength() { + return arcRecord.getMetaData().getLength(); + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/WarcResource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/WarcResource.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/WarcResource.java 2007-11-28 02:00:31 UTC (rev 2082) @@ -0,0 +1,98 @@ +package org.archive.wayback.resourcestore; + +import java.io.IOException; +import java.util.Hashtable; +import java.util.Map; + +import org.apache.commons.httpclient.Header; +import org.apache.commons.httpclient.HttpParser; +import org.apache.commons.httpclient.StatusLine; +import org.apache.commons.httpclient.util.EncodingUtil; +import org.archive.io.RecoverableIOException; +import org.archive.io.arc.ARCConstants; +import org.archive.io.warc.WARCReader; +import org.archive.io.warc.WARCRecord; +import org.archive.wayback.core.Resource; + +public class WarcResource extends Resource { + private WARCRecord rec = null; + private WARCReader reader = null; + private Map<String, String> headers = null; + private long length = 0; + private int status = 0; + private boolean parsedHeaders = false; + public WarcResource(WARCRecord rec, WARCReader reader) { + this.rec = rec; + this.reader = reader; + } + + /** + * @param bytes Array of bytes to examine for an EOL. + * @return Count of end-of-line characters or zero if none. + */ + private int getEolCharsCount(byte [] bytes) { + int count = 0; + if (bytes != null && bytes.length >=1 && + bytes[bytes.length - 1] == '\n') { + count++; + if (bytes.length >=2 && bytes[bytes.length -2] == '\r') { + count++; + } + } + return count; + } + + public void parseHeaders() throws IOException { + if(parsedHeaders) { + return; + } + + byte [] statusBytes = HttpParser.readRawLine(rec); + int eolCharCount = getEolCharsCount(statusBytes); + if (eolCharCount <= 0) { + throw new RecoverableIOException("Failed to read http status where one " + + " was expected: " + new String(statusBytes)); + } + String statusLineStr = EncodingUtil.getString(statusBytes, 0, + statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING); + if ((statusLineStr == null) || + !StatusLine.startsWithHTTP(statusLineStr)) { + throw new RecoverableIOException("Failed parse of http status line."); + } + StatusLine statusLine = new StatusLine(statusLineStr); + + this.status = statusLine.getStatusCode(); + + Header[] tmpHeaders = HttpParser.parseHeaders(rec, + ARCConstants.DEFAULT_ENCODING); + headers = new Hashtable<String,String>(); + for(Header header: tmpHeaders) { + headers.put(header.getName(), header.getValue()); + } + this.setInputStream(rec); + parsedHeaders = true; + } + + + @Override + public Map<String, String> getHttpHeaders() { + return headers; + } + + @Override + public long getRecordLength() { + // TODO Auto-generated method stub + return length; + } + + @Override + public int getStatusCode() { + return status; + } + + @Override + public void close() throws IOException { + rec.close(); + reader.close(); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-12-22 02:19:30
|
Revision: 2119 http://archive-access.svn.sourceforge.net/archive-access/?rev=2119&view=rev Author: bradtofel Date: 2007-12-21 18:19:35 -0800 (Fri, 21 Dec 2007) Log Message: ----------- FEATURE: added get/setResult() to UIQueryResults. This is set by HTMLPage when including JSP files in replayed documents via ArchivalURL mode Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UIQueryResults.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java 2007-12-22 00:59:25 UTC (rev 2118) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java 2007-12-22 02:19:35 UTC (rev 2119) @@ -98,7 +98,7 @@ Iterator<String> itr = jspInserts.iterator(); while(itr.hasNext()) { toInsert.append(page.includeJspString(itr.next(), httpRequest, - httpResponse, wbRequest, results)); + httpResponse, wbRequest, results, result)); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UIQueryResults.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UIQueryResults.java 2007-12-22 00:59:25 UTC (rev 2118) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UIQueryResults.java 2007-12-22 02:19:35 UTC (rev 2119) @@ -74,6 +74,7 @@ private int curPage; private SearchResults results; + private SearchResult result; private ResultURIConverter uriConverter; /** @@ -312,4 +313,12 @@ public Timestamp getExactRequestedTimestamp() { return exactRequestedTimestamp; } + + public SearchResult getResult() { + return result; + } + + public void setResult(SearchResult result) { + this.result = result; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java 2007-12-22 00:59:25 UTC (rev 2118) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java 2007-12-22 02:19:35 UTC (rev 2119) @@ -383,11 +383,12 @@ */ public String includeJspString(String jspPath, HttpServletRequest httpRequest, HttpServletResponse httpResponse, - WaybackRequest wbRequest, SearchResults results) + WaybackRequest wbRequest, SearchResults results, SearchResult result) throws ServletException, IOException { UIQueryResults uiResults = new UIQueryResults(httpRequest, wbRequest, results, uriConverter); + uiResults.setResult(result); StringHttpServletResponseWrapper wrappedResponse = new StringHttpServletResponseWrapper(httpResponse); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-01-15 02:58:25
|
Revision: 2136 http://archive-access.svn.sourceforge.net/archive-access/?rev=2136&view=rev Author: bradtofel Date: 2008-01-14 18:58:30 -0800 (Mon, 14 Jan 2008) Log Message: ----------- REFACTOR: moved resolveUrl() from UrlCanonicalizer to UrlOperations Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java 2008-01-15 02:57:30 UTC (rev 2135) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java 2008-01-15 02:58:30 UTC (rev 2136) @@ -43,7 +43,7 @@ import org.archive.wayback.replay.HTMLPage; import org.archive.wayback.replay.HttpHeaderProcessor; import org.archive.wayback.replay.HttpHeaderOperation; -import org.archive.wayback.util.UrlCanonicalizer; +import org.archive.wayback.util.url.UrlOperations; /** * ReplayRenderer responsible for marking up HTML pages so they replay in @@ -139,7 +139,7 @@ String baseUrl = result.getAbsoluteUrl(); String cd = result.getCaptureDate(); // by the spec, these should be absolute already, but just in case: - String u = UrlCanonicalizer.resolveUrl(baseUrl, value); + String u = UrlOperations.resolveUrl(baseUrl, value); output.put(key, uriConverter.makeReplayURI(cd,u)); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java 2008-01-15 02:57:30 UTC (rev 2135) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java 2008-01-15 02:58:30 UTC (rev 2136) @@ -5,7 +5,7 @@ import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.SearchResult; import org.archive.wayback.replay.TransparentReplayRenderer; -import org.archive.wayback.util.UrlCanonicalizer; +import org.archive.wayback.util.url.UrlOperations; /** * Slight extension to TransparentReplayRenderer, which rewrites Location and @@ -32,7 +32,7 @@ String baseUrl = result.getAbsoluteUrl(); String cd = result.getCaptureDate(); // by the spec, these should be absolute already, but just in case: - String u = UrlCanonicalizer.resolveUrl(baseUrl, value); + String u = UrlOperations.resolveUrl(baseUrl, value); output.put(key, uriConverter.makeReplayURI(cd,u)); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java 2008-01-15 02:57:30 UTC (rev 2135) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java 2008-01-15 02:58:30 UTC (rev 2136) @@ -25,8 +25,6 @@ package org.archive.wayback.domainprefix; import java.io.IOException; -//import java.util.Date; -//import java.util.Iterator; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -37,7 +35,6 @@ import org.archive.wayback.ReplayRenderer; import org.archive.wayback.ResultURIConverter; -//import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; @@ -47,8 +44,7 @@ import org.archive.wayback.replay.HTMLPage; import org.archive.wayback.replay.HttpHeaderProcessor; import org.archive.wayback.replay.HttpHeaderOperation; -//import org.archive.wayback.util.StringFormatter; -import org.archive.wayback.util.UrlCanonicalizer; +import org.archive.wayback.util.url.UrlOperations; /** * @@ -126,7 +122,7 @@ String baseUrl = result.getAbsoluteUrl(); String cd = result.getCaptureDate(); // by the spec, these should be absolute already, but just in case: - String u = UrlCanonicalizer.resolveUrl(baseUrl, value); + String u = UrlOperations.resolveUrl(baseUrl, value); output.put(key, uriConverter.makeReplayURI(cd,u)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-01-15 03:03:16
|
Revision: 2138 http://archive-access.svn.sourceforge.net/archive-access/?rev=2138&view=rev Author: bradtofel Date: 2008-01-14 19:03:21 -0800 (Mon, 14 Jan 2008) Log Message: ----------- FEATURE: UrlCanonicalizer customization capabilities. Previous hard-coded UrlCanonicalizer is now AggressiveUrlCanonicalizer, which is the default, but now it can be overridden with another UrlCanonicalizer implementation. main() of WarcIndexer and ArcIndexer now include parsing of "-identity" option, which causes the IdentityUrlCanonicalizer to be used -- passing through URLs to the CDX as they appear in the ARC file. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ARCRecordToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ArcIndexer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/WARCRecordToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/WarcIndexer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2008-01-15 03:00:16 UTC (rev 2137) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2008-01-15 03:03:21 UTC (rev 2138) @@ -32,6 +32,7 @@ import org.apache.commons.httpclient.URIException; import org.archive.io.arc.ARCLocation; import org.archive.io.arc.ARCRecord; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.Resource; @@ -44,8 +45,7 @@ import org.archive.wayback.exception.WaybackException; import org.archive.wayback.resourcestore.ARCRecordToSearchResultAdapter; import org.archive.wayback.resourcestore.ArcResource; -import org.archive.wayback.util.Adapter; -import org.archive.wayback.util.UrlCanonicalizer; +import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; /** * @@ -61,10 +61,15 @@ private ARCCacheDirectory arcCacheDir = null; private URLCacher cacher = null; private LiveWebLocalResourceIndex index = null; - static UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); - private static Adapter<ARCRecord,SearchResult> adapter = - new ARCRecordToSearchResultAdapter(); + private UrlCanonicalizer canonicalizer = null; + private ARCRecordToSearchResultAdapter adapter = null; + public LiveWebCache() { + canonicalizer = new AggressiveUrlCanonicalizer(); + adapter = new ARCRecordToSearchResultAdapter(); + adapter.setCanonicalizer(canonicalizer); + } + /** * closes all resources */ @@ -330,4 +335,13 @@ public void setIndex(LiveWebLocalResourceIndex index) { this.index = index; } + + public UrlCanonicalizer getCanonicalizer() { + return canonicalizer; + } + + public void setCanonicalizer(UrlCanonicalizer canonicalizer) { + this.canonicalizer = canonicalizer; + adapter.setCanonicalizer(canonicalizer); + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2008-01-15 03:00:16 UTC (rev 2137) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2008-01-15 03:03:21 UTC (rev 2138) @@ -25,11 +25,13 @@ package org.archive.wayback.resourceindex; import java.io.IOException; +import java.util.Iterator; import org.apache.commons.httpclient.URIException; import org.archive.net.UURI; import org.archive.net.UURIFactory; import org.archive.wayback.ResourceIndex; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; import org.archive.wayback.resourceindex.filters.CaptureToUrlResultFilter; import org.archive.wayback.resourceindex.filters.CounterFilter; @@ -39,7 +41,6 @@ import org.archive.wayback.resourceindex.filters.GuardRailFilter; import org.archive.wayback.resourceindex.filters.HostMatchFilter; import org.archive.wayback.resourceindex.filters.SelfRedirectFilter; -import org.archive.wayback.resourceindex.filters.StartDateFilter; import org.archive.wayback.resourceindex.filters.UrlMatchFilter; import org.archive.wayback.resourceindex.filters.UrlPrefixMatchFilter; import org.archive.wayback.resourceindex.filters.WindowEndFilter; @@ -58,7 +59,7 @@ import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.ObjectFilterChain; -import org.archive.wayback.util.UrlCanonicalizer; +import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; /** * @@ -77,11 +78,15 @@ protected SearchResultSource source; - private UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); + private UrlCanonicalizer canonicalizer = null; private boolean dedupeRecords = false; - private void filterRecords(CloseableIterator<SearchResult> itr, + public LocalResourceIndex() { + canonicalizer = new AggressiveUrlCanonicalizer(); + } + + private void filterRecords(Iterator<SearchResult> itr, ObjectFilter<SearchResult> filter, SearchResults results, boolean forwards) throws IOException { @@ -98,7 +103,11 @@ results.addSearchResult(result, forwards); } } - source.cleanup(itr); + if(itr instanceof CloseableIterator) { + CloseableIterator<SearchResult> citr = + (CloseableIterator<SearchResult>) itr; + source.cleanup(citr); + } } private String getRequired(WaybackRequest wbRequest, String field, @@ -216,27 +225,27 @@ if (searchType.equals(WaybackConstants.REQUEST_REPLAY_QUERY) || searchType.equals(WaybackConstants.REQUEST_CLOSEST_QUERY)) { - results = new CaptureSearchResults(); + results = new CaptureSearchResults(); + ObjectFilterChain<SearchResult> forwardFilters = new ObjectFilterChain<SearchResult>(); - ObjectFilterChain<SearchResult> reverseFilters = - new ObjectFilterChain<SearchResult>(); +// ObjectFilterChain<SearchResult> reverseFilters = +// new ObjectFilterChain<SearchResult>(); + // use the same guardrail for both: forwardFilters.addFilter(guardrail); - reverseFilters.addFilter(guardrail); +// reverseFilters.addFilter(guardrail); - // BUGBUG: won't work when closest is a dupe! forwardFilters.addFilter(new DuplicateRecordFilter()); - reverseFilters.addFilter(new DuplicateRecordFilter()); // match URL key: forwardFilters.addFilter(new UrlMatchFilter(keyUrl)); - reverseFilters.addFilter(new UrlMatchFilter(keyUrl)); +// reverseFilters.addFilter(new UrlMatchFilter(keyUrl)); if(hostMatchFilter != null) { forwardFilters.addFilter(hostMatchFilter); - reverseFilters.addFilter(hostMatchFilter); +// reverseFilters.addFilter(hostMatchFilter); } // be sure to only include records within the date range we want: @@ -246,11 +255,11 @@ // requested range. DateRangeFilter drFilter = new DateRangeFilter(startDate,endDate); forwardFilters.addFilter(drFilter); - reverseFilters.addFilter(drFilter); +// reverseFilters.addFilter(drFilter); // abort processing if we hit a date outside the search range: forwardFilters.addFilter(new EndDateFilter(endDate)); - reverseFilters.addFilter(new StartDateFilter(startDate)); +// reverseFilters.addFilter(new StartDateFilter(startDate)); // for replay, do not include records that redirect to // themselves.. We'll leave this for both closest and replays, @@ -258,39 +267,54 @@ // timeline in which case, we don't want to show captures that // redirect to themselves in the timeline if they are not viewable. SelfRedirectFilter selfRedirectFilter = new SelfRedirectFilter(); + selfRedirectFilter.setCanonicalizer(canonicalizer); forwardFilters.addFilter(selfRedirectFilter); - reverseFilters.addFilter(selfRedirectFilter); +// reverseFilters.addFilter(selfRedirectFilter); // possibly filter via exclusions: if(exclusion != null) { forwardFilters.addFilter(preExCounter); forwardFilters.addFilter(exclusion); - reverseFilters.addFilter(preExCounter); - reverseFilters.addFilter(exclusion); +// reverseFilters.addFilter(preExCounter); +// reverseFilters.addFilter(exclusion); } forwardFilters.addFilter(finalCounter); - reverseFilters.addFilter(finalCounter); +// reverseFilters.addFilter(finalCounter); - int resultsPerDirection = (int) Math.floor(resultsPerPage / 2); - if (resultsPerDirection * 2 == resultsPerPage) { - forwardFilters.addFilter(new WindowEndFilter( - resultsPerDirection)); - } else { - forwardFilters.addFilter(new WindowEndFilter( - resultsPerDirection + 1)); - } - reverseFilters.addFilter(new WindowEndFilter(resultsPerDirection)); + forwardFilters.addFilter(new WindowEndFilter(resultsPerPage)); +// int resultsPerDirection = (int) Math.floor(resultsPerPage / 2); +// reverseFilters.addFilter(new WindowEndFilter(resultsPerDirection)); - startKey = keyUrl + " " + exactDate; + startKey = keyUrl; - // first the reverse search: try { - filterRecords(source.getPrefixIterator(startKey), reverseFilters, - results, true); - // then the forwards: - filterRecords(source.getPrefixReverseIterator(startKey), - forwardFilters, results, false); +// CloseableIterator<SearchResult> reverse = +// new AdaptedObjectFilterIterator<SearchResult>( +// source.getPrefixReverseIterator(startKey), +// reverseFilters); + +// // reverse the reverseResults: +// ArrayList<SearchResult> reverseResults = +// new ArrayList<SearchResult>(); +// while(reverse.hasNext()) { +// reverseResults.add(0, reverse.next()); +// } + + // now make a composite of the reverse and forwards: + + CloseableIterator<SearchResult> forward = + source.getPrefixIterator(startKey); +// +// CompositeIterator<SearchResult> resultsItr = +// new CompositeIterator<SearchResult>(); +// resultsItr.addComponent(reverseResults.iterator()); +// resultsItr.addComponent(forward); + + // and filter: +// filterRecords(resultsItr, forwardFilters, results, true); + filterRecords(forward, forwardFilters, results, true); + } catch (IOException e) { throw new ResourceIndexNotAvailableException( e.getLocalizedMessage()); @@ -345,13 +369,11 @@ } filters.addFilter(new DateRangeFilter(startDate, endDate)); // possibly filter via exclusions: - if (exclusion == null) { - filters.addFilter(new CaptureToUrlResultFilter()); - } else { + if (exclusion != null) { filters.addFilter(preExCounter); filters.addFilter(exclusion); - filters.addFilter(new CaptureToUrlResultFilter()); } + filters.addFilter(new CaptureToUrlResultFilter()); filters.addFilter(finalCounter); startKey = keyUrl; @@ -430,4 +452,12 @@ public void setDedupeRecords(boolean dedupeRecords) { this.dedupeRecords = dedupeRecords; } + + public UrlCanonicalizer getCanonicalizer() { + return canonicalizer; + } + + public void setCanonicalizer(UrlCanonicalizer canonicalizer) { + this.canonicalizer = canonicalizer; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndex.java 2008-01-15 03:00:16 UTC (rev 2137) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndex.java 2008-01-15 03:03:21 UTC (rev 2138) @@ -35,6 +35,7 @@ import org.apache.commons.httpclient.URIException; import org.archive.wayback.ResourceIndex; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.WaybackRequest; @@ -42,8 +43,8 @@ import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; -import org.archive.wayback.util.UrlCanonicalizer; import org.archive.wayback.util.flatfile.FlatFile; +import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; /** * @@ -75,8 +76,12 @@ private String mapPath; private static Comparator<RangeGroup> comparator = RangeGroup.getComparator(); - private UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); + private UrlCanonicalizer canonicalizer = null; + public AlphaPartitionedIndex() { + canonicalizer = new AggressiveUrlCanonicalizer(); + } + @SuppressWarnings("unchecked") private void reloadMapFile() throws IOException { FlatFile ff = new FlatFile(mapPath); @@ -235,4 +240,12 @@ public void setMapPath(String mapPath) { this.mapPath = mapPath; } + + public UrlCanonicalizer getCanonicalizer() { + return canonicalizer; + } + + public void setCanonicalizer(UrlCanonicalizer canonicalizer) { + this.canonicalizer = canonicalizer; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java 2008-01-15 03:00:16 UTC (rev 2137) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java 2008-01-15 03:03:21 UTC (rev 2138) @@ -25,10 +25,11 @@ package org.archive.wayback.resourceindex.filters; import org.apache.commons.httpclient.URIException; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.SearchResult; import org.archive.wayback.util.ObjectFilter; -import org.archive.wayback.util.UrlCanonicalizer; +import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; /** * SearchResultFilter which INCLUDEs all records, unless they redirect to @@ -39,7 +40,10 @@ */ public class SelfRedirectFilter implements ObjectFilter<SearchResult> { - private UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); + private UrlCanonicalizer canonicalizer = new AggressiveUrlCanonicalizer(); + public SelfRedirectFilter() { + canonicalizer = new AggressiveUrlCanonicalizer(); + } /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) */ @@ -63,4 +67,10 @@ } return FILTER_INCLUDE; } + public UrlCanonicalizer getCanonicalizer() { + return canonicalizer; + } + public void setCanonicalizer(UrlCanonicalizer canonicalizer) { + this.canonicalizer = canonicalizer; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ARCRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ARCRecordToSearchResultAdapter.java 2008-01-15 03:00:16 UTC (rev 2137) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ARCRecordToSearchResultAdapter.java 2008-01-15 03:03:21 UTC (rev 2138) @@ -34,10 +34,11 @@ import org.archive.io.arc.ARCRecordMetaData; import org.archive.net.UURI; import org.archive.net.UURIFactory; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.SearchResult; import org.archive.wayback.util.Adapter; -import org.archive.wayback.util.UrlCanonicalizer; +import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; /** * @@ -51,9 +52,11 @@ private static final Logger LOGGER = Logger.getLogger( ARCRecordToSearchResultAdapter.class.getName()); - // TODO: make this configurable based on the ResourceIndex - private static UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); - + private UrlCanonicalizer canonicalizer = null; + + public ARCRecordToSearchResultAdapter() { + canonicalizer = new AggressiveUrlCanonicalizer(); + } // public static SearchResult arcRecordToSearchResult(final ARCRecord rec) // throws IOException, ParseException { /* (non-Javadoc) @@ -161,4 +164,10 @@ } return result; } + public UrlCanonicalizer getCanonicalizer() { + return canonicalizer; + } + public void setCanonicalizer(UrlCanonicalizer canonicalizer) { + this.canonicalizer = canonicalizer; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ArcIndexer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ArcIndexer.java 2008-01-15 03:00:16 UTC (rev 2137) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/ArcIndexer.java 2008-01-15 03:03:21 UTC (rev 2138) @@ -33,11 +33,14 @@ import org.archive.io.arc.ARCReader; import org.archive.io.arc.ARCReaderFactory; import org.archive.io.arc.ARCRecord; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.core.SearchResult; import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; import org.archive.wayback.util.AdaptedIterator; import org.archive.wayback.util.Adapter; import org.archive.wayback.util.CloseableIterator; +import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; +import org.archive.wayback.util.url.IdentityUrlCanonicalizer; /** * Transforms an ARC file into Iterator<SearchResult>. @@ -51,7 +54,12 @@ * CDX Header line for these fields. not very configurable.. */ public final static String CDX_HEADER_MAGIC = " CDX N b h m s k r V g"; - + private UrlCanonicalizer canonicalizer = null; + + public ArcIndexer() { + canonicalizer = new AggressiveUrlCanonicalizer(); + } + /** * @param arc * @return Iterator of SearchResults for input arc File @@ -65,8 +73,9 @@ Adapter<ArchiveRecord,ARCRecord> adapter1 = new ArchiveRecordToARCRecordAdapter(); - Adapter<ARCRecord,SearchResult> adapter2 = + ARCRecordToSearchResultAdapter adapter2 = new ARCRecordToSearchResultAdapter(); + adapter2.setCanonicalizer(canonicalizer); Iterator<ArchiveRecord> itr1 = arcReader.iterator(); @@ -76,29 +85,22 @@ return new AdaptedIterator<ARCRecord,SearchResult>(itr2,adapter2); } - - private class ArchiveRecordToARCRecordAdapter - implements Adapter<ArchiveRecord,ARCRecord> { + public UrlCanonicalizer getCanonicalizer() { + return canonicalizer; + } - /* (non-Javadoc) - * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) - */ - public ARCRecord adapt(ArchiveRecord o) { - ARCRecord rec = null; - if(o instanceof ARCRecord) { - rec = (ARCRecord) o; - } - return rec; - } + public void setCanonicalizer(UrlCanonicalizer canonicalizer) { + this.canonicalizer = canonicalizer; } private static void USAGE() { System.err.println("USAGE:"); System.err.println(""); - System.err.println("arc-indexer ARCFILE"); - System.err.println("arc-indexer ARCFILE CDXFILE"); + System.err.println("arc-indexer [-identity] ARCFILE"); + System.err.println("arc-indexer [-identity] ARCFILE CDXFILE"); System.err.println(""); - System.err.println("Create a CDX format index at CDXFILE or to STDOUT"); + System.err.println("Create a CDX format index at CDXFILE or to STDOUT."); + System.err.println("With -identity, perform no url canonicalization."); System.exit(1); } @@ -107,14 +109,20 @@ */ public static void main(String[] args) { ArcIndexer indexer = new ArcIndexer(); - File arc = new File(args[0]); + int idx = 0; + if(args[0] != null && args[0].equals("-identity")) { + indexer.setCanonicalizer(new IdentityUrlCanonicalizer()); + idx++; + } + File arc = new File(args[idx]); + idx++; PrintWriter pw = null; try { - if(args.length == 1) { + if(args.length == idx) { // dump to STDOUT: pw = new PrintWriter(System.out); - } else if(args.length == 2) { - pw = new PrintWriter(args[1]); + } else if(args.length == (idx + 1)) { + pw = new PrintWriter(args[idx]); } else { USAGE(); } @@ -126,6 +134,22 @@ pw.close(); } catch (Exception e) { e.printStackTrace(); + System.exit(1); } } + + private class ArchiveRecordToARCRecordAdapter + implements Adapter<ArchiveRecord,ARCRecord> { + + /* (non-Javadoc) + * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) + */ + public ARCRecord adapt(ArchiveRecord o) { + ARCRecord rec = null; + if(o instanceof ARCRecord) { + rec = (ARCRecord) o; + } + return rec; + } + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/WARCRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/WARCRecordToSearchResultAdapter.java 2008-01-15 03:00:16 UTC (rev 2137) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/WARCRecordToSearchResultAdapter.java 2008-01-15 03:03:21 UTC (rev 2138) @@ -16,10 +16,11 @@ import org.archive.io.warc.WARCRecord; import org.archive.net.UURI; import org.archive.net.UURIFactory; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.SearchResult; import org.archive.wayback.util.Adapter; -import org.archive.wayback.util.UrlCanonicalizer; +import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; /** * Adapts certain WARCRecords into SearchResults. DNS and response records are @@ -52,9 +53,12 @@ private static final Logger LOGGER = Logger.getLogger( WARCRecordToSearchResultAdapter.class.getName()); - // TODO: make this configurable based on the ResourceIndex - private static UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); + private UrlCanonicalizer canonicalizer = null; + public WARCRecordToSearchResultAdapter() { + canonicalizer = new AggressiveUrlCanonicalizer(); + } + /* (non-Javadoc) * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) */ @@ -303,4 +307,12 @@ return result; } + + public UrlCanonicalizer getCanonicalizer() { + return canonicalizer; + } + + public void setCanonicalizer(UrlCanonicalizer canonicalizer) { + this.canonicalizer = canonicalizer; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/WarcIndexer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/WarcIndexer.java 2008-01-15 03:00:16 UTC (rev 2137) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/WarcIndexer.java 2008-01-15 03:03:21 UTC (rev 2138) @@ -9,11 +9,14 @@ import org.archive.io.warc.WARCReader; import org.archive.io.warc.WARCReaderFactory; import org.archive.io.warc.WARCRecord; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.core.SearchResult; import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; import org.archive.wayback.util.AdaptedIterator; import org.archive.wayback.util.Adapter; import org.archive.wayback.util.CloseableIterator; +import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; +import org.archive.wayback.util.url.IdentityUrlCanonicalizer; public class WarcIndexer { @@ -22,6 +25,11 @@ */ public final static String CDX_HEADER_MAGIC = " CDX N b h m s k r V g"; + private UrlCanonicalizer canonicalizer = null; + public WarcIndexer() { + canonicalizer = new AggressiveUrlCanonicalizer(); + } + /** * @param arc * @return Iterator of SearchResults for input arc File @@ -32,7 +40,10 @@ Adapter<ArchiveRecord, WARCRecord> adapter1 = new ArchiveRecordToWARCRecordAdapter(); - Adapter<WARCRecord, SearchResult> adapter2 = new WARCRecordToSearchResultAdapter(); + WARCRecordToSearchResultAdapter adapter2 = + new WARCRecordToSearchResultAdapter(); + adapter2.setCanonicalizer(canonicalizer); + WARCReader reader = WARCReaderFactory.get(warc); Iterator<ArchiveRecord> itr1 = reader.iterator(); @@ -43,28 +54,22 @@ return new AdaptedIterator<WARCRecord, SearchResult>(itr2, adapter2); } - private class ArchiveRecordToWARCRecordAdapter implements - Adapter<ArchiveRecord, WARCRecord> { + public UrlCanonicalizer getCanonicalizer() { + return canonicalizer; + } - /* (non-Javadoc) - * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) - */ - public WARCRecord adapt(ArchiveRecord o) { - WARCRecord rec = null; - if (o instanceof WARCRecord) { - rec = (WARCRecord) o; - } - return rec; - } + public void setCanonicalizer(UrlCanonicalizer canonicalizer) { + this.canonicalizer = canonicalizer; } - + private static void USAGE() { System.err.println("USAGE:"); System.err.println(""); - System.err.println("warc-indexer WARCFILE"); - System.err.println("warc-indexer WARCFILE CDXFILE"); + System.err.println("warc-indexer [-identity] WARCFILE"); + System.err.println("warc-indexer [-identity] WARCFILE CDXFILE"); System.err.println(""); System.err.println("Create a CDX format index at CDXFILE or to STDOUT"); + System.err.println("With -identity, perform no url canonicalization."); System.exit(1); } @@ -73,13 +78,19 @@ */ public static void main(String[] args) { WarcIndexer indexer = new WarcIndexer(); - File arc = new File(args[0]); + int idx = 0; + if(args[0] != null && args[0].equals("-identity")) { + indexer.setCanonicalizer(new IdentityUrlCanonicalizer()); + idx++; + } + File arc = new File(args[idx]); + idx++; PrintWriter pw = null; try { - if (args.length == 1) { + if (args.length == idx) { // dump to STDOUT: pw = new PrintWriter(System.out); - } else if (args.length == 2) { + } else if (args.length == (idx+1)) { pw = new PrintWriter(args[1]); } else { USAGE(); @@ -94,4 +105,19 @@ e.printStackTrace(); } } + + private class ArchiveRecordToWARCRecordAdapter implements + Adapter<ArchiveRecord, WARCRecord> { + + /* (non-Javadoc) + * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) + */ + public WARCRecord adapt(ArchiveRecord o) { + WARCRecord rec = null; + if (o instanceof WARCRecord) { + rec = (WARCRecord) o; + } + return rec; + } + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-03-10 22:00:54
|
Revision: 2216 http://archive-access.svn.sourceforge.net/archive-access/?rev=2216&view=rev Author: bradtofel Date: 2008-03-10 15:00:46 -0700 (Mon, 10 Mar 2008) Log Message: ----------- BUGFIX: (ACC-14) now generate a fresh latest timestamp (if one is not configured) each time one is needed, instead of using the "default" one we generated when the JVM starts up. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java 2008-03-04 05:01:52 UTC (rev 2215) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java 2008-03-10 22:00:46 UTC (rev 2216) @@ -61,8 +61,8 @@ String startDate; String endDate; if(dateStr.length() == 0) { - startDate = earliestTimestamp; - endDate = latestTimestamp; + startDate = getEarliestTimestamp(); + endDate = getLatestTimestamp(); } else { startDate = Timestamp.parseBefore(dateStr).getDateStr(); endDate = Timestamp.parseAfter(dateStr).getDateStr(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java 2008-03-04 05:01:52 UTC (rev 2215) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java 2008-03-10 22:00:46 UTC (rev 2216) @@ -60,8 +60,8 @@ String startDate; String endDate; if(dateStr.length() == 0) { - startDate = earliestTimestamp; - endDate = latestTimestamp; + startDate = getEarliestTimestamp(); + endDate = getLatestTimestamp(); } else { startDate = Timestamp.parseBefore(dateStr).getDateStr(); endDate = Timestamp.parseAfter(dateStr).getDateStr(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java 2008-03-04 05:01:52 UTC (rev 2215) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java 2008-03-10 22:00:46 UTC (rev 2216) @@ -75,8 +75,8 @@ String startDate = null; String endDate = null; if (dateStr.length() == 14) { - startDate = earliestTimestamp; - endDate = latestTimestamp; + startDate = getEarliestTimestamp(); + endDate = getLatestTimestamp(); } else { // classic behavior: @@ -85,8 +85,8 @@ // dateStr = endDate; // "better" behavior: - startDate = earliestTimestamp; - endDate = latestTimestamp; + startDate = getEarliestTimestamp(); + endDate = getLatestTimestamp(); dateStr = Timestamp.parseAfter(dateStr).getDateStr(); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java 2008-03-04 05:01:52 UTC (rev 2215) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java 2008-03-10 22:00:46 UTC (rev 2216) @@ -108,8 +108,8 @@ String startDate; String endDate; if(dateStr.length() == 0) { - startDate = earliestTimestamp; - endDate = latestTimestamp; + startDate = getEarliestTimestamp(); + endDate = getLatestTimestamp(); } else { startDate = Timestamp.parseBefore(dateStr).getDateStr(); endDate = Timestamp.parseAfter(dateStr).getDateStr(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2008-03-04 05:01:52 UTC (rev 2215) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2008-03-10 22:00:46 UTC (rev 2216) @@ -66,10 +66,8 @@ protected int maxRecords = DEFAULT_MAX_RECORDS; - protected String earliestTimestamp = - Timestamp.earliestTimestamp().getDateStr(); - protected String latestTimestamp = - Timestamp.latestTimestamp().getDateStr(); + protected String earliestTimestamp = null; + protected String latestTimestamp = null; protected static String getMapParam(Map<String,String[]> queryMap, String field) { @@ -128,15 +126,21 @@ * @return */ public String getEarliestTimestamp() { - return earliestTimestamp; + if(earliestTimestamp != null) { + return earliestTimestamp; + + } + return Timestamp.earliestTimestamp().getDateStr(); } public String getLatestTimestamp() { - return latestTimestamp; + if(latestTimestamp != null) { + return latestTimestamp; + } + return Timestamp.latestTimestamp().getDateStr(); } public void setLatestTimestamp(String timestamp) { this.latestTimestamp = Timestamp.parseAfter(timestamp).getDateStr(); } - } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2008-03-04 05:01:52 UTC (rev 2215) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2008-03-10 22:00:46 UTC (rev 2216) @@ -47,8 +47,12 @@ parsers = getRequestParsers(); for(int i = 0; i < parsers.length; i++) { parsers[i].setMaxRecords(maxRecords); - parsers[i].setEarliestTimestamp(earliestTimestamp); - parsers[i].setLatestTimestamp(latestTimestamp); + if(earliestTimestamp != null) { + parsers[i].setEarliestTimestamp(earliestTimestamp); + } + if(latestTimestamp != null) { + parsers[i].setLatestTimestamp(latestTimestamp); + } } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2008-03-04 05:01:52 UTC (rev 2215) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2008-03-10 22:00:46 UTC (rev 2216) @@ -88,11 +88,11 @@ } if(wbRequest.get(WaybackConstants.REQUEST_START_DATE) == null) { wbRequest.put(WaybackConstants.REQUEST_START_DATE, - earliestTimestamp); + getEarliestTimestamp()); } if(wbRequest.get(WaybackConstants.REQUEST_END_DATE) == null) { wbRequest.put(WaybackConstants.REQUEST_END_DATE, - latestTimestamp); + getLatestTimestamp()); } } if(wbRequest != null) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2008-03-04 05:01:52 UTC (rev 2215) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2008-03-10 22:00:46 UTC (rev 2216) @@ -146,11 +146,11 @@ } if(wbRequest.get(WaybackConstants.REQUEST_START_DATE) == null) { wbRequest.put(WaybackConstants.REQUEST_START_DATE, - earliestTimestamp); + getEarliestTimestamp()); } if(wbRequest.get(WaybackConstants.REQUEST_END_DATE) == null) { wbRequest.put(WaybackConstants.REQUEST_END_DATE, - latestTimestamp); + getLatestTimestamp()); } wbRequest.fixup(httpRequest); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |