From: Brad <bra...@us...> - 2005-11-16 03:11:42
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/core In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30992/src/java/org/archive/wayback/core Modified Files: WaybackLogic.java Timestamp.java Resource.java Added Files: RequestFilter.java WaybackRequest.java SearchResult.java SearchResults.java Removed Files: ResourceResult.java ResourceResults.java WMRequest.java Log Message: Massive overhaul decomposing into three main categories of changes: 1) All internal datatypes are now extensible (currently Properties, but should be Maps) including: a) WaybackRequest(was WBRequest) b) SearchResults (was ResourceResults) c) SearchResult (was ResourceResult) d) Resource so that there is no longer an assumption of Archival URL queries, or "CDX-style" index results. This will put more responsiblility on the UI components to interrogate SearchResults to decide how to render, but should enable extension to data returned from Indexes, as well as allow far more flexibility in queries, predominantly geared towards free-text searching. This is still somewhat clunky, as there are no convenience accessor methods, so all users refer to constants when interacting with them. 2) Major cleanup of servlet and filter interaction with servlet container. ReplayUI and QueryUI are now just plain old servlets, and filters can be optionally added to allow non-CGI argument requests to be coerced into standard WaybackRequest objects. 3) Alternate "Proxy" Replay mode is now functional, and some work has been done towards an alternate Nutch ResourceIndex. Currently the web.xml contains example configurations for both Proxy and Archival Url replay modes, but the Proxy related configurations are commented out. Proxy mode *requires* changing the servlet context to ROOT. ArchivalUrl replay mode works as ROOT context and as any (I think) other context. There are some cosmetic double-slashe issues to work out. --- NEW FILE: SearchResult.java --- /* SearchResult * * $Id: SearchResult.java,v 1.1 2005/11/16 03:11:29 bradtofel Exp $ * * Created on 12:45:18 PM Nov 9, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback.core; import java.util.Properties; /** * * * @author brad * @version $Date: 2005/11/16 03:11:29 $, $Revision: 1.1 $ */ public class SearchResult { private Properties data = null; public SearchResult() { super(); data = new Properties(); } public boolean containsKey(String key) { return data.containsKey(key); } public String get(String key) { return (String) data.get(key); } public String put(String key, String value) { return (String) data.put(key, value); } } --- NEW FILE: SearchResults.java --- /* SearchResults * * $Id: SearchResults.java,v 1.1 2005/11/16 03:11:29 bradtofel Exp $ * * Created on 12:52:13 PM Nov 9, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback.core; import java.util.ArrayList; import java.util.Iterator; import java.util.Properties; import org.archive.wayback.WaybackConstants; /** * * * @author brad * @version $Date: 2005/11/16 03:11:29 $, $Revision: 1.1 $ */ public class SearchResults { private ArrayList results = null; private String firstResultDate; private String lastResultDate; private Properties filters = new Properties(); public SearchResults() { super(); results = new ArrayList(); } /** * @return true if no SearchResult objects, false otherwise. */ public boolean isEmpty() { return results.isEmpty(); } /** * @param result * SearchResult to add to this set */ public void addSearchResult(final SearchResult result) { String resultDate = result.get(WaybackConstants.RESULT_CAPTURE_DATE); if((firstResultDate == null) || (firstResultDate.compareTo(resultDate) < 0)) { firstResultDate = resultDate; } if((lastResultDate == null) || (lastResultDate.compareTo(resultDate) > 0)) { lastResultDate = resultDate; } results.add(result); } /** * @return number of SearchResult objects contained in these SearchResults */ public int getResultCount() { return results.size(); } /** * @return an Iterator that contains the ResourceResult objects */ public Iterator iterator() { return results.iterator(); } /** * @return Returns the firstResultDate. */ public String getFirstResultDate() { return firstResultDate; } /** * @return Returns the lastResultDate. */ public String getLastResultDate() { return lastResultDate; } public boolean containsFilter(String key) { return filters.containsKey(key); } public String getFilter(String key) { return (String) filters.get(key); } public String putFilter(String key, String value) { return (String) filters.put(key, value); } } --- ResourceResult.java DELETED --- --- WMRequest.java DELETED --- Index: Resource.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/core/Resource.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** Resource.java 19 Oct 2005 01:22:36 -0000 1.2 --- Resource.java 16 Nov 2005 03:11:29 -0000 1.3 *************** *** 24,27 **** --- 24,36 ---- package org.archive.wayback.core; + import java.io.IOException; + import java.io.InputStream; + import java.util.Enumeration; + import java.util.Iterator; + import java.util.Map; + import java.util.Properties; + import java.util.Set; + + import org.apache.commons.httpclient.Header; import org.archive.io.arc.ARCRecord; *************** *** 30,43 **** * to allow the Wayback to operator with non-ARC file format resources. Probably * the interface required will end up looking very much like ARCRecord, but can ! * be reimplemented to handle new ARC formats or non-ARC formats. At the moment, ! * users of this class just grab the ARCRecord out and use it directly. * * @author Brad Tofel * @version $Date$, $Revision$ */ ! public class Resource { ARCRecord arcRecord = null; ! /** * Constructor --- 39,55 ---- * to allow the Wayback to operator with non-ARC file format resources. Probably * the interface required will end up looking very much like ARCRecord, but can ! * be reimplemented to handle new ARC formats or non-ARC formats. * * @author Brad Tofel * @version $Date$, $Revision$ */ ! public class Resource extends InputStream { + private static String ARC_META_PREFIX = "arcmeta."; + private static String HTTP_HEADER_PREFIX = "httpheader."; ARCRecord arcRecord = null; ! boolean parsedHeader = false; ! Properties metaData = new Properties(); ! /** * Constructor *************** *** 50,53 **** --- 62,126 ---- } + public void parseHeaders () throws IOException { + if(!parsedHeader) { + arcRecord.skipHttpHeader(); + + // copy all HTTP headers to metaData, prefixing with + // HTTP_HEADER_PREFIX + Header[] headers = arcRecord.getHttpHeaders(); + if (headers != null) { + for (int i = 0; i < headers.length; i++) { + String value = headers[i].getValue(); + String name = headers[i].getName(); + metaData.put(HTTP_HEADER_PREFIX + name,value); + } + } + + // copy all ARC record header fields to metaData, prefixing with + // ARC_META_PREFIX + Map headerMetaMap = arcRecord.getMetaData().getHeaderFields(); + Set keys = headerMetaMap.keySet(); + Iterator itr = keys.iterator(); + while(itr.hasNext()) { + Object metaKey = itr.next(); + Object metaValue = headerMetaMap.get(metaKey); + String metaStringValue = (metaValue == null) ? "" : + metaValue.toString(); + metaData.put(ARC_META_PREFIX + metaKey.toString(), + metaStringValue); + } + + parsedHeader = true; + } + } + + public Properties filterMeta(String prefix) { + Properties matching = new Properties(); + for (Enumeration e = metaData.keys(); e.hasMoreElements();) { + String key = (String) e.nextElement(); + if (key.startsWith(prefix)) { + String finalKey = key.substring(prefix.length()); + String value = (String) metaData.get(key); + matching.put(finalKey, value); + } + } + return matching; + } + + public Properties getHttpHeaders() { + return filterMeta(HTTP_HEADER_PREFIX); + } + + public Properties getARCMetadata() { + return filterMeta(ARC_META_PREFIX); + } + + /* (non-Javadoc) + * @see org.archive.io.arc.ARCRecord#getStatusCode() + */ + public int getStatusCode() { + return arcRecord.getStatusCode(); + } + /** * @return the ARCRecord underlying this Resource. *************** *** 64,66 **** --- 137,167 ---- } + /* (non-Javadoc) + * @see org.archive.io.arc.ARCRecord#read() + */ + public int read() throws IOException { + return arcRecord.read(); + } + + /* (non-Javadoc) + * @see org.archive.io.arc.ARCRecord#read(byte[], int, int) + */ + public int read(byte[] arg0, int arg1, int arg2) throws IOException { + return arcRecord.read(arg0, arg1, arg2); + } + + /* (non-Javadoc) + * @see java.io.InputStream#read(byte[]) + */ + public int read(byte[] b) throws IOException { + return arcRecord.read(b); + } + + /* (non-Javadoc) + * @see org.archive.io.arc.ARCRecord#skip(long) + */ + public long skip(long arg0) throws IOException { + return arcRecord.skip(arg0); + } + } Index: Timestamp.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/core/Timestamp.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** Timestamp.java 20 Oct 2005 00:40:41 -0000 1.3 --- Timestamp.java 16 Nov 2005 03:11:29 -0000 1.4 *************** *** 146,150 **** String last = LAST1_TIMESTAMP; if (input.length() == 0) { ! return LAST2_TIMESTAMP; } if (input.length() < 4) { --- 146,150 ---- String last = LAST1_TIMESTAMP; if (input.length() == 0) { ! return ArchiveUtils.get14DigitDate(new Date()); } if (input.length() < 4) { Index: WaybackLogic.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/core/WaybackLogic.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** WaybackLogic.java 19 Oct 2005 01:22:36 -0000 1.2 --- WaybackLogic.java 16 Nov 2005 03:11:29 -0000 1.3 *************** *** 24,34 **** package org.archive.wayback.core; import java.util.Properties; import java.util.logging.Logger; ! import org.archive.wayback.QueryUI; ! import org.archive.wayback.ReplayUI; import org.archive.wayback.ResourceIndex; import org.archive.wayback.ResourceStore; /** --- 24,38 ---- package org.archive.wayback.core; + import java.util.Enumeration; import java.util.Properties; import java.util.logging.Logger; ! import org.archive.wayback.PropertyConfigurable; ! import org.archive.wayback.ReplayRenderer; ! import org.archive.wayback.QueryRenderer; ! import org.archive.wayback.ReplayResultURIConverter; import org.archive.wayback.ResourceIndex; import org.archive.wayback.ResourceStore; + import org.archive.wayback.exception.ConfigurationException; /** *************** *** 38,56 **** * @version $Date$, $Revision$ */ ! public class WaybackLogic { private static final Logger LOGGER = Logger.getLogger(WaybackLogic.class .getName()); ! private static final String REPLAY_UI_CLASS = "replayui.class"; ! private static final String QUERY_UI_CLASS = "queryui.class"; ! private static final String RESOURCE_STORE_CLASS = "resourcestore.class"; ! private static final String RESOURCE_INDEX_CLASS = "resourceindex.class"; ! private ReplayUI replayUI = null; ! private QueryUI queryUI = null; private ResourceIndex resourceIndex = null; --- 42,65 ---- * @version $Date$, $Revision$ */ ! public class WaybackLogic implements PropertyConfigurable { private static final Logger LOGGER = Logger.getLogger(WaybackLogic.class .getName()); ! private static final String REPLAY_URI_CONVERTER_PROPERTY = ! "replayuriconverter"; ! private static final String REPLAY_RENDERER_PROPERTY = "replayrenderer"; ! private static final String QUERY_RENDERER_PROPERTY = "queryrenderer"; ! private static final String RESOURCE_STORE_PROPERTY = "resourcestore"; ! private static final String RESOURCE_INDEX_PROPERTY = "resourceindex"; ! private ReplayResultURIConverter uriConverter = null; ! ! private ReplayRenderer replayRenderer = null; ! ! private QueryRenderer queryRenderer = null; private ResourceIndex resourceIndex = null; *************** *** 74,152 **** * @throws Exception */ ! public void init(Properties p) throws Exception { LOGGER.info("WaybackLogic constructing classes..."); ! replayUI = (ReplayUI) getInstance(p, REPLAY_UI_CLASS, "replayui"); ! queryUI = (QueryUI) getInstance(p, QUERY_UI_CLASS, "queryUI"); ! resourceStore = (ResourceStore) getInstance(p, RESOURCE_STORE_CLASS, ! "resourceStore"); ! resourceIndex = (ResourceIndex) getInstance(p, RESOURCE_INDEX_CLASS, ! "resourceIndex"); ! LOGGER.info("WaybackLogic initializing classes..."); ! try { ! replayUI.init(p); ! LOGGER.info("initialized replayUI"); ! queryUI.init(p); ! LOGGER.info("initialized queryUI"); ! resourceStore.init(p); ! LOGGER.info("initialized resourceStore"); ! resourceIndex.init(p); ! LOGGER.info("initialized resourceIndex"); - } catch (Exception e) { - throw new Exception(e.getMessage()); - } } ! protected Object getInstance(final Properties p, ! final String classProperty, final String pretty) throws Exception { ! Object result = null; ! String className = (String) p.get(classProperty); ! if ((className == null) || (className.length() <= 0)) { ! throw new Exception("No config (" + classProperty + " for " ! + pretty + ")"); } try { ! result = Class.forName(className).newInstance(); ! LOGGER.info("new " + className + " " + pretty + " created."); } catch (Exception e) { ! // Convert. Add info. ! throw new Exception("Failed making " + pretty + " with " ! + className + ": " + e.getMessage()); } return result; } /** ! * @return Returns the queryUI. */ ! public QueryUI getQueryUI() { ! return queryUI; } /** ! * @return Returns the replayUI. */ ! public ReplayUI getReplayUI() { ! return replayUI; } /** ! * @return Returns the resourceIndex. */ ! public ResourceIndex getResourceIndex() { ! return resourceIndex; } /** ! * @return Returns the resourceStore. */ ! public ResourceStore getResourceStore() { ! return resourceStore; } --- 83,185 ---- * @throws Exception */ ! public void init(Properties p) throws ConfigurationException { LOGGER.info("WaybackLogic constructing classes..."); ! uriConverter = (ReplayResultURIConverter) getInstance(p, ! REPLAY_URI_CONVERTER_PROPERTY); ! replayRenderer = (ReplayRenderer) getInstance(p, ! REPLAY_RENDERER_PROPERTY); ! queryRenderer = (QueryRenderer) getInstance(p, QUERY_RENDERER_PROPERTY); ! resourceStore = (ResourceStore) getInstance(p, RESOURCE_STORE_PROPERTY); ! resourceIndex = (ResourceIndex) getInstance(p, RESOURCE_INDEX_PROPERTY); ! ! LOGGER.info("WaybackLogic initialized classes..."); } ! protected PropertyConfigurable getInstance(final Properties p, ! final String classPrefix) throws ConfigurationException { ! PropertyConfigurable result = null; ! ! String classNameKey = classPrefix + ".classname"; ! String propertyPrefix = classPrefix + "."; ! String className = null; ! ! // build new class-specific Properties for class initialization: ! Properties classProperties = new Properties(); ! for (Enumeration e = p.keys(); e.hasMoreElements();) { ! String key = (String) e.nextElement(); ! ! if (key.equals(classNameKey)) { ! ! // special .classname value: ! className = (String) p.get(key); ! ! } else if (key.startsWith(propertyPrefix)) { ! ! String finalKey = key.substring(propertyPrefix.length()); ! String value = (String) p.get(key); ! classProperties.put(finalKey, value); ! ! } ! } ! ! // did we find the implementation class? ! if (className == null) { ! throw new ConfigurationException("No configuration for (" ! + classNameKey + ")"); } try { ! result = (PropertyConfigurable) Class.forName(className) ! .newInstance(); } catch (Exception e) { ! e.printStackTrace(); ! throw new ConfigurationException(e.getMessage()); } + LOGGER.info("new " + className + " created."); + result.init(p); + LOGGER.info("initialized " + className); + return result; } /** ! * @return Returns the resourceIndex. */ ! public ResourceIndex getResourceIndex() { ! return resourceIndex; } /** ! * @return Returns the resourceStore. */ ! public ResourceStore getResourceStore() { ! return resourceStore; } /** ! * @return Returns the uriConverter. */ ! public ReplayResultURIConverter getURIConverter() { ! return uriConverter; } /** ! * @return Returns the replayRenderer. */ ! public ReplayRenderer getReplayRenderer() { ! return replayRenderer; ! } ! ! /** ! * @return Returns the queryRenderer. ! */ ! public QueryRenderer getQueryRenderer() { ! return queryRenderer; } --- NEW FILE: WaybackRequest.java --- /* WMRequest * * Created on 2005/10/18 14:00:00 * * Copyright (C) 2005 Internet Archive. * * This file is part of the Wayback Machine (crawler.archive.org). * * Wayback Machine is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Wayback Machine is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Wayback Machine; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback.core; import java.util.Properties; /** * Abstraction of all the data associated with a users request to the Wayback * Machine. * * @author Brad Tofel * @version $Date: 2005/11/16 03:11:29 $, $Revision: 1.1 $ */ public class WaybackRequest { private int resultsPerPage = 1000; private int pageNum = 1; private Properties filters = new Properties(); /** * Constructor */ public WaybackRequest() { super(); } /** * @return Returns the pageNum. */ public int getPageNum() { return pageNum; } /** * @param pageNum The pageNum to set. */ public void setPageNum(int pageNum) { this.pageNum = pageNum; } /** * @return Returns the resultsPerPage. */ public int getResultsPerPage() { return resultsPerPage; } /** * @param resultsPerPage The resultsPerPage to set. */ public void setResultsPerPage(int resultsPerPage) { this.resultsPerPage = resultsPerPage; } public boolean containsKey(String key) { return filters.containsKey(key); } public String get(String key) { return (String) filters.get(key); } public void put(String key, String value) { filters.put(key, value); } } --- ResourceResults.java DELETED --- --- NEW FILE: RequestFilter.java --- /* RequestFilter * * $Id: RequestFilter.java,v 1.1 2005/11/16 03:11:29 bradtofel Exp $ * * Created on 1:17:08 PM Nov 8, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback.core; import java.io.IOException; import javax.servlet.Filter; import javax.servlet.FilterChain; import javax.servlet.FilterConfig; import javax.servlet.RequestDispatcher; import javax.servlet.ServletException; import javax.servlet.ServletRequest; import javax.servlet.ServletResponse; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; /** * * * @author brad * @version $Date: 2005/11/16 03:11:29 $, $Revision: 1.1 $ */ public abstract class RequestFilter implements Filter { private static final String WMREQUEST_ATTRIBUTE = "wmrequest.attribute"; private static final String HANDLER_URL = "handler.url"; private String handlerUrl = null; /** * Constructor */ public RequestFilter() { super(); } public void init(FilterConfig c) throws ServletException { handlerUrl = c.getInitParameter(HANDLER_URL); if ((handlerUrl == null) || (handlerUrl.length() <= 0)) { throw new ServletException("No config (" + HANDLER_URL + ")"); } } public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException { if (!handle(request, response)) { chain.doFilter(request, response); } } protected boolean handle(final ServletRequest request, final ServletResponse response) throws IOException, ServletException { if (!(request instanceof HttpServletRequest)) { return false; } if (!(response instanceof HttpServletResponse)) { return false; } HttpServletRequest httpRequest = (HttpServletRequest) request; //HttpServletResponse httpResponse = (HttpServletResponse) response; WaybackRequest wbRequest = parseRequest(httpRequest); if (wbRequest == null) { return false; } request.setAttribute(WMREQUEST_ATTRIBUTE, wbRequest); RequestDispatcher dispatcher = request.getRequestDispatcher(handlerUrl); dispatcher.forward(request, response); return true; } protected abstract WaybackRequest parseRequest( HttpServletRequest httpRequest); public void destroy() { } } |