From: Brad <bra...@us...> - 2005-11-16 03:11:41
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30992/src/java/org/archive/wayback Modified Files: ResourceStore.java ResourceIndex.java Added Files: WaybackConstants.java ReplayResultURIConverter.java QueryRenderer.java ReplayRenderer.java PropertyConfigurable.java Removed Files: ReplayUI.java RequestParser.java QueryUI.java Log Message: Massive overhaul decomposing into three main categories of changes: 1) All internal datatypes are now extensible (currently Properties, but should be Maps) including: a) WaybackRequest(was WBRequest) b) SearchResults (was ResourceResults) c) SearchResult (was ResourceResult) d) Resource so that there is no longer an assumption of Archival URL queries, or "CDX-style" index results. This will put more responsiblility on the UI components to interrogate SearchResults to decide how to render, but should enable extension to data returned from Indexes, as well as allow far more flexibility in queries, predominantly geared towards free-text searching. This is still somewhat clunky, as there are no convenience accessor methods, so all users refer to constants when interacting with them. 2) Major cleanup of servlet and filter interaction with servlet container. ReplayUI and QueryUI are now just plain old servlets, and filters can be optionally added to allow non-CGI argument requests to be coerced into standard WaybackRequest objects. 3) Alternate "Proxy" Replay mode is now functional, and some work has been done towards an alternate Nutch ResourceIndex. Currently the web.xml contains example configurations for both Proxy and Archival Url replay modes, but the Proxy related configurations are commented out. Proxy mode *requires* changing the servlet context to ROOT. ArchivalUrl replay mode works as ROOT context and as any (I think) other context. There are some cosmetic double-slashe issues to work out. --- NEW FILE: PropertyConfigurable.java --- /* PropertyConfigurable * * $Id: PropertyConfigurable.java,v 1.1 2005/11/16 03:11:29 bradtofel Exp $ * * Created on 3:46:34 PM Nov 7, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback; import java.util.Properties; import org.archive.wayback.exception.ConfigurationException; /** * * * @author brad * @version $Date: 2005/11/16 03:11:29 $, $Revision: 1.1 $ */ public interface PropertyConfigurable { /** * Initialize this Object. Pass in the specific * configurations via Properties. * * @param p * Generic properties bag for configurations * @throws ConfigurationException */ public void init(final Properties p) throws ConfigurationException; } --- QueryUI.java DELETED --- --- NEW FILE: ReplayRenderer.java --- /* ReplayRenderer * * $Id: ReplayRenderer.java,v 1.1 2005/11/16 03:11:29 bradtofel Exp $ * * Created on 5:27:09 PM Nov 1, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback; import java.io.IOException; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.WaybackException; /** * * * @author brad * @version $Date: 2005/11/16 03:11:29 $, $Revision: 1.1 $ */ public interface ReplayRenderer extends PropertyConfigurable { public void renderException(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, WaybackException exception) throws ServletException, IOException; public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, SearchResult result, Resource resource, ReplayResultURIConverter uriConverter) throws ServletException, IOException; } --- ReplayUI.java DELETED --- --- NEW FILE: QueryRenderer.java --- /* QueryRenderer * * $Id: QueryRenderer.java,v 1.1 2005/11/16 03:11:29 bradtofel Exp $ * * Created on 2:39:48 PM Nov 7, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback; import java.io.IOException; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.WaybackException; /** * * * @author brad * @version $Date: 2005/11/16 03:11:29 $, $Revision: 1.1 $ */ public interface QueryRenderer extends PropertyConfigurable { public void renderException(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, WaybackException exception) throws ServletException, IOException; public void renderUrlResults(HttpServletRequest httpRequest, HttpServletResponse response, WaybackRequest wbRequest, SearchResults results, ReplayResultURIConverter uriConverter) throws ServletException, IOException; public void renderUrlPrefixResults(HttpServletRequest httpRequest, HttpServletResponse response, WaybackRequest wbRequest, SearchResults results, ReplayResultURIConverter uriConverter) throws ServletException, IOException; } Index: ResourceStore.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/ResourceStore.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** ResourceStore.java 19 Oct 2005 01:22:37 -0000 1.2 --- ResourceStore.java 16 Nov 2005 03:11:29 -0000 1.3 *************** *** 25,32 **** import java.io.IOException; - import java.util.Properties; - import org.archive.io.arc.ARCLocation; import org.archive.wayback.core.Resource; /** --- 25,31 ---- import java.io.IOException; import org.archive.wayback.core.Resource; + import org.archive.wayback.core.SearchResult; /** *************** *** 36,40 **** * @version $Date$, $Revision$ */ ! public interface ResourceStore { /** * Transform an ARCLocation into a Resource --- 35,39 ---- * @version $Date$, $Revision$ */ ! public interface ResourceStore extends PropertyConfigurable { /** * Transform an ARCLocation into a Resource *************** *** 44,57 **** * @throws IOException */ ! public Resource retrieveResource(ARCLocation location) throws IOException; - /** - * Initialize this ResourceStore. Pass in the specific configurations via - * Properties. - * - * @param p - * Generic properties bag for configurations - * @throws Exception - */ - public void init(Properties p) throws Exception; } --- 43,47 ---- * @throws IOException */ ! public Resource retrieveResource(SearchResult result) throws IOException; } --- RequestParser.java DELETED --- --- NEW FILE: WaybackConstants.java --- /* WaybackConstants * * $Id: WaybackConstants.java,v 1.1 2005/11/16 03:11:29 bradtofel Exp $ * * Created on 3:28:47 PM Nov 14, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback; /** * * * @author brad * @version $Date: 2005/11/16 03:11:29 $, $Revision: 1.1 $ */ public interface WaybackConstants { /** * Request: filter results before this 14-digit timestamp */ public static final String REQUEST_START_DATE = "startdate"; /** * Request: filter results after this 14-digit timestamp */ public static final String REQUEST_END_DATE = "enddate"; /** * Request: (replay) find closest result to this 14-digit timestamp */ public static final String REQUEST_EXACT_DATE = "exactdate"; /** * Request: URL or URL prefix requested */ public static final String REQUEST_URL = "url"; /** * Request: URL of referrer, if supplied, or "" if not */ public static final String REQUEST_REFERER_URL = "refererurl"; /** * Request: defines type - urlquery, urlprefixquery, or replay */ public static final String REQUEST_TYPE = "type"; /** * Request: urlquery type request */ public static final String REQUEST_URL_QUERY = "urlquery"; /** * Request: urlprefixquery type request */ public static final String REQUEST_URL_PREFIX_QUERY = "urlprefixquery"; /** * Request: replay type request */ public static final String REQUEST_REPLAY_QUERY = "replay"; /** * Results: int first record of all matching returned, 1-based */ public static final String RESULTS_FIRST_RECORD = "firstrecord"; /** * Results: int first page of all matching pages to return, 1-based */ public static final String RESULTS_FIRST_PAGE = "firstpage"; /** * Results: boolean: "true"|"false" if there are more records matching * than those returned in the currect SearchResults */ public static final String RESULTS_HAS_MORE = "hasmore"; /** * Result: URL of captured document */ public static final String RESULT_URL = "url"; /** * Result: 14-digit timestamp when document was captured */ public static final String RESULT_CAPTURE_DATE = "capturedate"; /** * Result: basename of ARC file containing this document. */ public static final String RESULT_ARC_FILE = "arcfile"; /** * Result: compressed byte offset within ARC file where this document's * gzip envelope begins. */ public static final String RESULT_OFFSET = "compressedoffset"; /** * Result: original exact host from which this document was captured. */ public static final String RESULT_ORIG_HOST = "originalhost"; /** * Result: best-guess at mime-type of this document. */ public static final String RESULT_MIME_TYPE = "mimetype"; /** * Result: 3-digit integer HTTP response code. may be '0' in some * fringe conditions, old ARCs, bug in crawler, etc. */ public static final String RESULT_HTTP_CODE = "httpresponsecode"; /** * Result: all or part of the 32-digit hexadecimal MD5 digest of this * document */ public static final String RESULT_MD5_DIGEST= "md5digest"; /** * Result: URL that this document redirected to, or '-' if it does * not redirect */ public static final String RESULT_REDIRECT_URL = "redirecturl"; } --- NEW FILE: ReplayResultURIConverter.java --- /* ReplayURI * * $Id: ReplayResultURIConverter.java,v 1.1 2005/11/16 03:11:29 bradtofel Exp $ * * Created on 5:20:43 PM Nov 1, 2005. * * Copyright (C) 2005 Internet Archive. * * This file is part of wayback. * * wayback is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * wayback is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with wayback; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.archive.wayback; import org.archive.wayback.core.SearchResult; /** * * * @author brad * @version $Date: 2005/11/16 03:11:29 $, $Revision: 1.1 $ */ public interface ReplayResultURIConverter extends PropertyConfigurable { /** * @param result * @return user-viewable String URL that will replay the ResourceResult */ public String makeReplayURI(final SearchResult result); public String makeRedirectReplayURI(final SearchResult result, String url); public String getReplayUriPrefix (); } Index: ResourceIndex.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/ResourceIndex.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** ResourceIndex.java 19 Oct 2005 01:22:37 -0000 1.2 --- ResourceIndex.java 16 Nov 2005 03:11:29 -0000 1.3 *************** *** 24,33 **** package org.archive.wayback; ! import java.io.IOException; ! import java.util.Properties; ! ! import org.archive.wayback.core.ResourceResults; ! import org.archive.wayback.core.WMRequest; ! import org.archive.wayback.exception.WaybackException; /** --- 24,32 ---- package org.archive.wayback; ! import org.archive.wayback.core.SearchResults; ! import org.archive.wayback.core.WaybackRequest; ! import org.archive.wayback.exception.BadQueryException; ! import org.archive.wayback.exception.ResourceIndexNotAvailableException; ! import org.archive.wayback.exception.ResourceNotInArchiveException; /** *************** *** 37,41 **** * @version $Date$, $Revision$ */ ! public interface ResourceIndex { /** * Transform a WMRequest into a ResourceResults. --- 36,40 ---- * @version $Date$, $Revision$ */ ! public interface ResourceIndex extends PropertyConfigurable { /** * Transform a WMRequest into a ResourceResults. *************** *** 45,62 **** * WMRequest * ! * @throws IOException ! * @throws WaybackException ! */ ! public ResourceResults query(final WMRequest request) throws IOException, ! WaybackException; ! ! /** ! * Initialize this ResourceIndex. Pass in the specific configurations via ! * Properties. ! * ! * @param p ! * Generic properties bag for configurations ! * @throws Exception */ ! public void init(Properties p) throws Exception; } --- 44,52 ---- * WMRequest * ! * @throws ResourceIndexNotAvailableException ! * @throws ResourceNotInArchiveException */ ! public SearchResults query(final WaybackRequest request) ! throws ResourceIndexNotAvailableException, ! ResourceNotInArchiveException, BadQueryException; } |