You can subscribe to this list here.
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
(10) |
Sep
(36) |
Oct
(339) |
Nov
(103) |
Dec
(152) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2006 |
Jan
(141) |
Feb
(102) |
Mar
(125) |
Apr
(203) |
May
(57) |
Jun
(30) |
Jul
(139) |
Aug
(46) |
Sep
(64) |
Oct
(105) |
Nov
(34) |
Dec
(162) |
2007 |
Jan
(81) |
Feb
(57) |
Mar
(141) |
Apr
(72) |
May
(9) |
Jun
(1) |
Jul
(144) |
Aug
(88) |
Sep
(40) |
Oct
(43) |
Nov
(34) |
Dec
(20) |
2008 |
Jan
(44) |
Feb
(45) |
Mar
(16) |
Apr
(36) |
May
(8) |
Jun
(77) |
Jul
(177) |
Aug
(66) |
Sep
(8) |
Oct
(33) |
Nov
(13) |
Dec
(37) |
2009 |
Jan
(2) |
Feb
(5) |
Mar
(8) |
Apr
|
May
(36) |
Jun
(19) |
Jul
(46) |
Aug
(8) |
Sep
(1) |
Oct
(66) |
Nov
(61) |
Dec
(10) |
2010 |
Jan
(13) |
Feb
(16) |
Mar
(38) |
Apr
(76) |
May
(47) |
Jun
(32) |
Jul
(35) |
Aug
(45) |
Sep
(20) |
Oct
(61) |
Nov
(24) |
Dec
(16) |
2011 |
Jan
(22) |
Feb
(34) |
Mar
(11) |
Apr
(8) |
May
(24) |
Jun
(23) |
Jul
(11) |
Aug
(42) |
Sep
(81) |
Oct
(48) |
Nov
(21) |
Dec
(20) |
2012 |
Jan
(30) |
Feb
(25) |
Mar
(4) |
Apr
(6) |
May
(1) |
Jun
(5) |
Jul
(5) |
Aug
(8) |
Sep
(6) |
Oct
(6) |
Nov
|
Dec
|
From: <bra...@us...> - 2007-10-02 03:32:57
|
Revision: 2030 http://archive-access.svn.sourceforge.net/archive-access/?rev=2030&view=rev Author: bradtofel Date: 2007-10-01 20:33:00 -0700 (Mon, 01 Oct 2007) Log Message: ----------- INITIAL REV: some useful templates and examples for wayback Spring configuration. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/wayback-templates.xml Added: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/wayback-templates.xml =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/wayback-templates.xml (rev 0) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/wayback-templates.xml 2007-10-02 03:33:00 UTC (rev 2030) @@ -0,0 +1,143 @@ + +<!-- SearchResultSource templates --> + <bean id="bdbsearchresultsource" + class="org.archive.wayback.resourceindex.bdb.BDBIndex" + init-method="init"> + <property name="bdbName" value="DB1" /> + <property name="bdbPath" value="/tmp/wayback/index/" /> + </bean> + + <bean id="cdxsearchresultsource" class="org.archive.wayback.resourceindex.cdx.CDXIndex"> + <property name="path" value="/tmp/wayback/cdx-index/index.cdx" /> + </bean> + + <bean id="compositecdxresultsource" class="org.archive.wayback.resourceindex.CompositeSearchResultSource"> + <property name="CDXSources"> + <list> + <value>/tmp/wayback/cdx-index/index.cdx.1</value> + <value>/tmp/wayback/cdx-index/index.cdx.2</value> + </list> + </property> + </bean> + +<!-- ResourceIndex templates --> + + <bean id="remoteindex" class="org.archive.wayback.resourceindex.RemoteResourceIndex" init-method="init"> + <property name="searchUrlBase" value="http://webdata010.us.archive.org:8080/wayback/xmlquery" /> + </bean> + + <bean id="localbdbindex" class="org.archive.wayback.resourceindex.LocalResourceIndex"> + <property name="source" ref="bdbsearchresultsource" /> + <property name="maxRecords" value="10000" /> + </bean> + <bean id="localcdxindex" class="org.archive.wayback.resourceindex.LocalResourceIndex"> + <property name="source" ref="cdxsearchresultsource" /> + <property name="maxRecords" value="10000" /> + </bean> + + <property name="remotenutchindex"> + <bean class="org.archive.wayback.resourceindex.NutchResourceIndex" init-method="init"> + <property name="searchUrlBase" value="http://webteam-ws.us.archive.org:8080/katrina/opensearch" /> + <property name="maxRecords" value="100" /> + </bean> + </property> + +<!-- ResourceStore templates --> + + <bean id="localstore" class="org.archive.wayback.resourcestore.LocalARCResourceStore"> + <property name="arcDir" value="/tmp/wayback/arcs/" /> + </bean> + + <bean id="remotestore" class="org.archive.wayback.resourcestore.HttpARCResourceStore"> + <property name="urlPrefix" value="http://localhost:8080/arcproxy/" /> + </bean> + +<!-- WaybackCollection templates --> + + <bean id="localcollection" class="org.archive.wayback.webapp.WaybackCollection"> + <property name="index" ref="localbdbindex" /> + <property name="store" ref="localstore" /> + </bean> + +<!-- QueryUI templates --> + <bean id="standardquery" class="org.archive.wayback.query.Renderer"> + <property name="captureJsp" value="/jsp/HTMLResults.jsp" /> + </bean> + <bean id="calendarquery" class="org.archive.wayback.query.Renderer"> + <property name="captureJsp" value="/jsp/CalendarResults.jsp" /> + </bean> + +<!-- ArchivalURL ReplayUI templates --> + <bean id="archivalurlreplay" class="org.archive.wayback.archivalurl.ArchivalUrlReplayDispatcher"> + <property name="jsInserts"> + <list> + <value>http://localhost:8080/wm.js</value> + </list> + </property> + </bean> + <bean id="archivalurluriconverter" + class="org.archive.wayback.archivalurl.ArchivalUrlResultURIConverter"> + <property name="replayURIPrefix" value="http://localhost:8080/wayback/" /> + </bean> + <bean id="archivalurlparser" class="org.archive.wayback.archivalurl.ArchivalUrlRequestParser" init-method="init"> + <property name="maxRecords" value="1000" /> + </bean> + +<!-- Proxy ReplayUI templates --> + <bean id="proxyreplay" class="org.archive.wayback.proxy.ProxyReplayDispatcher"> + </bean> + <bean id="proxyuriconverter" class="org.archive.wayback.proxy.RedirectResultURIConverter"> + <property name="redirectURI" value="http://localhost:8090/jsp/Redirect.jsp" /> + </bean> + <bean id="proxyparser" class="org.archive.wayback.proxy.ProxyRequestParser" init-method="init"> + <property name="localhostNames"> + <list> + <value>foo.archive.org</value> + </list> + </property> + <property name="maxRecords" value="1000" /> + </bean> + +<!--IP-base authentication template --> + + <bean class="org.archive.wayback.authenticationcontrol.IPMatchesBooleanOperator"> + <property name="allowedRanges"> + <list> + <value>192.168.1.16/24</value> + </list> + </property> + </bean> + + +<!-- AccessPoint templates --> + + <bean name="8080:wayback" class="org.archive.wayback.webapp.AccessPoint"> + <property name="collection" ref="localcollection" /> + <property name="query" ref="standardquery" /> + <property name="replay" ref="archivalurlreplay" /> + <property name="parser" ref="archivalurlparser" /> + <property name="uriConverter" ref="archivalurluriconverter" /> + </bean> + + <bean name="8081" parent="8080:wayback"> + <property name="useServerName" value="true" /> + <property name="replay"> + <bean class="org.archive.wayback.domainprefix.DomainPrefixReplayDispatcher" /> + </property> + + <property name="parser"> + <bean class="org.archive.wayback.domainprefix.DomainPrefixCompositeRequestParser" + init-method="init"> + <property name="hostPort" value="localhost.archive.org:8081" /> + <property name="maxRecords" value="1000" /> + <property name="earliestTimestamp" value="1996" /> + </bean> + </property> + + <property name="uriConverter"> + <bean class="org.archive.wayback.domainprefix.DomainPrefixResultURIConverter"> + <property name="hostPort" value="localhost.archive.org:8081" /> + </bean> + </property> + </bean> + \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-10-02 03:29:37
|
Revision: 2029 http://archive-access.svn.sourceforge.net/archive-access/?rev=2029&view=rev Author: bradtofel Date: 2007-10-01 20:29:41 -0700 (Mon, 01 Oct 2007) Log Message: ----------- BUGFIX: was missing a contextroot for one of the timeline images. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/replay/Timeline.jsp Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/replay/Timeline.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/replay/Timeline.jsp 2007-10-02 03:28:19 UTC (rev 2028) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/replay/Timeline.jsp 2007-10-02 03:29:41 UTC (rev 2029) @@ -312,7 +312,7 @@ </form> </td> <td> - <img wmSpecial="1" alt='' height='1' src='images/1px.gif' width='5'> + <img wmSpecial="1" alt='' height='1' src='<%= contextRoot %>/images/1px.gif' width='5'> </td> </tr> </table> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-10-02 03:28:15
|
Revision: 2028 http://archive-access.svn.sourceforge.net/archive-access/?rev=2028&view=rev Author: bradtofel Date: 2007-10-01 20:28:19 -0700 (Mon, 01 Oct 2007) Log Message: ----------- TWEAK: removed site plugin configuration -- sticking with the defaults for the moment. Modified Paths: -------------- trunk/archive-access/projects/wayback/dist/pom.xml Modified: trunk/archive-access/projects/wayback/dist/pom.xml =================================================================== --- trunk/archive-access/projects/wayback/dist/pom.xml 2007-10-02 03:26:38 UTC (rev 2027) +++ trunk/archive-access/projects/wayback/dist/pom.xml 2007-10-02 03:28:19 UTC (rev 2028) @@ -66,17 +66,8 @@ <build> <plugins> - + <plugin> - <artifactId>maven-site-plugin</artifactId> - <configuration> - <xdocDirectory> - ${basedir}/xdocs - </xdocDirectory> - </configuration> - </plugin> - - <plugin> <!-- NOTE: We don't need a groupId specification because the group is org.apache.maven.plugins ...which is assumed by default. --> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-10-02 03:26:34
|
Revision: 2027 http://archive-access.svn.sourceforge.net/archive-access/?rev=2027&view=rev Author: bradtofel Date: 2007-10-01 20:26:38 -0700 (Mon, 01 Oct 2007) Log Message: ----------- INITIAL REV: place holder page. Added Paths: ----------- trunk/archive-access/projects/wayback/dist/src/site/xdoc/developer_manual.xml Added: trunk/archive-access/projects/wayback/dist/src/site/xdoc/developer_manual.xml =================================================================== --- trunk/archive-access/projects/wayback/dist/src/site/xdoc/developer_manual.xml (rev 0) +++ trunk/archive-access/projects/wayback/dist/src/site/xdoc/developer_manual.xml 2007-10-02 03:26:38 UTC (rev 2027) @@ -0,0 +1,29 @@ +<?xml version="1.0" encoding="ISO-8859-1"?> +<document> + <properties> + <author email="brad.AT.archive.DOT.org">Brad Tofel</author> + <title>Developer Manual</title> + </properties> + <meta name="keyword" content="wayback machine, heritrix, java"/> + <body> + <section name="Introduction"> + <p> + The Wayback Machine is a pure Java application that allows + web browsers to access and search content stored in a set of ARC + files. + </p> + </section> + <section name="Setting up the developement Environment"> + <p> + Please see <a href="developer_environment.html">this page</a>. + </p> + </section> + <section name="Query .jsp customizations"> + <p> + Please see the reference implementation .jsp files for examples, and + the API docs (and source) for + org.archive.wayback.query.UIQueryResults.java. + </p> + </section> + </body> +</document> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-10-02 02:50:26
|
Revision: 2026 http://archive-access.svn.sourceforge.net/archive-access/?rev=2026&view=rev Author: bradtofel Date: 2007-10-01 19:50:31 -0700 (Mon, 01 Oct 2007) Log Message: ----------- UPDATED: removed pretty much everything, as the previous content has moved to administrator_manual Modified Paths: -------------- trunk/archive-access/projects/wayback/dist/src/site/xdoc/user_manual.xml Modified: trunk/archive-access/projects/wayback/dist/src/site/xdoc/user_manual.xml =================================================================== --- trunk/archive-access/projects/wayback/dist/src/site/xdoc/user_manual.xml 2007-10-02 02:49:00 UTC (rev 2025) +++ trunk/archive-access/projects/wayback/dist/src/site/xdoc/user_manual.xml 2007-10-02 02:50:31 UTC (rev 2026) @@ -13,1694 +13,8 @@ files. </p> <p> - This manual provides basic information about downloading, - installing, configuring and using the Wayback Machine. + Remainder TBD. </p> - <p> - Setting up an instance of the Wayback Machine software will - require several steps: - <ol> - <li> - Preflight checklist: Do you have everything you need? - </li> - <li> - Download the software. - </li> - <li> - Determine how you will use the software: Replay, - ResourceStore, ResourceIndex - </li> - <li> - Install and Configure the software. - </li> - <li> - Preparing your index data. - </li> - <li> - Use the software. - </li> - </ol> - </p> </section> - <section name="Preflight Checklist"> - <p> - Make sure you've satisfied all the - <a href="requirements.html"> - Requirements - </a> - first. - </p> - </section> - <section name="Downloading"> - <p> - Stable point releases of this software are available on - sourceforge.net. You can download the binary distribution of - the Wayback Machine via - <a href="http://sourceforge.net/project/showfiles.php?group_id=118427"> - this page - </a> - . - </p> - <p> - You can find the latest version available from our continuous - build box - <a href="http://builds.archive.org:8080/cruisecontrol/buildresults/HEAD-archive-access"> - here - </a> - . Builds obtained here are not guaranteed stable. Use - with caution. - </p> - <p> - Once you have downloaded the .tar.gz or .zip file from - sourceforge, you will need to unpack the file to access the - webapp file, <b>wayback.war</b>. - </p> - </section> - <section name="Determining how you will use the software: Replay Mode"> - <p> - The Wayback Machine provides two types of web browser-based - access to the content in ARC files: Query access, and - Replay access. - </p> - <p> - Query access allows users to locate resources stored in ARC - files within the ResourceStore matching a particular query. - At present, the only criteria that can be specified in - queries to filter returned documents are URL based filters, - and capture date. It is expected that in future releases of - the Wayback Machine, users will be able to locate documents - via full text search. See the - <a href="http://archive-access.sourceforge.net/projects/nutch/wayback.html"> - NutchWAX project - </a> - for more information on full text search and the Wayback - Machine. - </p> - <p> - Replay access allows users to actually view resources in the - ResourceStore within their web browser. There are three Replay - modes currently available with the Wayback Machine: - <b>Archival URL</b>, <b>Proxy</b>, and <b>Timeline</b>. - </p> - <p> - Before installing the Wayback Machine application, you will - need to choose which Replay mode you want to use. The three - replay modes are described below. - </p> - <subsection name="Archival URL Replay mode"> - <p> - Archival URL Replay mode uses a modified URL to designate - documents stored in ARC files. The general form of an - Archival URL is: - <p> - <code> - http://HOSTNAME:PORT/CONTEXT/TIMESTAMP/URL - </code> - </p> - where - <ul> - <li> - HOSTNAME is the host where the Wayback Machine is - running. - </li> - <li> - PORT is the port where Tomcat is listening for - incoming HTTP requests. - </li> - <li> - CONTEXT is the context where the Wayback Machine - webapp has been deployed. - </li> - <li> - TIMESTAMP is 0 to 14 digits of a date, possibly - followed by an asterisk ('*'). The format of a - TIMESTAMP is: - <p> - <code> - YYYYMMDDHHmmss - </code> - </p> - where - <ul> - <li> - YYYY represents a 4-digit year - </li> - <li> - MM represents a 2-digit, 1-based month - (Jan = 1 - Dec = 12) - </li> - <li> - DD represents a 2-digit day of the month - (01-31) - </li> - <li> - HH represents a 2-digit hour (01-24) - </li> - <li> - mm represents a 2-digit minute (00-59) - </li> - <li> - ss represents a 2-digit second (00-59) - </li> - </ul> - The following are example dates expressed as - 14-digit Timestamps: - <p> - Jan 13, 1999 03:34:35 (am GMT) - 19990113033435 - </p> - <p> - Dec 31, 2004 23:01:00 (pm GMT) - 20041231230100 - </p> - </li> - <li> - URL represents the actual URL that should be - replayed. - </li> - </ul> - <p> - Here is an example Archival URL (on an assumed host - wayback-ng.archive.org) for www.yahoo.com on Dec 31, - 1999 at 12:00:00 - <p> - <code> - http://wayback-ng.archive.org/19991231120000/http://www.yahoo.com/ - </code> - </p> - </p> - <p> - Archival URL mode allows replay of all versions captured - of a particul URL, by modifying the Timestamp. When an - Archival URL Replay request is recieved for a URL, the - Wayback Machine will replay the closest version in time - to the Timestamp requested of the particular URL. - </p> - <p> - HTML documents returned in Archival URL Replay mode are - modified from the original version to provide a replay - experience more consistant to viewing the original - content. This is accomplished by the insertion of - Javascript, which executes in the client browser after - the page has loaded. This Javascript modifies most URLs - within the HTML page, both Anchors (links) as well as - embedded content (images, applets, etc) so that they - become appropriate requests back to the Wayback Machine. - </p> - <p> - This Javascript is imperfect: sometimes requests - "leak" to the live web temporarily, before the - Javascript has executed. Also, not all URLs are - rewritten correctly, especially URLs that are created - by Javascript that was in the original page, and - specialized file types containing links like Flash and - PDF documents. - </p> - </p> - </subsection> - <subsection name="Proxy Replay mode"> - <p> - Proxy Replay mode works by configuring a client browser to - proxy all HTTP requests through the Wayback Machine - application. Instead of retrieving documents from the live - web, the Wayback Machine will retrieve documents from the - local repository of ARC files. - </p> - <p> - Proxy Replay mode does not suffer from the shortcomings of - the inserted Javascript that the Archival URL mode uses, - but it has one major drawback: there is no way to - specify which version of a captured document should - be replayed. Only the URL to be replayed is sent from the - client browser to the Wayback Machine - no date information - is sent with the request. - </p> - <p> - In Proxy Replay mode, the Wayback Machine will return the - most recent version captured of any requested page. This - behavior can be changed by using the Firefox-specific plugin - developed by Oskar Grenholm. You can find out more about - this plugin and download it - <a href="http://archive-access.sourceforge.net/projects/waxtoolbar/"> - here - </a>. - </p> - <p> - Thanks Oskar! - </p> - </subsection> - <subsection name="Timeline Replay mode"> - <p> - Timeline Replay mode provides an interface similar to the - <a href="http://archive-access.sourceforge.net/projects/wera/"> - WERA - </a> - access tool, where a timeline is visible at the top of every - page viewed in the Wayback. This timeline illustrates which - versions are available, and provides the capability to - access other versions of the same document, including the - ability to scroll forwards and backwards in time. - </p> - <p> - Timeline Replay mode is still under testing, and sites with - frames may not work well. It currently uses Javascript to - rewrite links within pages, so this mode suffers from the - same set of problems found in Archival Url mode. If you find - problems with this mode, please provide feedback via the - archive-access discussion list. - </p> - </subsection> - </section> - <section name="Determining how you will use the software: ResourceStore"> - <p> - The Wayback Machine provides access to resources, which must - (currently) be stored in a set of ARC files. The module that - provides access for the Wayback Machine to this set of - resources is called the <b>ResourceStore</b>. - </p> - <p> - There are two ResourceStore implementations you can - choose from, depending on how many ARC files you have and - where they are stored. If all your ARC files fit in a single - directory, you can use the <b>LocalARCResourceStore</b> - implementation, otherwise you will need to use the - <b>Http11ResourceStore</b> implementation. - </p> - <p> - If you use the Http11ResourceStore, you will need to: - <ol> - <li> - Set up a singleton ArcProxy webapp. This webapp - maintains a BDB that maps ARC filenames to their - actual absolute URL, and creates an indirection, so - all ARC files are accessible within a single HTTP - exported directory. - </li> - <li> - Export your ARC files via HTTP 1.1, on all hosts - that hold them, to the node running the ArcProxy - webapp. Some examples of HTTP 1.1 webservers you can - use to export your ARC files are <b>Apache</b>, - <b>Tomcat</b>, and <b>thttpd</b>. Any other - webserver that supports HTTP 1.1 will also work. - </li> - <li> - Populate the ArcProxy BDB with the locations of all - ARC files in your repository. See instructions for - the using <b>location-client</b> command-line tool, - within this document, to populate the ArcProxy BDB. - </li> - </ol> - </p> - </section> - <section name="Determining how you will use the software: ResourceIndex"> - <p> - In order to quickly search for documents requested by - users, the Wayback Machine needs a list of all the resources - stored in the ResourceStore. The module that provides access - for the Wayback Machine to this list of resources is - called the <b>ResourceIndex</b>. - </p> - <p> - <b>ResourceIndex</b> Options: - <ul> - <li> - <b>Local-BDB</b>: This ResourceIndex is good for - smaller scale installations, up to 10's of millions - of documents, and allows for fast incremental - updates to the index. It also allows for automated - index updating. - </li> - <li> - <b>Local-CDX</b>: This ResourceIndex is good for - larger scale installations, bounded mostly by the - size of the index you can (first create, and later) - store on a single machine. Using the command line - tool <b>index-client</b>, and the standard UNIX - <b>sort</b> tool(see note below on LC_ALL), you create - one or more sorted flat text files that are searched on - each request. Building these sorted files, and updating - the ResourceIndex are manual operations presently. - </li> - <li> - <b>Remote-Nutch</b>: This ResourceIndex option - allows you to query a Nutch full-text search engine. - This ResourceIndex option is highly experimental. - For help setting up a Remote-Nutch ResourceIndex, - please see - <a href="http://archive-access.sourceforge.net/projects/nutch/wayback.html"> - this page. - </a> - </li> - <li> - <b>Remote-BDB/CDX</b>: This ResourceIndex option - allows hosting of a ResourceIndex on a machine other - than the machine hosting the Wayback webapp. It also - allows a single ResourceIndex to be shared by - multiple Wayback webapps. For example, you can set - up one Wayback webapp for each Replay Mode, and have - all of those installations access a single - ResourceIndex using this option. - </li> - </ul> - </p> - </section> - <section name="Installing and Configuring"> - <p> - Installation and configuration of this software involves the - following steps: - <ol> - <li> - Placing .war file in appropriate location. - </li> - <li> - Waiting for Tomcat to unpack the .war file. - </li> - <li> - Customizing base web.xml file. - </li> - <li> - Restarting tomcat. - </li> - </ol> - </p> - <subsection name="Archival URL mode installation"> - <p> - By default, the wayback.war file is configured to operate - in Archival URL mode. It can be installed by simply placing - the wayback.war file into Tomcat's <b>webapps/</b> - directory. - </p> - <p> - If you need to run the Wayback Machine application in a - context path besides <b>wayback</b>, you will need to rename - the wayback.war file to <i>CONTEXT</i>.war before placing - it in the <b>webapps/</b> directory. When Tomcat deploys - the webapp, it will be accessible via <i>CONTEXT/</i>. - </p> - </subsection> - <subsection name="Timeline mode installation"> - <p> - This mode has the same procedure as Archival URL mode - installation. Please follow the instructions found there, - but you will need to modify the default web.xml file before - using this mode. - </p> - </subsection> - <subsection name="Proxy mode installation"> - <p> - Running the Wayback Machine in proxy mode requires the - webapp to run as the ROOT context, so you will need to - rename wayback.war to ROOT.war before placing it in the - <b>webapps/</b> directory. If you have another ROOT webapp - installed, you might want to move it out of the way before - putting the ROOT.war (which was originally wayback.war) - into place, but this is not required. If you do not move the - old ROOT/ (and possibly the <i>old</i> ROOT.war) out of the - way, they may be overwritten when Tomcat deploys the new - ROOT webapp. - <ol> - <li> - shutdown Tomcat - </li> - <li> - <i>(optionally)</i> move old ROOT/ and/or ROOT.war elsewhere. - </li> - <li> - rename the wayback.war to ROOT.war in the webapps - directory (where TOMCAT_HOME is the path where you - installed Tomcat): - <p> - <code> - mv wayback.war TOMCAT_HOME/webapps/ROOT.war - </code> - </p> - </li> - <li> - start Tomcat. You will shutdown Tomcat again in - a moment to configure the web.xml file, but for - now, we need Tomcat to unpack the .war file, - which requires a startup. - </li> - </ol> - </p> - </subsection> - <subsection name="Modifying the base web.xml file"> - <p> - Once you have installed the .war file under the Tomcat - webapps directory, and the webapp has been deployed, you - may need to modify the web.xml file to customize your - Wayback installation. - </p> - <p> - There are 4 main categories of customizations: - <ol> - <li> - User Interface(Archival URL, Timeline, or Proxy) - </li> - <li> - Resource Store (Local ARCs, or HTTP 1.1 ARC access) - </li> - <li> - Resource Index (Local BDB, Local CDX, Remote Nutch, - Remote BDB/CDX) - </li> - <li> - Resource Index Exclusions (None, Robots.txt, Manual - Exclusions and Robots.txt) - </li> - </ol> - Within the web.xml file, there are comments indicating each - main configuration section. Within each section, all options - are present, but non-default options are commented out. - </p> - <p> - You can alter the active options by commenting out the - default option for a section, and uncommenting another - option. Within each option, there are further customizable - configurations. For information about these other options, - please refer to the table below. After making any changes to - the web.xml file, you will need to restart Tomcat. - </p> - </subsection> - - <subsection name="User Interface Customizations"> - - <p> - Archival URL UI options: - <table> - <tr> - <td> - Option - </td> - <td> - Default value - </td> - <td> - Description - </td> - </tr> - <tr> - <td> - jsuri - </td> - <td> - http://localhost:8080/wayback/wm.js - </td> - <td> - Absolute URL of page rewriting Javascript. This - value will be written into all returned HTML pages, - so will need to be changed to the absolute location - where your Wayback Machine is running. - </td> - </tr> - <tr> - <td> - replayuriprefix - </td> - <td> - http://localhost:8080/wayback - </td> - <td> - HTTP URI prefix for the replay UI. This should be - the name and port of the Tomcat hosting this webapp, - plus the name of the context where this webapp is - installed. - </td> - </tr> - </table> - </p> - <p> - Timeline UI options: - <table> - <tr> - <td> - Option - </td> - <td> - Default value - </td> - <td> - Description - </td> - </tr> - <tr> - <td> - jsuri - </td> - <td> - http://localhost:8080/wayback/jsp/TimelineUI/wm-timeline.js - </td> - <td> - Absolute URL of page rewriting Javascript. This - value will be written into all returned HTML pages, - so will need to be changed to the absolute location - where your Wayback Machine is running. - </td> - </tr> - <tr> - <td> - replayuriprefix - </td> - <td> - http://localhost:8080/wayback/replay - </td> - <td> - HTTP URI prefix for the replay UI. This should be - the name and port of the Tomcat hosting this webapp, - plus the name of the context where this webapp is - installed, plus the path "replay". - </td> - </tr> - <tr> - <td> - metauriprefix - </td> - <td> - http://localhost:8080/wayback/meta - </td> - <td> - HTTP URI prefix for the replay UI. This should be - the name and port of the Tomcat hosting this webapp, - plus the name of the context where this webapp is - installed, plus the path "meta". - </td> - </tr> - <tr> - <td> - timelineuriprefix - </td> - <td> - http://localhost:8080/wayback/timeline - </td> - <td> - HTTP URI prefix for the replay UI. This should be - the name and port of the Tomcat hosting this webapp, - plus the name of the context where this webapp is - installed, plus the path "timeline". - </td> - </tr> - <tr> - <td> - frameseturiprefix - </td> - <td> - http://localhost:8080/wayback/frameset - </td> - <td> - HTTP URI prefix for the replay UI. This should be - the name and port of the Tomcat hosting this webapp, - plus the name of the context where this webapp is - installed, plus the path "frameset". - </td> - </tr> - </table> - </p> - <p> - Proxy UI options: - <table> - <tr> - <td> - Option - </td> - <td> - Default value - </td> - <td> - Description - </td> - </tr> - <tr> - <td> - query.localhostname - </td> - <td> - - </td> - <td> - extra hostname that should be considered "local" - when discriminating between Replay and Query - requests. If you use DNS aliases to refer to - this host, then the fully qualified alias - (ex: wayback.archive.org) used to access the - Wayback proxy service should be used here. - </td> - </tr> - <tr> - <td> - proxy.redirectpath - </td> - <td> - http://localhost:8080/jsp/QueryUI/Redirect.jsp - </td> - <td> - absolute URL to jsp where requests are bounced - through to pick up timestamps - </td> - </tr> - </table> - </p> - </subsection> - <subsection name="Resource Store Customizations"> - - <p> - Local ARC options: - <table> - <tr> - <td> - Option - </td> - <td> - Default value - </td> - <td> - Description - </td> - </tr> - <tr> - <td> - resourcestore.arcpath - </td> - <td> - /tmp/wayback/arcs - </td> - <td> - Directory where ARC files are found (possibly - where Heritrix writes them.) This directory must - exist. - </td> - </tr> - <tr> - <td> - resourcestore.autoindex - </td> - <td> - 1 - </td> - <td> - If this is set to '1', then a background thread - is launched that detects new ARC files appearing - in <b>resourcestore.arcpath</b>. New ARCs are - indexed, and a CDX flat file, with one line per - ARC Record is created, one CDX file per ARC. - These CDX files are then handed off to the - ResourceIndex for incorporation into the index. - </td> - </tr> - <tr> - <td> - resourcestore.tmppath - </td> - <td> - /tmp/wayback/arc-indexer/tmp - </td> - <td> - Directory where CDX files are created - temporarily. This is a scratch space directory, - which must exist. - </td> - </tr> - <tr> - <td> - resourcestore.workpath - </td> - <td> - /tmp/wayback/arc-indexer/work - </td> - <td> - Directory which holds empty flag files - indicating that ARC files are waiting to be - indexed. This directory must exist. - </td> - </tr> - <tr> - <td> - resourcestore.queuedpath - </td> - <td> - /tmp/wayback/arc-indexer/queued - </td> - <td> - Directory which holds empty flag files - indicating that ARC files have already been seen - and queued for indexing. This directory must - exist. - </td> - </tr> - <tr> - <td> - resourcestore.indextarget - </td> - <td> - /tmp/wayback/index-data/incoming - </td> - <td> - Directory or URL where CDX files are sent after - they are created. If the value of this parameter - begins with http://, then the value is assumed - to be a URL where CDX files are PUT, on a - possibly remote resourceindex node. If the value - does not begin with http://, then the value is - assumed to be a local directory, which must - exist, where completed CDX files are moved for - incorporation into the index. - </td> - </tr> - <tr> - <td> - resourcestore.indexinterval - </td> - <td> - 10000 - </td> - <td> - Millisecond interval between checks for new ARCs - that need to be processed. This is only the - initial time slept when first starting up, and - after any new files are found. Each interval - that no new ARCs are detected, the duration - slept increases by this amount. - </td> - </tr> - </table> - </p> - <p> - Remote-HTTP1.1 options: - <table> - <tr> - <td> - Option - </td> - <td> - Default value - </td> - <td> - Description - </td> - </tr> - <tr> - <td> - resourcestore.arcurlprefix - </td> - <td> - http://localhost:8080/arc-proxy/arcs - </td> - <td> - Absolute URL of the ArcProxy webapp which - reverse proxies HTTP 1.1 requests to the actual - location of ARC files. - </td> - </tr> - </table> - </p> - <p> - ArcProxy/LocationDB options: - <table> - <tr> - <td> - Option - </td> - <td> - Default value - </td> - <td> - Description - </td> - </tr> - <tr> - <td> - filelocationdb.path - </td> - <td> - /tmp/wayback/arc-db - </td> - <td> - Directory where the filelocation BDB, which maps - ARC filenames to their absolute HTTP path(s), is - stored. Must exist. - </td> - </tr> - <tr> - <td> - filelocationdb.logpath - </td> - <td> - /tmp/wayback/arc-db.log - </td> - <td> - Path where log of new ARCs inserted into the - filelocation db are stored. Containing directory - must exist. - </td> - </tr> - </table> - </p> - </subsection> - - <subsection name="Resource Index Customizations"> - - <p> - Local BDB options: - <table> - <tr> - <td> - Option - </td> - <td> - Default value - </td> - <td> - Description - </td> - </tr> - <tr> - <td> - resourceindex.indexpath - </td> - <td> - /tmp/wayback/index - </td> - <td> - Directory to where BDB files are stored. This - directory must exist. - </td> - </tr> - <tr> - <td> - resourceindex.incomingpath - </td> - <td> - /tmp/wayback/index-data/incoming - </td> - <td> - BDB index-specific configuration that indicates - new CDX format flat files will appear in the - directory named in the value of this param. If - this configuration is present and non-empty, a - background thread will be started that monitors - this directory, and adds CDX records in files - found in this directory to the index. - </td> - </tr> - <tr> - <td> - resourceindex.mergedpath - </td> - <td> - /tmp/wayback/index-data/merged - </td> - <td> - If this value is present and non-empty, then CDX - files that are successfully processed from - incoming are moved to this directory after - merging. If this option is missing or blank, CDX - files are deleted after merging. - </td> - </tr> - <tr> - <td> - resourceindex.failedpath - </td> - <td> - /tmp/wayback/index-data/failed - </td> - <td> - If this value is present and non-empty, then CDX - files that fail to parse successfully are moved - to this directory after a single attempt. If - this option is missing or blank, malformed CDX - files are left in the incoming directory and - repeatedly re-attempted until some other process - moves them out of the way or fixes them. - </td> - </tr> - <tr> - <td> - resourceindex.mergeinterval - </td> - <td> - 10000 - </td> - <td> - Millisecond interval between checks for new - files in the incoming directory. This is only - the starting number, when no new files are - found in the directory. Each subsequent interval - will increase by this number of ms, until a file - is found, at which point the interval will - revert to the initial level. - </td> - </tr> - <tr> - <td> - maxresults - </td> - <td> - 1000 - </td> - <td> - Maximum number of results to return from the - ResourceIndex. - </td> - </tr> - </table> - </p> - <p> - Local CDX options: - <table> - <tr> - <td> - Option - </td> - <td> - Default value - </td> - <td> - Description - </td> - </tr> - <tr> - <td> - resourceindex.cdxpaths - </td> - <td> - /tmp/wayback/index/index.cdx - </td> - <td> - One or more comma-separated paths pointing to - sorted CDX files that contain index information - for this Wayback installation. - </td> - </tr> - <tr> - <td> - maxresults - </td> - <td> - 1000 - </td> - <td> - Maximum number of results to return from the - ResourceIndex. - </td> - </tr> - </table> - </p> - <p> - Remote Nutch Index options: - <table> - <tr> - <td> - Option - </td> - <td> - Default value - </td> - <td> - Description - </td> - </tr> - <tr> - <td> - resourceindex.baseurl - </td> - <td> - http://localhost:8081/xmlquery - </td> - <td> - Absolute URL to Nutch server. - </td> - </tr> - <tr> - <td> - maxresults - </td> - <td> - 1000 - </td> - <td> - Maximum number of results to return from the - ResourceIndex. - </td> - </tr> - </table> - </p> - <p> - Remote BDB/CDX Resource Index options: - <table> - <tr> - <td> - Option - </td> - <td> - Default value - </td> - <td> - Description - </td> - </tr> - <tr> - <td> - resourceindex.baseurl - </td> - <td> - http://localhost:8080/xmlquery - </td> - <td> - Absolute URL to the Wayback Machine Resource - Index service. - </td> - </tr> - <tr> - <td> - maxresults - </td> - <td> - 1000 - </td> - <td> - Maximum number of results to return from the - ResourceIndex. - </td> - </tr> - </table> - </p> - </subsection> - <subsection name="Resource Index Exclusion Customizations"> - <p> - Administrative Plus Robots.txt Exclusions options: - <table> - <tr> - <td> - Option - </td> - <td> - Default value - </td> - <td> - Description - </td> - </tr> - <tr> - <td> - resourceindex.exclusionua - </td> - <td> - ia_archiver - </td> - <td> - UserAgent to have exclusion service check - access with. - </td> - </tr> - <tr> - <td> - adminexclusion.dbpath - </td> - <td> - /tmp/wayback/admin - </td> - <td> - Directory where BDB files that store manual - exclusion data are stored. - </td> - </tr> - <tr> - <td> - robotdbpath - </td> - <td> - /tmp/wayback/robotsdb - </td> - <td> - Directory where BDB files that store cached - robots.txt document information are stored. - </td> - </tr> - <tr> - <td> - robotarcdir - </td> - <td> - /tmp/wayback/robot-arcs - </td> - <td> - Directory where ARC files containing cached - robots.txt documents are stored. - </td> - </tr> - <tr> - <td> - robotarcprefix - </td> - <td> - live-robots - </td> - <td> - Filename prefix for generated ARC files holding - robot.txt documents. - </td> - </tr> - <tr> - <td> - tempdir - </td> - <td> - /tmp/wayback/robot-temp - </td> - <td> - Directory where robots.txt documents are store - temporarily before they are written to ARC - files. - </td> - </tr> - </table> - </p> - <p> - Robots.txt Exclusions options: - <table> - <tr> - <td> - Option - </td> - <td> - Default value - </td> - <td> - Description - </td> - </tr> - <tr> - <td> - resourceindex.exclusionua - </td> - <td> - ia_archiver - </td> - <td> - UserAgent to have exclusion service check - access with. - </td> - </tr> - <tr> - <td> - robotdbpath - </td> - <td> - /tmp/wayback/robotsdb - </td> - <td> - Directory where BDB files that store cached - robots.txt document information are stored. - </td> - </tr> - <tr> - <td> - robotarcdir - </td> - <td> - /tmp/wayback/robot-arcs - </td> - <td> - Directory where ARC files containing cached - robots.txt documents are stored. - </td> - </tr> - <tr> - <td> - robotarcprefix - </td> - <td> - live-robots - </td> - <td> - Filename prefix for generated ARC files holding - robot.txt documents. - </td> - </tr> - <tr> - <td> - tempdir - </td> - <td> - /tmp/wayback/robot-temp - </td> - <td> - Directory where robots.txt documents are store - temporarily before they are written to ARC - files. - </td> - </tr> - </table> - </p> - - </subsection> - </section> - <section name="Preparing the ResourceIndex"> - <subsection name="Local BDB ResourceIndex Preparation"> - <p> - How to set up your ResourceIndex depends on whether you are - using the LocalARC ResourceStore or the Http11 - ResourceStore. In both cases, the actual index database - files will be stored on the machine running the Wayback - webapp. If you did not set a value for the - <b>resourceindex.incomingpath</b> configuration, then no - automatic updating of the ResourceIndex will occur. In this - case, you can still manually update the BDB ResourceIndex - using the <b>bdb-client</b> command-line tool. The rest of - this section assumes that the - <b>resourceindex.incomingpath</b> has been set. - </p> - <p> - Any file placed in the <b>resourceindex.incomingpath</b> - will be interpreted as a CDX file containing index - information about ARC files in the collection, and will be - merged into the BDB automatically. There are 3 ways to get - CDX format files into this directory: - <ol> - <li> - A LocalARC ResourceStore with the - <b>resourcestore.autoindex</b> configuration set to - '1' will automatically notice new ARC files in - the directory named by <b>resourcestore.arcpath</b>. - Each new ARC will have a CDX file generated and - placed in the <b>resourceindex.incomingpath</b> - directory. This combination should not require human - interaction to automatically index new ARC content - to be viewed with the Wayback as it appears in the - <b>resourcestore.arcpath</b> directory. - </li> - <li> - If you are not using a LocalARC ResourceStore, then - you will need to manually use the - <b>index-client</b> command-line tool to index your - ARC files. The <b>index-client</b> has the - capability to HTTP PUT CDX data directly into the - <b>resourceindex.incomingpath</b> directory. - </li> - <li> - You can manually place files in the - <b>resourceindex.incomingpath</b> directory. - </li> - </ol> - </p> - </subsection> - <subsection name="Local CDX ResourceIndex Preparation"> - <p> - Using a CDX ResourceIndex should allow you to generate and - search indexes of a much larger size than a BDB index, but - there are tradeoffs. Generating and updating the CDX files - will be more difficult. - </p> - <p> - To generate a CDX index, run the <b>index-client</b> tool on - each ARC file in your collection, and send all the output to - the GNU <b>sort</b> utility (with the -u option). The output - of the <b>sort</b> utility is the final CDX file that can be - used with the Wayback. Be sure that the environment variable - <b><i>LC_ALL</i></b> is set to <b><i>C</i></b> before - running the <b>sort</b> tool. - </p> - </subsection> - </section> - <section name="Command Line Tools"> - <p> - The wayback distribution includes several command-line tools - that assist in creating and testing index files, and populating - the ArcProxy location db. - </p> - <p> - All the command line tools can be found which can be found - underneath the directory where you unpacked your distribution - at:<b>bin/*</b> (example: <i>bin/location-client</i>). You will - need to change permissions on the tools to allow them to be - executed: - </p> - <p> - <code> - chmod a+x bin/* - </code> - </p> - <subsection name="bdb-client"> - <p> - This tool allows several maintenance operations to be - performed on BDB files. There are two primary modes, read - and write. - <ol> - <li> - <code> - bin/bdb-client -r BDB_DIR BDB_NAME [PREFIX] - </code> - <p> - Output records from a BDB database on STDOUT. - </p> - <p> - where: - <ul> - <li> - <i>BDB_DIR</i> Open BDB in this - directory. - </li> - <li> - <i>BDB_NAME</i> Open BDB with this name. - </li> - <li> - <i>PREFIX</i> (optional) if present, - only output records whose KEY begins - with PREFIX. If this option is omitted, - all records will be output from the - BDB. Records are always output in sorted - order. - </li> - </ul> - </p> - </li> - <li> - <code> - bin/bdb-client -w BDB_DIR BDB_NAME - </code> - <p> - Read CDX format lines from STDIN, and insert - into a BDB, creating the BDB if needed. - </p> - <p> - where: - <ul> - <li> - <i>BDB_DIR</i> Open BDB in this - directory. - </li> - <li> - <i>BDB_NAME</i> Open BDB with this name. - </li> - </ul> - </p> - </li> - </ol> - </p> - </subsection> - <subsection name="bin-search"> - <p> - This tool allows binary searching against large sorted text - files. It will output lines prefixed with a particular - <i>key</i> on STDOUT. - </p> - <p> - <code> - bin/bin-search KEY FILE [FILE2 ...] - </code> - <ul> - <li> - <i>KEY</i> String prefix for lines that should be - output. - </li> - <li> - <i>FILE [FILE2 ...]</i> Sequentially search through - each file specified, outputting the lines prefixed - with KEY for each file. Note that the complete - output of bin-search will be sorted when used with - a single file, but when multiple files are searched, - the results may not be sorted completely. - </li> - </ul> - </p> - </subsection> - <subsection name="index-client"> - <p> - This tool has two usages: - <ol> - <li> - <code> - bin/index-client ARC_PATH - </code> - <p> - Generation of a CDX format index data for a - single ARC file named by ARC_PATH. The CDX - format data is sent to STDOUT, and can be saved - to a file, sorted, etc. This is needed to - generate sorted CDX format indexes. - </p> - </li> - <li> - <code> - bin/index-client TMP_DIR INCOMING_URL LOCATION_URL ARC_DIR ARC_URL_PREFIX - </code> - <p> - where: - <ul> - <li> - <i> - TMP_DIR - </i> - Temporary working directory where - ex. - <b> - /tmp/ - </b> - </li> - <li> - <i> - INCOMING_URL - </i> - HTTP path to the RemoteSubmitFilter - which allows remote submission of index - data in CDX format for automatic merging - with a BDB ResourceIndex. - ex. - <b> - http://wayback-webapp.your-archive.org/wayback/index-incoming/ - </b> - </li> - <li> - <i> - LOCATION_URL - </i> - is the absolute URL where the ArcProxy can be - accessed. ex. - <b> - http://wayback-webapp.your-archive.org/arc-proxy/locationDB - </b> - </li> - <li> - <i> - ARC_DIR - </i> - is the absolute path to the directory on the local - machine which holds ARC files ex. - <b> - /2/arc-collection-1 - </b> - </li> - <li> - <i> - ARC_URL_PREFIX - </i> - is the absolute URL where the directory ARC_DIR can - be accessed. ex. - <b> - http://arc-storage-node-1.your-archive.org/2/arc-collection-1/ - </b> - </li> - </ul> - </p> - <p> - If you chose the Http11 ResourceStore, and are - using the BDB ResourceIndex implementation then - you will need to run this script with these - arguments once for each directory containing ARC - files (on each machine containing ARC files.) - For each ARC file found, this script will: - <ol> - <li> - generate the plain-text index file for - the ARC file - </li> - <li> - push that plain-text file onto the - machine running the Wayback webapp, - where the ResourceIndex database is - stored. The plain-text index files will - arrive in the IndexPipeline directory - structure so they are merged into the - ResourceIndex. - </li> - <li> - notify the ArcProxy LocationDB of the - URL where the ARC file can be accessed, - for later Replay requests which require - access to documents in the ARC file. - </li> - </ol> - </p> - </li> - </ol> - </p> - </subsection> - <subsection name="location-client"> - <p> - If you have already populated your ResourceIndex, and just - need to inform the ArcProxy LocationDB of where ARC files - are located. This script will allow you to synchronize the - ArcProxy LocationDB with the directories holding your ARC - files. - </p> - <p> - Execute the script once for each directory containing - ARC files (on each machine containing ARC files.) Again, - this script will <b>not</b> index the content of the ARC - files, but will only populate the ArcProxy LocationDB with - the locations of ARC files. - </p> - <p> - <code> - bin/location-client sync LOCATION_URL ARC_DIR ARC_URL_PREFIX - </code> - </p> - <p> - where: - <ul> - <li> - <i> - LOCATION_URL - </i> - is the absolute URL where the ArcProxy can be - accessed. ex. - <b> - http://wayback-webapp.your-archive.org/arc-proxy/locationDB - </b> - </li> - <li> - <i> - ARC_DIR - </i> - is the absolute path to the directory on the local - machine which holds ARC files ex. - <b> - /2/arc-collection-1 - </b> - </li> - <li> - <i> - ARC_URL_PREFIX - </i> - is the absolute URL where the directory ARC_DIR can - be accessed. ex. - <b> - http://arc-storage-node-1.your-archive.org/2/arc-collection-1/ - </b> - </li> - </ul> - </p> - </subsection> - <subsection name="url-client"> - <p> - URLs stored in BDB and CDX format ResourceIndexes are - <i>canonicalized</i> to a more genertic form. Before - performing a lookup operation on the ResourceIndex, the same - canonicalization function is applied to requested URLs. This - tool will read space(" ") delimited lines from STDIN, and - output the same lines on STDOUT, but with one column - altered. The column that is changed is assumed to be a URL, - and the version output is the canonicalized form of the - input URL. - </p> - <p> - This tool is mostly useful for debugging the - canonicalization function, but can also be used, if the - canonicalization function is altered, to update an existing - CDX index, without recreating CDX files from original ARCs. - </p> - <p> - <code> - bin/url-client [-cdx] [-f FIELD] - </code> - <ul> - <li> - <i>-cdx</i> Pass thru lines prefixed with " CDX " - unchanged. - </li> - <li> - <i>-f FIELD</i> alter column FIELD of each line, - instead of the default column 1. - </li> - </ul> - </p> - </subsection> - </section> - <section name="Using"> - <subsection name="Archival URL mode usage"> - <p> - Once you have customized the web.xml file, restarted - Tomcat, and prepared your ResourceIndex, the Wayback Machine - installation will be ready for use. - </p> - <p> - In any case, you can immediately visit the context path - where you deployed the Wayback Machine webapp, at: - <b>http://HOSTNAME:PORT/CONTEXT</b>, ex: - <i>http://yourhost.yourdomain.org:8080/wayback</i>. - - </p> - <p> - You can search for pages in the index by typing the URL in - the box 'Enter Web Address' and clicking "Take Me Back". To - search for all pages within a website or website - sub-directory, or to search for pages between particular - dates, click the "Adv. Search" link. - </p> - </subsection> - <subsection name="Proxy mode usage"> - <p> - Before accessing the Wayback Machine using Proxy mode, you - must configure the web browser that will access the service - to proxy all HTTP requests through the webapp, using the - proxy URL which is the context path where you deployed the - Wayback Machine webapp, at: <b>http://HOSTNAME:PORT/</b>, - ex: <i>http://yourhost.yourdomain.org:8080/</i>. For help on - how to configure your browser to proxy HTTP requests through - a proxy server, please consult the documentation for your - web browser. - </p> - <p> - You can access the Wayback Machine Query UI, which allows - you to view the status of the server and query the index - for documents, by pointing the browser at the same URL that - you configured as the proxy server. For information on - status, configurations and queries, please consult the - documentation for Using the Wayback Machine in Archival URL - mode. A common problem in Firefox Proxy configuration is - including the "http://": you do not include the "http://" - in the proxy server name. - </p> - </subsection> - <subsection name="Timeline mode usage"> - <p> - Once you have replaced and possibly customized the web.xml - file, restarted Tomcat, and prepared your ResourceIndex, - the Wayback Machine installation will be ready for use. - </p> - <p> - You can start by visiting the front page of your Wayback - service (see replayuriprefix in your web.xml file). Once you - are at this page, you can type a URL into the search box - near the top of the page, or use the Advanced Search page to - perform more complex searches. - </p> - <p> - Once you have located documents you would like to view with - your searches, you can start using the Timeline mode by - clicking a link in the search results. This will load the - timeline view for the specific page and time that you - selected. You can view different versions of any page by - clicking the timeline, and you can navigate to other pages - in your collection by clicking links directly in the page - below the timeline. - </p> - <p> - By checking the "Metadata" box, you can view metadata - available for the particular document you are viewing. - Unchecking the box will return to normal view mode. - </p> - </subsection> - - </section> </body> </document> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-10-02 02:48:56
|
Revision: 2025 http://archive-access.svn.sourceforge.net/archive-access/?rev=2025&view=rev Author: bradtofel Date: 2007-10-01 19:49:00 -0700 (Mon, 01 Oct 2007) Log Message: ----------- TWEAK: now needs Java 1.5 Modified Paths: -------------- trunk/archive-access/projects/wayback/dist/src/site/xdoc/requirements.xml Modified: trunk/archive-access/projects/wayback/dist/src/site/xdoc/requirements.xml =================================================================== --- trunk/archive-access/projects/wayback/dist/src/site/xdoc/requirements.xml 2007-10-02 02:48:33 UTC (rev 2024) +++ trunk/archive-access/projects/wayback/dist/src/site/xdoc/requirements.xml 2007-10-02 02:49:00 UTC (rev 2025) @@ -10,7 +10,7 @@ <section name="Runtime Requirements"> <subsection name="JAVA"> <p> - Tested working with SUN v1.5.0_01 and 1.4.2_03. + Tested working with SUN v1.5.0_01. </p> </subsection> <subsection name="Tomcat"> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-10-02 02:48:30
|
Revision: 2024 http://archive-access.svn.sourceforge.net/archive-access/?rev=2024&view=rev Author: bradtofel Date: 2007-10-01 19:48:33 -0700 (Mon, 01 Oct 2007) Log Message: ----------- UPDATED: no more references to 'ant' upped maven to 2.0 Modified Paths: -------------- trunk/archive-access/projects/wayback/dist/src/site/xdoc/requirements.xml Modified: trunk/archive-access/projects/wayback/dist/src/site/xdoc/requirements.xml =================================================================== --- trunk/archive-access/projects/wayback/dist/src/site/xdoc/requirements.xml 2007-10-02 02:47:53 UTC (rev 2023) +++ trunk/archive-access/projects/wayback/dist/src/site/xdoc/requirements.xml 2007-10-02 02:48:33 UTC (rev 2024) @@ -25,25 +25,12 @@ </subsection> </section> <section name="Build from src Requirements" > - <subsection name="Ant"> - <p> - Tested working with version 1.6.2. - </p> - </subsection> <subsection name="Maven"> <p> If you want to build distributions and the website, you'll - need Maven 1.0.2. + need Maven 2. </p> </subsection> </section> - <section name="Building HEAD from src Requirements" > - <subsection name="Maven"> - <p> - The HEAD is an in-progress migration from Maven 1.0.2 to - Maven 2.0. - </p> - </subsection> - </section> </body> </document> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-10-02 02:47:49
|
Revision: 2023 http://archive-access.svn.sourceforge.net/archive-access/?rev=2023&view=rev Author: bradtofel Date: 2007-10-01 19:47:53 -0700 (Mon, 01 Oct 2007) Log Message: ----------- INITIAL REV: installation and configuration instructions. Added Paths: ----------- trunk/archive-access/projects/wayback/dist/src/site/xdoc/administrator_manual.xml Added: trunk/archive-access/projects/wayback/dist/src/site/xdoc/administrator_manual.xml =================================================================== --- trunk/archive-access/projects/wayback/dist/src/site/xdoc/administrator_manual.xml (rev 0) +++ trunk/archive-access/projects/wayback/dist/src/site/xdoc/administrator_manual.xml 2007-10-02 02:47:53 UTC (rev 2023) @@ -0,0 +1,1559 @@ +<?xml version="1.0" encoding="ISO-8859-1"?> +<document> + <properties> + <title>Administrators Manual</title> + <author email="brad at archive dot org">Brad Tofel</author> + <revision>$$Id$$</revision> + </properties> + + <body> + + + + <section name="Requirements"> + + + <subsection name="Third Party Packages"> + <p> + Please see the + <a href="requirements.html"> + System Requirements + </a> + . + </p> + </subsection> + + + <subsection name="Wayback Software"> + <p> + Please see the + <a href="downloads.html"> + Software Downloads page + </a> + . + </p> + </subsection> + + + </section> + + + + <section name="Installing"> + + + <subsection name="Installing Tomcat"> + <p> + Please refer to the README file included with your Tomcat distribution. + </p> + </subsection> + + + <subsection name="Installing Wayback"> + <p> + Once you have downloaded the .tar.gz file from + sourceforge, you will need to unpack the file to access the + webapp file, <b>wayback.war</b>. + </p> + <p> + Installation and configuration of this software involves the + following steps: + <ol> + <li> + Placing .war file in appropriate location. + </li> + <li> + Waiting for Tomcat to unpack the .war file. + </li> + <li> + Customizing base wayback.xml file. + </li> + <li> + Restarting tomcat. + </li> + </ol> + </p> + </subsection> + </section> + + + + <section name="Wayback Configuration Overview"> + <p> + The wayback software provides Search and Replay access to documents + contained in a WaybackCollection. Search access allows users to + query a collection to locate documents, and is presently limited + to URL based queries. Replay access allows users to view archived + content in collections within a web browser. A WaybackCollection is + a combination of a ResourceStore, which contains the actual archived + documents, and a ResourceIndex, which provides URL based search of the + documents in the ResourceStore. + </p> + <p> + The Wayback machine is configured using Spring IOC, to specify and + configure concrete implementations of several basic modules. For + information about using Spring, please see + <a href="http://www.springframework.org/docs/reference/beans.html"> + this page + </a>. + </p> + </section> + + + + <section name="Defining WaybackCollections"> + <p> + The XML configuration template for a Wayback collection follows: + <pre> + +<bean id="localbdbcollection" + class="org.archive.wayback.webapp.WaybackCollection"> + <property name="resourceStore" ... /> + <property name="resourceIndex" ... /> +</bean> + + </pre> + </p> + <p> + The resourceStore property refers to a bean implementing org.archive.wayback.ResourceStore. + </p> + <p> + The resourceIndex property refers to a bean implementing org.archive.wayback.ResourceIndex. + </p> + </section> + + + + <section name="org.archive.wayback.ResourceStore implementations"> + + + <subsection name="LocalARCResourceStore"> + <p> + This implementation works well for small + collections, where all the ARC files can be placed in a single + directory on the same computer running the wayback application. + Using NFS or another network filesystem technology and symbolic + links can allow this implementation to deal with ARC files in + multiple directories, or across multiple storage nodes. This + implementation also includes the capability to run a background + thread to automatically notice new ARC files appearing, index + those ARC files, and hand off the index data for merging with + a BDBResourceIndex. + </p> + <p> + The XML configuration template for a LocalARCResourceStore follows: + <pre> + +<property name="resourceStore"> + <bean class="org.archive.wayback.resourcestore.LocalARCResourceStore" + init-method="init"> + <property name="arcDir" value="/tmp/wayback/arcs/" /> + <property name="queuedDir" value="/tmp/wayback/arc-indexer/queued" /> + <property name="workDir" value="/tmp/wayback/arc-indexer/work" /> + <property name="runInterval" value="10000" /> + <property name="indexClient"> + <bean class="org.archive.wayback.resourceindex.indexer.IndexClient"> + <property name="tmpDir" value="/tmp/wayback/arc-indexer/tmp" /> + <property name="target" value="/tmp/wayback/index-data/incoming" /> + </bean> + </property> + </bean> +</property> + + </pre> + </p> + <p> + Required configuration: + <ul> + <li> + <b> + arcDir + </b> + is the local directory where ARC files will be + located. + </li> + </ul> + </p> + <p> + Optional configuration (only needed for automatic indexing) + <ul> + <li> + <b> + queuedDir + </b> + names a local directory where the indexer will maintain state + about ARC files that have already been indexed. + </li> + <li> + <b> + workDir + </b> + names a local directory where the indexer will maintain state + about ARC files that are about to be indexed. + </li> + <li> + <b> + runInterval + </b> + indicates the number of milliseconds between polling arcDir + for newly created ARC files. Default is 10000. + </li> + <li> + <b> + tmpDir + </b> + names a local directory where index data will be stored + temporarily before handing off to <b>target</b>. + </li> + <li> + <b> + target + </b> + names: + <ol> + <li> + a local directory where an BDBIndexUpdater is configured to + look for new index data to be merged with a BDBIndex. + </li> + <li> + a remote http:// URL where index data should be PUT, for + merging with a remote BDBIndex. + </li> + </ol> + </li> + </ul> + </p> + </subsection> + + + <subsection name="HttpARCResourceStore"> + <p> + This implementation allows the wayback + application to access documents in remote ARC files via HTTP 1.1, + and scales to millions of ARC files. + </p> + <p> + The XML configuration template for an HttpARCResourceStore follows: + <pre> + +<property name="resourceStore"> + <bean class="org.archive.wayback.resourcestore.HttpARCResourceStore"> + <property name="urlPrefix" value="http://localhost:8080/arcproxy/" /> + </bean> +</property> + + </pre> + </p> + <p> + Required configuration: + <ul> + <li> + <b> + urlPrefix + </b> + this is the http:// prefix where ARC files are exported with an + ArcProxy installation. See elsewhere in this document for + information about setting up an ArcProxy. + </li> + </ul> + </p> + </subsection> + + + </section> + + + + <section name="org.archive.wayback.ResourceIndex implementations"> + + + <subsection name="LocalResourceIndex"> + <p> + This ResourceIndex implementation allows wayback to search one of + several index formats hosted on the same machine as the wayback + application. See below for details on which specific index formats + are available. + </p> + <p> + The XML configuration template for a LocalResourceIndex follows: + <pre> + +<property name="resourceIndex"> + <bean class="org.archive.wayback.resourceindex.LocalResourceIndex"> + <property name="source" ... /> + <property name="maxRecords" value="10000" /> + </bean> +</property> + + </pre> + </p> + <p> + <b> + maxRecords + </b> + specifies the maximum number of records to process, and thus that can + be returned, during a single query. + </p> + <br></br> + <p> + <b> + source + </b> + defines the format to be used for storing and searching records in + the ResourceIndex. There are several possible implementations + available: + <ul> + <li> + <b> + BDBIndex + </b> + This implementation is good for smaller scale installations, up + to 10's of millions of documents, and allows for fast incremental + updates to the index. It also allows for automated index updating. + <pre> + +<bean class="org.archive.wayback.resourceindex.bdb.BDBIndex" + init-method="init"> + <property name="bdbName" value="DB1" /> + <property name="bdbPath" value="/tmp/wayback/index/" /> + <property name="updater"> + <bean class="org.archive.wayback.resourceindex.bdb.BDBIndexUpdater"> + <property name="incoming" value="/tmp/wayback/index-data/incoming/" /> + <property name="failed" value="/tmp/wayback/index-data/failed/" /> + <property name="merged" value="/tmp/wayback/index-data/merged/" /> + <property name="runInterval" value="10000" /> + </bean> + </property> +</bean> + + </pre> + The <b>updater</b> property is optional. If used, a background + index merging thread will be started. Every <b>runInterval</b> + milliseconds, the thread will look for new files in the + <b>incoming</b> directory. Any files present are assumed to be + in CDX file format, and will be merged into the index and + immediately available for access. Files that are not successfully + merged with the index are left in place (or moved to the + <b>failed</b> directory, if it is specified.) Files that are + successfully merged are deleted (or moved to the <b>merged</b> + directory, if it is specified.) + <br></br> + </li> + <li> + <b> + CDXIndex + </b> + This implementation is good for larger scale installations, + bounded mostly by the size of the index you can (first create, + and later) store on a single machine. Using the command line tool + <b>index-client</b>, and the standard UNIX <b>sort</b> tool + (see note below on LC_ALL), you create a sorted flat text file + that is searched on each request. Building these sorted files, + and updating the index are manual operations presently. + <pre> + +<bean id="cdxsearchresultsource" class="org.archive.wayback.resourceindex.cdx.CDXIndex"> + <property name="path" value="/tmp/wayback/cdx-index/index.cdx" /> +</bean> + + </pre> + </li> + <li> + <b> + CompositeSearchResultSource + </b> + This implementation allows for searching multiple CDXIndex text + files for each request. For optimal search efficiency, multiple + index files should be merged (sort -mu) prior to production use, + but this implementation allows a trade-off in simplified index + management for a decrease in search performance. + <pre> + +<bean id="compositecdxresultsource" class="org.archive.wayback.resourceindex.CompositeSearchResultSource"> + <property name="CDXSources"> + <list> + <value>/tmp/wayback/cdx-index/index.cdx.1</value> + <value>/tmp/wayback/cdx-index/index.cdx.2</value> + </list> + </property> +</bean> + + </pre> + </li> + </ul> + </p> + + </subsection> + + + <subsection name="RemoteResourceIndex configuration"> + <p> + This ResourceIndex option allows hosting of a ResourceIndex on a + machine other than the machine hosting the Wayback webapp. + </p> + <p> + The XML configuration template for a RemoteResourceIndex follows: + <pre> + +<bean id="remoteindex" class="org.archive.wayback.resourceindex.RemoteResourceIndex" init-method="init"> + <property name="searchUrlBase" value="http://wayback-index.archive.org:8080/wayback/xmlquery" /> +</bean> + + </pre> + <b>searchUrlBase</b> indicates the URL prefix to which OpenSearchQuery + parameters are appended to access a Wayback AccessPoint running a + LocalResourceIndex on a remote host to the Wayback application. + </p> + + </subsection> + + + <subsection name="NutchResourceIndex configuration"> + <p> + This ResourceIndex option allows the wayback to query a Nutch + full-text search engine. This ResourceIndex option is highly + experimental. For help setting up a NutchResourceIndex, please see + <a href="http://archive-access.sourceforge.net/projects/nutch/wayback.html"> + this page. + </a> + </p> + <p> + The XML configuration template for a NutchResourceIndex follows: + <pre> + + <property name="remotenutchindex"> + <bean class="org.archive.wayback.resourceindex.NutchResourceIndex" init-method="init"> + <property name="searchUrlBase" value="http://webteam-ws.us.archive.org:8080/katrina/opensearch" /> + <property name="maxRecords" value="100" /> + </bean> + </property> + + </pre> + <b>searchUrlBase</b> indicates the URL prefix to which OpenSearchQuery + parameters are appended to access a Nutch servers XML query interface. + + </p> + </subsection> + </section> + + + + <section name="Defining AccessPoints for WaybackCollections"> + <p> + Once you have defined one or more WaybackCollections, you need to + specify how those collections are exposed to end users. Collections are + exposed by defining an AccessPoint for that collection. + </p> + <p> + An AccessPoint is a combination of a WaybackCollection, a Query User + Interface, a Replay User Interface, and a URL by which users interact + with that AccessPoint. AccessPoints can also describe mechanisms for + excluding documents, and for limiting what users are allowed to + interact with the AccessPoint. + </p> + <p> + AccessPoints can be used to provide different levels and types of + access to the same collection for different users. For example, you + can provide both Proxy and Archival URL mode access to a single + collection by defining 2 AccessPoints with different Replay User + Interfaces but the same WaybackCollection. Using AccessPoints, you can + also provide different levels of access to a collection. For example, + users within a particular subnet may be able to access all documents + within a collection via one AccessPoint, but users outside that subnet + may only be restricted to viewing documents currently allowed by a + web sites current robots.txt file. + </p> + <p> + The XML configuration template for an AccessPoint follows: + <pre> + +<bean name="8080:wayback" class="org.archive.wayback.webapp.AccessPoint"> + <property name="collection" ... /> + <property name="query" ... /> + <property name="replay" ... /> + <property name="parser" ... /> + <property name="uriConverter" ... /> + <property name="exclusionFactory" ... /> + <property name="authentication" ... /> + <property name="configs" ... /> +</bean> + + </pre> + </p> + <p> + Required property configurations: + <ul> + <li> + <b> + collection + </b> + is a reference to the WaybackCollection for this AccessPoint. + </li> + <li> + <b> + query + </b> + defines what .jsp files to use to render results for queries to + this AccessPoint. See the section "Query .jsp configuration" for + more information. + </li> + <li> + <b> + replay + </b> + defines what Replay User Interface to use for this AccessPoint. See + the section "Setting up the Replay User Interface within an + AccessPoint" for more information. + </li> + <li> + <b> + parser + </b> + defines how incoming requests are parsed and subsequently processed, + and is usually dependent on the Replay User Interface being used + with this AccessPoint.See the section "Setting up the Replay User + Interface within an AccessPoint" for more information. + </li> + <li> + <b> + uriConverter + </b> + defines how public URLs are constructed to provide Replay access + to this AccessPoint. This is usually dependant on the Replay User + Interface used with this AccessPoint. See the section "Setting up + the Replay User Interface within an AccessPoint" for more + information. + </li> + </ul> + </p> + <p> + Optional property configurations: + <ul> + <li> + <b> + exclusionFactory + </b> + defines how documents are excluded within this AccessPoint. See the + section "Excluding Documents within an AccessPoint" for more + information. + </li> + <li> + <b> + authentication + </b> + defines who is allowed to interact with this AccessPoint. See the + section "Limiting Access to an AccessPoint" for more information. + </li> + <li> + <b> + configs + </b> + Allows additional customizations within this AccessPoint. See the + section "Adding Additional Configurations to an AccessPoint" for + more information. + </li> + </ul> + </p> + </section> + + + <section name="Query .jsp configuration"> + <p> + Wayback provides query results to a .jsp handler page, which is + responsible for rendering final output to users. The actual .jsp file + invoked for the various response types can be configured as described + below. Included with the Wayback package are several reference .jsp + implementations, including one which outputs XML. This XML interface is + used by the Wayback software in distributed index configurations, but + can also be used as an extension point for further user interface + customizations. + </p> + <br></br> + <p> + The XML configuration template for the query Renderer follows below, + including the default configuration for each value. The values indicate + the path to the .jsp file that will be executed to generate the output + for each class of query. + <pre> + +<bean class="org.archive.wayback.query.Renderer"> + <property name="errorJsp" value="/jsp/HTMLError.jsp" /> + <property name="xmlErrorJsp" value="/jsp/XMLError.jsp" /> + <property name="captureJsp" value="/jsp/HTMLResults.jsp" /> + <property name="urlJsp" value="/jsp/HTMLResults.jsp" /> + <property name="xmlJsp" value="/jsp/XMLResults.jsp" /> +</bean> + + </pre> + The following list indicates when each .jsp is executed: + <ul> + <li> + <b> + errorJsp + </b> + will be executed when any type of expected error condition occurs + during handling of a request. + </li> + <li> + <b> + xmlErrorJsp + </b> + will be executed when any type of expected error condition occurs + during handling of a request indicating that xml response data is + desired. + </li> + <li> + <b> + captureJsp + </b> + will be executed when results listing captures for a specific, + single URL are requested in HTML format. + </li> + <li> + <b> + urlJsp + </b> + will be executed when results listing captures for multiple URLs, + each URL having one or more captures, are requested in HTML format. + </li> + <li> + <b> + xmlJsp + </b> + will be executed when results are requested in XML format. + </li> + </ul> + </p> + </section> + + <section name="Setting up the Replay User Interface within an AccessPoint"> + <p> + There are presently 2 Replay modes supported by the Wayback software, + Archival URL mode, and Proxy mode. + </p> + <subsection name="Archival URL"> + <p> + Archival URL Replay mode uses a modified URL to designate + documents stored in ARC files. The general form of an + Archival URL is: + <br></br> + <div> + <code> + http://HOSTNAME:PORT/CONTEXT/TIMESTAMP/URL + </code> + </div> + <br></br> + where + <ul> + <li> + <b>HOSTNAME</b> is the host where the Wayback Machine is + running. + </li> + <li> + <b>PORT</b> is the port where Tomcat is listening for + incoming HTTP requests, which also refers to part of the name of + the Access Point. See below for example CONTEXT mappings. + </li> + <li> + <b>CONTEXT</b> is the context where the Wayback Machine + webapp has been deployed, plus the name of the Access Point. See + below for example CONTEXT mappings. + </li> + <li> + <b>TIMESTAMP</b> is 0 to 14 digits of a date, possibly + followed by an asterisk ('*'). The format of a + TIMESTAMP is: + <div> + <code> + YYYYMMDDHHmmss + </code> + </div> + where + <ul> + <li> + <b>YYYY</b> represents a 4-digit year + </li> + <li> + <b>MM</b> represents a 2-digit, 1-based month + (Jan = 1 - Dec = 12) + </li> + <li> + <b>DD</b> represents a 2-digit day of the month + (01-31) + </li> + <li> + <b>HH</b> represents a 2-digit hour (01-24) + </li> + <li> + <b>mm</b> represents a 2-digit minute (00-59) + </li> + <li> + <b>ss</b> represents a 2-digit second (00-59) + </li> + </ul> + The following are example dates expressed as + 14-digit Timestamps: + <br></br> + <div> + Jan 13, 1999 03:34:35 (am UTC) - 19990113033435 + </div> + <br></br> + <div> + Dec 31, 2004 23:01:00 (pm UTC) - 20041231230100 + </div> + <br></br> + </li> + <li> + <b>URL</b> represents the actual URL that should be + replayed. + </li> + </ul> + <br></br> + <div> + Here is an example Archival URL, on an assumed host + <b>wayback.somehost.org</b>, with a wayback webapp deployed as + <b>ROOT</b>, via the Access Point named <b>80:archive</b> for the + page <b>http://www.yahoo.com/</b> on Dec 31, 1999 at 12:00:00 UTC. + <br></br> + <div> + <code> + http://wayback.somehost.org/archive/19991231120000/http://www.yahoo.com/ + </code> + </div> + <br></br> + </div> + <br></br> + <div> + Archival URL mode allows replay of all versions captured + of a particular URL, by modifying the Timestamp. When an + Archival URL Replay request is received for a URL, the + Wayback Machine will replay the closest version in time + to the Timestamp requested of the particular URL. + </div> + <br></br> + <div> + HTML documents returned in Archival URL Replay mode are + modified from the original version to provide a replay + experience more consistent to viewing the original + content. This is accomplished by the insertion of + Javascript, which executes in the client browser after + the page has loaded. This Javascript modifies most URLs + within the HTML page, both Anchors (links) as well as + embedded content (images, applets, etc) so that they + become appropriate Archival URL requests back to the Wayback + application. + </div> + <br></br> + <div> + This Javascript is imperfect: sometimes requests + "leak" to the live web temporarily, before the + Javascript has executed. Also, not all URLs are + rewritten correctly, especially URLs that are created + by Javascript that was in the original page, and + specialized file types containing links like Flash and + PDF documents. + </div> + <br></br> + <div> + The <b>name</b> of the Access Point bean in the Spring configuration + file determines the CONTEXT and PORT used in Archival URLs within + that Access Point. The Servlet context name where the Wayback + application is deployed also factors into the CONTEXT used within + Archival URLs for each Access Point. + </div> + <br></br> + <div> + The following examples show the Archival URL prefix for the + following two Access Points depending on the Wayback webapp being + deployed in two different contexts, "ROOT" and "wayback". + </div> + <br></br> + <div> + If the following Access Point definitions are present in the + wayback.xml: + <pre> + +<bean name="8080:wayback" class="org.archive.wayback.webapp.AccessPoint"> + <property name="collection" ref="localcollection" /> + ... +</bean> + +<bean name="8080:wayback2" class="org.archive.wayback.webapp.AccessPoint"> + <property name="collection" ref="localcollection" /> + ... +</bean> + + </pre> + then the following table shows the Archival URL prefixes to access + each collection on the host "wayback.somehost.org" assuming a + Tomcat Connector listening on port 8080: + </div> + <table> + <tr> + <th> + webapp deployed at + </th> + <th> + Access Point bean name + </th> + <th> + Archival URL prefix + </th> + </tr> + <tr> + <td> + ROOT + </td> + <td> + 8080:wayback + </td> + <td> + http://wayback.somehost.org:8080/wayback/ + </td> + </tr> + <tr> + <td> + ROOT + </td> + <td> + 8080:wayback2 + </td> + <td> + http://wayback.somehost.org:8080/wayback2/ + </td> + </tr> + <tr> + <td> + wb-webapp + </td> + <td> + 8080:wayback + </td> + <td> + http://wayback.somehost.org:8080/wb-webapp/wayback/ + </td> + </tr> + <tr> + <td> + wb-webapp + </td> + <td> + 8080:wayback2 + </td> + <td> + http://wayback.somehost.org:8080/wb-webapp/wayback2/ + </td> + </tr> + </table> + </p> + <p> + The properties <b>replay</b>, <b>parser</b>, and <b>uriConverter</b> + for Archival URL Access Points must be set to the following + implementations: + <pre> + + <property name="replay"> + <bean class="org.archive.wayback.archivalurl.ArchivalUrlReplayDispatcher"> + <property name="jsInserts"> + <list> + <value>http://wayback.somehost.org:8080/wb-webapp/wm.js</value> + </list> + </property> + <property name="jspInserts"> + <list> + <value>/replay/Timeline.jsp</value> + </list> + </property> + </bean> + </property> + + <property name="parser"> + <bean class="org.archive.wayback.archivalurl.ArchivalUrlRequestParser" + init-method="init"> + <property name="maxRecords" value="1000" /> + <property name="earliestTimestamp" value="1996" /> + </bean> + </property> + + <property name="uriConverter"> + <bean class="org.archive.wayback.archivalurl.ArchivalUrlResultURIConverter"> + <property name="replayURIPrefix" value="http://wayback.somehost.org:8080/wb-webapp/wayback/" /> + </bean> + </property> + + </pre> + </p> + <table> + <tr> + <th> + configuration + </th> + <th> + optional/required + </th> + <th> + description + </th> + </tr> + <tr> + <td> + jsInserts + </td> + <td> + required + </td> + <td> + This list must include a reference to the wm.js javascript file, + but references to additional javascript files here will result in + a reference to those javascript URLs within all replayed HTML + pages. + </td> + </tr> + <tr> + <td> + jspInserts + </td> + <td> + optional + </td> + <td> + If any values are referenced here, then those .jsp files will be + invoked for every replayed document, and the resulting output + will be included in replayed HTML pages. The example included + here will result in a Timeline banner in-page presence being + included with each replayed HTML page, allowing navigation + between different versions of the current URL. + </td> + </tr> + <tr> + <td> + maxRecords + </td> + <td> + optional + </td> + <td> + Sets the default maximum requested records for Archival URL query + requests. + </td> + </tr> + <tr> + <td> + earliestTimestamp + </td> + <td> + optional + </td> + <td> + Set the default start date for requested records for Archival + URL query requests. + </td> + </tr> + <tr> + <td> + replayURIPrefix + </td> + <td> + required + </td> + <td> + Points to the Archival URL prefix of the Access Point as + illustrated in the preceding table. + </td> + </tr> + </table> + </subsection> + + <subsection name="Proxy"> + <p> + Wayback can be configured to act as an HTTP proxy server. To utilize + this mode, the wayback webapp must be deployed as the ROOT context, + and client browser must be configured to proxy all HTTP requests + through the Wayback Machine application. Instead of retrieving + documents from the live web, the Wayback Machine will retrieve + documents from the local repository of ARC files. + </p> + <br></br> + <br></br> + <p> + Proxy Replay mode does not suffer from the shortcomings of + the inserted Javascript that the Archival URL mode uses, + but it has one major drawback: there is no way to + specify which version of a captured document should + be replayed. Only the URL to be replayed is sent from the + client browser to the Wayback Machine - no date information + is sent with the request. + </p> + <br></br> + <br></br> + <p> + In Proxy Replay mode, the Wayback Machine will return the + most recent version captured of any requested page. This + behavior can be changed by using the experimental Firefox-specific + plugin developed by Oskar Grenholm. You can find out more about + this plugin and download it + <a href="http://archive-access.sourceforge.net/projects/waxtoolbar/"> + here + </a>. + </p> + <br></br> + <br></br> + <p> + Thanks Oskar! + </p> + + <br></br> + <br></br> + <div> + The following is an example Proxy Replay Access Point definition. It + assumes to be running on a host <b>wayback.somehost.org</b>, that a + Tomcat Connector has been added for port <b>8090</b>, + that the Wayback webapp has been deployed at the ROOT context, and + that another Archival URL Access Point named "8080:wayback" has been + configured. + <pre> + +<bean name="8090" parent="8080:wayback"> + <property name="replay"> + <bean class="org.archive.wayback.proxy.ProxyReplayDispatcher" /> + </property> + <property name="uriconverter"> + <bean class="org.archive.wayback.proxy.RedirectResultURIConverter"> + <property name="redirectURI" value="http://wayback.somehost.org:8090/jsp/Redirect.jsp" /> + </bean> + </property> + <property name="parser"> + <bean class="org.archive.wayback.proxy.ProxyRequestParser" init-method="init"> + <property name="localhostNames"> + <list> + <value>wayback.somehost.org</value> + </list> + </property> + <property name="maxRecords" value="1000" /> + </bean> + </property> +</bean> + + </pre> + </div> + <br></br> + <br></br> + <div> + <b>redirectURI</b> is required, and must be set to the name of the + host where the Wayback application is running. If this is not the + primary name of the machine running the Wayback application, then you + may need to also specify the hostname used for the Wayback application + in the <b>localhostNames</b> configuration list. + </div> + </subsection> + + </section> + + + + <section name="Excluding Documents within an AccessPoint"> + <subsection name="Excluding Documents with live Robots.txt"> + Documents may be excluded from access within an Access Point by + retroactively enforcing the policies in a web sites live robots.txt + documents by adding the following configuration in the Access Point. + <pre> + +<property name="exclusionFactory" ref="excluder-factory-robot" /> + + </pre> + + <br></br> + Please see the default wayback.xml packaged with this software for an + example bean definition for the referenced <b>excluder-factory-robot</b> + bean. + </subsection> + + <subsection name="Excluding Documents with an Administrative List"> + Documents may be excluded from access within an Access Point by + using a plain text file listing URL prefixes which should be blocked. + If this option is used with a non-zero value for <b>checkInterval</b>, + the Wayback software will monitor the external file, and will + automatically reload the file when it changes. + <br></br> + The following Spring configuration defines a static exclusion file that + causes URLs listed in the file <b>/tmp/exclude.txt</b> to be blocked, + with the file being checked for updates every 10 minutes. + <pre> + +<bean id="static-exclusion" class="org.archive.wayback.accesscontrol.staticmap.StaticMapExclusionFilterFactory"> + <property name="file" value="/tmp/exclude.txt" /> + <property name="checkInterval" value="600" /> +</bean> + + </pre> + <br></br> + Adding the following configuration to an Access Point will cause the + excluded URLs named in <b>/tmp/exclude.txt</b> to be inaccessible: + <pre> + +<property name="exclusionFactory" ref="static-exclusion"> + + </pre> + </subsection> + + </section> + + <section name="Restricting who can interact with an AccessPoint"> + + <subsection name="Limiting Access based on IP Addresses"> + Access to a particular Access Point can be limited to a specific IP + address range by adding the following configuration to an Access Point + definition. + + <pre> + +<property name="authentication"> + <bean class="org.archive.wayback.authenticationcontrol.IPMatchesBooleanOperator"> + <property name="allowedRanges"> + <list> + <value>192.168.1.16/24</value> + </list> + </property> + </bean> +</property> + + </pre> + + which would have the affect of blocking users outside the + <b>192.168.1.16/24</b> network. + </subsection> + + <subsection name="Limiting Access based on HTTP BASIC Authentication"> + Access can be restricted to a particular Access Point using Tomcat's + built-in configuration options. By adding the following configuration to + the web.xml, which assumes an Access Point named "8080:secure" (or + really for any port): + <pre> + +<security-constraint> + <web-resource-collection> + <web-resource-name>Secured-Wayback</web-resource-name> + <url-pattern>/secure/*</url-pattern> + </web-resource-collection> + <auth-constraint> + <role-name>wayback</role-name> + </auth-constraint> +</security-constraint> + +<login-config> + <auth-method>BASIC</auth-method> + <realm-name>Secured-Wayback</realm-name> +</login-config> + + </pre> + <br></br> + <br></br> + And then adding user configuration to the tomcat-users.xml file: + <pre> + +<role rolename="wayback"/> +<user password="changeM3" roles="wayback" username="brad"/> + + </pre> + </subsection> + </section> + + <section name="Adding Additional Configurations to an AccessPoint"> + <p> + The following configuration can be added to an Access Point: + <pre> + +<property name="configs"> + <props> + <prop key="inst">Acrobatic Association</prop> + <prop key="logo">http://images.somehost.com/logos/acro.jpg</prop> + </props> +</property> + + </pre> + </p> + <p> + These configurations are then accessible in the common .jsp rendering + pages, allowing Collection or Access Point specific text to be relayed + to shared .jsp files, which can then retrieve the Access Point specific + configuration with the following code: + + <pre> + +UIResults results = UIResults.getFromRequest(request); +String instString = results.getContextConfig("inst"); +String logoString = results.getContextConfig("logo"); + + </pre> + </p> + </section> + + <section name="External Tools"> + + <p> + The wayback distribution includes several command-line tools + that assist in creating and testing index files, and populating + the ArcProxy location db. + </p> + <p> + All the command line tools can be found which can be found + underneath the directory where you unpacked your distribution + at:<b>bin/*</b> (example: <i>bin/location-client</i>). You will + need to change permissions on the tools to allow them to be + executed: + </p> + <p> + <code> + chmod a+x bin/* + </code> + </p> + + <subsection name="bdb-client"> + <p> + This tool allows several maintenance operations to be + performed on BDB files. There are two primary modes, read + and write. + <ol> + <li> + <code> + bin/bdb-client -r BDB_DIR BDB_NAME [PREFIX] + </code> + <p> + Output records from a BDB database on STDOUT. + </p> + <p> + where: + <ul> + <li> + <i>BDB_DIR</i> Open BDB in this + directory. + </li> + <li> + <i>BDB_NAME</i> Open BDB with this name. + </li> + <li> + <i>PREFIX</i> (optional) if present, + only output records whose KEY begins + with PREFIX. If this option is omitted, + all records will be output from the + BDB. Records are always output in sorted + order. + </li> + </ul> + </p> + </li> + <li> + <code> + bin/bdb-client -w BDB_DIR BDB_NAME + </code> + <p> + Read CDX format lines from STDIN, and insert + into a BDB, creating the BDB if needed. + </p> + <p> + where: + <ul> + <li> + <i>BDB_DIR</i> Open BDB in this + directory. + </li> + <li> + <i>BDB_NAME</i> Open BDB with this name. + </li> + </ul> + </p> + </li> + </ol> + </p> + </subsection> + + <subsection name="bin-search"> + <p> + This tool allows binary searching against large sorted text + files. It will output lines prefixed with a particular + <i>key</i> on STDOUT. + </p> + <p> + <code> + bin/bin-search KEY FILE [FILE2 ...] + </code> + <ul> + <li> + <i>KEY</i> String prefix for lines that should be + output. + </li> + <li> + <i>FILE [FILE2 ...]</i> Sequentially search through + each file specified, outputting the lines prefixed + with KEY for each file. Note that the complete + output of bin-search will be sorted when used with + a single file, but when multiple files are searched, + the results may not be sorted completely. + </li> + </ul> + </p> + </subsection> + + <subsection name="index-client"> + <p> + This tool has two usages: + <ol> + <li> + <code> + bin/index-client ARC_PATH + </code> + <p> + Generation of a CDX format index data for a + single ARC file named by ARC_PATH. The CDX + format data is sent to STDOUT, and can be saved + to a file, sorted, etc. This is needed to + generate sorted CDX format indexes. + </p> + </li> + <li> + <code> + bin/index-client TMP_DIR INCOMING_URL LOCATION_URL ARC_DIR ARC_URL_PREFIX + </code> + <p> + where: + <ul> + <li> + <i> + TMP_DIR + </i> + Temporary working directory where + ex. + <b> + /tmp/ + </b> + </li> + <li> + <i> + INCOMING_URL + </i> + HTTP path to the RemoteSubmitFilter + which allows remote submission of index + data in CDX format for automatic merging + with a BDB ResourceIndex. + ex. + <b> + http://wayback-webapp.your-archive.org/wayback/index-incoming/ + </b> + </li> + <li> + <i> + LOCATION_URL + </i> + is the absolute URL where the ArcProxy can be + accessed. ex. + <b> + http://wayback-webapp.your-archive.org:8080/locationdb/locationDB + </b> + </li> + <li> + <i> + ARC_DIR + </i> + is the absolute path to the directory on the local + machine which holds ARC files ex. + <b> + /2/arc-collection-1 + </b> + </li> + <li> + <i> + ARC_URL_PREFIX + </i> + is the absolute URL where the directory ARC_DIR can + be accessed. ex. + <b> + http://arc-storage-node-1.your-archive.org/2/arc-collection-1/ + </b> + </li> + </ul> + </p> + <p> + If you chose the Http11 ResourceStore, and are + using the BDB ResourceIndex implementation then + you will need to run this script with these + arguments once for each directory containing ARC + files (on each machine containing ARC files.) + For each ARC file found, this script will: + <ol> + <li> + generate the plain-text index file for + the ARC file + </li> + <li> + push that plain-text file onto the + machine running the Wayback webapp, + where the ResourceIndex database is + stored. The plain-text index files will + arrive in the IndexPipeline directory + structure so they are merged into the + ResourceIndex. + </li> + <li> + notify the ArcProxy LocationDB of the + URL where the ARC file can be accessed, + for later Replay requests which require + access to documents in the ARC file. + </li> + </ol> + </p> + </li> + </ol> + </p> + </subsection> + + <subsection name="location-client"> + <p> + If you have already populated your ResourceIndex, and just + need to inform the ArcProxy LocationDB of where ARC files + are located. This script will allow you to synchronize the + ArcProxy LocationDB with the directories holding your ARC + files. + </p> + <p> + Execute the script once for each directory containing + ARC files (on each machine containing ARC files.) Again, + this script will <b>not</b> index the content of the ARC + files, but will only populate the ArcProxy LocationDB with + the locations of ARC files. + </p> + <p> + <code> + bin/location-client sync LOCATION_URL ARC_DIR ARC_URL_PREFIX + </code> + </p> + <p> + where: + <ul> + <li> + <i> + LOCATION_URL + </i> + is the absolute URL where the ArcProxy can be + accessed. ex. + <b> + http://wayback-webapp.your-archive.org:8080/locationdb/locationDB + </b> + </li> + <li> + <i> + ARC_DIR + </i> + is the absolute path to the directory on the local + machine which holds ARC files ex. + <b> + /2/arc-collection-1 + </b> + </li> + <li> + <i> + ARC_URL_PREFIX + </i> + is the absolute URL where the directory ARC_DIR can + be accessed. ex. + <b> + http://arc-storage-node-1.your-archive.org/2/arc-collection-1/ + </b> + </li> + </ul> + </p> + </subsection> + + <subsection name="url-client"> + <p> + URLs stored in BDB and CDX format ResourceIndexes are + <i>canonicalized</i> to a more genertic form. Before + performing a lookup operation on the ResourceIndex, the same + canonicalization function is applied to requested URLs. This + tool will read space(" ") delimited lines from STDIN, and + output the same lines on STDOUT, but with one column + altered. The column that is changed is assumed to be a URL, + and the version output is the canonicalized form of the + input URL. + </p> + <p> + This tool is mostly useful for debugging the + canonicalization function, but can also be used, if the + canonicalization function is altered, to update an existing + CDX index, without recreating CDX files from original ARCs. + </p> + <p> + <code> + bin/url-client [-cdx] [-f FIELD] + </code> + <ul> + <li> + <i>-cdx</i> Pass thru lines prefixed with " CDX " + unchanged. + </li> + <li> + <i>-f FIELD</i> alter column FIELD of each line, + instead of the default column 1. + </li> + </ul> + </p> + </subsection> + + </section> + + + <section name="ArcProxy and LocationDB application"> + + <p> + + The Wayback software includes an additional application, the ArcProxy, + which can simplify some distributed ResourceStore implementations. The + ArcProxy application exposes two external services, one used to + configure the underlying database mapping ARC filenames to the actual, + fully qualified HTTP 1.1 URL, and a second service which reverse proxies + incoming HTTP 1.1 range requests to appropriate back-end storage nodes. + + </p> + + <p> + The <b>arcproxy</b> reverse proxy service allows one or more HttpARCResourceStore + instances to configure a single URL prefix where all ARC files are + assumed to be located. This reverse proxy then uses a BDB JE to find the + actual current location of the ARC file, and forward the request to the + actual host holding the ARC file. + </p> + + <p> + The <b>locationdb</b> service allows population and management of the + BDB JE database(the <i>locationDB</i>) used by the <b>arcproxy</b> + service. There is also a command line tool, <b>location-client</b> + described elsewhere in this document which provides command line access + to the management of the locationDB. + </p> + + <p> + Adding the following configuration to wayback.xml will expose the + arcproxy and locationdb services: + </p> + <pre> + +<bean id="filelocationdb" class="org.archive.wayback.resourcestore.http.FileLocationDB" + init-method="init"> + <property name="bdbPath" value="/tmp/wayback/arc-db" /> + <property name="bdbName" value="DB1" /> + <property name="logPath" value="/tmp/wayback/arc-db.log" /> +</bean> + +<bean name="8080:arcproxy" class="org.archive.wayback.resourcestore.http.ArcProxyServlet"> + <property name="locationDB" ref="filelocationdb" /> +</bean> + +<bean name="8080:locationdb" class="org.archive.wayback.resourcestore.http.FileLocationDBServlet"> + <property name="locationDB" ref="filelocationdb" /> +</bean> + + </pre> + + </section> + + </body> +</document> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 23:15:49
|
Revision: 2022 http://archive-access.svn.sourceforge.net/archive-access/?rev=2022&view=rev Author: bradtofel Date: 2007-09-28 16:15:53 -0700 (Fri, 28 Sep 2007) Log Message: ----------- TWEAK: changed link reference to administrator_manual.html for installation instructions Modified Paths: -------------- trunk/archive-access/projects/wayback/dist/src/site/fml/faq.fml Modified: trunk/archive-access/projects/wayback/dist/src/site/fml/faq.fml =================================================================== --- trunk/archive-access/projects/wayback/dist/src/site/fml/faq.fml 2007-09-28 23:15:09 UTC (rev 2021) +++ trunk/archive-access/projects/wayback/dist/src/site/fml/faq.fml 2007-09-28 23:15:53 UTC (rev 2022) @@ -30,7 +30,7 @@ </question> <answer> <p> - See the <a href="user_manual.html">User Manual</a> for information + See the <a href="administrator_manual.html">Administrator Manual</a> for information about installing and using this application. </p> </answer> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 23:15:07
|
Revision: 2021 http://archive-access.svn.sourceforge.net/archive-access/?rev=2021&view=rev Author: bradtofel Date: 2007-09-28 16:15:09 -0700 (Fri, 28 Sep 2007) Log Message: ----------- TWEAK: prettified. Modified Paths: -------------- trunk/archive-access/projects/wayback/dist/src/site/apt/developer_environment.apt Modified: trunk/archive-access/projects/wayback/dist/src/site/apt/developer_environment.apt =================================================================== --- trunk/archive-access/projects/wayback/dist/src/site/apt/developer_environment.apt 2007-09-28 22:42:11 UTC (rev 2020) +++ trunk/archive-access/projects/wayback/dist/src/site/apt/developer_environment.apt 2007-09-28 23:15:09 UTC (rev 2021) @@ -7,11 +7,11 @@ Getting Eclipse Europa installed - [[1]] download and unpack eclipse-europa with "JEE" support + [[1]] download and unpack {{{http://www.eclipse.org/downloads/}eclipse-europa}} with "JEE" support [[2]] do latest software updates - [[3]] software update: find and install: + [[3]] "Help" \>\> "Software Updates" \>\> "Find and Install..." * Search for new features to install @@ -26,7 +26,7 @@ [[5]] Check box to install "Subclipse" features [[6]] on same dialog, choose "Select Required" button.\ - this will select all dependencies from the Europa Discovery Site. + This will select all dependencies from the Europa Discovery Site. [[7]] "Next", then accept terms of aggreement, then "Next", then "Finish" This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 22:42:10
|
Revision: 2020 http://archive-access.svn.sourceforge.net/archive-access/?rev=2020&view=rev Author: bradtofel Date: 2007-09-28 15:42:11 -0700 (Fri, 28 Sep 2007) Log Message: ----------- REFACTOR: moved references to store and index to WaybackCollection. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2007-09-28 22:41:20 UTC (rev 2019) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2007-09-28 22:42:11 UTC (rev 2020) @@ -35,8 +35,6 @@ import org.archive.wayback.QueryRenderer; import org.archive.wayback.ReplayDispatcher; import org.archive.wayback.RequestParser; -import org.archive.wayback.ResourceIndex; -import org.archive.wayback.ResourceStore; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.WaybackConstants; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; @@ -71,8 +69,7 @@ private boolean useServerName = false; private int contextPort = 0; private String contextName = null; - private ResourceIndex index = null; - private ResourceStore store = null; + private WaybackCollection collection = null; private ReplayDispatcher replay = null; private QueryRenderer query = null; private RequestParser parser = null; @@ -295,7 +292,7 @@ throws IOException, ServletException { Resource resource = null; try { - SearchResults results = index.query(wbRequest); + SearchResults results = collection.getResourceIndex().query(wbRequest); if(!(results instanceof CaptureSearchResults)) { throw new ResourceNotAvailableException("Bad results..."); } @@ -303,7 +300,7 @@ // TODO: check which versions are actually accessible right now? SearchResult closest = captureResults.getClosest(wbRequest); - resource = store.retrieveResource(closest); + resource = collection.getResourceStore().retrieveResource(closest); replay.renderResource(httpRequest, httpResponse, wbRequest, closest, resource, uriConverter, captureResults); @@ -321,7 +318,7 @@ throws ServletException, IOException { try { - SearchResults results = index.query(wbRequest); + SearchResults results = collection.getResourceIndex().query(wbRequest); if(results.getResultsType().equals( WaybackConstants.RESULTS_TYPE_CAPTURE)) { @@ -352,20 +349,6 @@ } /** - * @param index the index to set - */ - public void setIndex(ResourceIndex index) { - this.index = index; - } - - /** - * @param store the store to set - */ - public void setStore(ResourceStore store) { - this.store = store; - } - - /** * @param replay the replay to set */ public void setReplay(ReplayDispatcher replay) { @@ -444,4 +427,12 @@ public void setAuthentication(BooleanOperator<WaybackRequest> authentication) { this.authentication = authentication; } + + public WaybackCollection getCollection() { + return collection; + } + + public void setCollection(WaybackCollection collection) { + this.collection = collection; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 22:41:16
|
Revision: 2019 http://archive-access.svn.sourceforge.net/archive-access/?rev=2019&view=rev Author: bradtofel Date: 2007-09-28 15:41:20 -0700 (Fri, 28 Sep 2007) Log Message: ----------- BUGFIX: now throws ConfigurationException when no store or index have been configured Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackCollection.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackCollection.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackCollection.java 2007-09-28 22:40:12 UTC (rev 2018) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackCollection.java 2007-09-28 22:41:20 UTC (rev 2019) @@ -26,9 +26,11 @@ import org.archive.wayback.ResourceIndex; import org.archive.wayback.ResourceStore; +import org.archive.wayback.exception.ConfigurationException; /** - * Abstraction point for sharing document collection and index across multiple AccessPoints. + * Abstraction point for sharing document collection and index across multiple + * AccessPoints. * * @author brad * @version $Date$, $Revision$ @@ -36,13 +38,19 @@ public class WaybackCollection { private ResourceStore resourceStore = null; private ResourceIndex resourceIndex = null; - public ResourceStore getResourceStore() { + public ResourceStore getResourceStore() throws ConfigurationException { + if(resourceStore == null) { + throw new ConfigurationException("No resourceStore declared"); + } return resourceStore; } public void setResourceStore(ResourceStore resourceStore) { this.resourceStore = resourceStore; } - public ResourceIndex getResourceIndex() { + public ResourceIndex getResourceIndex() throws ConfigurationException { + if(resourceIndex == null) { + throw new ConfigurationException("No resourceIndex declared"); + } return resourceIndex; } public void setResourceIndex(ResourceIndex resourceIndex) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 22:40:12
|
Revision: 2018 http://archive-access.svn.sourceforge.net/archive-access/?rev=2018&view=rev Author: bradtofel Date: 2007-09-28 15:40:12 -0700 (Fri, 28 Sep 2007) Log Message: ----------- FEATURE: now attempts to create missing tmp directory. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java 2007-09-28 22:39:36 UTC (rev 2017) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java 2007-09-28 22:40:12 UTC (rev 2018) @@ -117,7 +117,7 @@ try { int code = http.executeMethod(method); os.close(); - // TODO: Contstant 200 + // TODO: Constant 200 if(code != 200) { throw new LiveDocumentNotAvailableException(urlString); } @@ -449,6 +449,9 @@ */ public void setTmpDir(String tmpDir) { this.tmpDir = new File(tmpDir); + if(!this.tmpDir.exists()) { + this.tmpDir.mkdirs(); + } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2017 http://archive-access.svn.sourceforge.net/archive-access/?rev=2017&view=rev Author: bradtofel Date: 2007-09-28 15:39:36 -0700 (Fri, 28 Sep 2007) Log Message: ----------- FEATURE: now attempts to create missing tmp and target directories. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/IndexClient.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/IndexClient.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/IndexClient.java 2007-09-28 22:38:08 UTC (rev 2016) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/IndexClient.java 2007-09-28 22:39:36 UTC (rev 2017) @@ -114,6 +114,9 @@ // assume a local directory: File toBeMergedDir = new File(target); if(!toBeMergedDir.exists()) { + toBeMergedDir.mkdirs(); + } + if(!toBeMergedDir.exists()) { throw new IOException("Target " + target + " does not exist"); } if(!toBeMergedDir.isDirectory()) { @@ -360,5 +363,8 @@ */ public void setTmpDir(String tmpDir) { this.tmpDir = new File(tmpDir); + if(!this.tmpDir.isDirectory()) { + this.tmpDir.mkdirs(); + } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2016 http://archive-access.svn.sourceforge.net/archive-access/?rev=2016&view=rev Author: bradtofel Date: 2007-09-28 15:38:08 -0700 (Fri, 28 Sep 2007) Log Message: ----------- TWEAK: removed unused static property name. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-09-28 18:44:25 UTC (rev 2015) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-09-28 22:38:08 UTC (rev 2016) @@ -64,11 +64,6 @@ private static final Logger LOGGER = Logger.getLogger(RemoteResourceIndex .class.getName()); - /** - * name of the property value indicating the url prefix of the remote index. - */ - public final static String SEARCH_BASE_URL = "resourceindex.baseurl"; - private String searchUrlBase; private DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 18:44:21
|
Revision: 2015 http://archive-access.svn.sourceforge.net/archive-access/?rev=2015&view=rev Author: bradtofel Date: 2007-09-28 11:44:25 -0700 (Fri, 28 Sep 2007) Log Message: ----------- INITIAL REV: small container wrapping ResourceIndex and ResourceStore Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackCollection.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackCollection.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackCollection.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackCollection.java 2007-09-28 18:44:25 UTC (rev 2015) @@ -0,0 +1,51 @@ +/* WaybackCollection + * + * $Id$ + * + * Created on 11:28:52 AM Sep 28, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.webapp; + +import org.archive.wayback.ResourceIndex; +import org.archive.wayback.ResourceStore; + +/** + * Abstraction point for sharing document collection and index across multiple AccessPoints. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class WaybackCollection { + private ResourceStore resourceStore = null; + private ResourceIndex resourceIndex = null; + public ResourceStore getResourceStore() { + return resourceStore; + } + public void setResourceStore(ResourceStore resourceStore) { + this.resourceStore = resourceStore; + } + public ResourceIndex getResourceIndex() { + return resourceIndex; + } + public void setResourceIndex(ResourceIndex resourceIndex) { + this.resourceIndex = resourceIndex; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 18:43:29
|
Revision: 2014 http://archive-access.svn.sourceforge.net/archive-access/?rev=2014&view=rev Author: bradtofel Date: 2007-09-28 11:43:31 -0700 (Fri, 28 Sep 2007) Log Message: ----------- REFACTOR: changed name of WaybackContext >> AccessPoint Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/RequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -28,7 +28,7 @@ import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -47,7 +47,7 @@ * @throws BadQueryException */ public abstract WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException; + AccessPoint wbContext) throws BadQueryException; /** * @param maxRecords */ Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -29,7 +29,7 @@ import javax.servlet.http.HttpServletRequest; import org.archive.wayback.util.StringFormatter; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -189,7 +189,7 @@ */ public String getContextConfig(final String configName) { String configValue = null; - WaybackContext context = getWbRequest().getContext(); + AccessPoint context = getWbRequest().getContext(); if(context != null) { Properties configs = context.getConfigs(); if(configs != null) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -39,7 +39,7 @@ import org.archive.wayback.requestparser.OpenSearchRequestParser; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.StringFormatter; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * Abstraction of all the data associated with a users request to the Wayback @@ -56,7 +56,7 @@ private String contextPrefix = null; private String serverPrefix = null; - private WaybackContext context = null; + private AccessPoint context = null; private ObjectFilter<SearchResult> exclusionFilter = null; private HashMap<String,String> filters = new HashMap<String,String>(); @@ -383,14 +383,14 @@ /** * @return the context */ - public WaybackContext getContext() { + public AccessPoint getContext() { return context; } /** * @param context the context to set */ - public void setContext(WaybackContext context) { + public void setContext(AccessPoint context) { this.context = context; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -35,7 +35,7 @@ import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -75,7 +75,7 @@ * @see org.archive.wayback.RequestParser#parse(javax.servlet.http.HttpServletRequest, org.archive.wayback.webapp.WaybackContext) */ public WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException { + AccessPoint wbContext) throws BadQueryException { WaybackRequest wbRequest = null; String server = httpRequest.getServerName() + Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -35,7 +35,7 @@ import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.requestparser.BaseRequestParser; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -66,7 +66,7 @@ */ @Override public WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException { + AccessPoint wbContext) throws BadQueryException { if (isLocalRequest(httpRequest)) { // local means query: let the following RequestParsers have a go Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -33,7 +33,7 @@ import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * Class that implements the RequestParser interface, and also understands how @@ -133,7 +133,7 @@ * @see org.archive.wayback.RequestParser#parse(javax.servlet.http.HttpServletRequest) */ public abstract WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException; + AccessPoint wbContext) throws BadQueryException; /** * @return the maxRecords Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -29,7 +29,7 @@ import org.archive.wayback.RequestParser; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -64,7 +64,7 @@ * @see org.archive.wayback.RequestParser#parse(javax.servlet.http.HttpServletRequest) */ public WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException { + AccessPoint wbContext) throws BadQueryException { WaybackRequest wbRequest = null; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -32,7 +32,7 @@ import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -51,7 +51,7 @@ * WaybackRequest object, except the Submit button argument. */ public WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) { + AccessPoint wbContext) { WaybackRequest wbRequest = null; @SuppressWarnings("unchecked") Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -32,7 +32,7 @@ import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * @@ -74,7 +74,7 @@ * info from the httpRequest object. */ public WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException { + AccessPoint wbContext) throws BadQueryException { WaybackRequest wbRequest = null; @SuppressWarnings("unchecked") Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -28,7 +28,7 @@ import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.webapp.WaybackContext; +import org.archive.wayback.webapp.AccessPoint; /** * Subclass of RequestParser that acquires key request information from the @@ -51,7 +51,7 @@ */ @Override public WaybackRequest parse(HttpServletRequest httpRequest, - WaybackContext wbContext) throws BadQueryException { + AccessPoint wbContext) throws BadQueryException { String queryString = httpRequest.getQueryString(); String origRequestPath = httpRequest.getRequestURI(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -36,10 +36,10 @@ import org.archive.wayback.ResourceIndex; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; -import org.archive.wayback.core.UrlSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AccessControlException; import org.archive.wayback.exception.BadQueryException; @@ -68,11 +68,12 @@ private static final String NUTCH_NS = "http://www.nutch.org/opensearchrss/1.0/"; private String searchUrlBase; - private DocumentBuilderFactory factory; + private DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); private DocumentBuilder builder; private static final String NUTCH_ARCNAME = "arcname"; private static final String NUTCH_ARCOFFSET = "arcoffset"; private static final String NUTCH_ARCDATE = "tstamp"; + private static final String NUTCH_ARCDATE_ALT = "arcdate"; private static final String NUTCH_DIGEST = "digest"; private static final String NUTCH_PRIMARY_TYPE = "primaryType"; private static final String NUTCH_SUB_TYPE = "subType"; @@ -95,7 +96,7 @@ LOGGER.info("initializing NutchResourceIndex..."); LOGGER.info("Using base search url " + this.searchUrlBase); - this.factory = DocumentBuilderFactory.newInstance(); +// this.factory = DocumentBuilderFactory.newInstance(); this.factory.setNamespaceAware(true); try { this.builder = this.factory.newDocumentBuilder(); @@ -129,7 +130,15 @@ e.getMessage()); } - SearchResults results = new UrlSearchResults(); + SearchResults results; + String type = wbRequest.get(WaybackConstants.REQUEST_TYPE); + if(type.equals(WaybackConstants.REQUEST_REPLAY_QUERY) || + type.equals(WaybackConstants.REQUEST_URL_QUERY)) { + results = new CaptureSearchResults(); + } else { + // TODO: this is wrong, but needs exploration into what NutchWax can actually do. + throw new BadQueryException("Unable to perform path prefix requests with this index type"); + } NodeList channel = getSearchChannel(document); NodeList nodes = getSearchItems(document); @@ -174,7 +183,8 @@ return results; } - private SearchResult elementToSearchResult(Element e) { + private SearchResult elementToSearchResult(Element e) + throws ResourceIndexNotAvailableException { SearchResult result = new SearchResult(); @@ -184,6 +194,12 @@ // The date in nutchwax is now named 'tstamp' and its // 17 characters rather than 14. Pass first 14 only. String d = getNodeNutchContent(e,NUTCH_ARCDATE); + if(d == null) { + d = getNodeNutchContent(e,NUTCH_ARCDATE_ALT); + } + if(d == null) { + throw new ResourceIndexNotAvailableException("Missing arcdate field in search results"); + } if (d.length() == 17) { d = d.substring(0, 14); } @@ -289,8 +305,8 @@ } // when searching for exacturl, we are mostly // interested in the different versions over the time - ms.append("&sort=date"); - ms.append("&reverse=true"); +// ms.append("&sort=date"); +// ms.append("&reverse=true"); } ms.append("&hitsPerPage=").append(hitsPerPage); ms.append("&start=").append(start); Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java (from rev 1996, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -0,0 +1,447 @@ +/* WaybackContext + * + * $Id$ + * + * Created on 5:37:31 PM Apr 20, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-webapp. + * + * wayback-webapp is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-webapp is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-webapp; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.webapp; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.QueryRenderer; +import org.archive.wayback.ReplayDispatcher; +import org.archive.wayback.RequestParser; +import org.archive.wayback.ResourceIndex; +import org.archive.wayback.ResourceStore; +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.accesscontrol.ExclusionFilterFactory; +import org.archive.wayback.core.CaptureSearchResults; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.UIResults; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.AuthenticationControlException; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.ResourceNotAvailableException; +import org.archive.wayback.exception.WaybackException; +import org.archive.wayback.util.operator.BooleanOperator; +import org.springframework.beans.factory.BeanNameAware; + +/** + * Retains all information about a particular Wayback configuration + * within a ServletContext, including holding references to the + * implementation instances of the primary Wayback classes: + * + * ResourceIndex + * ResourceStore + * QueryUI + * ReplayUI + * + * @author brad + * @version $Date$, $Revision$ + */ +public class AccessPoint implements RequestContext, BeanNameAware { + + private boolean useServerName = false; + private int contextPort = 0; + private String contextName = null; + private ResourceIndex index = null; + private ResourceStore store = null; + private ReplayDispatcher replay = null; + private QueryRenderer query = null; + private RequestParser parser = null; + private ResultURIConverter uriConverter = null; + private Properties configs = null; + private ExclusionFilterFactory exclusionFactory = null; + private BooleanOperator<WaybackRequest> authentication = null; + + /** + * + */ + public AccessPoint() { + + } + + /* (non-Javadoc) + * @see org.springframework.beans.factory.BeanNameAware#setBeanName(java.lang.String) + */ + public void setBeanName(String beanName) { + // TODO Auto-generated method stub + this.contextName = ""; + int idx = beanName.indexOf(":"); + if(idx > -1) { + contextPort = Integer.valueOf(beanName.substring(0,idx)); + contextName = beanName.substring(idx + 1); + } else { + try { + this.contextPort = Integer.valueOf(beanName); + } catch(NumberFormatException e) { + e.printStackTrace(); + } + } + } + /** + * @param httpRequest + * @return the prefix of paths recieved by this server that are handled by + * this WaybackContext, including the trailing '/' + */ + public String getContextPath(HttpServletRequest httpRequest) { +// if(contextPort != 0) { +// return httpRequest.getContextPath(); +// } + String httpContextPath = httpRequest.getContextPath(); + if(contextName.length() == 0) { + return httpContextPath + "/"; + } + return httpContextPath + "/" + contextName + "/"; + } + + /** + * @param httpRequest + * @param includeQuery + * @return the portion of the request following the path to this context + * without leading '/' + */ + private String translateRequest(HttpServletRequest httpRequest, + boolean includeQuery) { + + String origRequestPath = httpRequest.getRequestURI(); + if(includeQuery) { + String queryString = httpRequest.getQueryString(); + if (queryString != null) { + origRequestPath += "?" + queryString; + } + } + String contextPath = getContextPath(httpRequest); + if (!origRequestPath.startsWith(contextPath)) { + return null; + } + return origRequestPath.substring(contextPath.length()); + } + + /** + * @param httpRequest + * @return the portion of the request following the path to this context, + * including any query information,without leading '/' + */ + public String translateRequestPathQuery(HttpServletRequest httpRequest) { + return translateRequest(httpRequest,true); + } + + /** + * @param httpRequest + * @return the portion of the request following the path to this context, + * excluding any query information, without leading '/' + */ + public String translateRequestPath(HttpServletRequest httpRequest) { + return translateRequest(httpRequest,false); + } + + /** + * Construct an absolute URL that points to the root of the context that + * recieved the request, including a trailing "/". + * + * @return String absolute URL pointing to the Context root where the + * request was revieved. + */ + private String getAbsoluteContextPrefix(HttpServletRequest httpRequest, + boolean useRequestServer) { + + StringBuilder prefix = new StringBuilder(); + prefix.append(WaybackConstants.HTTP_URL_PREFIX); + String waybackPort = null; + if(useRequestServer) { + prefix.append(httpRequest.getLocalName()); + waybackPort = String.valueOf(httpRequest.getLocalPort()); + } else { + prefix.append(httpRequest.getServerName()); + waybackPort = String.valueOf(httpRequest.getServerPort()); + } + if (!waybackPort.equals(WaybackConstants.HTTP_DEFAULT_PORT)) { + prefix.append(":").append(waybackPort); + } + String contextPath = getContextPath(httpRequest); +// if(contextPath.length() > 1) { +// prefix.append(contextPath); +// } else { +// prefix.append(contextPath); +// } + prefix.append(contextPath); + return prefix.toString(); + } + + /** + * @param httpRequest + * @return absolute URL pointing to the base of this WaybackContext, using + * Server and port information from the HttpServletRequest argument. + */ + public String getAbsoluteServerPrefix(HttpServletRequest httpRequest) { + return getAbsoluteContextPrefix(httpRequest, true); + } + + /** + * @param httpRequest + * @return absolute URL pointing to the base of this WaybackContext, using + * Canonical server and port information. + */ + public String getAbsoluteLocalPrefix(HttpServletRequest httpRequest) { + return getAbsoluteContextPrefix(httpRequest, useServerName); + } + + private boolean dispatchLocal(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) + throws ServletException, IOException { + + WaybackRequest wbRequest = new WaybackRequest(); + wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); + wbRequest.setContext(this); + UIResults uiResults = new UIResults(wbRequest); + String translated = "/" + translateRequestPathQuery(httpRequest); + uiResults.storeInRequest(httpRequest,translated); + RequestDispatcher dispatcher = null; +// // special case for the front '/' page: +// if(translated.length() == 0) { +// translated = "/"; +// } else { +// translated = "/" + translated; +// } + dispatcher = httpRequest.getRequestDispatcher(translated); + if(dispatcher != null) { + dispatcher.forward(httpRequest, httpResponse); + return true; + } + return false; + } + + /** + * @param httpRequest + * @param httpResponse + * @return true if the request was actually handled + * @throws ServletException + * @throws IOException + */ + public boolean handleRequest(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) + throws ServletException, IOException { + + WaybackRequest wbRequest = null; + boolean handled = false; + + try { + wbRequest = parser.parse(httpRequest, this); + + if(wbRequest != null) { + wbRequest.setContext(this); + handled = true; + wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); + if(authentication != null) { + if(!authentication.isTrue(wbRequest)) { + throw new AuthenticationControlException("Not authorized"); + } + } + + if(exclusionFactory != null) { + wbRequest.setExclusionFilter(exclusionFactory.get()); + } + if(wbRequest.isReplayRequest()) { + + handleReplay(wbRequest,httpRequest,httpResponse); + + } else { + + handleQuery(wbRequest,httpRequest,httpResponse); + } + } else { + handled = dispatchLocal(httpRequest,httpResponse); + } + + } catch (BadQueryException e) { + query.renderException(httpRequest, httpResponse, wbRequest, e); + } catch (AuthenticationControlException e) { + query.renderException(httpRequest, httpResponse, wbRequest, e); + } + + return handled; + } + + private void handleReplay(WaybackRequest wbRequest, + HttpServletRequest httpRequest, HttpServletResponse httpResponse) + throws IOException, ServletException { + Resource resource = null; + try { + SearchResults results = index.query(wbRequest); + if(!(results instanceof CaptureSearchResults)) { + throw new ResourceNotAvailableException("Bad results..."); + } + CaptureSearchResults captureResults = (CaptureSearchResults) results; + + // TODO: check which versions are actually accessible right now? + SearchResult closest = captureResults.getClosest(wbRequest); + resource = store.retrieveResource(closest); + + replay.renderResource(httpRequest, httpResponse, wbRequest, + closest, resource, uriConverter, captureResults); + } catch(WaybackException e) { + replay.renderException(httpRequest, httpResponse, wbRequest, e); + } finally { + if(resource != null) { + resource.close(); + } + } + } + + private void handleQuery(WaybackRequest wbRequest, + HttpServletRequest httpRequest, HttpServletResponse httpResponse) + throws ServletException, IOException { + + try { + SearchResults results = index.query(wbRequest); + if(results.getResultsType().equals( + WaybackConstants.RESULTS_TYPE_CAPTURE)) { + + query.renderUrlResults(httpRequest,httpResponse,wbRequest, + results,uriConverter); + + } else { + query.renderUrlPrefixResults(httpRequest,httpResponse,wbRequest, + results,uriConverter); + } + } catch(WaybackException e) { + query.renderException(httpRequest, httpResponse, wbRequest, e); + } + } + + /** + * @param contextPort the contextPort to set + */ + public void setContextPort(int contextPort) { + this.contextPort = contextPort; + } + + /** + * @param contextName the contextName to set + */ + public void setContextName(String contextName) { + this.contextName = contextName; + } + + /** + * @param index the index to set + */ + public void setIndex(ResourceIndex index) { + this.index = index; + } + + /** + * @param store the store to set + */ + public void setStore(ResourceStore store) { + this.store = store; + } + + /** + * @param replay the replay to set + */ + public void setReplay(ReplayDispatcher replay) { + this.replay = replay; + } + + /** + * @param query the query to set + */ + public void setQuery(QueryRenderer query) { + this.query = query; + } + + /** + * @param parser the parser to set + */ + public void setParser(RequestParser parser) { + this.parser = parser; + } + + /** + * @param uriConverter the uriConverter to set + */ + public void setUriConverter(ResultURIConverter uriConverter) { + this.uriConverter = uriConverter; + } + + + /** + * @return the contextPort + */ + public int getContextPort() { + return contextPort; + } + + /** + * @return the configs + */ + public Properties getConfigs() { + return configs; + } + + /** + * @param configs the configs to set + */ + public void setConfigs(Properties configs) { + this.configs = configs; + } + + /** + * @return the useServerName + */ + public boolean isUseServerName() { + return useServerName; + } + + /** + * @param useServerName the useServerName to set + */ + public void setUseServerName(boolean useServerName) { + this.useServerName = useServerName; + } + + public ExclusionFilterFactory getExclusionFactory() { + return exclusionFactory; + } + + public void setExclusionFactory(ExclusionFilterFactory exclusionFactory) { + this.exclusionFactory = exclusionFactory; + } + + public BooleanOperator<WaybackRequest> getAuthentication() { + return authentication; + } + + public void setAuthentication(BooleanOperator<WaybackRequest> authentication) { + this.authentication = authentication; + } +} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java 2007-09-28 00:55:20 UTC (rev 2013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java 2007-09-28 18:43:31 UTC (rev 2014) @@ -1,447 +0,0 @@ -/* WaybackContext - * - * $Id$ - * - * Created on 5:37:31 PM Apr 20, 2007. - * - * Copyright (C) 2007 Internet Archive. - * - * This file is part of wayback-webapp. - * - * wayback-webapp is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback-webapp is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback-webapp; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.webapp; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.RequestDispatcher; -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.archive.wayback.QueryRenderer; -import org.archive.wayback.ReplayDispatcher; -import org.archive.wayback.RequestParser; -import org.archive.wayback.ResourceIndex; -import org.archive.wayback.ResourceStore; -import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.accesscontrol.ExclusionFilterFactory; -import org.archive.wayback.core.CaptureSearchResults; -import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; -import org.archive.wayback.core.UIResults; -import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.exception.AuthenticationControlException; -import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.exception.ResourceNotAvailableException; -import org.archive.wayback.exception.WaybackException; -import org.archive.wayback.util.operator.BooleanOperator; -import org.springframework.beans.factory.BeanNameAware; - -/** - * Retains all information about a particular Wayback configuration - * within a ServletContext, including holding references to the - * implementation instances of the primary Wayback classes: - * - * ResourceIndex - * ResourceStore - * QueryUI - * ReplayUI - * - * @author brad - * @version $Date$, $Revision$ - */ -public class WaybackContext implements RequestContext, BeanNameAware { - - private boolean useServerName = false; - private int contextPort = 0; - private String contextName = null; - private ResourceIndex index = null; - private ResourceStore store = null; - private ReplayDispatcher replay = null; - private QueryRenderer query = null; - private RequestParser parser = null; - private ResultURIConverter uriConverter = null; - private Properties configs = null; - private ExclusionFilterFactory exclusionFactory = null; - private BooleanOperator<WaybackRequest> authentication = null; - - /** - * - */ - public WaybackContext() { - - } - - /* (non-Javadoc) - * @see org.springframework.beans.factory.BeanNameAware#setBeanName(java.lang.String) - */ - public void setBeanName(String beanName) { - // TODO Auto-generated method stub - this.contextName = ""; - int idx = beanName.indexOf(":"); - if(idx > -1) { - contextPort = Integer.valueOf(beanName.substring(0,idx)); - contextName = beanName.substring(idx + 1); - } else { - try { - this.contextPort = Integer.valueOf(beanName); - } catch(NumberFormatException e) { - e.printStackTrace(); - } - } - } - /** - * @param httpRequest - * @return the prefix of paths recieved by this server that are handled by - * this WaybackContext, including the trailing '/' - */ - public String getContextPath(HttpServletRequest httpRequest) { -// if(contextPort != 0) { -// return httpRequest.getContextPath(); -// } - String httpContextPath = httpRequest.getContextPath(); - if(contextName.length() == 0) { - return httpContextPath + "/"; - } - return httpContextPath + "/" + contextName + "/"; - } - - /** - * @param httpRequest - * @param includeQuery - * @return the portion of the request following the path to this context - * without leading '/' - */ - private String translateRequest(HttpServletRequest httpRequest, - boolean includeQuery) { - - String origRequestPath = httpRequest.getRequestURI(); - if(includeQuery) { - String queryString = httpRequest.getQueryString(); - if (queryString != null) { - origRequestPath += "?" + queryString; - } - } - String contextPath = getContextPath(httpRequest); - if (!origRequestPath.startsWith(contextPath)) { - return null; - } - return origRequestPath.substring(contextPath.length()); - } - - /** - * @param httpRequest - * @return the portion of the request following the path to this context, - * including any query information,without leading '/' - */ - public String translateRequestPathQuery(HttpServletRequest httpRequest) { - return translateRequest(httpRequest,true); - } - - /** - * @param httpRequest - * @return the portion of the request following the path to this context, - * excluding any query information, without leading '/' - */ - public String translateRequestPath(HttpServletRequest httpRequest) { - return translateRequest(httpRequest,false); - } - - /** - * Construct an absolute URL that points to the root of the context that - * recieved the request, including a trailing "/". - * - * @return String absolute URL pointing to the Context root where the - * request was revieved. - */ - private String getAbsoluteContextPrefix(HttpServletRequest httpRequest, - boolean useRequestServer) { - - StringBuilder prefix = new StringBuilder(); - prefix.append(WaybackConstants.HTTP_URL_PREFIX); - String waybackPort = null; - if(useRequestServer) { - prefix.append(httpRequest.getLocalName()); - waybackPort = String.valueOf(httpRequest.getLocalPort()); - } else { - prefix.append(httpRequest.getServerName()); - waybackPort = String.valueOf(httpRequest.getServerPort()); - } - if (!waybackPort.equals(WaybackConstants.HTTP_DEFAULT_PORT)) { - prefix.append(":").append(waybackPort); - } - String contextPath = getContextPath(httpRequest); -// if(contextPath.length() > 1) { -// prefix.append(contextPath); -// } else { -// prefix.append(contextPath); -// } - prefix.append(contextPath); - return prefix.toString(); - } - - /** - * @param httpRequest - * @return absolute URL pointing to the base of this WaybackContext, using - * Server and port information from the HttpServletRequest argument. - */ - public String getAbsoluteServerPrefix(HttpServletRequest httpRequest) { - return getAbsoluteContextPrefix(httpRequest, true); - } - - /** - * @param httpRequest - * @return absolute URL pointing to the base of this WaybackContext, using - * Canonical server and port information. - */ - public String getAbsoluteLocalPrefix(HttpServletRequest httpRequest) { - return getAbsoluteContextPrefix(httpRequest, useServerName); - } - - private boolean dispatchLocal(HttpServletRequest httpRequest, - HttpServletResponse httpResponse) - throws ServletException, IOException { - - WaybackRequest wbRequest = new WaybackRequest(); - wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); - wbRequest.setContext(this); - UIResults uiResults = new UIResults(wbRequest); - String translated = "/" + translateRequestPathQuery(httpRequest); - uiResults.storeInRequest(httpRequest,translated); - RequestDispatcher dispatcher = null; -// // special case for the front '/' page: -// if(translated.length() == 0) { -// translated = "/"; -// } else { -// translated = "/" + translated; -// } - dispatcher = httpRequest.getRequestDispatcher(translated); - if(dispatcher != null) { - dispatcher.forward(httpRequest, httpResponse); - return true; - } - return false; - } - - /** - * @param httpRequest - * @param httpResponse - * @return true if the request was actually handled - * @throws ServletException - * @throws IOException - */ - public boolean handleRequest(HttpServletRequest httpRequest, - HttpServletResponse httpResponse) - throws ServletException, IOException { - - WaybackRequest wbRequest = null; - boolean handled = false; - - try { - wbRequest = parser.parse(httpRequest, this); - - if(wbRequest != null) { - wbRequest.setContext(this); - handled = true; - wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); - if(authentication != null) { - if(!authentication.isTrue(wbRequest)) { - throw new AuthenticationControlException("Not authorized"); - } - } - - if(exclusionFactory != null) { - wbRequest.setExclusionFilter(exclusionFactory.get()); - } - if(wbRequest.isReplayRequest()) { - - handleReplay(wbRequest,httpRequest,httpResponse); - - } else { - - handleQuery(wbRequest,httpRequest,httpResponse); - } - } else { - handled = dispatchLocal(httpRequest,httpResponse); - } - - } catch (BadQueryException e) { - query.renderException(httpRequest, httpResponse, wbRequest, e); - } catch (AuthenticationControlException e) { - query.renderException(httpRequest, httpResponse, wbRequest, e); - } - - return handled; - } - - private void handleReplay(WaybackRequest wbRequest, - HttpServletRequest httpRequest, HttpServletResponse httpResponse) - throws IOException, ServletException { - Resource resource = null; - try { - SearchResults results = index.query(wbRequest); - if(!(results instanceof CaptureSearchResults)) { - throw new ResourceNotAvailableException("Bad results..."); - } - CaptureSearchResults captureResults = (CaptureSearchResults) results; - - // TODO: check which versions are actually accessible right now? - SearchResult closest = captureResults.getClosest(wbRequest); - resource = store.retrieveResource(closest); - - replay.renderResource(httpRequest, httpResponse, wbRequest, - closest, resource, uriConverter, captureResults); - } catch(WaybackException e) { - replay.renderException(httpRequest, httpResponse, wbRequest, e); - } finally { - if(resource != null) { - resource.close(); - } - } - } - - private void handleQuery(WaybackRequest wbRequest, - HttpServletRequest httpRequest, HttpServletResponse httpResponse) - throws ServletException, IOException { - - try { - SearchResults results = index.query(wbRequest); - if(results.getResultsType().equals( - WaybackConstants.RESULTS_TYPE_CAPTURE)) { - - query.renderUrlResults(httpRequest,httpResponse,wbRequest, - results,uriConverter); - - } else { - query.renderUrlPrefixResults(httpRequest,httpResponse,wbRequest, - results,uriConverter); - } - } catch(WaybackException e) { - query.renderException(httpRequest, httpResponse, wbRequest, e); - } - } - - /** - * @param contextPort the contextPort to set - */ - public void setContextPort(int contextPort) { - this.contextPort = contextPort; - } - - /** - * @param contextName the contextName to set - */ - public void setContextName(String contextName) { - this.contextName = contextName; - } - - /** - * @param index the index to set - */ - public void setIndex(ResourceIndex index) { - this.index = index; - } - - /** - * @param store the store to set - */ - public void setStore(ResourceStore store) { - this.store = store; - } - - /** - * @param replay the replay to set - */ - public void setReplay(ReplayDispatcher replay) { - this.replay = replay; - } - - /** - * @param query the query to set - */ - public void setQuery(QueryRenderer query) { - this.query = query; - } - - /** - * @param parser the parser to set - */ - public void setParser(RequestParser parser) { - this.parser = parser; - } - - /** - * @param uriConverter the uriConverter to set - */ - public void setUriConverter(ResultURIConverter uriConverter) { - this.uriConverter = uriConverter; - } - - - /** - * @return the contextPort - */ - public int getContextPort() { - return contextPort; - } - - /** - * @return the configs - */ - public Properties getConfigs() { - return configs; - } - - /** - * @param configs the configs to set - */ - public void setConfigs(Properties configs) { - this.configs = configs; - } - - /** - * @return the useServerName - */ - public boolean isUseServerName() { - return useServerName; - } - - /** - * @param useServerName the useServerName to set - */ - public void setUseServerName(boolean useServerName) { - this.useServerName = useServerName; - } - - public ExclusionFilterFactory getExclusionFactory() { - return exclusionFactory; - } - - public void setExclusionFactory(ExclusionFilterFactory exclusionFactory) { - this.exclusionFactory = exclusionFactory; - } - - public BooleanOperator<WaybackRequest> getAuthentication() { - return authentication; - } - - public void setAuthentication(BooleanOperator<WaybackRequest> authentication) { - this.authentication = authentication; - } -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 00:55:20
|
Revision: 2013 http://archive-access.svn.sourceforge.net/archive-access/?rev=2013&view=rev Author: bradtofel Date: 2007-09-27 17:55:20 -0700 (Thu, 27 Sep 2007) Log Message: ----------- ADDED: new developer_environment.apt file Added Paths: ----------- trunk/archive-access/projects/wayback/dist/src/site/apt/ trunk/archive-access/projects/wayback/dist/src/site/apt/developer_environment.apt Added: trunk/archive-access/projects/wayback/dist/src/site/apt/developer_environment.apt =================================================================== --- trunk/archive-access/projects/wayback/dist/src/site/apt/developer_environment.apt (rev 0) +++ trunk/archive-access/projects/wayback/dist/src/site/apt/developer_environment.apt 2007-09-28 00:55:20 UTC (rev 2013) @@ -0,0 +1,134 @@ + --- + Setting up the Wayback Eclipse Development Environment + --- + Brad Tofel (brad at archive dot org) + --- + + +Getting Eclipse Europa installed + + [[1]] download and unpack eclipse-europa with "JEE" support + + [[2]] do latest software updates + + [[3]] software update: find and install: + + * Search for new features to install + + * New Remote Site + + * Name: <<<Subclipse>>> <(or whatever)> + + * Url: <<<http://subclipse.tigris.org/update_1.2.x>>> + + [[4]] select both Subclipse and Europa Discovery Site search sites + + [[5]] Check box to install "Subclipse" features + + [[6]] on same dialog, choose "Select Required" button.\ + this will select all dependencies from the Europa Discovery Site. + + [[7]] "Next", then accept terms of aggreement, then "Next", then "Finish" + + [[8]] Choose "Install All" on Feature Verification dialog. + + [[9]] choose "yes" to restart now + + [[10]] software update: find and install: + + * Search for new features to install + + * New Remote Site + + * Name: <<<Maven 2>>> <(or whatever)> + + * Url: <<<http://m2eclipse.codehaus.org/update/>>> + + [[11]] search Maven 2 plugin site with "Finish" + + [[12]] Check box to install "Subclipse" features + + [[13]] "Next", then accept terms of agreement, then "Next", then "Finish" + + [[14]] Choose "Install All" on Feature Verification dialog. + + [[15]] choose "yes" to restart now + + +Install Apache Tomcat + + From {{http://tomcat.apache.org/download-55.cgi}}. + +Adding a Tomcat server to Eclipse + + [[1]] Choose "File" \>\> "New" \>\> "Other..." + + [[2]] Choose "Server" \>\> "Server", and click "Next" + + [[3]] Fill out dialog "New Server:Define a New Server" + + * Server's host name: <<<localhost>>> + + * server type: "Apache" \>\> "Tomcat v5.5 Server", and click "Next" + + [[4]] Fill out dialog "New Server:Tomcat Server" + + * Name: <<<Apache Tomcat v5.5>>> + + * Tomcat installation directory: <(locate directory where you installed Tomcat 5.5)> + +Add WORKSPACE_ROOT classpath variable + + [[1]] Choose "Window" \>\> "Preferences..." + + [[2]] "General" \>\> "Workspace" \>\> "Linked Resources" \>\> "New..." + + [[3]] Fill in "New Variable" dialog: + + * Name: <<<WORKSPACE_ROOT>>> + + * Location: <(path to your workspace)> + + +Checking out source from SVN + + [[1]] Choose "File" \>\> "New" \>\> "Project..." + + [[2]] Choose "SVN" \>\> "Checkout Projects from SVN" + + [[3]] Choose "Create a new repository location", then "Next" + + [[4]] Fill in SVN repository Url: + + * Url: <<<https://archive-access.svn.sourceforge.net/svnroot/archive-access/trunk/archive-access/projects/wayback>>> + + [[5]] select top directory, and click "Finish" + + [[6]] wait for project to checkout and workspace to be rebuilt + +Running and Debugging webapp on local Tomcat server: + + [[1]] Choose "File" \>\> "Import..." + + [[2]] Choose "General" \>\> "Existing Projects into Workspace", then "Next" + + [[3]] Choose "Select root directory" then "Browse..." + + [[4]] under <<<wayback>>> choose <<<wayback-webapp>>> directory, and click "OK" + + [[5]] "Finish" + +Configuring wayback-webapp to run on the Apache Tomcat 5.5 server + + [[1]] On the Servers tab, right-click then choose "Add and Remove Projects..." + + [[2]] add <<<wayback-webapp>>>, click "Finish" + + [[3]] exit and restart Eclipse + + [[4]] start server + +Accessing ARC file content: + + [[1]] place arc.gz files in <</tmp/wayback/arcs/>> <(or whatever directory you've changed the store:arcDir property to)> + \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 00:14:42
|
Revision: 2012 http://archive-access.svn.sourceforge.net/archive-access/?rev=2012&view=rev Author: bradtofel Date: 2007-09-27 17:14:43 -0700 (Thu, 27 Sep 2007) Log Message: ----------- moved src/site/xdoc/faq.fml to src/site/fml/faq.fml Added Paths: ----------- trunk/archive-access/projects/wayback/dist/src/site/fml/ trunk/archive-access/projects/wayback/dist/src/site/fml/faq.fml Removed Paths: ------------- trunk/archive-access/projects/wayback/dist/src/site/xdoc/faq.fml Copied: trunk/archive-access/projects/wayback/dist/src/site/fml/faq.fml (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/faq.fml) =================================================================== --- trunk/archive-access/projects/wayback/dist/src/site/fml/faq.fml (rev 0) +++ trunk/archive-access/projects/wayback/dist/src/site/fml/faq.fml 2007-09-28 00:14:43 UTC (rev 2012) @@ -0,0 +1,39 @@ +<?xml version="1.0" encoding="UTF-8"?> +<faqs title="Frequently Asked Questions"> + + <part id="general"> + <title>General</title> + + <faq id="about"> + <question> + What is this project all about? + </question> + <answer> + <p> + The project is designed to replace the current Wayback Machine with an + all Java solution that is flexible enough to provide an easy-to-use + solution for the single-machine at-home user, as well as scaling up + to hundreds of machines for a full historical collection. + </p> + <p> + Primarily it is a few easily replaceable interfaces, and some core + classes that utilize those interfaces to provide the Wayback + service. Presently only trivial implementations of those interfaces + have been developed, but we hope that these interfaces will allow a + high degree of flexibility and experimentation. + </p> + </answer> + </faq> + <faq id="install"> + <question> + How can I install and use this? + </question> + <answer> + <p> + See the <a href="user_manual.html">User Manual</a> for information + about installing and using this application. + </p> + </answer> + </faq> + </part> +</faqs> Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/faq.fml =================================================================== --- trunk/archive-access/projects/wayback/dist/src/site/xdoc/faq.fml 2007-09-28 00:13:11 UTC (rev 2011) +++ trunk/archive-access/projects/wayback/dist/src/site/xdoc/faq.fml 2007-09-28 00:14:43 UTC (rev 2012) @@ -1,39 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<faqs title="Frequently Asked Questions"> - - <part id="general"> - <title>General</title> - - <faq id="about"> - <question> - What is this project all about? - </question> - <answer> - <p> - The project is designed to replace the current Wayback Machine with an - all Java solution that is flexible enough to provide an easy-to-use - solution for the single-machine at-home user, as well as scaling up - to hundreds of machines for a full historical collection. - </p> - <p> - Primarily it is a few easily replaceable interfaces, and some core - classes that utilize those interfaces to provide the Wayback - service. Presently only trivial implementations of those interfaces - have been developed, but we hope that these interfaces will allow a - high degree of flexibility and experimentation. - </p> - </answer> - </faq> - <faq id="install"> - <question> - How can I install and use this? - </question> - <answer> - <p> - See the <a href="user_manual.html">User Manual</a> for information - about installing and using this application. - </p> - </answer> - </faq> - </part> -</faqs> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 00:13:10
|
Revision: 2011 http://archive-access.svn.sourceforge.net/archive-access/?rev=2011&view=rev Author: bradtofel Date: 2007-09-27 17:13:11 -0700 (Thu, 27 Sep 2007) Log Message: ----------- moved *.dia to src/site/resources/dia moved *.png to src/site/resources/images Added Paths: ----------- trunk/archive-access/projects/wayback/dist/src/site/resources/dia/ trunk/archive-access/projects/wayback/dist/src/site/resources/dia/ARCProxy.dia trunk/archive-access/projects/wayback/dist/src/site/resources/dia/AlphaRemoteResourceIndex.dia trunk/archive-access/projects/wayback/dist/src/site/resources/dia/DynamicCDXResourceIndex.dia trunk/archive-access/projects/wayback/dist/src/site/resources/dia/HTTP11ResourceStore.dia trunk/archive-access/projects/wayback/dist/src/site/resources/dia/RemoteResourceIndex.dia trunk/archive-access/projects/wayback/dist/src/site/resources/dia/WM-Shared-Small.dia trunk/archive-access/projects/wayback/dist/src/site/resources/dia/WM-Shared.dia trunk/archive-access/projects/wayback/dist/src/site/resources/dia/WM-Standard.dia trunk/archive-access/projects/wayback/dist/src/site/resources/images/ARCProxy.png trunk/archive-access/projects/wayback/dist/src/site/resources/images/AlphaRemoteResourceIndex.png trunk/archive-access/projects/wayback/dist/src/site/resources/images/DynamicCDXResourceIndex.png trunk/archive-access/projects/wayback/dist/src/site/resources/images/HTTP11ResourceStore.png trunk/archive-access/projects/wayback/dist/src/site/resources/images/RemoteResourceIndex.png trunk/archive-access/projects/wayback/dist/src/site/resources/images/WM-Component.png trunk/archive-access/projects/wayback/dist/src/site/resources/images/WM-Shared-Small.png trunk/archive-access/projects/wayback/dist/src/site/resources/images/WM-Shared.png trunk/archive-access/projects/wayback/dist/src/site/resources/images/WM-Standard.png Removed Paths: ------------- trunk/archive-access/projects/wayback/dist/src/site/xdoc/ARCProxy.dia trunk/archive-access/projects/wayback/dist/src/site/xdoc/ARCProxy.png trunk/archive-access/projects/wayback/dist/src/site/xdoc/AlphaRemoteResourceIndex.dia trunk/archive-access/projects/wayback/dist/src/site/xdoc/AlphaRemoteResourceIndex.png trunk/archive-access/projects/wayback/dist/src/site/xdoc/DynamicCDXResourceIndex.dia trunk/archive-access/projects/wayback/dist/src/site/xdoc/DynamicCDXResourceIndex.png trunk/archive-access/projects/wayback/dist/src/site/xdoc/HTTP11ResourceStore.dia trunk/archive-access/projects/wayback/dist/src/site/xdoc/HTTP11ResourceStore.png trunk/archive-access/projects/wayback/dist/src/site/xdoc/RemoteResourceIndex.dia trunk/archive-access/projects/wayback/dist/src/site/xdoc/RemoteResourceIndex.png trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Component.png trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Shared-Small.dia trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Shared-Small.png trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Shared.dia trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Shared.png trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Standard.dia trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Standard.png Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/dia/ARCProxy.dia (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/ARCProxy.dia) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/dia/AlphaRemoteResourceIndex.dia (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/AlphaRemoteResourceIndex.dia) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/dia/DynamicCDXResourceIndex.dia (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/DynamicCDXResourceIndex.dia) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/dia/HTTP11ResourceStore.dia (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/HTTP11ResourceStore.dia) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/dia/RemoteResourceIndex.dia (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/RemoteResourceIndex.dia) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/dia/WM-Shared-Small.dia (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Shared-Small.dia) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/dia/WM-Shared.dia (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Shared.dia) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/dia/WM-Standard.dia (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Standard.dia) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/images/ARCProxy.png (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/ARCProxy.png) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/images/AlphaRemoteResourceIndex.png (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/AlphaRemoteResourceIndex.png) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/images/DynamicCDXResourceIndex.png (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/DynamicCDXResourceIndex.png) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/images/HTTP11ResourceStore.png (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/HTTP11ResourceStore.png) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/images/RemoteResourceIndex.png (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/RemoteResourceIndex.png) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/images/WM-Component.png (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Component.png) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/images/WM-Shared-Small.png (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Shared-Small.png) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/images/WM-Shared.png (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Shared.png) =================================================================== (Binary files differ) Copied: trunk/archive-access/projects/wayback/dist/src/site/resources/images/WM-Standard.png (from rev 2009, trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Standard.png) =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/ARCProxy.dia =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/ARCProxy.png =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/AlphaRemoteResourceIndex.dia =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/AlphaRemoteResourceIndex.png =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/DynamicCDXResourceIndex.dia =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/DynamicCDXResourceIndex.png =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/HTTP11ResourceStore.dia =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/HTTP11ResourceStore.png =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/RemoteResourceIndex.dia =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/RemoteResourceIndex.png =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Component.png =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Shared-Small.dia =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Shared-Small.png =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Shared.dia =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Shared.png =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Standard.dia =================================================================== (Binary files differ) Deleted: trunk/archive-access/projects/wayback/dist/src/site/xdoc/WM-Standard.png =================================================================== (Binary files differ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 00:10:43
|
Revision: 2010 http://archive-access.svn.sourceforge.net/archive-access/?rev=2010&view=rev Author: bradtofel Date: 2007-09-27 17:10:47 -0700 (Thu, 27 Sep 2007) Log Message: ----------- Added links for administrator and developer manuals Modified Paths: -------------- trunk/archive-access/projects/wayback/dist/src/site/site.xml Modified: trunk/archive-access/projects/wayback/dist/src/site/site.xml =================================================================== --- trunk/archive-access/projects/wayback/dist/src/site/site.xml 2007-09-28 00:09:04 UTC (rev 2009) +++ trunk/archive-access/projects/wayback/dist/src/site/site.xml 2007-09-28 00:10:47 UTC (rev 2010) @@ -29,12 +29,12 @@ <item name="Requirements" href="requirements.html"/> <item name="Downloads" href="downloads.html"/> <item name="User Manual" href="user_manual.html"/> + <item name="Administrator Manual" href="administrator_manual.html"/> + <item name="Developer Manual" href="developer_manual.html"/> <item name="FAQ" href="/faq.html"/> <item name="API" href="./apidocs"/> <item name="Browse/Submit a Bug" href="http://sourceforge.net/tracker/?group_id=118427&atid=681137"/> - <item name="Go Yahoo!" - href="http://www.yahoo.com/"/> </menu> <!--Its not possible to change the labels used in reports, not yet anyways. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 00:09:00
|
Revision: 2009 http://archive-access.svn.sourceforge.net/archive-access/?rev=2009&view=rev Author: bradtofel Date: 2007-09-27 17:09:04 -0700 (Thu, 27 Sep 2007) Log Message: ----------- MOVED: src/site/xdocs => src/site/xdoc Modified Paths: -------------- trunk/archive-access/projects/wayback/dist/src/site/xdoc/navigation.xml Added Paths: ----------- trunk/archive-access/projects/wayback/dist/src/site/xdoc/ Removed Paths: ------------- trunk/archive-access/projects/wayback/dist/src/site/xdocs/ Copied: trunk/archive-access/projects/wayback/dist/src/site/xdoc (from rev 1983, trunk/archive-access/projects/wayback/dist/src/site/xdocs) Modified: trunk/archive-access/projects/wayback/dist/src/site/xdoc/navigation.xml =================================================================== --- trunk/archive-access/projects/wayback/dist/src/site/xdocs/navigation.xml 2007-08-31 20:24:58 UTC (rev 1983) +++ trunk/archive-access/projects/wayback/dist/src/site/xdoc/navigation.xml 2007-09-28 00:09:04 UTC (rev 2009) @@ -14,6 +14,7 @@ <item name="Requirements" href="requirements.html"/> <item name="Downloads" href="downloads.html"/> <item name="User Manual" href="user_manual.html"/> + <item name="Test" href="test.html"/> <item name="FAQ" href="/faq.html"/> <item name="API" href="./apidocs"/> <item name="Browse/Submit a Bug" This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-28 00:08:55
|
Revision: 2008 http://archive-access.svn.sourceforge.net/archive-access/?rev=2008&view=rev Author: bradtofel Date: 2007-09-27 17:08:45 -0700 (Thu, 27 Sep 2007) Log Message: ----------- MOVED: src/site/xdocs => src/site/xdoc Modified Paths: -------------- trunk/archive-access/projects/wayback/dist/src/site/site.xml Modified: trunk/archive-access/projects/wayback/dist/src/site/site.xml =================================================================== --- trunk/archive-access/projects/wayback/dist/src/site/site.xml 2007-09-27 01:31:14 UTC (rev 2007) +++ trunk/archive-access/projects/wayback/dist/src/site/site.xml 2007-09-28 00:08:45 UTC (rev 2008) @@ -33,6 +33,8 @@ <item name="API" href="./apidocs"/> <item name="Browse/Submit a Bug" href="http://sourceforge.net/tracker/?group_id=118427&atid=681137"/> + <item name="Go Yahoo!" + href="http://www.yahoo.com/"/> </menu> <!--Its not possible to change the labels used in reports, not yet anyways. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-27 01:31:10
|
Revision: 2007 http://archive-access.svn.sourceforge.net/archive-access/?rev=2007&view=rev Author: bradtofel Date: 2007-09-26 18:31:14 -0700 (Wed, 26 Sep 2007) Log Message: ----------- TWEAK added default text for new authentication control exception. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/classes/WaybackUI.properties Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/classes/WaybackUI.properties =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/classes/WaybackUI.properties 2007-09-27 01:30:14 UTC (rev 2006) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/classes/WaybackUI.properties 2007-09-27 01:31:14 UTC (rev 2007) @@ -2,6 +2,8 @@ Exception.wayback.message=An unknown exception has occured. {0} Exception.accessControl.title=Access Control Exception Exception.accessControl.message=Access to this content has been blocked. {0} +Exception.authenticationControl.title=Authentication Control Exception +Exception.authenticationControl.message=This content is not accessible as the current user or from your current location. {0} Exception.badContent.title=Bad Content Exception Exception.badContent.message=The content that was archived is not replayable. Exception.badQuery.title=Bad Query Exception This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-27 01:30:10
|
Revision: 2006 http://archive-access.svn.sourceforge.net/archive-access/?rev=2006&view=rev Author: bradtofel Date: 2007-09-26 18:30:14 -0700 (Wed, 26 Sep 2007) Log Message: ----------- FEATURE: now allow WaybackException instance to set status and possibly add other HTTP headers, etc, to response. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/jsp/HTMLError.jsp Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/jsp/HTMLError.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/jsp/HTMLError.jsp 2007-09-27 01:28:52 UTC (rev 2005) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/jsp/HTMLError.jsp 2007-09-27 01:30:14 UTC (rev 2006) @@ -1,13 +1,15 @@ <%@ page import="org.archive.wayback.exception.WaybackException" %> <%@ page import="org.archive.wayback.core.UIResults" %> <%@ page import="org.archive.wayback.util.StringFormatter" %> +<% +WaybackException e = (WaybackException) request.getAttribute("exception"); +e.setupResponse(response); +%> <jsp:include page="/template/UI-header.jsp" flush="true" /> <% -WaybackException e = (WaybackException) request.getAttribute("exception"); UIResults results = UIResults.getFromRequest(request); StringFormatter fmt = results.getFormatter(); -response.setStatus(e.getStatus()); %> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |