From: <bra...@us...> - 2007-07-19 21:26:03
|
Revision: 1835 http://archive-access.svn.sourceforge.net/archive-access/?rev=1835&view=rev Author: bradtofel Date: 2007-07-19 14:26:02 -0700 (Thu, 19 Jul 2007) Log Message: ----------- REFACTOR: complete configuration changeover to multiple WaybackContexts being handled by a single webapp, all requests handled by a single filter, and Spring configuration of WaybackContexts. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/web.xml Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/web.xml =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/web.xml 2007-07-19 21:12:46 UTC (rev 1834) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/web.xml 2007-07-19 21:26:02 UTC (rev 1835) @@ -5,1087 +5,16 @@ <!-- General Installation information --> - <context-param> - <param-name>installationname</param-name> - <param-value>General Configuration</param-value> - <description> - This text will appear on the Wayback Configuration and Status page - and may assist in determining which installation users are viewing - via their web browser in environments with multiple Wayback - installations. - </description> + <param-name>config-path</param-name> + <param-value>WEB-INF/wayback.xml</param-value> </context-param> - - <listener> - <listener-class>org.archive.wayback.core.WaybackContextListener</listener-class> - </listener> -<!-- - USER INTERFACE SECTION ---> -<!-- START OF Archival-Url UI OPTIONS -This section contains configuration for using Archival URL query and replay -mechanisms for retrieving content from the wayback machine. This is the default -access mode. ---> - - <servlet> - <servlet-name>QueryServlet</servlet-name> - <servlet-class>org.archive.wayback.query.QueryServlet</servlet-class> - <init-param> - <param-name>queryui.jsppath</param-name> - <param-value>jsp/QueryUI</param-value> - </init-param> - </servlet> - <servlet-mapping> - <servlet-name>QueryServlet</servlet-name> - <url-pattern>/query</url-pattern> - </servlet-mapping> - - <servlet> - <servlet-name>XMLQueryServlet</servlet-name> - <servlet-class>org.archive.wayback.query.QueryServlet</servlet-class> - <init-param> - <param-name>queryui.jsppath</param-name> - <param-value>jsp/QueryXMLUI</param-value> - </init-param> - </servlet> - <servlet-mapping> - <servlet-name>XMLQueryServlet</servlet-name> - <url-pattern>/xmlquery</url-pattern> - </servlet-mapping> - - <servlet> - <servlet-name>ReplayServlet</servlet-name> - <servlet-class>org.archive.wayback.replay.ReplayServlet</servlet-class> - </servlet> - <servlet-mapping> - <servlet-name>ReplayServlet</servlet-name> - <url-pattern>/replay</url-pattern> - </servlet-mapping> - <filter> - <filter-name>QueryFilter</filter-name> - <filter-class>org.archive.wayback.archivalurl.QueryFilter</filter-class> - - <init-param> - <param-name>handler.url</param-name> - <param-value>/query</param-value> - </init-param> + <filter-name>RequestFilter</filter-name> + <filter-class>org.archive.wayback.webapp.RequestFilter</filter-class> </filter> <filter-mapping> - <filter-name>QueryFilter</filter-name> + <filter-name>RequestFilter</filter-name> <url-pattern>/*</url-pattern> </filter-mapping> - - <filter> - <filter-name>ReplayFilter</filter-name> - <filter-class>org.archive.wayback.archivalurl.ReplayFilter</filter-class> - - <init-param> - <param-name>handler.url</param-name> - <param-value>/replay</param-value> - </init-param> - </filter> - <filter-mapping> - <filter-name>ReplayFilter</filter-name> - <url-pattern>/*</url-pattern> - </filter-mapping> - - <context-param> - <param-name>queryrenderer.classname</param-name> - <param-value>org.archive.wayback.query.Renderer</param-value> - <description>Implementation responsible for drawing Index Query results</description> - </context-param> - - <context-param> - <param-name>replayrenderer.classname</param-name> - <param-value>org.archive.wayback.archivalurl.JSReplayRenderer</param-value> - <description>Implementation responsible for drawing replayed resources and replay error messages</description> - </context-param> - - <context-param> - <param-name>replayuriconverter.classname</param-name> - <param-value>org.archive.wayback.archivalurl.ArchivalUrlResultURIConverter</param-value> - <description>Class that implements translation of index results to Replayable URIs for this Wayback</description> - </context-param> - - <context-param> - <param-name>replayui.jsppath</param-name> - <param-value>jsp/ReplayUI</param-value> - <description>RawReplayUI specific path to jsp pages. relative to webapp/</description> - </context-param> - - <context-param> - <param-name>jsuri</param-name> - <param-value>http://localhost:8080/wayback/wm.js</param-value> - <description>absolute URL of page rewriting javascript </description> - </context-param> - - <context-param> - <param-name>replayuriprefix</param-name> - <param-value>http://localhost:8080/wayback/</param-value> - <description>HTTP URI prefix for the replay UI. This should be the name and port of the Tomcat hosting this webapp, plus the name of the context where this webapp is installed.</description> - </context-param> - - <context-param> - <param-name>resultsperpage</param-name> - <param-value>10</param-value> - <description>Default number of results to show per page</description> - </context-param> - <!-- END OF Archival-Url UI OPTIONS --> - -<!-- START OF Proxy UI OPTIONS -This section contains configuration for accessing content from the wayback -machine as a proxy server. Client browsers are configured to make all web -requests through the wayback machine, and it will serve content from ARC -files instead of from the original websites. This section also contains -configuration for serving XML index queries, which allows a firefox plugin -toolbar to retrieve a list of all versions of the current document being viewed. - -These options are not used by default. ---> -<!-- - - <servlet> - <servlet-name>QueryServlet</servlet-name> - <servlet-class>org.archive.wayback.query.QueryServlet</servlet-class> - <init-param> - <param-name>queryui.jsppath</param-name> - <param-value>jsp/QueryUI</param-value> - </init-param> - </servlet> - <servlet-mapping> - <servlet-name>QueryServlet</servlet-name> - <url-pattern>/query</url-pattern> - </servlet-mapping> - - <servlet> - <servlet-name>XMLQueryServlet</servlet-name> - <servlet-class>org.archive.wayback.query.QueryServlet</servlet-class> - <init-param> - <param-name>queryui.jsppath</param-name> - <param-value>jsp/QueryXMLUI</param-value> - </init-param> - </servlet> - <servlet-mapping> - <servlet-name>XMLQueryServlet</servlet-name> - <url-pattern>/xmlquery</url-pattern> - </servlet-mapping> - - <servlet> - <servlet-name>ReplayServlet</servlet-name> - <servlet-class>org.archive.wayback.replay.ReplayServlet</servlet-class> - </servlet> - <servlet-mapping> - <servlet-name>ReplayServlet</servlet-name> - <url-pattern>/replay</url-pattern> - </servlet-mapping> - - <filter> - <filter-name>ReplayFilter</filter-name> - <filter-class>org.archive.wayback.proxy.ReplayFilter</filter-class> - - <init-param> - <param-name>handler.url</param-name> - <param-value>/replay</param-value> - </init-param> - - <init-param> - <param-name>query.localhostname</param-name> - <param-value></param-value> - <description>extra hostname that should be considered "local" when - discriminating between Replay and Query requests - </description> - </init-param> - </filter> - <filter-mapping> - <filter-name>ReplayFilter</filter-name> - <url-pattern>/*</url-pattern> - </filter-mapping> - - - <context-param> - <param-name>queryrenderer.classname</param-name> - <param-value>org.archive.wayback.query.Renderer</param-value> - <description>Implementation responsible for drawing Index Query results</description> - </context-param> - - <context-param> - <param-name>replayrenderer.classname</param-name> - <param-value>org.archive.wayback.proxy.RawReplayRenderer</param-value> - <description>Implementation responsible for drawing replayed resources and replay error messages</description> - </context-param> - - <context-param> - <param-name>replayui.jsppath</param-name> - <param-value>jsp/ReplayUI</param-value> - <description>RawReplayUI specific path to jsp pages. relative to webapp/</description> - </context-param> - - <context-param> - <param-name>replayuriconverter.classname</param-name> - <param-value>org.archive.wayback.proxy.RedirectResultURIConverter</param-value> - <description>Class that implements translation of index results to Replayable URIs for this Wayback</description> - </context-param> - - <context-param> - <param-name>proxy.redirectpath</param-name> - <param-value>http://localhost:8080/jsp/QueryUI/Redirect.jsp</param-value> - <description>absolute URL to jsp where requests are bounced thru to pick up timestamps</description> - </context-param> ---> -<!-- END OF Proxy UI OPTIONS --> - -<!-- START OF Timeline UI OPTIONS -This section contains configuration for using the wayback machine in timeline -access mode, similar to the WERA application. - -These options are not used by default. ---> -<!-- - - <servlet> - <servlet-name>XMLQueryServlet</servlet-name> - <servlet-class>org.archive.wayback.query.QueryServlet</servlet-class> - <init-param> - <param-name>queryui.jsppath</param-name> - <param-value>jsp/QueryXMLUI</param-value> - </init-param> - </servlet> - <servlet-mapping> - <servlet-name>XMLQueryServlet</servlet-name> - <url-pattern>/xmlquery</url-pattern> - </servlet-mapping> - - <servlet> - <servlet-name>QueryServlet</servlet-name> - <servlet-class>org.archive.wayback.query.QueryServlet</servlet-class> - <init-param> - <param-name>queryui.jsppath</param-name> - <param-value>jsp/QueryUI</param-value> - </init-param> - </servlet> - <servlet-mapping> - <servlet-name>QueryServlet</servlet-name> - <url-pattern>/query</url-pattern> - </servlet-mapping> - - <servlet> - <servlet-name>TimelineQueryServlet</servlet-name> - <servlet-class>org.archive.wayback.query.QueryServlet</servlet-class> - <init-param> - <param-name>queryui.jsppath</param-name> - <param-value>jsp/TimelineUI</param-value> - </init-param> - </servlet> - <servlet-mapping> - <servlet-name>TimelineQueryServlet</servlet-name> - <url-pattern>/timeline</url-pattern> - </servlet-mapping> - - <servlet> - <servlet-name>FramesetReplayServlet</servlet-name> - <servlet-class>org.archive.wayback.replay.ReplayServlet</servlet-class> - <init-param> - <param-name>replayrenderer.classname</param-name> - <param-value>org.archive.wayback.timeline.FramesetReplayRenderer</param-value> - <description>Implementation responsible for drawing replayed resources and replay error messages</description> - </init-param> - </servlet> - <servlet-mapping> - <servlet-name>FramesetReplayServlet</servlet-name> - <url-pattern>/frameset</url-pattern> - </servlet-mapping> - - <servlet> - <servlet-name>InlineReplayServlet</servlet-name> - <servlet-class>org.archive.wayback.replay.ReplayServlet</servlet-class> - <init-param> - <param-name>replayrenderer.classname</param-name> - <param-value>org.archive.wayback.timeline.TimelineReplayRenderer</param-value> - <description>Implementation responsible for drawing replayed resources and replay error messages</description> - </init-param> - </servlet> - <servlet-mapping> - <servlet-name>InlineReplayServlet</servlet-name> - <url-pattern>/replay</url-pattern> - </servlet-mapping> - - <servlet> - <servlet-name>MetaReplayServlet</servlet-name> - <servlet-class>org.archive.wayback.replay.ReplayServlet</servlet-class> - <init-param> - <param-name>replayrenderer.classname</param-name> - <param-value>org.archive.wayback.timeline.MetaReplayRenderer</param-value> - <description>Implementation responsible for drawing replayed resources and replay error messages</description> - </init-param> - </servlet> - <servlet-mapping> - <servlet-name>MetaReplayServlet</servlet-name> - <url-pattern>/meta</url-pattern> - </servlet-mapping> - - - <context-param> - <param-name>replayui.jsppath</param-name> - <param-value>jsp/ReplayUI</param-value> - <description>ReplayUI specific path to jsp pages. relative to webapp/</description> - </context-param> - - <context-param> - <param-name>queryrenderer.classname</param-name> - <param-value>org.archive.wayback.timeline.TimelineQueryRenderer</param-value> - <description>Implementation responsible for drawing Index Query results</description> - </context-param> - - <context-param> - <param-name>replayuriconverter.classname</param-name> - <param-value>org.archive.wayback.timeline.TimelineReplayResultURIConverter</param-value> - <description>Class that implements translation of index results to Replayable URIs for this Wayback</description> - </context-param> - - - - <context-param> - <param-name>jsuri</param-name> - <param-value>http://localhost:8080/wayback/jsp/TimelineUI/wm-timeline.js</param-value> - <description>HTTP URI to javascript files</description> - </context-param> - - - <context-param> - <param-name>replayuriprefix</param-name> - <param-value>http://localhost:8080/wayback/replay</param-value> - <description>HTTP URI prefix for the replay servlet</description> - </context-param> - - <context-param> - <param-name>metauriprefix</param-name> - <param-value>http://localhost:8080/wayback/meta</param-value> - <description>HTTP URI prefix for the meta replay servlet</description> - </context-param> - - <context-param> - <param-name>timelineuriprefix</param-name> - <param-value>http://localhost:8080/wayback/timeline</param-value> - <description>HTTP URI prefix for the timeline servlet</description> - </context-param> - - <context-param> - <param-name>frameseturiprefix</param-name> - <param-value>http://localhost:8080/wayback/frameset</param-value> - <description>HTTP URI prefix for the frameset servlet</description> - </context-param> ---> -<!-- END OF Timeline UI OPTIONS --> - -<!-- - END OF USER INTERFACE SECTION ---> - - -<!-- - RESOURCE STORE SECTION ---> -<!-- START OF Local-ARC ResourceStore OPTIONS -This section contains configuration for accessing ARC files from a single -directory on a local filesystem. If ARC files are spread across multiple -local directories, a single directory be created, and populated with symbolic -links to the various locations of the ARC files. This configuration section also -contains specific configuration for an indexing thread, which can optionally -notice new ARC files, generate CDX flat files for new ARCs, and hand off these -CDX files to a BDB resource index for merging. ---> - - <context-param> - <param-name>resourcestore.classname</param-name> - <param-value>org.archive.wayback.resourcestore.LocalARCResourceStore</param-value> - <description>Class that implements ResourceStore for this Wayback</description> - </context-param> - - <context-param> - <param-name>resourcestore.arcpath</param-name> - <param-value>/tmp/wayback/arcs</param-value> - <description> - Directory where ARC files are found (possibly where Heritrix writes them.) - This directory must exist. - </description> - </context-param> - - <context-param> - <param-name>resourcestore.autoindex</param-name> - <param-value>1</param-value> - <description> - If this is set to '1', then a background thread is launched that - detects new ARC files appearing in arcpath. New ARCs are indexed, - and a CDX flat file, with one line per ARC Record is created, one - CDX file per ARC. These CDX files are then handed off to the index - for incorporation into the index. - </description> - </context-param> - - <context-param> - <param-name>resourcestore.tmppath</param-name> - <param-value>/tmp/wayback/arc-indexer/tmp</param-value> - <description> - Directory where CDX files are created temporarily. This is a - scratch space directory, which must exist. - </description> - </context-param> - - <context-param> - <param-name>resourcestore.workpath</param-name> - <param-value>/tmp/wayback/arc-indexer/work</param-value> - <description> - Directory which holds empty flag files indicating that ARC files - are waiting to be indexed. - This directory must exist. - </description> - </context-param> - - <context-param> - <param-name>resourcestore.queuedpath</param-name> - <param-value>/tmp/wayback/arc-indexer/queued</param-value> - <description> - Directory which holds empty flag files indicating that ARC files - have already been seen and queued for indexing. - This directory must exist. - </description> - </context-param> - - <context-param> - <param-name>resourcestore.indextarget</param-name> - <param-value>/tmp/wayback/index-data/incoming</param-value> - <description> - Directory or URL where CDX files are sent after they are created. If - the value of this parameter begins with http://, then the value is - assumed to be a URL where CDX files are PUT, on a possibly remote - resourceindex node. If the value does not begin with http://, then - the value is assumed to be a local directory, which must exist, - where completed CDX files are moved for incorporation into the - index. - </description> - </context-param> - - <context-param> - <param-name>resourcestore.indexinterval</param-name> - <param-value>10000</param-value> - <description> - Millisecond interval between checks for new ARCs that need to be - processed. This is only the initial time slept when first starting - up, and after any new files are found. Each interval that no new - ARCs are detected, the duration slept increases by this amount. - </description> - </context-param> - -<!-- END OF Local-ARC ResourceStore OPTIONS --> - -<!-- START OF ArcProxy/LocationDB Servlet Package OPTIONS -This section contains configuration for a standalone webapp that implements -a singleton FileLocationDB for multiple Wayback applications using the same -dataset. This has several purposes: -1) there is a single location DB to update -2) allows "reverse proxying" of all ARC files on a backend network, making them -appear to be in a single exported directory. - -This should be the only configuration active in a separate webapp, if used. - -These options are not used by default. ---> -<!-- - - <servlet> - <servlet-name>ArcProxyServlet</servlet-name> - <servlet-class>org.archive.wayback.resourcestore.http.ArcProxyServlet</servlet-class> - </servlet> - <servlet-mapping> - <servlet-name>ArcProxyServlet</servlet-name> - <url-pattern>/arcs/*</url-pattern> - </servlet-mapping> - - <servlet> - <servlet-name>FileLocationDBServlet</servlet-name> - <servlet-class>org.archive.wayback.resourcestore.http.FileLocationDBServlet</servlet-class> - </servlet> - <servlet-mapping> - <servlet-name>FileLocationDBServlet</servlet-name> - <url-pattern>/locationDB</url-pattern> - </servlet-mapping> - - <context-param> - <param-name>filelocationdb.classname</param-name> - <param-value>org.archive.wayback.resourcestore.http.FileLocationDB</param-value> - <description>Class implementing FileLocationDB</description> - </context-param> - - <context-param> - <param-name>filelocationdb.path</param-name> - <param-value>/tmp/wayback/arc-db</param-value> - <description>Directory where the filelocation BDB, which maps ARC - filenames to their absolute HTTP path(s). Must exist.</description> - </context-param> - - <context-param> - <param-name>filelocationdb.logpath</param-name> - <param-value>/tmp/wayback/arc-db.log</param-value> - <description>Path where log of new ARCs inserted into the filelocation - db are stored. Containing directory must exist.</description> - </context-param> - - <context-param> - <param-name>filelocationdb.name</param-name> - <param-value>arc-db</param-value> - <description>name for BDBJE arc database</description> - </context-param> - - ---> -<!-- END OF ArcProxy/LocationDB Servlet Package OPTIONS --> - -<!-- START OF Remote-HTTP1.1 ResourceStore OPTIONS -This section contains configuration for accessing ARC file data via HTTP 1.1 -from remote servers. This configuration requires all ARCs to appear as if they -were in a single directory, on an HTTP 1.1 compliate web server. The ArcProxy -configuration can be used to reverse-proxy HTTP 1.1 range requests to backend -storage nodes using a BDB JE database to map ARC filenames to an specific URL. - -These options are not used by default. ---> -<!-- - - <context-param> - <param-name>resourcestore.classname</param-name> - <param-value>org.archive.wayback.resourcestore.HttpARCResourceStore</param-value> - <description>Class that implements ResourceStore for this Wayback</description> - </context-param> - - <context-param> - <param-name>resourcestore.arcurlprefix</param-name> - <param-value>http://localhost:8080/arc-proxy/arcs</param-value> - <description>Absolute URL of the ArcProxy webapp which reverse proxies HTTP 1.1 requests to the actual location of ARC files</description> - </context-param> - ---> -<!-- END OF Remote-HTTP1.1 ResourceStore OPTIONS --> - -<!-- - END OF RESOURCE STORE SECTION ---> - - -<!-- - RESOURCE INDEX SECTION ---> -<!-- START OF Local-BDB ResourceIndex OPTIONS -This section contains configuration for using a BDB JE to hold the document -index on the local filesystem. This section also contains configuration for -an optional index update thread, which will scan a directory for new index data, -in CDX format, and will automatically add new index records to the index.This -is the default index storage implementation. ---> - - <filter> - <filter-name>RemoteSubmitFilter</filter-name> - <filter-class>org.archive.wayback.resourceindex.indexer.RemoteSubmitFilter</filter-class> - <init-param> - <param-name>pipeline.statusjsp</param-name> - <param-value>jsp/PipelineUI/PipelineStatus.jsp</param-value> - </init-param> - </filter> - <filter-mapping> - <filter-name>RemoteSubmitFilter</filter-name> - <url-pattern>/index-incoming/*</url-pattern> - </filter-mapping> - - <context-param> - <param-name>resourceindex.classname</param-name> - <param-value>org.archive.wayback.resourceindex.LocalResourceIndex</param-value> - <description>Class that implements ResourceIndex for this Wayback</description> - </context-param> - - <context-param> - <param-name>resourceindex.sourceclass</param-name> - <param-value>BDB</param-value> - <description>Class that implements ResultSource for this Wayback, - currently: BDB|CDX</description> - </context-param> - - <context-param> - <param-name>resourceindex.indexpath</param-name> - <param-value>/tmp/wayback/index</param-value> - <description> - LocalBDBResourceIndex specific directory to store the BDB files. - Directory must exists. - </description> - </context-param> - - <context-param> - <param-name>resourceindex.dbname</param-name> - <param-value>DB1</param-value> - <description> - LocalBDBResourceIndex specific name for BDB database - </description> - </context-param> - - <context-param> - <param-name>resourceindex.incomingpath</param-name> - <param-value>/tmp/wayback/index-data/incoming</param-value> - <description> - BDB index-specific configuration that indicates new CDX format flat - files will appear in the directory named in the value of this param. - If this configuration is present and non-empty, a background thread - will be started that monitors this directory, and adds CDX records - in files found in this directory to the index. - </description> - </context-param> - - <context-param> - <param-name>resourceindex.mergedpath</param-name> - <param-value>/tmp/wayback/index-data/merged</param-value> - <description> - If this value is present and non-empty, then CDX files that are - successfully processed from incoming are moved to this directory - after merging. If this option is missing or blank, CDX files are - deleted after merging. - </description> - </context-param> - - <context-param> - <param-name>resourceindex.failedpath</param-name> - <param-value>/tmp/wayback/index-data/failed</param-value> - <description> - If this value is present and non-empty, then CDX files that fail to - parse successfully are moved to this directory after a single - attempt. If this option is missing or blank, malformed CDX files are - left in the incoming directory and repeatedly re-attempted until - some other process moves them out of the way or fixes them. - </description> - </context-param> - - <context-param> - <param-name>resourceindex.mergeinterval</param-name> - <param-value>10000</param-value> - <description> - Millisecond interval between checks for new files in the incoming - directory. This is only the starting number, when no new files are - found in the directory. Each subsequent interval will increase by - this number of ms, until a file is found, at which point the - interval will revert to the initial level. - </description> - </context-param> - - <context-param> - <param-name>maxresults</param-name> - <param-value>1000</param-value> - <description> - Maximum number of results to return from the ResourceIndex. - </description> - </context-param> - -<!-- END OF Local-BDB ResourceIndex OPTIONS --> - -<!-- START OF Alphabetically-Partitioned ResourceIndex OPTIONS -This section contains configuration for using a distributed ResourceIndex -composed of groups of nodes, where each group of nodes is responsible for -handling queries within an alphabetic range of URLs. - -These options are not used by default. ---> -<!-- - - <context-param> - <param-name>resourceindex.classname</param-name> - <param-value>org.archive.wayback.resourceindex.distributed.AlphaPartitionedIndex</param-value> - <description>Class that implements ResourceIndex for this Wayback</description> - </context-param> - - <context-param> - <param-name>resourceindex.distributed.mappath</param-name> - <param-value>http://localhost:8080/wayback/dist/range-map.txt</param-value> - <description> - path to local file that maps url ranges to sets of nodes. - </description> - </context-param> - - <context-param> - <param-name>resourceindex.distributed.checkinterval</param-name> - <param-value>100</param-value> - <description> - Number of seconds between checks for changes in file pointed to by - resourceindex.distributed.mappath. - </description> - </context-param> - - <context-param> - <param-name>maxresults</param-name> - <param-value>1000</param-value> - <description> - Maximum number of results to return from the ResourceIndex. - </description> - </context-param> - --> -<!-- END OF Alphabetically-Partitioned ResourceIndex OPTIONS --> - -<!-- START OF Local-CDX ResourceIndex OPTIONS -This section contains configuration for using one or more sorted CDX format -flat files to store index information. - -These options are not used by default. ---> -<!-- - - <context-param> - <param-name>resourceindex.classname</param-name> - <param-value>org.archive.wayback.resourceindex.LocalResourceIndex</param-value> - <description>Class that implements ResourceIndex for this Wayback</description> - </context-param> - - <context-param> - <param-name>resourceindex.sourceclass</param-name> - <param-value>CDX</param-value> - <description>Class that implements ResultSource for this Wayback, - currently: BDB|CDX</description> - </context-param> - - <context-param> - <param-name>resourceindex.cdxpaths</param-name> - <param-value>/tmp/wayback/index/index.cdx</param-value> - <description> - One or more comma-separated paths pointing to sorted CDX files - that contain index information for this Wayback installation. - </description> - </context-param> - - <context-param> - <param-name>maxresults</param-name> - <param-value>1000</param-value> - <description> - Maximum number of results to return from the ResourceIndex. - </description> - </context-param> - --> -<!-- END OF Local-CDX ResourceIndex OPTIONS --> - -<!-- START OF Local-Dynamic-CDX ResourceIndex OPTIONS -This section contains configuration for using a dynamically defined, -automatically updating ResourceIndex composed of one or more sorted CDX format -flat files, which are configured via HTTP accessible text files. - -These options are not used by default. ---> -<!-- - - <context-param> - <param-name>resourceindex.classname</param-name> - <param-value>org.archive.wayback.resourceindex.LocalResourceIndex</param-value> - <description>Class that implements ResourceIndex for this Wayback</description> - </context-param> - - <context-param> - <param-name>resourceindex.sourceclass</param-name> - <param-value>DYNACDX</param-value> - <description>Class that implements ResultSource for this Wayback, - currently: BDB|CDX|DYNACDX</description> - </context-param> - - <context-param> - <param-name>resourceindex.cdxinterval</param-name> - <param-value>100000</param-value> - <description> - Number of milliseconds between checking for updates to local CDX - configuration and setup. - </description> - </context-param> - - <context-param> - <param-name>resourceindex.cdxdir</param-name> - <param-value>/tmp/wayback/dynamic-cdx</param-value> - <description> - Path to directory managed by this Dynamic CDX index, where index - files are stored. - </description> - </context-param> - - <context-param> - <param-name>resourceindex.cdxrangeurl</param-name> - <param-value>http://localhost:8080/wayback/dyncdx/ranges.txt</param-value> - <description> - URL to file that maps nodes to CDX ranges. - </description> - </context-param> - - <context-param> - <param-name>resourceindex.cdxdefnurl</param-name> - <param-value>http://localhost:8080/wayback/dyncdx/cdx-def.txt</param-value> - <description> - URL to file that maps CDX ranges to MD5s. - </description> - </context-param> - - <context-param> - <param-name>resourceindex.cdxmd5url</param-name> - <param-value>http://localhost:8080/wayback/dyncdx/md5-loc.txt</param-value> - <description> - URL to file that maps MD5s to HTTP locations. - </description> - </context-param> - - <context-param> - <param-name>maxresults</param-name> - <param-value>1000</param-value> - <description> - Maximum number of results to return from the ResourceIndex. - </description> - </context-param> - --> -<!-- END OF Local-Dynamic-CDX ResourceIndex OPTIONS --> - -<!-- START OF Remote-Nutch ResourceIndex OPTIONS -This section contains configuration for using a Nutch full text index -to query for documents. - -These options are not used by default. ---> -<!-- - - <context-param> - <param-name>resourceindex.classname</param-name> - <param-value>org.archive.wayback.resourceindex.NutchResourceIndex</param-value> - <description>Class that implements ResourceIndex for this Wayback</description> - </context-param> - - <context-param> - <param-name>resourceindex.baseurl</param-name> - <param-value>http://localhost:8081/xmlquery</param-value> - <description>absolute URL to Nutch server</description> - </context-param> - - <context-param> - <param-name>maxresults</param-name> - <param-value>1000</param-value> - <description> - Maximum number of results to return from the ResourceIndex. - </description> - </context-param> - --> -<!-- END OF Remote-Nutch ResourceIndex OPTIONS --> - -<!-- START OF Remote-BDB/CDX ResourceIndex OPTIONS -This section contains configuration for using a BDB/CDX index hosted on a -remote server. The remote server holding the index must provide the XMLQuery -interface. - -These options are not used by default. ---> -<!-- - - <context-param> - <param-name>resourceindex.classname</param-name> - <param-value>org.archive.wayback.resourceindex.RemoteResourceIndex</param-value> - <description>Class that implements ResourceIndex for this Wayback</description> - </context-param> - - <context-param> - <param-name>resourceindex.baseurl</param-name> - <param-value>http://localhost:8080/xmlquery</param-value> - <description> - Absolute URL to the Wayback Machine Resource Index service. - </description> - </context-param> - - <context-param> - <param-name>maxresults</param-name> - <param-value>1000</param-value> - <description> - Maximum number of results to return from the ResourceIndex. - </description> - </context-param> - --> -<!-- END OF Remote-BDB/CDX ResourceIndex OPTIONS --> - -<!-- - END OF RESOURCE INDEX SECTION ---> - - -<!-- - RESOURCE INDEX EXCLUSION SECTION ---> -<!-- START OF No Exclusions OPTIONS -By default there is no masking of content from the index. ---> -<!-- END OF No Exclusions OPTIONS --> - -<!-- START OF Administrative plus robots.txt Exclusions OPTIONS -This section contains configuration for using both manual exclusions and -live web robots.txt to mask content from the index. - -These options are not used by default. ---> -<!-- - <context-param> - <param-name>resourceindex.exclusionurl</param-name> - <param-value>http://localhost:8080/wayback/exclusion</param-value> - <description> - HTTP base URL for remote access to exclusion services. - </description> - </context-param> - - <context-param> - <param-name>resourceindex.exclusionua</param-name> - <param-value>ia_archiver</param-value> - <description> - UserAgent to have exclusion service check access with. - </description> - </context-param> - - - <servlet> - <servlet-name>ExclusionServlet</servlet-name> - <servlet-class>org.archive.wayback.accesscontrol.ExclusionServlet</servlet-class> - </servlet> - <servlet-mapping> - <servlet-name>ExclusionServlet</servlet-name> - <url-pattern>/exclusion</url-pattern> - </servlet-mapping> - - <servlet> - <servlet-name>AdminExclusionServlet</servlet-name> - <servlet-class>org.archive.wayback.accesscontrol.AdministrativeExclusionServlet</servlet-class> - </servlet> - <servlet-mapping> - <servlet-name>AdminExclusionServlet</servlet-name> - <url-pattern>/admin-exclusion</url-pattern> - </servlet-mapping> - - <context-param> - <param-name>exclusionauthority.classname</param-name> - <param-value>org.archive.wayback.accesscontrol.AdministrativeExclusionAuthority</param-value> - <description> - Class that handles exclusions. - </description> - </context-param> - - <context-param> - <param-name>adminexclusion.dbpath</param-name> - <param-value>/tmp/wayback/admin</param-value> - <description>Directory where BDBJE places manual exclusion data</description> - </context-param> - - <context-param> - <param-name>adminexclusion.dbname</param-name> - <param-value>admin</param-value> - <description>name of data within BDBJE in adminexclusion.dbpath</description> - </context-param> - - <context-param> - <param-name>liveweb.cachedbpath</param-name> - <param-value>/tmp/wayback/cachedb</param-value> - <description>directory where database of cached robots.txt document information is kept</description> - </context-param> - - <context-param> - <param-name>liveweb.cachedbname</param-name> - <param-value>cachedb</param-value> - <description>name of database where cached robots.txt document information is kept</description> - </context-param> - - <context-param> - <param-name>liveweb.arcdir</param-name> - <param-value>/tmp/wayback/liveweb-arcs</param-value> - <description>directory where ARC files containing cached robots.txt documents are stored</description> - </context-param> - - <context-param> - <param-name>liveweb.arcprefix</param-name> - <param-value>live-robots</param-value> - <description>ARC filename prefix for robot.txt document storage</description> - </context-param> - - <context-param> - <param-name>liveweb.tempdir</param-name> - <param-value>/tmp/wayback/liveweb-temp</param-value> - <description>Directory where robots.txt documents are store temporarily</description> - </context-param> - ---> -<!-- END OF Administrative plus robots.txt Exclusions OPTIONS --> - -<!-- START OF robots.txt Exclusions OPTIONS -This section contains configuration for using live web robots.txt to mask -content from the index. - -These options are not used by default. ---> -<!-- - <context-param> - <param-name>resourceindex.exclusionurl</param-name> - <param-value>http://localhost:8080/wayback/exclusion</param-value> - <description> - HTTP base URL for remote access to exclusion services. - </description> - </context-param> - - <context-param> - <param-name>resourceindex.exclusionua</param-name> - <param-value>ia_archiver</param-value> - <description> - UserAgent to have exclusion service check access with. - </description> - </context-param> - - <servlet> - <servlet-name>ExclusionServlet</servlet-name> - <servlet-class>org.archive.wayback.accesscontrol.ExclusionServlet</servlet-class> - </servlet> - <servlet-mapping> - <servlet-name>ExclusionServlet</servlet-name> - <url-pattern>/exclusion</url-pattern> - </servlet-mapping> - - <context-param> - <param-name>exclusionauthority.classname</param-name> - <param-value>org.archive.wayback.accesscontrol.RoboCache</param-value> - <description>Class that handles exclusions.</description> - </context-param> - - <context-param> - <param-name>liveweb.cachedbpath</param-name> - <param-value>/tmp/wayback/cachedb</param-value> - <description>directory where database of cached robots.txt document information is kept</description> - </context-param> - - <context-param> - <param-name>liveweb.cachedbname</param-name> - <param-value>cachedb</param-value> - <description>name of database where cached robots.txt document information is kept</description> - </context-param> - - <context-param> - <param-name>liveweb.arcdir</param-name> - <param-value>/tmp/wayback/liveweb-arcs</param-value> - <description>directory where ARC files containing cached robots.txt documents are stored</description> - </context-param> - - <context-param> - <param-name>liveweb.arcprefix</param-name> - <param-value>live-robots</param-value> - <description>ARC filename prefix for robot.txt document storage</description> - </context-param> - - <context-param> - <param-name>liveweb.tempdir</param-name> - <param-value>/tmp/wayback/liveweb-temp</param-value> - <description>Directory where robots.txt documents are store temporarily</description> - </context-param> ---> -<!-- END OF robots.txt Exclusions OPTIONS --> - -<!-- - END OF RESOURCE INDEX EXCLUSION SECTION ---> - - - </web-app> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |