From: <bra...@us...> - 2008-08-13 00:05:56
|
Revision: 2541 http://archive-access.svn.sourceforge.net/archive-access/?rev=2541&view=rev Author: bradtofel Date: 2008-08-13 00:06:02 +0000 (Wed, 13 Aug 2008) Log Message: ----------- TWEAK: unset DebugBanner, added Discalimer.jsp as default Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/ArchivalUrlReplay.xml trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/DomainPrefixReplay.xml trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/wayback.xml Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/ArchivalUrlReplay.xml =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/ArchivalUrlReplay.xml 2008-08-12 23:03:30 UTC (rev 2540) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/ArchivalUrlReplay.xml 2008-08-13 00:06:02 UTC (rev 2541) @@ -6,7 +6,7 @@ <bean id="archivalurlhttpheaderprocessor" class="org.archive.wayback.replay.RedirectRewritingHttpHeaderProcessor" /> - <bean id="archivaldateredirectingreplayrenderer" class="org.archive.wayback.replay.DateRedirectReplayRenderer" /> + <bean id="archivaldateredirectingreplayrenderer" class="org.archive.wayback.replay.DateRedirectReplayRenderer" /> <bean id="archivalcssreplayrenderer" class="org.archive.wayback.archivalurl.ArchivalUrlCSSReplayRenderer"> <constructor-arg><ref bean="archivalurlhttpheaderprocessor"/></constructor-arg> </bean> @@ -33,25 +33,25 @@ <constructor-arg><ref bean="archivalurlhttpheaderprocessor"/></constructor-arg> <property name="jspInserts"> <list> - <value>/WEB-INF/replay/ArchiveComment.jsp</value> - <value>/WEB-INF/replay/ClientSideJSInsert.jsp</value> + <value>/WEB-INF/replay/ArchiveComment.jsp</value> + <value>/WEB-INF/replay/ClientSideJSInsert.jsp</value> + <value>/WEB-INF/replay/Disclaimer.jsp</value> +<!-- <value>/WEB-INF/replay/DebugBanner.jsp</value> -<!-- - <value>/WEB-INF/replay/Disclaimer.jsp</value> <value>/WEB-INF/replay/Timeline.jsp</value> --> </list> </property> </bean> - + <bean id="archivalurlreplay" class="org.archive.wayback.replay.SelectorReplayDispatcher"> <property name="selectors"> <list> <!-- REDIRECT IF NOT EXACT DATE --> - <bean class="org.archive.wayback.replay.selector.DateMismatchSelector"> - <property name="renderer" ref="archivaldateredirectingreplayrenderer"/> - </bean> + <bean class="org.archive.wayback.replay.selector.DateMismatchSelector"> + <property name="renderer" ref="archivaldateredirectingreplayrenderer"/> + </bean> <!-- HTML REPLAY --> <bean class="org.archive.wayback.replay.selector.MimeTypeSelector"> Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/DomainPrefixReplay.xml =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/DomainPrefixReplay.xml 2008-08-12 23:03:30 UTC (rev 2540) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/DomainPrefixReplay.xml 2008-08-13 00:06:02 UTC (rev 2541) @@ -16,6 +16,7 @@ <property name="jspInserts"> <list> <value>/WEB-INF/replay/ArchiveComment.jsp</value> + <value>/WEB-INF/replay/Disclaimer.jsp</value> <!-- <value>/WEB-INF/replay/DebugBanner.jsp</value> <value>/WEB-INF/replay/JSLessTimeline.jsp</value> Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/wayback.xml =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/wayback.xml 2008-08-12 23:03:30 UTC (rev 2540) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/wayback.xml 2008-08-13 00:06:02 UTC (rev 2541) @@ -2,319 +2,104 @@ <beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.springframework.org/schema/beans - http://www.springframework.org/schema/beans/spring-beans-2.5.xsd"> + http://www.springframework.org/schema/beans/spring-beans-2.5.xsd" + default-init-method="init"> <!-- - The following 3 beans are required when using the ArcProxy for providing - HTTP 1.1 remote access to ARC files distributed across multiple computers - or directories. + Macro-like substitutions for the overall file: + wayback.basedir: default top level directory for all index, state, + locationdb storage. --> - - <bean id="resourcefilelocationdb" class="org.archive.wayback.resourcestore.locationdb.BDBResourceFileLocationDB" - init-method="init"> - <property name="bdbPath" value="/tmp/wayback/file-db/db/" /> - <property name="bdbName" value="DB1" /> - <property name="logPath" value="/tmp/wayback/file-db/db.log" /> - </bean> - <bean name="8080:locationdb" class="org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDBServlet"> - <property name="locationDB" ref="resourcefilelocationdb" /> - </bean> - <bean name="8080:fileproxy" class="org.archive.wayback.resourcestore.locationdb.FileProxyServlet"> - <property name="locationDB" ref="resourcefilelocationdb" /> - </bean> - <bean id="localbdbresourceindex" class="org.archive.wayback.resourceindex.LocalResourceIndex"> - <property name="source"> - <bean class="org.archive.wayback.resourceindex.bdb.BDBIndex" - init-method="init"> - <property name="bdbName" value="DB1" /> - <property name="bdbPath" value="/tmp/wayback/index/" /> - </bean> + <bean class="org.springframework.beans.factory.config.PropertyPlaceholderConfigurer"> + <property name="properties"> + <value> + wayback.basedir=/tmp/wayback + </value> </property> - <property name="maxRecords" value="10000" /> </bean> - <bean id="indexqueue" class="org.archive.wayback.resourcestore.indexer.DirectoryIndexQueue"> - <property name="path" value="/tmp/wayback/index-data/queue" /> - </bean> - <bean id="localbdbcollection" class="org.archive.wayback.webapp.WaybackCollection"> - <property name="resourceStore"> - <bean id="localresourcestore" class="org.archive.wayback.resourcestore.LocalResourceFileResourceStore"> - <property name="db" ref="resourcefilelocationdb" /> - </bean> - </property> - - <property name="resourceIndex" ref="localbdbresourceindex"/> - - <property name="shutdownables"> - <list> - <!-- This thread notices new files appearing in your resourcefilesources --> - <bean id="resourcefilesourceupdater" class="org.archive.wayback.resourcestore.resourcefile.ResourceFileSourceUpdater" - init-method="init"> - <property name="target" value="/tmp/wayback/file-db/incoming" /> - <property name="interval" value="100000" /> - <property name="sources"> - <list> - <bean class="org.archive.wayback.resourcestore.resourcefile.DirectoryResourceFileSource"> - <property name="name" value="files1" /> - <property name="prefix" value="/tmp/wayback/files1/" /> - </bean> - <bean class="org.archive.wayback.resourcestore.resourcefile.DirectoryResourceFileSource"> - <property name="name" value="files2" /> - <property name="prefix" value="/tmp/wayback/files2/" /> - </bean> - </list> - </property> - </bean> - - <!-- This thread updates the location db with updates from resourcefilesourceupdater --> - <bean id="resourcefilelocationdbupdater" class="org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDBUpdater" - init-method="init"> - <property name="interval" value="100000" /> - <property name="db" ref="resourcefilelocationdb" /> - <property name="incomingDir" value="/tmp/wayback/file-db/incoming" /> - <property name="stateDir" value="/tmp/wayback/file-db/state" /> - </bean> - - <!-- This thread notices new files arriving in the filelocationdb, and queues them for indexing --> - <bean id="indexqueueupdater" class="org.archive.wayback.resourcestore.indexer.IndexQueueUpdater" - init-method="init"> - <property name="db" ref="resourcefilelocationdb" /> - <property name="queue" ref="indexqueue" /> - <property name="interval" value="1000" /> - <property name="lastMark" value="/tmp/wayback/index-data/queue.mark" /> - </bean> - - <!-- This thread checks the to-be-indexed queue for files needing indexing, indexes them, and hands off the results for merging with the ResourceIndex --> - <bean id="indexworker" class="org.archive.wayback.resourcestore.indexer.IndexWorker" - init-method="init"> - <property name="db" ref="resourcefilelocationdb" /> - <property name="queue" ref="indexqueue" /> - <property name="interval" value="1000" /> - <property name="target"> - <bean class="org.archive.wayback.resourceindex.updater.IndexClient"> - <property name="tmpDir" value="/tmp/wayback/index-data/tmp/" /> - <property name="target" value="/tmp/wayback/index-data/incoming/" /> - </bean> - </property> - </bean> - - <!-- This thread merges updates from the indexworker into the ResourceIndex --> - <bean class="org.archive.wayback.resourceindex.updater.LocalResourceIndexUpdater" - init-method="init"> - - <property name="index" ref="localbdbresourceindex" /> - <property name="incoming" value="/tmp/wayback/index-data/incoming/" /> - <property name="failed" value="/tmp/wayback/index-data/failed/" /> - <property name="merged" value="/tmp/wayback/index-data/merged/" /> - <property name="runInterval" value="10000" /> - </bean> - </list> - </property> - </bean> - - - <!-- - The following WaybackCollection bean template is required when using a - manually built local CDX index. + The ResourceFileLocationDB implementation to use for mapping ARC/WARC names + to absolute paths/URLs via a BDBJE database. --> -<!-- - <bean id="localcdxcollection" class="org.archive.wayback.webapp.WaybackCollection"> - <property name="resourceStore"> - <bean class="org.archive.wayback.resourcestore.LocalResourceStore" - init-method="init"> - <property name="dataDir" value="/tmp/wayback/arcs/" /> - </bean> - </property> - - <property name="resourceIndex"> - <bean class="org.archive.wayback.resourceindex.LocalResourceIndex"> - <property name="source"> - <bean id="cdxsearchresultsource" class="org.archive.wayback.resourceindex.cdx.CDXIndex"> - <property name="path" value="/tmp/wayback/cdx-index/index.cdx" /> - </bean> - </property> - <property name="maxRecords" value="10000" /> - </bean> - </property> + <bean id="resourcefilelocationdb" class="org.archive.wayback.resourcestore.locationdb.BDBResourceFileLocationDB"> + <property name="bdbPath" value="${wayback.basedir}/file-db/db/" /> + <property name="bdbName" value="DB1" /> + <property name="logPath" value="${wayback.basedir}/file-db/db.log" /> </bean> ---> - <bean id="localcdxcollection2" class="org.archive.wayback.webapp.WaybackCollection"> - - <property name="resourceStore"> - <bean class="org.archive.wayback.resourcestore.LocalResourceFileResourceStore"> - <property name="db"> - <bean class="org.archive.wayback.resourcestore.locationdb.FlatFileResourceFileLocationDB"> - <property name="path" value="/tmp/wayback/path-index.txt" /> - </bean> - </property> - </bean> - </property> - - <property name="resourceIndex"> - <bean class="org.archive.wayback.resourceindex.LocalResourceIndex"> - <property name="source"> - <bean id="cdxsearchresultsource" class="org.archive.wayback.resourceindex.cdx.CDXIndex"> - <property name="path" value="/tmp/wayback/cdx-index/index.1" /> - </bean> - </property> - <property name="maxRecords" value="10000" /> - </bean> - </property> - </bean> - <!-- - The following WaybackCollection bean template is required when using a - remote ResourceIndex and ResourceStore implementation. This will also - require setting up an arcproxy and locationdb on the host specified by - the resourceStore:urlPrefix configuration, and an additional AccessPoint - on the host specified by the resourceIndex:searchUrlBase configuration. + To enable manual management of, or remote access to the above locationDB, + uncomment the following bean. --> - <!-- - <bean id="remotecollection" class="org.archive.wayback.webapp.WaybackCollection"> - - <property name="resourceStore"> - <bean class="org.archive.wayback.resourcestore.Http11ResourceStore"> - <property name="urlPrefix" value="http://wayback.archive-it.org/fileproxy/" /> - </bean> - </property> - - <property name="resourceIndex"> - <bean class="org.archive.wayback.resourceindex.RemoteResourceIndex" - init-method="init"> - <property name="searchUrlBase" value="http://wayback.archive-it.org/1055/xmlquery" /> - </bean> - </property> + <bean name="8080:locationdb" class="org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDBServlet"> + <property name="locationDB" ref="resourcefilelocationdb" /> </bean> --> <!-- - The following WaybackCollection bean template is an example for using a NutchWAX - full-text index with Wayback, using a RemoteResourceStore to access - replayed documents. You will need to change searchUrlBase to your local NutchWAX - installation. You may also need to ensure that the maxRecords on your RequestParser is - not greater than the maxRecords configured on the RemoteNutchResourceIndex. + The FileProxyServlet uses a ResourceFileLocationDB to make all ARC/WARC + files appear to reside within a single HTTP 1.1 exported directory. + Required when using the SimpleResourceStore to access distributed ARC/WARC + files over HTTP through a single reverse proxy. --> <!-- - <bean id="remotenutchcollection" class="org.archive.wayback.webapp.WaybackCollection"> - - <property name="resourceStore"> - <bean class="org.archive.wayback.resourcestore.Http11ResourceStore"> - <property name="urlPrefix" value="http://webapp101.us.archive.org/arcproxy/" /> - </bean> - </property> - - <property name="resourceIndex"> - <bean class="org.archive.wayback.resourceindex.NutchResourceIndex" init-method="init"> - <property name="searchUrlBase" value="http://webapp101.us.archive.org/e04/xmlquery" /> - <property name="maxRecords" value="100" /> - </bean> - </property> + <bean name="8080:fileproxy" class="org.archive.wayback.resourcestore.locationdb.FileProxyServlet"> + <property name="locationDB" ref="resourcefilelocationdb" /> </bean> --> -<!-- - The following bean is an example using the Access Control Oracle, thanks - Alex Osborne and NLA. Currently this is pretty undocumented, but here is a - place to get started: + <import resource="BDBCollection.xml"/> - http://webteam.archive.org/confluence/display/wayback/Exclusions+API +<!-- + The XML files indicated in the following import tags contain alternate + example implementations of WaybackCollections. --> + <import resource="CDXCollection.xml"/> + <import resource="RemoteCollection.xml"/> + <import resource="NutchCollection.xml"/> <!-- - <bean id="excluder-factory-oracle" class="org.archive.wayback.accesscontrol.oracleclient.OracleExclusionFilterFactory"> - <property name="oracleUrl" value="http://localhost:8180/oracle/" /> - <property name="accessGroup" value="ia_archiver" /> - </bean> --> + <!-- This is the only AccessPoint defined by default within this wayback.xml Spring configuration file, providing an ArchivalURL Replay UI to the - "localbdbcollection" by providing ArchivalURL-specific implementations - of the replay, parser, and uriConverter. - + "localbdbcollection", defined in "BDBCollection.xml" by providing + ArchivalURL-specific implementations of the replay, parser, and + uriConverter. + This AccessPoint currently will provide access only from the machine - running Tomcat. To provide external access, replace "localhost" with your - fully qualified hostname of the computer running Tomcat. + running Tomcat. To provide external access, replace "localhost.archive.org" + with your fully qualified hostname of the computer running Tomcat. --> <import resource="ArchivalUrlReplay.xml"/> <bean name="8080:wayback" class="org.archive.wayback.webapp.AccessPoint"> - <!-- - <property name="exclusionFactory" ref="excluder-factory-oracle" /> - --> - <property name="collection" ref="localbdbcollection" /> - <property name="configs"> - <props> - <prop key="inst">foo</prop> - <prop key="coll">supreme court</prop> - </props> - </property> - - <property name="uriConverter"> - <bean class="org.archive.wayback.archivalurl.ArchivalUrlResultURIConverter"> - <property name="replayURIPrefix" value="http://localhost:8080/wayback/"/> - </bean> - </property> - + <property name="collection" ref="remotecollection" /> + <property name="replay" ref="archivalurlreplay" /> <property name="query"> <bean class="org.archive.wayback.query.Renderer"> <property name="captureJsp" value="/WEB-INF/query/CalendarResults.jsp" /> </bean> </property> - <property name="replay" ref="archivalurlreplay" /> - - <property name="parser"> - <bean class="org.archive.wayback.archivalurl.ArchivalUrlRequestParser" - init-method="init"> - <property name="maxRecords" value="1000" /> - <property name="earliestTimestamp" value="1996" /> + <property name="uriConverter"> + <bean class="org.archive.wayback.archivalurl.ArchivalUrlResultURIConverter"> + <property name="replayURIPrefix" value="http://localhost.archive.org:8080/wayback/"/> </bean> </property> - </bean> - <bean name="8080:rwayback" parent="8080:wayback"> - <property name="collection" ref="localcdxcollection2" /> <property name="parser"> - <bean class="org.archive.wayback.archivalurl.ArchivalUrlRequestParser" - init-method="init"> - <property name="maxRecords" value="100" /> + <bean class="org.archive.wayback.archivalurl.ArchivalUrlRequestParser"> + <property name="maxRecords" value="1000" /> <property name="earliestTimestamp" value="1996" /> </bean> </property> - <property name="exception"> - <bean class="org.archive.wayback.exception.CustomNotInArchiveExceptionRenderer"> - <property name="hosts"> - <list> - <value>www.aladems.org</value> - </list> - </property> - <property name="jspHandler" value="/exception/GrayBlank.jsp"/> - </bean> - </property> - <property name="uriConverter"> - <bean class="org.archive.wayback.archivalurl.ArchivalUrlResultURIConverter"> - <property name="replayURIPrefix" value="http://localhost:8080/rwayback/" /> - </bean> - </property> - <!-- - <property name="collection"> - <bean class="org.archive.wayback.webapp.WaybackCollection"> - <property name="resourceStore" ref="fancyresourcestore" /> - <property name="resourceIndex"> - <bean class="org.archive.wayback.resourceindex.RemoteResourceIndex" - init-method="init"> - <property name="searchUrlBase" value="http://localhost:8080/wayback/xmlquery" /> - </bean> - </property> - </bean> - </property> - --> + </bean> <!-- @@ -325,52 +110,65 @@ <bean name="8080:netsecure" parent="8080:wayback"> <property name="authentication"> - <bean class="org.archive.wayback.authenticationcontrol.AccessControlSettingOperation"> - <property name="operator"> - <bean class="org.archive.wayback.util.operator.NotBooleanOperator"> - <property name="operand"> - <bean class="org.archive.wayback.authenticationcontrol.IPMatchesBooleanOperator"> - <property name="allowedRanges"> - <list> - <value>192.168.1.16/24</value> - </list> - </property> - </bean> - </property> - </bean> + <bean class="org.archive.wayback.authenticationcontrol.IPMatchesBooleanOperator"> + <property name="allowedRanges"> + <list> + <value>192.168.1.16/24</value> + </list> </property> - <property name="factory" ref="excluder-factory-robot"/> </bean> </property> <property name="uriConverter"> <bean class="org.archive.wayback.archivalurl.ArchivalUrlResultURIConverter"> - <property name="replayURIPrefix" value="http://192.168.1.16:8080/netsecure/" /> + <property name="replayURIPrefix" value="http://localhost.archive.org:8080/netsecure/" /> </bean> </property> </bean> --> + <!-- The following AccessPoint inherits all configuration from the 8080:wayback - AccessPoint, but checks live web robots.txt documents to determine if - archived content should be accessible. - - Note: using this AccessPoint requires enabling the "livewebcache" and - "excluder-factory-robot" beans declared at the top of this file. + AccessPoint, but uses an Access Control Oracle to determine if archived + content should be accessible. + + The Access Control Oracle was developed by Alex Osborne of the NLA. + + Some documentation for this project is available at: + + http://webteam.archive.org/confluence/display/wayback/Exclusions+API --> + <!-- - <bean name="8080:robots" parent="8080:wayback"> - <property name="exclusionFactory" ref="excluder-factory-robot" /> + <bean name="8080:exclusion" parent="8080:wayback"> + <property name="exclusionFactory"> + <bean class="org.archive.wayback.accesscontrol.oracleclient.OracleExclusionFilterFactory"> + <property name="oracleUrl" value="http://localhost:8180/oracle/" /> + <property name="accessGroup" value="ia_archiver" /> + </bean> + </property> <property name="uriConverter"> <bean class="org.archive.wayback.archivalurl.ArchivalUrlResultURIConverter"> - <property name="replayURIPrefix" value="http://localhost:8080/robots/" /> + <property name="replayURIPrefix" value="http://localhost:8080/exclusion/" /> </bean> </property> </bean> --> +<!-- + The following AccessPoint inherits all configuration from the 8080:wayback + AccessPoint, but provides a DomainPrefix Replay UI to the same collection. + These two access points can be used simultaneously on the same Tomcat + installation. + + Note: using this AccessPoint requires adding a "Connector" on port 8081 + in your Tomcat's server.xml file. + + Note: the hostname suffix localhost.archive.org has a special DNS wildcard + entry, so all hostnames suffixed with this value resolve to 127.0.0.1 + --> <import resource="DomainPrefixReplay.xml"/> <bean name="8081" parent="8080:wayback"> - <property name="useServerName" value="true" /> + <property name="urlRoot" value="http://localhost.archive.org:8081/" /> <property name="replay" ref="domainprefixreplay" /> <property name="uriConverter"> <bean class="org.archive.wayback.domainprefix.DomainPrefixResultURIConverter"> @@ -378,7 +176,7 @@ </bean> </property> <property name="parser"> - <bean class="org.archive.wayback.domainprefix.DomainPrefixCompositeRequestParser" init-method="init"> + <bean class="org.archive.wayback.domainprefix.DomainPrefixCompositeRequestParser"> <property name="hostPort" value="localhost.archive.org:8081" /> <property name="maxRecords" value="1000" /> </bean> @@ -397,18 +195,18 @@ --> <import resource="ProxyReplay.xml"/> <bean name="8090" parent="8080:wayback"> - <property name="useServerName" value="true" /> + <property name="urlRoot" value="http://localhost.archive.org/" /> <property name="replay" ref="proxyreplay" /> <property name="uriConverter"> <bean class="org.archive.wayback.proxy.RedirectResultURIConverter"> - <property name="redirectURI" value="http://brad.archive.org/jsp/Redirect.jsp" /> + <property name="redirectURI" value="http://localhost.archive.org/jsp/Redirect.jsp" /> </bean> </property> <property name="parser"> - <bean class="org.archive.wayback.proxy.ProxyRequestParser" init-method="init"> + <bean class="org.archive.wayback.proxy.ProxyRequestParser"> <property name="localhostNames"> <list> - <value>brad.archive.org</value> + <value>localhost.archive.org</value> </list> </property> <property name="maxRecords" value="1000" /> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |